@inproceedings{xu-etal-2024-clffrd,
title = "{CLFFRD}: Curriculum Learning and Fine-grained Fusion for Multimodal Rumor Detection",
author = "Xu, Fan and
Zeng, Lei and
Zou, Bowei and
Aw, Ai Ti and
Rong, Huan",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.294",
pages = "3314--3324",
abstract = "In an era where rumors can propagate rapidly across social media platforms such as Twitter and Weibo, automatic rumor detection has garnered considerable attention from both academia and industry. Existing multimodal rumor detection models often overlook the intricacies of sample difficulty, e.g., text-level difficulty, image-level difficulty, and multimodal-level difficulty, as well as their order when training. Inspired by the concept of curriculum learning, we propose the Curriculum Learning and Fine-grained Fusion-driven multimodal Rumor Detection (CLFFRD) framework, which employs curriculum learning to automatically select and train samples according to their difficulty at different training stages. Furthermore, we introduce a fine-grained fusion strategy that unifies entities from text and objects from images, enhancing their semantic cohesion. We also propose a novel data augmentation method that utilizes linear interpolation between textual and visual modalities to generate diverse data. Additionally, our approach incorporates deep fusion for both intra-modality (e.g., text entities and image objects) and inter-modality (e.g., CLIP and social graph) features. Extensive experimental results demonstrate that CLFFRD outperforms state-of-the-art models on both English and Chinese benchmark datasets for rumor detection in social media.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xu-etal-2024-clffrd">
<titleInfo>
<title>CLFFRD: Curriculum Learning and Fine-grained Fusion for Multimodal Rumor Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fan</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lei</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bowei</namePart>
<namePart type="family">Zou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ai</namePart>
<namePart type="given">Ti</namePart>
<namePart type="family">Aw</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huan</namePart>
<namePart type="family">Rong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In an era where rumors can propagate rapidly across social media platforms such as Twitter and Weibo, automatic rumor detection has garnered considerable attention from both academia and industry. Existing multimodal rumor detection models often overlook the intricacies of sample difficulty, e.g., text-level difficulty, image-level difficulty, and multimodal-level difficulty, as well as their order when training. Inspired by the concept of curriculum learning, we propose the Curriculum Learning and Fine-grained Fusion-driven multimodal Rumor Detection (CLFFRD) framework, which employs curriculum learning to automatically select and train samples according to their difficulty at different training stages. Furthermore, we introduce a fine-grained fusion strategy that unifies entities from text and objects from images, enhancing their semantic cohesion. We also propose a novel data augmentation method that utilizes linear interpolation between textual and visual modalities to generate diverse data. Additionally, our approach incorporates deep fusion for both intra-modality (e.g., text entities and image objects) and inter-modality (e.g., CLIP and social graph) features. Extensive experimental results demonstrate that CLFFRD outperforms state-of-the-art models on both English and Chinese benchmark datasets for rumor detection in social media.</abstract>
<identifier type="citekey">xu-etal-2024-clffrd</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.294</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>3314</start>
<end>3324</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CLFFRD: Curriculum Learning and Fine-grained Fusion for Multimodal Rumor Detection
%A Xu, Fan
%A Zeng, Lei
%A Zou, Bowei
%A Aw, Ai Ti
%A Rong, Huan
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F xu-etal-2024-clffrd
%X In an era where rumors can propagate rapidly across social media platforms such as Twitter and Weibo, automatic rumor detection has garnered considerable attention from both academia and industry. Existing multimodal rumor detection models often overlook the intricacies of sample difficulty, e.g., text-level difficulty, image-level difficulty, and multimodal-level difficulty, as well as their order when training. Inspired by the concept of curriculum learning, we propose the Curriculum Learning and Fine-grained Fusion-driven multimodal Rumor Detection (CLFFRD) framework, which employs curriculum learning to automatically select and train samples according to their difficulty at different training stages. Furthermore, we introduce a fine-grained fusion strategy that unifies entities from text and objects from images, enhancing their semantic cohesion. We also propose a novel data augmentation method that utilizes linear interpolation between textual and visual modalities to generate diverse data. Additionally, our approach incorporates deep fusion for both intra-modality (e.g., text entities and image objects) and inter-modality (e.g., CLIP and social graph) features. Extensive experimental results demonstrate that CLFFRD outperforms state-of-the-art models on both English and Chinese benchmark datasets for rumor detection in social media.
%U https://aclanthology.org/2024.lrec-main.294
%P 3314-3324
Markdown (Informal)
[CLFFRD: Curriculum Learning and Fine-grained Fusion for Multimodal Rumor Detection](https://aclanthology.org/2024.lrec-main.294) (Xu et al., LREC-COLING 2024)
ACL