@inproceedings{glazkova-2023-data,
title = "Data Augmentation for Fake News Detection by Combining Seq2seq and {NLI}",
author = "Glazkova, Anna",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.ranlp-1.48",
pages = "429--439",
abstract = "State-of-the-art data augmentation methods help improve the generalization of deep learning models. However, these methods often generate examples that contradict the preserving class labels. This is crucial for some natural language processing tasks, such as fake news detection. In this work, we combine sequence-to-sequence and natural language inference models for data augmentation in the fake news detection domain using short news texts, such as tweets and news titles. This approach allows us to generate new training examples that do not contradict facts from the original texts. We use the non-entailment probability for the pair of the original and generated texts as a loss function for a transformer-based sequence-to-sequence model. The proposed approach has demonstrated the effectiveness on three classification benchmarks in fake news detection in terms of the F1-score macro and ROC AUC. Moreover, we showed that our approach retains the class label of the original text more accurately than other transformer-based methods.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="glazkova-2023-data">
<titleInfo>
<title>Data Augmentation for Fake News Detection by Combining Seq2seq and NLI</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Glazkova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>State-of-the-art data augmentation methods help improve the generalization of deep learning models. However, these methods often generate examples that contradict the preserving class labels. This is crucial for some natural language processing tasks, such as fake news detection. In this work, we combine sequence-to-sequence and natural language inference models for data augmentation in the fake news detection domain using short news texts, such as tweets and news titles. This approach allows us to generate new training examples that do not contradict facts from the original texts. We use the non-entailment probability for the pair of the original and generated texts as a loss function for a transformer-based sequence-to-sequence model. The proposed approach has demonstrated the effectiveness on three classification benchmarks in fake news detection in terms of the F1-score macro and ROC AUC. Moreover, we showed that our approach retains the class label of the original text more accurately than other transformer-based methods.</abstract>
<identifier type="citekey">glazkova-2023-data</identifier>
<location>
<url>https://aclanthology.org/2023.ranlp-1.48</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>429</start>
<end>439</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data Augmentation for Fake News Detection by Combining Seq2seq and NLI
%A Glazkova, Anna
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F glazkova-2023-data
%X State-of-the-art data augmentation methods help improve the generalization of deep learning models. However, these methods often generate examples that contradict the preserving class labels. This is crucial for some natural language processing tasks, such as fake news detection. In this work, we combine sequence-to-sequence and natural language inference models for data augmentation in the fake news detection domain using short news texts, such as tweets and news titles. This approach allows us to generate new training examples that do not contradict facts from the original texts. We use the non-entailment probability for the pair of the original and generated texts as a loss function for a transformer-based sequence-to-sequence model. The proposed approach has demonstrated the effectiveness on three classification benchmarks in fake news detection in terms of the F1-score macro and ROC AUC. Moreover, we showed that our approach retains the class label of the original text more accurately than other transformer-based methods.
%U https://aclanthology.org/2023.ranlp-1.48
%P 429-439
Markdown (Informal)
[Data Augmentation for Fake News Detection by Combining Seq2seq and NLI](https://aclanthology.org/2023.ranlp-1.48) (Glazkova, RANLP 2023)
ACL