@inproceedings{kim-etal-2021-robust,
title = "How Robust are Fact Checking Systems on Colloquial Claims?",
author = "Kim, Byeongchang and
Kim, Hyunwoo and
Hong, Seokhee and
Kim, Gunhee",
editor = "Toutanova, Kristina and
Rumshisky, Anna and
Zettlemoyer, Luke and
Hakkani-Tur, Dilek and
Beltagy, Iz and
Bethard, Steven and
Cotterell, Ryan and
Chakraborty, Tanmoy and
Zhou, Yichao",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.naacl-main.121",
doi = "10.18653/v1/2021.naacl-main.121",
pages = "1535--1548",
abstract = "Knowledge is now starting to power neural dialogue agents. At the same time, the risk of misinformation and disinformation from dialogue agents also rises. Verifying the veracity of information from formal sources are widely studied in computational fact checking. In this work, we ask: How robust are fact checking systems on claims in colloquial style? We aim to open up new discussions in the intersection of fact verification and dialogue safety. In order to investigate how fact checking systems behave on colloquial claims, we transfer the styles of claims from FEVER (Thorne et al., 2018) into colloquialism. We find that existing fact checking systems that perform well on claims in formal style significantly degenerate on colloquial claims with the same semantics. Especially, we show that document retrieval is the weakest spot in the system even vulnerable to filler words, such as {``}yeah{''} and {``}you know{''}. The document recall of WikiAPI retriever (Hanselowski et al., 2018) which is 90.0{\%} on FEVER, drops to 72.2{\%} on the colloquial claims. We compare the characteristics of colloquial claims to those of claims in formal style, and demonstrate the challenging issues in them.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-etal-2021-robust">
<titleInfo>
<title>How Robust are Fact Checking Systems on Colloquial Claims?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Byeongchang</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hyunwoo</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seokhee</namePart>
<namePart type="family">Hong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gunhee</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kristina</namePart>
<namePart type="family">Toutanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luke</namePart>
<namePart type="family">Zettlemoyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dilek</namePart>
<namePart type="family">Hakkani-Tur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iz</namePart>
<namePart type="family">Beltagy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yichao</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Knowledge is now starting to power neural dialogue agents. At the same time, the risk of misinformation and disinformation from dialogue agents also rises. Verifying the veracity of information from formal sources are widely studied in computational fact checking. In this work, we ask: How robust are fact checking systems on claims in colloquial style? We aim to open up new discussions in the intersection of fact verification and dialogue safety. In order to investigate how fact checking systems behave on colloquial claims, we transfer the styles of claims from FEVER (Thorne et al., 2018) into colloquialism. We find that existing fact checking systems that perform well on claims in formal style significantly degenerate on colloquial claims with the same semantics. Especially, we show that document retrieval is the weakest spot in the system even vulnerable to filler words, such as “yeah” and “you know”. The document recall of WikiAPI retriever (Hanselowski et al., 2018) which is 90.0% on FEVER, drops to 72.2% on the colloquial claims. We compare the characteristics of colloquial claims to those of claims in formal style, and demonstrate the challenging issues in them.</abstract>
<identifier type="citekey">kim-etal-2021-robust</identifier>
<identifier type="doi">10.18653/v1/2021.naacl-main.121</identifier>
<location>
<url>https://aclanthology.org/2021.naacl-main.121</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>1535</start>
<end>1548</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How Robust are Fact Checking Systems on Colloquial Claims?
%A Kim, Byeongchang
%A Kim, Hyunwoo
%A Hong, Seokhee
%A Kim, Gunhee
%Y Toutanova, Kristina
%Y Rumshisky, Anna
%Y Zettlemoyer, Luke
%Y Hakkani-Tur, Dilek
%Y Beltagy, Iz
%Y Bethard, Steven
%Y Cotterell, Ryan
%Y Chakraborty, Tanmoy
%Y Zhou, Yichao
%S Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F kim-etal-2021-robust
%X Knowledge is now starting to power neural dialogue agents. At the same time, the risk of misinformation and disinformation from dialogue agents also rises. Verifying the veracity of information from formal sources are widely studied in computational fact checking. In this work, we ask: How robust are fact checking systems on claims in colloquial style? We aim to open up new discussions in the intersection of fact verification and dialogue safety. In order to investigate how fact checking systems behave on colloquial claims, we transfer the styles of claims from FEVER (Thorne et al., 2018) into colloquialism. We find that existing fact checking systems that perform well on claims in formal style significantly degenerate on colloquial claims with the same semantics. Especially, we show that document retrieval is the weakest spot in the system even vulnerable to filler words, such as “yeah” and “you know”. The document recall of WikiAPI retriever (Hanselowski et al., 2018) which is 90.0% on FEVER, drops to 72.2% on the colloquial claims. We compare the characteristics of colloquial claims to those of claims in formal style, and demonstrate the challenging issues in them.
%R 10.18653/v1/2021.naacl-main.121
%U https://aclanthology.org/2021.naacl-main.121
%U https://doi.org/10.18653/v1/2021.naacl-main.121
%P 1535-1548
Markdown (Informal)
[How Robust are Fact Checking Systems on Colloquial Claims?](https://aclanthology.org/2021.naacl-main.121) (Kim et al., NAACL 2021)
ACL
- Byeongchang Kim, Hyunwoo Kim, Seokhee Hong, and Gunhee Kim. 2021. How Robust are Fact Checking Systems on Colloquial Claims?. In Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pages 1535–1548, Online. Association for Computational Linguistics.