@inproceedings{chen-etal-2022-rationalization,
title = "Can Rationalization Improve Robustness?",
author = "Chen, Howard and
He, Jacqueline and
Narasimhan, Karthik and
Chen, Danqi",
editor = "Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-main.278/",
doi = "10.18653/v1/2022.naacl-main.278",
pages = "3792--3805",
abstract = "A growing line of work has investigated the development of neural NLP models that can produce rationales{--}subsets of input that can explain their model predictions. In this paper, we ask whether such rationale models can provide robustness to adversarial attacks in addition to their interpretable nature. Since these models need to first generate rationales ({\textquotedblleft}rationalizer{\textquotedblright}) before making predictions ({\textquotedblleft}predictor{\textquotedblright}), they have the potential to ignore noise or adversarially added text by simply masking it out of the generated rationale. To this end, we systematically generate various types of {\textquoteleft}AddText' attacks for both token and sentence-level rationalization tasks and perform an extensive empirical evaluation of state-of-the-art rationale models across five different tasks. Our experiments reveal that the rationale models promise to improve robustness over AddText attacks while they struggle in certain scenarios{--}when the rationalizer is sensitive to position bias or lexical choices of attack text. Further, leveraging human rationale as supervision does not always translate to better performance. Our study is a first step towards exploring the interplay between interpretability and robustness in the rationalize-then-predict framework."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2022-rationalization">
<titleInfo>
<title>Can Rationalization Improve Robustness?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Howard</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacqueline</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karthik</namePart>
<namePart type="family">Narasimhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danqi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="given">Vladimir</namePart>
<namePart type="family">Meza Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A growing line of work has investigated the development of neural NLP models that can produce rationales–subsets of input that can explain their model predictions. In this paper, we ask whether such rationale models can provide robustness to adversarial attacks in addition to their interpretable nature. Since these models need to first generate rationales (“rationalizer”) before making predictions (“predictor”), they have the potential to ignore noise or adversarially added text by simply masking it out of the generated rationale. To this end, we systematically generate various types of ‘AddText’ attacks for both token and sentence-level rationalization tasks and perform an extensive empirical evaluation of state-of-the-art rationale models across five different tasks. Our experiments reveal that the rationale models promise to improve robustness over AddText attacks while they struggle in certain scenarios–when the rationalizer is sensitive to position bias or lexical choices of attack text. Further, leveraging human rationale as supervision does not always translate to better performance. Our study is a first step towards exploring the interplay between interpretability and robustness in the rationalize-then-predict framework.</abstract>
<identifier type="citekey">chen-etal-2022-rationalization</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-main.278</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-main.278/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>3792</start>
<end>3805</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can Rationalization Improve Robustness?
%A Chen, Howard
%A He, Jacqueline
%A Narasimhan, Karthik
%A Chen, Danqi
%Y Carpuat, Marine
%Y de Marneffe, Marie-Catherine
%Y Meza Ruiz, Ivan Vladimir
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F chen-etal-2022-rationalization
%X A growing line of work has investigated the development of neural NLP models that can produce rationales–subsets of input that can explain their model predictions. In this paper, we ask whether such rationale models can provide robustness to adversarial attacks in addition to their interpretable nature. Since these models need to first generate rationales (“rationalizer”) before making predictions (“predictor”), they have the potential to ignore noise or adversarially added text by simply masking it out of the generated rationale. To this end, we systematically generate various types of ‘AddText’ attacks for both token and sentence-level rationalization tasks and perform an extensive empirical evaluation of state-of-the-art rationale models across five different tasks. Our experiments reveal that the rationale models promise to improve robustness over AddText attacks while they struggle in certain scenarios–when the rationalizer is sensitive to position bias or lexical choices of attack text. Further, leveraging human rationale as supervision does not always translate to better performance. Our study is a first step towards exploring the interplay between interpretability and robustness in the rationalize-then-predict framework.
%R 10.18653/v1/2022.naacl-main.278
%U https://aclanthology.org/2022.naacl-main.278/
%U https://doi.org/10.18653/v1/2022.naacl-main.278
%P 3792-3805
Markdown (Informal)
[Can Rationalization Improve Robustness?](https://aclanthology.org/2022.naacl-main.278/) (Chen et al., NAACL 2022)
ACL
- Howard Chen, Jacqueline He, Karthik Narasimhan, and Danqi Chen. 2022. Can Rationalization Improve Robustness?. In Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pages 3792–3805, Seattle, United States. Association for Computational Linguistics.