@inproceedings{eger-benz-2020-hero,
title = "From Hero to Z{\'e}roe: A Benchmark of Low-Level Adversarial Attacks",
author = "Eger, Steffen and
Benz, Yannik",
editor = "Wong, Kam-Fai and
Knight, Kevin and
Wu, Hua",
booktitle = "Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing",
month = dec,
year = "2020",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.aacl-main.79/",
doi = "10.18653/v1/2020.aacl-main.79",
pages = "786--803",
abstract = "Adversarial attacks are label-preserving modifications to inputs of machine learning classifiers designed to fool machines but not humans. Natural Language Processing (NLP) has mostly focused on high-level attack scenarios such as paraphrasing input texts. We argue that these are less realistic in typical application scenarios such as in social media, and instead focus on low-level attacks on the character-level. Guided by human cognitive abilities and human robustness, we propose the first large-scale catalogue and benchmark of low-level adversarial attacks, which we dub Z{\'e}roe, encompassing nine different attack modes including visual and phonetic adversaries. We show that RoBERTa, NLP`s current workhorse, fails on our attacks. Our dataset provides a benchmark for testing robustness of future more human-like NLP models."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="eger-benz-2020-hero">
<titleInfo>
<title>From Hero to Zéroe: A Benchmark of Low-Level Adversarial Attacks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Steffen</namePart>
<namePart type="family">Eger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yannik</namePart>
<namePart type="family">Benz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kam-Fai</namePart>
<namePart type="family">Wong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Knight</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Adversarial attacks are label-preserving modifications to inputs of machine learning classifiers designed to fool machines but not humans. Natural Language Processing (NLP) has mostly focused on high-level attack scenarios such as paraphrasing input texts. We argue that these are less realistic in typical application scenarios such as in social media, and instead focus on low-level attacks on the character-level. Guided by human cognitive abilities and human robustness, we propose the first large-scale catalogue and benchmark of low-level adversarial attacks, which we dub Zéroe, encompassing nine different attack modes including visual and phonetic adversaries. We show that RoBERTa, NLP‘s current workhorse, fails on our attacks. Our dataset provides a benchmark for testing robustness of future more human-like NLP models.</abstract>
<identifier type="citekey">eger-benz-2020-hero</identifier>
<identifier type="doi">10.18653/v1/2020.aacl-main.79</identifier>
<location>
<url>https://aclanthology.org/2020.aacl-main.79/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>786</start>
<end>803</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T From Hero to Zéroe: A Benchmark of Low-Level Adversarial Attacks
%A Eger, Steffen
%A Benz, Yannik
%Y Wong, Kam-Fai
%Y Knight, Kevin
%Y Wu, Hua
%S Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing
%D 2020
%8 December
%I Association for Computational Linguistics
%C Suzhou, China
%F eger-benz-2020-hero
%X Adversarial attacks are label-preserving modifications to inputs of machine learning classifiers designed to fool machines but not humans. Natural Language Processing (NLP) has mostly focused on high-level attack scenarios such as paraphrasing input texts. We argue that these are less realistic in typical application scenarios such as in social media, and instead focus on low-level attacks on the character-level. Guided by human cognitive abilities and human robustness, we propose the first large-scale catalogue and benchmark of low-level adversarial attacks, which we dub Zéroe, encompassing nine different attack modes including visual and phonetic adversaries. We show that RoBERTa, NLP‘s current workhorse, fails on our attacks. Our dataset provides a benchmark for testing robustness of future more human-like NLP models.
%R 10.18653/v1/2020.aacl-main.79
%U https://aclanthology.org/2020.aacl-main.79/
%U https://doi.org/10.18653/v1/2020.aacl-main.79
%P 786-803
Markdown (Informal)
[From Hero to Zéroe: A Benchmark of Low-Level Adversarial Attacks](https://aclanthology.org/2020.aacl-main.79/) (Eger & Benz, AACL 2020)
ACL
- Steffen Eger and Yannik Benz. 2020. From Hero to Zéroe: A Benchmark of Low-Level Adversarial Attacks. In Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing, pages 786–803, Suzhou, China. Association for Computational Linguistics.