@inproceedings{excell-al-moubayed-2021-towards,
title = "Towards Equal Gender Representation in the Annotations of Toxic Language Detection",
author = "Excell, Elizabeth and
Al Moubayed, Noura",
editor = "Costa-jussa, Marta and
Gonen, Hila and
Hardmeier, Christian and
Webster, Kellie",
booktitle = "Proceedings of the 3rd Workshop on Gender Bias in Natural Language Processing",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.gebnlp-1.7",
doi = "10.18653/v1/2021.gebnlp-1.7",
pages = "55--65",
abstract = "Classifiers tend to propagate biases present in the data on which they are trained. Hence, it is important to understand how the demographic identities of the annotators of comments affect the fairness of the resulting model. In this paper, we focus on the differences in the ways men and women annotate comments for toxicity, investigating how these differences result in models that amplify the opinions of male annotators. We find that the BERT model associates toxic comments containing offensive words with male annotators, causing the model to predict 67.7{\%} of toxic comments as having been annotated by men. We show that this disparity between gender predictions can be mitigated by removing offensive words and highly toxic comments from the training data. We then apply the learned associations between gender and language to toxic language classifiers, finding that models trained exclusively on female-annotated data perform 1.8{\%} better than those trained solely on male-annotated data, and that training models on data after removing all offensive words reduces bias in the model by 55.5{\%} while increasing the sensitivity by 0.4{\%}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="excell-al-moubayed-2021-towards">
<titleInfo>
<title>Towards Equal Gender Representation in the Annotations of Toxic Language Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Excell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Noura</namePart>
<namePart type="family">Al Moubayed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on Gender Bias in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="family">Costa-jussa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hila</namePart>
<namePart type="family">Gonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Hardmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kellie</namePart>
<namePart type="family">Webster</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Classifiers tend to propagate biases present in the data on which they are trained. Hence, it is important to understand how the demographic identities of the annotators of comments affect the fairness of the resulting model. In this paper, we focus on the differences in the ways men and women annotate comments for toxicity, investigating how these differences result in models that amplify the opinions of male annotators. We find that the BERT model associates toxic comments containing offensive words with male annotators, causing the model to predict 67.7% of toxic comments as having been annotated by men. We show that this disparity between gender predictions can be mitigated by removing offensive words and highly toxic comments from the training data. We then apply the learned associations between gender and language to toxic language classifiers, finding that models trained exclusively on female-annotated data perform 1.8% better than those trained solely on male-annotated data, and that training models on data after removing all offensive words reduces bias in the model by 55.5% while increasing the sensitivity by 0.4%.</abstract>
<identifier type="citekey">excell-al-moubayed-2021-towards</identifier>
<identifier type="doi">10.18653/v1/2021.gebnlp-1.7</identifier>
<location>
<url>https://aclanthology.org/2021.gebnlp-1.7</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>55</start>
<end>65</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Equal Gender Representation in the Annotations of Toxic Language Detection
%A Excell, Elizabeth
%A Al Moubayed, Noura
%Y Costa-jussa, Marta
%Y Gonen, Hila
%Y Hardmeier, Christian
%Y Webster, Kellie
%S Proceedings of the 3rd Workshop on Gender Bias in Natural Language Processing
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F excell-al-moubayed-2021-towards
%X Classifiers tend to propagate biases present in the data on which they are trained. Hence, it is important to understand how the demographic identities of the annotators of comments affect the fairness of the resulting model. In this paper, we focus on the differences in the ways men and women annotate comments for toxicity, investigating how these differences result in models that amplify the opinions of male annotators. We find that the BERT model associates toxic comments containing offensive words with male annotators, causing the model to predict 67.7% of toxic comments as having been annotated by men. We show that this disparity between gender predictions can be mitigated by removing offensive words and highly toxic comments from the training data. We then apply the learned associations between gender and language to toxic language classifiers, finding that models trained exclusively on female-annotated data perform 1.8% better than those trained solely on male-annotated data, and that training models on data after removing all offensive words reduces bias in the model by 55.5% while increasing the sensitivity by 0.4%.
%R 10.18653/v1/2021.gebnlp-1.7
%U https://aclanthology.org/2021.gebnlp-1.7
%U https://doi.org/10.18653/v1/2021.gebnlp-1.7
%P 55-65
Markdown (Informal)
[Towards Equal Gender Representation in the Annotations of Toxic Language Detection](https://aclanthology.org/2021.gebnlp-1.7) (Excell & Al Moubayed, GeBNLP 2021)
ACL