@inproceedings{raman-etal-2023-centering,
title = "Centering the Margins: Outlier-Based Identification of Harmed Populations in Toxicity Detection",
author = "Raman, Vyoma and
Fleisig, Eve and
Klein, Dan",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.579",
doi = "10.18653/v1/2023.emnlp-main.579",
pages = "9316--9329",
abstract = "The impact of AI models on marginalized communities has traditionally been measured by identifying performance differences between specified demographic subgroups. Though this approach aims to center vulnerable groups, it risks obscuring patterns of harm faced by intersectional subgroups or shared across multiple groups. To address this, we draw on theories of marginalization from disability studies and related disciplines, which state that people farther from the norm face greater adversity, to consider the {``}margins{''} in the domain of toxicity detection. We operationalize the {``}margins{''} of a dataset by employing outlier detection to identify text about people with demographic attributes distant from the {``}norm{''}. We find that model performance is consistently worse for demographic outliers, with mean squared error (MSE) between outliers and non-outliers up to 70.4{\%} worse across toxicity types. It is also worse for text outliers, with a MSE up to 68.4{\%} higher for outliers than non-outliers. We also find text and demographic outliers to be particularly susceptible to errors in the classification of severe toxicity and identity attacks. Compared to analysis of disparities using traditional demographic breakdowns, we find that our outlier analysis frequently surfaces greater harms faced by a larger, more intersectional group, which suggests that outlier analysis is particularly beneficial for identifying harms against those groups.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="raman-etal-2023-centering">
<titleInfo>
<title>Centering the Margins: Outlier-Based Identification of Harmed Populations in Toxicity Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vyoma</namePart>
<namePart type="family">Raman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eve</namePart>
<namePart type="family">Fleisig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Klein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The impact of AI models on marginalized communities has traditionally been measured by identifying performance differences between specified demographic subgroups. Though this approach aims to center vulnerable groups, it risks obscuring patterns of harm faced by intersectional subgroups or shared across multiple groups. To address this, we draw on theories of marginalization from disability studies and related disciplines, which state that people farther from the norm face greater adversity, to consider the “margins” in the domain of toxicity detection. We operationalize the “margins” of a dataset by employing outlier detection to identify text about people with demographic attributes distant from the “norm”. We find that model performance is consistently worse for demographic outliers, with mean squared error (MSE) between outliers and non-outliers up to 70.4% worse across toxicity types. It is also worse for text outliers, with a MSE up to 68.4% higher for outliers than non-outliers. We also find text and demographic outliers to be particularly susceptible to errors in the classification of severe toxicity and identity attacks. Compared to analysis of disparities using traditional demographic breakdowns, we find that our outlier analysis frequently surfaces greater harms faced by a larger, more intersectional group, which suggests that outlier analysis is particularly beneficial for identifying harms against those groups.</abstract>
<identifier type="citekey">raman-etal-2023-centering</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.579</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.579</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>9316</start>
<end>9329</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Centering the Margins: Outlier-Based Identification of Harmed Populations in Toxicity Detection
%A Raman, Vyoma
%A Fleisig, Eve
%A Klein, Dan
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F raman-etal-2023-centering
%X The impact of AI models on marginalized communities has traditionally been measured by identifying performance differences between specified demographic subgroups. Though this approach aims to center vulnerable groups, it risks obscuring patterns of harm faced by intersectional subgroups or shared across multiple groups. To address this, we draw on theories of marginalization from disability studies and related disciplines, which state that people farther from the norm face greater adversity, to consider the “margins” in the domain of toxicity detection. We operationalize the “margins” of a dataset by employing outlier detection to identify text about people with demographic attributes distant from the “norm”. We find that model performance is consistently worse for demographic outliers, with mean squared error (MSE) between outliers and non-outliers up to 70.4% worse across toxicity types. It is also worse for text outliers, with a MSE up to 68.4% higher for outliers than non-outliers. We also find text and demographic outliers to be particularly susceptible to errors in the classification of severe toxicity and identity attacks. Compared to analysis of disparities using traditional demographic breakdowns, we find that our outlier analysis frequently surfaces greater harms faced by a larger, more intersectional group, which suggests that outlier analysis is particularly beneficial for identifying harms against those groups.
%R 10.18653/v1/2023.emnlp-main.579
%U https://aclanthology.org/2023.emnlp-main.579
%U https://doi.org/10.18653/v1/2023.emnlp-main.579
%P 9316-9329
Markdown (Informal)
[Centering the Margins: Outlier-Based Identification of Harmed Populations in Toxicity Detection](https://aclanthology.org/2023.emnlp-main.579) (Raman et al., EMNLP 2023)
ACL