@inproceedings{urchs-etal-2024-detecting,
title = "Detecting Gender Discrimination on Actor Level Using Linguistic Discourse Analysis",
author = "Urchs, Stefanie and
Thurner, Veronika and
A{\ss}enmacher, Matthias and
Heumann, Christian and
Thiemichen, Stephanie",
editor = "Fale{\'n}ska, Agnieszka and
Basta, Christine and
Costa-juss{\`a}, Marta and
Goldfarb-Tarrant, Seraphina and
Nozza, Debora",
booktitle = "Proceedings of the 5th Workshop on Gender Bias in Natural Language Processing (GeBNLP)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.gebnlp-1.8/",
doi = "10.18653/v1/2024.gebnlp-1.8",
pages = "140--149",
abstract = "With the usage of tremendous amounts of text data for training powerful large language models such as ChatGPT, the issue of analysing and securing data quality has become more pressing than ever. Any biases, stereotypes and discriminatory patterns that exist in the training data can be reproduced, reinforced or broadly disseminated by the models in production. Therefore, it is crucial to carefully select and monitor the text data that is used as input to train the model. Due to the vast amount of training data, this process needs to be (at least partially) automated. In this work, we introduce a novel approach for automatically detecting gender discrimination in text data on the actor level based on linguistic discourse analysis. Specifically, we combine existing information extraction (IE) techniques to partly automate the qualitative research done in linguistic discourse analysis. We focus on two important steps: Identifying the respectiveperson-named-entity (an actor) and all forms it is referred to (Nomination), and detecting the characteristics it is ascribed (Predication). Asa proof of concept, we integrate these two steps into a pipeline for automated text analysis. The separate building blocks of the pipeline could be flexibly adapted, extended, and scaled for bigger datasets to accommodate a wide range of usage scenarios and specific ML tasks or help social scientists with analysis tasks. We showcase and evaluate our approach on several real and simulated exemplary texts."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="urchs-etal-2024-detecting">
<titleInfo>
<title>Detecting Gender Discrimination on Actor Level Using Linguistic Discourse Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stefanie</namePart>
<namePart type="family">Urchs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronika</namePart>
<namePart type="family">Thurner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Aßenmacher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Heumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephanie</namePart>
<namePart type="family">Thiemichen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Gender Bias in Natural Language Processing (GeBNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Agnieszka</namePart>
<namePart type="family">Faleńska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christine</namePart>
<namePart type="family">Basta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seraphina</namePart>
<namePart type="family">Goldfarb-Tarrant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With the usage of tremendous amounts of text data for training powerful large language models such as ChatGPT, the issue of analysing and securing data quality has become more pressing than ever. Any biases, stereotypes and discriminatory patterns that exist in the training data can be reproduced, reinforced or broadly disseminated by the models in production. Therefore, it is crucial to carefully select and monitor the text data that is used as input to train the model. Due to the vast amount of training data, this process needs to be (at least partially) automated. In this work, we introduce a novel approach for automatically detecting gender discrimination in text data on the actor level based on linguistic discourse analysis. Specifically, we combine existing information extraction (IE) techniques to partly automate the qualitative research done in linguistic discourse analysis. We focus on two important steps: Identifying the respectiveperson-named-entity (an actor) and all forms it is referred to (Nomination), and detecting the characteristics it is ascribed (Predication). Asa proof of concept, we integrate these two steps into a pipeline for automated text analysis. The separate building blocks of the pipeline could be flexibly adapted, extended, and scaled for bigger datasets to accommodate a wide range of usage scenarios and specific ML tasks or help social scientists with analysis tasks. We showcase and evaluate our approach on several real and simulated exemplary texts.</abstract>
<identifier type="citekey">urchs-etal-2024-detecting</identifier>
<identifier type="doi">10.18653/v1/2024.gebnlp-1.8</identifier>
<location>
<url>https://aclanthology.org/2024.gebnlp-1.8/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>140</start>
<end>149</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detecting Gender Discrimination on Actor Level Using Linguistic Discourse Analysis
%A Urchs, Stefanie
%A Thurner, Veronika
%A Aßenmacher, Matthias
%A Heumann, Christian
%A Thiemichen, Stephanie
%Y Faleńska, Agnieszka
%Y Basta, Christine
%Y Costa-jussà, Marta
%Y Goldfarb-Tarrant, Seraphina
%Y Nozza, Debora
%S Proceedings of the 5th Workshop on Gender Bias in Natural Language Processing (GeBNLP)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F urchs-etal-2024-detecting
%X With the usage of tremendous amounts of text data for training powerful large language models such as ChatGPT, the issue of analysing and securing data quality has become more pressing than ever. Any biases, stereotypes and discriminatory patterns that exist in the training data can be reproduced, reinforced or broadly disseminated by the models in production. Therefore, it is crucial to carefully select and monitor the text data that is used as input to train the model. Due to the vast amount of training data, this process needs to be (at least partially) automated. In this work, we introduce a novel approach for automatically detecting gender discrimination in text data on the actor level based on linguistic discourse analysis. Specifically, we combine existing information extraction (IE) techniques to partly automate the qualitative research done in linguistic discourse analysis. We focus on two important steps: Identifying the respectiveperson-named-entity (an actor) and all forms it is referred to (Nomination), and detecting the characteristics it is ascribed (Predication). Asa proof of concept, we integrate these two steps into a pipeline for automated text analysis. The separate building blocks of the pipeline could be flexibly adapted, extended, and scaled for bigger datasets to accommodate a wide range of usage scenarios and specific ML tasks or help social scientists with analysis tasks. We showcase and evaluate our approach on several real and simulated exemplary texts.
%R 10.18653/v1/2024.gebnlp-1.8
%U https://aclanthology.org/2024.gebnlp-1.8/
%U https://doi.org/10.18653/v1/2024.gebnlp-1.8
%P 140-149
Markdown (Informal)
[Detecting Gender Discrimination on Actor Level Using Linguistic Discourse Analysis](https://aclanthology.org/2024.gebnlp-1.8/) (Urchs et al., GeBNLP 2024)
ACL