@inproceedings{garnerin-etal-2021-investigating,
title = "Investigating the Impact of Gender Representation in {ASR} Training Data: a Case Study on Librispeech",
author = "Garnerin, Mahault and
Rossato, Solange and
Besacier, Laurent",
editor = "Costa-jussa, Marta and
Gonen, Hila and
Hardmeier, Christian and
Webster, Kellie",
booktitle = "Proceedings of the 3rd Workshop on Gender Bias in Natural Language Processing",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.gebnlp-1.10",
doi = "10.18653/v1/2021.gebnlp-1.10",
pages = "86--92",
abstract = "In this paper we question the impact of gender representation in training data on the performance of an end-to-end ASR system. We create an experiment based on the Librispeech corpus and build 3 different training corpora varying only the proportion of data produced by each gender category. We observe that if our system is overall robust to the gender balance or imbalance in training data, it is nonetheless dependant of the adequacy between the individuals present in the training and testing sets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="garnerin-etal-2021-investigating">
<titleInfo>
<title>Investigating the Impact of Gender Representation in ASR Training Data: a Case Study on Librispeech</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mahault</namePart>
<namePart type="family">Garnerin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Solange</namePart>
<namePart type="family">Rossato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laurent</namePart>
<namePart type="family">Besacier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on Gender Bias in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="family">Costa-jussa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hila</namePart>
<namePart type="family">Gonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Hardmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kellie</namePart>
<namePart type="family">Webster</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we question the impact of gender representation in training data on the performance of an end-to-end ASR system. We create an experiment based on the Librispeech corpus and build 3 different training corpora varying only the proportion of data produced by each gender category. We observe that if our system is overall robust to the gender balance or imbalance in training data, it is nonetheless dependant of the adequacy between the individuals present in the training and testing sets.</abstract>
<identifier type="citekey">garnerin-etal-2021-investigating</identifier>
<identifier type="doi">10.18653/v1/2021.gebnlp-1.10</identifier>
<location>
<url>https://aclanthology.org/2021.gebnlp-1.10</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>86</start>
<end>92</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Investigating the Impact of Gender Representation in ASR Training Data: a Case Study on Librispeech
%A Garnerin, Mahault
%A Rossato, Solange
%A Besacier, Laurent
%Y Costa-jussa, Marta
%Y Gonen, Hila
%Y Hardmeier, Christian
%Y Webster, Kellie
%S Proceedings of the 3rd Workshop on Gender Bias in Natural Language Processing
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F garnerin-etal-2021-investigating
%X In this paper we question the impact of gender representation in training data on the performance of an end-to-end ASR system. We create an experiment based on the Librispeech corpus and build 3 different training corpora varying only the proportion of data produced by each gender category. We observe that if our system is overall robust to the gender balance or imbalance in training data, it is nonetheless dependant of the adequacy between the individuals present in the training and testing sets.
%R 10.18653/v1/2021.gebnlp-1.10
%U https://aclanthology.org/2021.gebnlp-1.10
%U https://doi.org/10.18653/v1/2021.gebnlp-1.10
%P 86-92
Markdown (Informal)
[Investigating the Impact of Gender Representation in ASR Training Data: a Case Study on Librispeech](https://aclanthology.org/2021.gebnlp-1.10) (Garnerin et al., GeBNLP 2021)
ACL