@inproceedings{salesky-etal-2021-sigtyp,
title = "{SIGTYP} 2021 Shared Task: Robust Spoken Language Identification",
author = "Salesky, Elizabeth and
Abdullah, Badr M. and
Mielke, Sabrina and
Klyachko, Elena and
Serikov, Oleg and
Ponti, Edoardo Maria and
Kumar, Ritesh and
Cotterell, Ryan and
Vylomova, Ekaterina",
editor = {Vylomova, Ekaterina and
Salesky, Elizabeth and
Mielke, Sabrina and
Lapesa, Gabriella and
Kumar, Ritesh and
Hammarstr{\"o}m, Harald and
Vuli{\'c}, Ivan and
Korhonen, Anna and
Reichart, Roi and
Ponti, Edoardo Maria and
Cotterell, Ryan},
booktitle = "Proceedings of the Third Workshop on Computational Typology and Multilingual NLP",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.sigtyp-1.11/",
doi = "10.18653/v1/2021.sigtyp-1.11",
pages = "122--129",
abstract = "While language identification is a fundamental speech and language processing task, for many languages and language families it remains a challenging task. For many low-resource and endangered languages this is in part due to resource availability: where larger datasets exist, they may be single-speaker or have different domains than desired application scenarios, demanding a need for domain and speaker-invariant language identification systems. This year`s shared task on robust spoken language identification sought to investigate just this scenario: systems were to be trained on largely single-speaker speech from one domain, but evaluated on data in other domains recorded from speakers under different recording circumstances, mimicking realistic low-resource scenarios. We see that domain and speaker mismatch proves very challenging for current methods which can perform above 95{\%} accuracy in-domain, which domain adaptation can address to some degree, but that these conditions merit further investigation to make spoken language identification accessible in many scenarios."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="salesky-etal-2021-sigtyp">
<titleInfo>
<title>SIGTYP 2021 Shared Task: Robust Spoken Language Identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Badr</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Abdullah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sabrina</namePart>
<namePart type="family">Mielke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Klyachko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Serikov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edoardo</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Ponti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Computational Typology and Multilingual NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sabrina</namePart>
<namePart type="family">Mielke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriella</namePart>
<namePart type="family">Lapesa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harald</namePart>
<namePart type="family">Hammarström</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Vulić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roi</namePart>
<namePart type="family">Reichart</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edoardo</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Ponti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While language identification is a fundamental speech and language processing task, for many languages and language families it remains a challenging task. For many low-resource and endangered languages this is in part due to resource availability: where larger datasets exist, they may be single-speaker or have different domains than desired application scenarios, demanding a need for domain and speaker-invariant language identification systems. This year‘s shared task on robust spoken language identification sought to investigate just this scenario: systems were to be trained on largely single-speaker speech from one domain, but evaluated on data in other domains recorded from speakers under different recording circumstances, mimicking realistic low-resource scenarios. We see that domain and speaker mismatch proves very challenging for current methods which can perform above 95% accuracy in-domain, which domain adaptation can address to some degree, but that these conditions merit further investigation to make spoken language identification accessible in many scenarios.</abstract>
<identifier type="citekey">salesky-etal-2021-sigtyp</identifier>
<identifier type="doi">10.18653/v1/2021.sigtyp-1.11</identifier>
<location>
<url>https://aclanthology.org/2021.sigtyp-1.11/</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>122</start>
<end>129</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SIGTYP 2021 Shared Task: Robust Spoken Language Identification
%A Salesky, Elizabeth
%A Abdullah, Badr M.
%A Mielke, Sabrina
%A Klyachko, Elena
%A Serikov, Oleg
%A Ponti, Edoardo Maria
%A Kumar, Ritesh
%A Cotterell, Ryan
%A Vylomova, Ekaterina
%Y Vylomova, Ekaterina
%Y Salesky, Elizabeth
%Y Mielke, Sabrina
%Y Lapesa, Gabriella
%Y Kumar, Ritesh
%Y Hammarström, Harald
%Y Vulić, Ivan
%Y Korhonen, Anna
%Y Reichart, Roi
%Y Ponti, Edoardo Maria
%Y Cotterell, Ryan
%S Proceedings of the Third Workshop on Computational Typology and Multilingual NLP
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F salesky-etal-2021-sigtyp
%X While language identification is a fundamental speech and language processing task, for many languages and language families it remains a challenging task. For many low-resource and endangered languages this is in part due to resource availability: where larger datasets exist, they may be single-speaker or have different domains than desired application scenarios, demanding a need for domain and speaker-invariant language identification systems. This year‘s shared task on robust spoken language identification sought to investigate just this scenario: systems were to be trained on largely single-speaker speech from one domain, but evaluated on data in other domains recorded from speakers under different recording circumstances, mimicking realistic low-resource scenarios. We see that domain and speaker mismatch proves very challenging for current methods which can perform above 95% accuracy in-domain, which domain adaptation can address to some degree, but that these conditions merit further investigation to make spoken language identification accessible in many scenarios.
%R 10.18653/v1/2021.sigtyp-1.11
%U https://aclanthology.org/2021.sigtyp-1.11/
%U https://doi.org/10.18653/v1/2021.sigtyp-1.11
%P 122-129
Markdown (Informal)
[SIGTYP 2021 Shared Task: Robust Spoken Language Identification](https://aclanthology.org/2021.sigtyp-1.11/) (Salesky et al., SIGTYP 2021)
ACL
- Elizabeth Salesky, Badr M. Abdullah, Sabrina Mielke, Elena Klyachko, Oleg Serikov, Edoardo Maria Ponti, Ritesh Kumar, Ryan Cotterell, and Ekaterina Vylomova. 2021. SIGTYP 2021 Shared Task: Robust Spoken Language Identification. In Proceedings of the Third Workshop on Computational Typology and Multilingual NLP, pages 122–129, Online. Association for Computational Linguistics.