@inproceedings{mueller-etal-2020-analysis,
title = "An Analysis of Massively Multilingual Neural Machine Translation for Low-Resource Languages",
author = "Mueller, Aaron and
Nicolai, Garrett and
McCarthy, Arya D. and
Lewis, Dylan and
Wu, Winston and
Yarowsky, David",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.458",
pages = "3710--3718",
abstract = "In this work, we explore massively multilingual low-resource neural machine translation. Using translations of the Bible (which have parallel structure across languages), we train models with up to 1,107 source languages. We create various multilingual corpora, varying the number and relatedness of source languages. Using these, we investigate the best ways to use this many-way aligned resource for multilingual machine translation. Our experiments employ a grammatically and phylogenetically diverse set of source languages during testing for more representative evaluations. We find that best practices in this domain are highly language-specific: adding more languages to a training set is often better, but too many harms performance{---}the best number depends on the source language. Furthermore, training on related languages can improve or degrade performance, depending on the language. As there is no one-size-fits-most answer, we find that it is critical to tailor one{'}s approach to the source language and its typology.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mueller-etal-2020-analysis">
<titleInfo>
<title>An Analysis of Massively Multilingual Neural Machine Translation for Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aaron</namePart>
<namePart type="family">Mueller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Garrett</namePart>
<namePart type="family">Nicolai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arya</namePart>
<namePart type="given">D</namePart>
<namePart type="family">McCarthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dylan</namePart>
<namePart type="family">Lewis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Winston</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Yarowsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>In this work, we explore massively multilingual low-resource neural machine translation. Using translations of the Bible (which have parallel structure across languages), we train models with up to 1,107 source languages. We create various multilingual corpora, varying the number and relatedness of source languages. Using these, we investigate the best ways to use this many-way aligned resource for multilingual machine translation. Our experiments employ a grammatically and phylogenetically diverse set of source languages during testing for more representative evaluations. We find that best practices in this domain are highly language-specific: adding more languages to a training set is often better, but too many harms performance—the best number depends on the source language. Furthermore, training on related languages can improve or degrade performance, depending on the language. As there is no one-size-fits-most answer, we find that it is critical to tailor one’s approach to the source language and its typology.</abstract>
<identifier type="citekey">mueller-etal-2020-analysis</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.458</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>3710</start>
<end>3718</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Analysis of Massively Multilingual Neural Machine Translation for Low-Resource Languages
%A Mueller, Aaron
%A Nicolai, Garrett
%A McCarthy, Arya D.
%A Lewis, Dylan
%A Wu, Winston
%A Yarowsky, David
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F mueller-etal-2020-analysis
%X In this work, we explore massively multilingual low-resource neural machine translation. Using translations of the Bible (which have parallel structure across languages), we train models with up to 1,107 source languages. We create various multilingual corpora, varying the number and relatedness of source languages. Using these, we investigate the best ways to use this many-way aligned resource for multilingual machine translation. Our experiments employ a grammatically and phylogenetically diverse set of source languages during testing for more representative evaluations. We find that best practices in this domain are highly language-specific: adding more languages to a training set is often better, but too many harms performance—the best number depends on the source language. Furthermore, training on related languages can improve or degrade performance, depending on the language. As there is no one-size-fits-most answer, we find that it is critical to tailor one’s approach to the source language and its typology.
%U https://aclanthology.org/2020.lrec-1.458
%P 3710-3718
Markdown (Informal)
[An Analysis of Massively Multilingual Neural Machine Translation for Low-Resource Languages](https://aclanthology.org/2020.lrec-1.458) (Mueller et al., LREC 2020)
ACL