@inproceedings{berard-etal-2021-efficient,
title = "Efficient Inference for Multilingual Neural Machine Translation",
author = "Berard, Alexandre and
Lee, Dain and
Clinchant, Stephane and
Jung, Kweonwoo and
Nikoulina, Vassilina",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.emnlp-main.674",
doi = "10.18653/v1/2021.emnlp-main.674",
pages = "8563--8583",
abstract = "Multilingual NMT has become an attractive solution for MT deployment in production. But to match bilingual quality, it comes at the cost of larger and slower models. In this work, we consider several ways to make multilingual NMT faster at inference without degrading its quality. We experiment with several {``}light decoder{''} architectures in two 20-language multi-parallel settings: small-scale on TED Talks and large-scale on ParaCrawl. Our experiments demonstrate that combining a shallow decoder with vocabulary filtering leads to almost 2 times faster inference with no loss in translation quality. We validate our findings with BLEU and chrF (on 380 language pairs), robustness evaluation and human evaluation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="berard-etal-2021-efficient">
<titleInfo>
<title>Efficient Inference for Multilingual Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Berard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dain</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephane</namePart>
<namePart type="family">Clinchant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kweonwoo</namePart>
<namePart type="family">Jung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vassilina</namePart>
<namePart type="family">Nikoulina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Multilingual NMT has become an attractive solution for MT deployment in production. But to match bilingual quality, it comes at the cost of larger and slower models. In this work, we consider several ways to make multilingual NMT faster at inference without degrading its quality. We experiment with several “light decoder” architectures in two 20-language multi-parallel settings: small-scale on TED Talks and large-scale on ParaCrawl. Our experiments demonstrate that combining a shallow decoder with vocabulary filtering leads to almost 2 times faster inference with no loss in translation quality. We validate our findings with BLEU and chrF (on 380 language pairs), robustness evaluation and human evaluation.</abstract>
<identifier type="citekey">berard-etal-2021-efficient</identifier>
<identifier type="doi">10.18653/v1/2021.emnlp-main.674</identifier>
<location>
<url>https://aclanthology.org/2021.emnlp-main.674</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>8563</start>
<end>8583</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Efficient Inference for Multilingual Neural Machine Translation
%A Berard, Alexandre
%A Lee, Dain
%A Clinchant, Stephane
%A Jung, Kweonwoo
%A Nikoulina, Vassilina
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online and Punta Cana, Dominican Republic
%F berard-etal-2021-efficient
%X Multilingual NMT has become an attractive solution for MT deployment in production. But to match bilingual quality, it comes at the cost of larger and slower models. In this work, we consider several ways to make multilingual NMT faster at inference without degrading its quality. We experiment with several “light decoder” architectures in two 20-language multi-parallel settings: small-scale on TED Talks and large-scale on ParaCrawl. Our experiments demonstrate that combining a shallow decoder with vocabulary filtering leads to almost 2 times faster inference with no loss in translation quality. We validate our findings with BLEU and chrF (on 380 language pairs), robustness evaluation and human evaluation.
%R 10.18653/v1/2021.emnlp-main.674
%U https://aclanthology.org/2021.emnlp-main.674
%U https://doi.org/10.18653/v1/2021.emnlp-main.674
%P 8563-8583
Markdown (Informal)
[Efficient Inference for Multilingual Neural Machine Translation](https://aclanthology.org/2021.emnlp-main.674) (Berard et al., EMNLP 2021)
ACL
- Alexandre Berard, Dain Lee, Stephane Clinchant, Kweonwoo Jung, and Vassilina Nikoulina. 2021. Efficient Inference for Multilingual Neural Machine Translation. In Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pages 8563–8583, Online and Punta Cana, Dominican Republic. Association for Computational Linguistics.