@inproceedings{nicolai-etal-2022-penalizing,
title = "Penalizing Divergence: Multi-Parallel Translation for Low-Resource Languages of {N}orth {A}merica",
author = "Nicolai, Garrett and
Yang, Changbing and
Silfverberg, Miikka",
editor = "Calzolari, Nicoletta and
Huang, Chu-Ren and
Kim, Hansaem and
Pustejovsky, James and
Wanner, Leo and
Choi, Key-Sun and
Ryu, Pum-Mo and
Chen, Hsin-Hsi and
Donatelli, Lucia and
Ji, Heng and
Kurohashi, Sadao and
Paggio, Patrizia and
Xue, Nianwen and
Kim, Seokhwan and
Hahm, Younggyun and
He, Zhong and
Lee, Tony Kyungil and
Santus, Enrico and
Bond, Francis and
Na, Seung-Hoon",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.378/",
pages = "4292--4298",
abstract = "This paper explores a special case in multilingual machine translation: so called multi-parallel translation, where the target data for all language pairs are identical. While multi-parallelism offers benefits which are not available in a standard translation setting, translation models can easily overfit when training data are limited. We introduce a regularizer, the divergence penalty, which penalizes the translation model when it represents source sentences with identical target translations in divergent ways. Experiments on very low-resourced Indigenous North American languages show that an initially deficient multilingual translator can improve by 4.9 BLEU through mBART pre-training, and 5.5 BLEU points with the strategic addition of monolingual data, and that a divergence penalty leads to further increases of 0.4 BLEU. Further experiments on Germanic languages demonstrate a improvement of 0.5 BLEU when applying the divergence penalty. An investigation of the neural encoder representations learned by our translation models shows that the divergence penalty encourages models to learn a unified neural interlingua."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nicolai-etal-2022-penalizing">
<titleInfo>
<title>Penalizing Divergence: Multi-Parallel Translation for Low-Resource Languages of North America</title>
</titleInfo>
<name type="personal">
<namePart type="given">Garrett</namePart>
<namePart type="family">Nicolai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Changbing</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miikka</namePart>
<namePart type="family">Silfverberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chu-Ren</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hansaem</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Key-Sun</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pum-Mo</namePart>
<namePart type="family">Ryu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Donatelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sadao</namePart>
<namePart type="family">Kurohashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrizia</namePart>
<namePart type="family">Paggio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seokhwan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Younggyun</namePart>
<namePart type="family">Hahm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhong</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tony</namePart>
<namePart type="given">Kyungil</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrico</namePart>
<namePart type="family">Santus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Bond</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seung-Hoon</namePart>
<namePart type="family">Na</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper explores a special case in multilingual machine translation: so called multi-parallel translation, where the target data for all language pairs are identical. While multi-parallelism offers benefits which are not available in a standard translation setting, translation models can easily overfit when training data are limited. We introduce a regularizer, the divergence penalty, which penalizes the translation model when it represents source sentences with identical target translations in divergent ways. Experiments on very low-resourced Indigenous North American languages show that an initially deficient multilingual translator can improve by 4.9 BLEU through mBART pre-training, and 5.5 BLEU points with the strategic addition of monolingual data, and that a divergence penalty leads to further increases of 0.4 BLEU. Further experiments on Germanic languages demonstrate a improvement of 0.5 BLEU when applying the divergence penalty. An investigation of the neural encoder representations learned by our translation models shows that the divergence penalty encourages models to learn a unified neural interlingua.</abstract>
<identifier type="citekey">nicolai-etal-2022-penalizing</identifier>
<location>
<url>https://aclanthology.org/2022.coling-1.378/</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>4292</start>
<end>4298</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Penalizing Divergence: Multi-Parallel Translation for Low-Resource Languages of North America
%A Nicolai, Garrett
%A Yang, Changbing
%A Silfverberg, Miikka
%Y Calzolari, Nicoletta
%Y Huang, Chu-Ren
%Y Kim, Hansaem
%Y Pustejovsky, James
%Y Wanner, Leo
%Y Choi, Key-Sun
%Y Ryu, Pum-Mo
%Y Chen, Hsin-Hsi
%Y Donatelli, Lucia
%Y Ji, Heng
%Y Kurohashi, Sadao
%Y Paggio, Patrizia
%Y Xue, Nianwen
%Y Kim, Seokhwan
%Y Hahm, Younggyun
%Y He, Zhong
%Y Lee, Tony Kyungil
%Y Santus, Enrico
%Y Bond, Francis
%Y Na, Seung-Hoon
%S Proceedings of the 29th International Conference on Computational Linguistics
%D 2022
%8 October
%I International Committee on Computational Linguistics
%C Gyeongju, Republic of Korea
%F nicolai-etal-2022-penalizing
%X This paper explores a special case in multilingual machine translation: so called multi-parallel translation, where the target data for all language pairs are identical. While multi-parallelism offers benefits which are not available in a standard translation setting, translation models can easily overfit when training data are limited. We introduce a regularizer, the divergence penalty, which penalizes the translation model when it represents source sentences with identical target translations in divergent ways. Experiments on very low-resourced Indigenous North American languages show that an initially deficient multilingual translator can improve by 4.9 BLEU through mBART pre-training, and 5.5 BLEU points with the strategic addition of monolingual data, and that a divergence penalty leads to further increases of 0.4 BLEU. Further experiments on Germanic languages demonstrate a improvement of 0.5 BLEU when applying the divergence penalty. An investigation of the neural encoder representations learned by our translation models shows that the divergence penalty encourages models to learn a unified neural interlingua.
%U https://aclanthology.org/2022.coling-1.378/
%P 4292-4298
Markdown (Informal)
[Penalizing Divergence: Multi-Parallel Translation for Low-Resource Languages of North America](https://aclanthology.org/2022.coling-1.378/) (Nicolai et al., COLING 2022)
ACL