@inproceedings{banerjee-etal-2021-scrambled,
title = "Scrambled Translation Problem: A Problem of Denoising {UNMT}",
author = "Banerjee, Tamali and
V Murthy, Rudra and
Bhattacharya, Pushpak",
editor = "Duh, Kevin and
Guzm{\'a}n, Francisco",
booktitle = "Proceedings of Machine Translation Summit XVIII: Research Track",
month = aug,
year = "2021",
address = "Virtual",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2021.mtsummit-research.11/",
pages = "127--138",
abstract = "In this paper and we identify an interesting kind of error in the output of Unsupervised Neural Machine Translation (UNMT) systems like Undreamt1. We refer to this error type as Scrambled Translation problem. We observe that UNMT models which use word shuffle noise (as in case of Undreamt) can generate correct words and but fail to stitch them together to form phrases. As a result and words of the translated sentence look scrambled and resulting in decreased BLEU. We hypothesise that the reason behind scrambled translation problem is `shuffling noise' which is introduced in every input sentence as a denoising strategy. To test our hypothesis and we experiment by retraining UNMT models with a simple retraining strategy. We stop the training of the Denoising UNMT model after a pre-decided number of iterations and resume the training for the remaining iterations- which number is also pre-decided- using original sentence as input without adding any noise. Our proposed solution achieves significant performance improvement UNMT models that train conventionally. We demonstrate these performance gains on four language pairs and viz. and English-French and English-German and English-Spanish and Hindi-Punjabi. Our qualitative and quantitative analysis shows that the retraining strategy helps achieve better alignment as observed by attention heatmap and better phrasal translation and leading to statistically significant improvement in BLEU scores."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="banerjee-etal-2021-scrambled">
<titleInfo>
<title>Scrambled Translation Problem: A Problem of Denoising UNMT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tamali</namePart>
<namePart type="family">Banerjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rudra</namePart>
<namePart type="family">V Murthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Machine Translation Summit XVIII: Research Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francisco</namePart>
<namePart type="family">Guzmán</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Virtual</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper and we identify an interesting kind of error in the output of Unsupervised Neural Machine Translation (UNMT) systems like Undreamt1. We refer to this error type as Scrambled Translation problem. We observe that UNMT models which use word shuffle noise (as in case of Undreamt) can generate correct words and but fail to stitch them together to form phrases. As a result and words of the translated sentence look scrambled and resulting in decreased BLEU. We hypothesise that the reason behind scrambled translation problem is ‘shuffling noise’ which is introduced in every input sentence as a denoising strategy. To test our hypothesis and we experiment by retraining UNMT models with a simple retraining strategy. We stop the training of the Denoising UNMT model after a pre-decided number of iterations and resume the training for the remaining iterations- which number is also pre-decided- using original sentence as input without adding any noise. Our proposed solution achieves significant performance improvement UNMT models that train conventionally. We demonstrate these performance gains on four language pairs and viz. and English-French and English-German and English-Spanish and Hindi-Punjabi. Our qualitative and quantitative analysis shows that the retraining strategy helps achieve better alignment as observed by attention heatmap and better phrasal translation and leading to statistically significant improvement in BLEU scores.</abstract>
<identifier type="citekey">banerjee-etal-2021-scrambled</identifier>
<location>
<url>https://aclanthology.org/2021.mtsummit-research.11/</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>127</start>
<end>138</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Scrambled Translation Problem: A Problem of Denoising UNMT
%A Banerjee, Tamali
%A V Murthy, Rudra
%A Bhattacharya, Pushpak
%Y Duh, Kevin
%Y Guzmán, Francisco
%S Proceedings of Machine Translation Summit XVIII: Research Track
%D 2021
%8 August
%I Association for Machine Translation in the Americas
%C Virtual
%F banerjee-etal-2021-scrambled
%X In this paper and we identify an interesting kind of error in the output of Unsupervised Neural Machine Translation (UNMT) systems like Undreamt1. We refer to this error type as Scrambled Translation problem. We observe that UNMT models which use word shuffle noise (as in case of Undreamt) can generate correct words and but fail to stitch them together to form phrases. As a result and words of the translated sentence look scrambled and resulting in decreased BLEU. We hypothesise that the reason behind scrambled translation problem is ‘shuffling noise’ which is introduced in every input sentence as a denoising strategy. To test our hypothesis and we experiment by retraining UNMT models with a simple retraining strategy. We stop the training of the Denoising UNMT model after a pre-decided number of iterations and resume the training for the remaining iterations- which number is also pre-decided- using original sentence as input without adding any noise. Our proposed solution achieves significant performance improvement UNMT models that train conventionally. We demonstrate these performance gains on four language pairs and viz. and English-French and English-German and English-Spanish and Hindi-Punjabi. Our qualitative and quantitative analysis shows that the retraining strategy helps achieve better alignment as observed by attention heatmap and better phrasal translation and leading to statistically significant improvement in BLEU scores.
%U https://aclanthology.org/2021.mtsummit-research.11/
%P 127-138
Markdown (Informal)
[Scrambled Translation Problem: A Problem of Denoising UNMT](https://aclanthology.org/2021.mtsummit-research.11/) (Banerjee et al., MTSummit 2021)
ACL