@inproceedings{jaiswal-etal-2020-improving,
title = "Improving {NMT} via Filtered Back Translation",
author = "Jaiswal, Nikhil and
Patidar, Mayur and
Kumari, Surabhi and
Patwardhan, Manasi and
Karande, Shirish and
Agarwal, Puneet and
Vig, Lovekesh",
editor = "Nakazawa, Toshiaki and
Nakayama, Hideki and
Ding, Chenchen and
Dabre, Raj and
Kunchukuttan, Anoop and
Pa, Win Pa and
Bojar, Ond{\v{r}}ej and
Parida, Shantipriya and
Goto, Isao and
Mino, Hidaya and
Manabe, Hiroshi and
Sudoh, Katsuhito and
Kurohashi, Sadao and
Bhattacharyya, Pushpak",
booktitle = "Proceedings of the 7th Workshop on Asian Translation",
month = dec,
year = "2020",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wat-1.19/",
doi = "10.18653/v1/2020.wat-1.19",
pages = "154--159",
abstract = "Document-Level Machine Translation (MT) has become an active research area among the NLP community in recent years. Unlike sentence-level MT, which translates the sentences independently, document-level MT aims to utilize contextual information while translating a given source sentence. This paper demonstrates our submission (Team ID - DEEPNLP) to the Document-Level Translation task organized by WAT 2020. This task focuses on translating texts from a business dialog corpus while optionally utilizing the context present in the dialog. In our proposed approach, we utilize publicly available parallel corpus from different domains to train an open domain base NMT model. We then use monolingual target data to create filtered pseudo parallel data and employ Back-Translation to fine-tune the base model. This is further followed by fine-tuning on the domain-specific corpus. We also ensemble various models to improvise the translation performance. Our best models achieve a BLEU score of 26.59 and 22.83 in an unconstrained setting and 15.10 and 10.91 in the constrained settings for En-{\ensuremath{>}}Ja {\&} Ja-{\ensuremath{>}}En direction, respectively."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jaiswal-etal-2020-improving">
<titleInfo>
<title>Improving NMT via Filtered Back Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikhil</namePart>
<namePart type="family">Jaiswal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mayur</namePart>
<namePart type="family">Patidar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surabhi</namePart>
<namePart type="family">Kumari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manasi</namePart>
<namePart type="family">Patwardhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shirish</namePart>
<namePart type="family">Karande</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Puneet</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lovekesh</namePart>
<namePart type="family">Vig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 7th Workshop on Asian Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Toshiaki</namePart>
<namePart type="family">Nakazawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hideki</namePart>
<namePart type="family">Nakayama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenchen</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raj</namePart>
<namePart type="family">Dabre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Kunchukuttan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Win</namePart>
<namePart type="given">Pa</namePart>
<namePart type="family">Pa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Bojar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shantipriya</namePart>
<namePart type="family">Parida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isao</namePart>
<namePart type="family">Goto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hidaya</namePart>
<namePart type="family">Mino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroshi</namePart>
<namePart type="family">Manabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katsuhito</namePart>
<namePart type="family">Sudoh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sadao</namePart>
<namePart type="family">Kurohashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Document-Level Machine Translation (MT) has become an active research area among the NLP community in recent years. Unlike sentence-level MT, which translates the sentences independently, document-level MT aims to utilize contextual information while translating a given source sentence. This paper demonstrates our submission (Team ID - DEEPNLP) to the Document-Level Translation task organized by WAT 2020. This task focuses on translating texts from a business dialog corpus while optionally utilizing the context present in the dialog. In our proposed approach, we utilize publicly available parallel corpus from different domains to train an open domain base NMT model. We then use monolingual target data to create filtered pseudo parallel data and employ Back-Translation to fine-tune the base model. This is further followed by fine-tuning on the domain-specific corpus. We also ensemble various models to improvise the translation performance. Our best models achieve a BLEU score of 26.59 and 22.83 in an unconstrained setting and 15.10 and 10.91 in the constrained settings for En-\ensuremath>Ja & Ja-\ensuremath>En direction, respectively.</abstract>
<identifier type="citekey">jaiswal-etal-2020-improving</identifier>
<identifier type="doi">10.18653/v1/2020.wat-1.19</identifier>
<location>
<url>https://aclanthology.org/2020.wat-1.19/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>154</start>
<end>159</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving NMT via Filtered Back Translation
%A Jaiswal, Nikhil
%A Patidar, Mayur
%A Kumari, Surabhi
%A Patwardhan, Manasi
%A Karande, Shirish
%A Agarwal, Puneet
%A Vig, Lovekesh
%Y Nakazawa, Toshiaki
%Y Nakayama, Hideki
%Y Ding, Chenchen
%Y Dabre, Raj
%Y Kunchukuttan, Anoop
%Y Pa, Win Pa
%Y Bojar, Ondřej
%Y Parida, Shantipriya
%Y Goto, Isao
%Y Mino, Hidaya
%Y Manabe, Hiroshi
%Y Sudoh, Katsuhito
%Y Kurohashi, Sadao
%Y Bhattacharyya, Pushpak
%S Proceedings of the 7th Workshop on Asian Translation
%D 2020
%8 December
%I Association for Computational Linguistics
%C Suzhou, China
%F jaiswal-etal-2020-improving
%X Document-Level Machine Translation (MT) has become an active research area among the NLP community in recent years. Unlike sentence-level MT, which translates the sentences independently, document-level MT aims to utilize contextual information while translating a given source sentence. This paper demonstrates our submission (Team ID - DEEPNLP) to the Document-Level Translation task organized by WAT 2020. This task focuses on translating texts from a business dialog corpus while optionally utilizing the context present in the dialog. In our proposed approach, we utilize publicly available parallel corpus from different domains to train an open domain base NMT model. We then use monolingual target data to create filtered pseudo parallel data and employ Back-Translation to fine-tune the base model. This is further followed by fine-tuning on the domain-specific corpus. We also ensemble various models to improvise the translation performance. Our best models achieve a BLEU score of 26.59 and 22.83 in an unconstrained setting and 15.10 and 10.91 in the constrained settings for En-\ensuremath>Ja & Ja-\ensuremath>En direction, respectively.
%R 10.18653/v1/2020.wat-1.19
%U https://aclanthology.org/2020.wat-1.19/
%U https://doi.org/10.18653/v1/2020.wat-1.19
%P 154-159
Markdown (Informal)
[Improving NMT via Filtered Back Translation](https://aclanthology.org/2020.wat-1.19/) (Jaiswal et al., WAT 2020)
ACL
- Nikhil Jaiswal, Mayur Patidar, Surabhi Kumari, Manasi Patwardhan, Shirish Karande, Puneet Agarwal, and Lovekesh Vig. 2020. Improving NMT via Filtered Back Translation. In Proceedings of the 7th Workshop on Asian Translation, pages 154–159, Suzhou, China. Association for Computational Linguistics.