@inproceedings{appicharla-etal-2021-iitp-mt,
title = "{IITP}-{MT} at {WAT}2021: Indic-{E}nglish Multilingual Neural Machine Translation using {R}omanized Vocabulary",
author = "Appicharla, Ramakrishna and
Gupta, Kamal Kumar and
Ekbal, Asif and
Bhattacharyya, Pushpak",
editor = "Nakazawa, Toshiaki and
Nakayama, Hideki and
Goto, Isao and
Mino, Hideya and
Ding, Chenchen and
Dabre, Raj and
Kunchukuttan, Anoop and
Higashiyama, Shohei and
Manabe, Hiroshi and
Pa, Win Pa and
Parida, Shantipriya and
Bojar, Ond{\v{r}}ej and
Chu, Chenhui and
Eriguchi, Akiko and
Abe, Kaori and
Oda, Yusuke and
Sudoh, Katsuhito and
Kurohashi, Sadao and
Bhattacharyya, Pushpak",
booktitle = "Proceedings of the 8th Workshop on Asian Translation (WAT2021)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wat-1.29",
doi = "10.18653/v1/2021.wat-1.29",
pages = "238--243",
abstract = "This paper describes the systems submitted to WAT 2021 MultiIndicMT shared task by IITP-MT team. We submit two multilingual Neural Machine Translation (NMT) systems (Indic-to-English and English-to-Indic). We romanize all Indic data and create subword vocabulary which is shared between all Indic languages. We use back-translation approach to generate synthetic data which is appended to parallel corpus and used to train our models. The models are evaluated using BLEU, RIBES and AMFM scores with Indic-to-English model achieving 40.08 BLEU for Hindi-English pair and English-to-Indic model achieving 34.48 BLEU for English-Hindi pair. However, we observe that the shared romanized subword vocabulary is not helping English-to-Indic model at the time of generation, leading it to produce poor quality translations for Tamil, Telugu and Malayalam to English pairs with BLEU score of 8.51, 6.25 and 3.79 respectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="appicharla-etal-2021-iitp-mt">
<titleInfo>
<title>IITP-MT at WAT2021: Indic-English Multilingual Neural Machine Translation using Romanized Vocabulary</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ramakrishna</namePart>
<namePart type="family">Appicharla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kamal</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asif</namePart>
<namePart type="family">Ekbal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Workshop on Asian Translation (WAT2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Toshiaki</namePart>
<namePart type="family">Nakazawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hideki</namePart>
<namePart type="family">Nakayama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isao</namePart>
<namePart type="family">Goto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hideya</namePart>
<namePart type="family">Mino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenchen</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raj</namePart>
<namePart type="family">Dabre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Kunchukuttan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shohei</namePart>
<namePart type="family">Higashiyama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroshi</namePart>
<namePart type="family">Manabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Win</namePart>
<namePart type="given">Pa</namePart>
<namePart type="family">Pa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shantipriya</namePart>
<namePart type="family">Parida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Bojar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenhui</namePart>
<namePart type="family">Chu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akiko</namePart>
<namePart type="family">Eriguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaori</namePart>
<namePart type="family">Abe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Oda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katsuhito</namePart>
<namePart type="family">Sudoh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sadao</namePart>
<namePart type="family">Kurohashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the systems submitted to WAT 2021 MultiIndicMT shared task by IITP-MT team. We submit two multilingual Neural Machine Translation (NMT) systems (Indic-to-English and English-to-Indic). We romanize all Indic data and create subword vocabulary which is shared between all Indic languages. We use back-translation approach to generate synthetic data which is appended to parallel corpus and used to train our models. The models are evaluated using BLEU, RIBES and AMFM scores with Indic-to-English model achieving 40.08 BLEU for Hindi-English pair and English-to-Indic model achieving 34.48 BLEU for English-Hindi pair. However, we observe that the shared romanized subword vocabulary is not helping English-to-Indic model at the time of generation, leading it to produce poor quality translations for Tamil, Telugu and Malayalam to English pairs with BLEU score of 8.51, 6.25 and 3.79 respectively.</abstract>
<identifier type="citekey">appicharla-etal-2021-iitp-mt</identifier>
<identifier type="doi">10.18653/v1/2021.wat-1.29</identifier>
<location>
<url>https://aclanthology.org/2021.wat-1.29</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>238</start>
<end>243</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T IITP-MT at WAT2021: Indic-English Multilingual Neural Machine Translation using Romanized Vocabulary
%A Appicharla, Ramakrishna
%A Gupta, Kamal Kumar
%A Ekbal, Asif
%A Bhattacharyya, Pushpak
%Y Nakazawa, Toshiaki
%Y Nakayama, Hideki
%Y Goto, Isao
%Y Mino, Hideya
%Y Ding, Chenchen
%Y Dabre, Raj
%Y Kunchukuttan, Anoop
%Y Higashiyama, Shohei
%Y Manabe, Hiroshi
%Y Pa, Win Pa
%Y Parida, Shantipriya
%Y Bojar, Ondřej
%Y Chu, Chenhui
%Y Eriguchi, Akiko
%Y Abe, Kaori
%Y Oda, Yusuke
%Y Sudoh, Katsuhito
%Y Kurohashi, Sadao
%Y Bhattacharyya, Pushpak
%S Proceedings of the 8th Workshop on Asian Translation (WAT2021)
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F appicharla-etal-2021-iitp-mt
%X This paper describes the systems submitted to WAT 2021 MultiIndicMT shared task by IITP-MT team. We submit two multilingual Neural Machine Translation (NMT) systems (Indic-to-English and English-to-Indic). We romanize all Indic data and create subword vocabulary which is shared between all Indic languages. We use back-translation approach to generate synthetic data which is appended to parallel corpus and used to train our models. The models are evaluated using BLEU, RIBES and AMFM scores with Indic-to-English model achieving 40.08 BLEU for Hindi-English pair and English-to-Indic model achieving 34.48 BLEU for English-Hindi pair. However, we observe that the shared romanized subword vocabulary is not helping English-to-Indic model at the time of generation, leading it to produce poor quality translations for Tamil, Telugu and Malayalam to English pairs with BLEU score of 8.51, 6.25 and 3.79 respectively.
%R 10.18653/v1/2021.wat-1.29
%U https://aclanthology.org/2021.wat-1.29
%U https://doi.org/10.18653/v1/2021.wat-1.29
%P 238-243
Markdown (Informal)
[IITP-MT at WAT2021: Indic-English Multilingual Neural Machine Translation using Romanized Vocabulary](https://aclanthology.org/2021.wat-1.29) (Appicharla et al., WAT 2021)
ACL