@inproceedings{patel-etal-2020-leveraging,
title = "Leveraging Alignment and Phonology for low-resource Indic to {E}nglish Neural Machine Transliteration",
author = "Patel, Parth and
Mehta, Manthan and
Bhattacharya, Pushpak and
Atreya, Arjun",
editor = "Bhattacharyya, Pushpak and
Sharma, Dipti Misra and
Sangal, Rajeev",
booktitle = "Proceedings of the 17th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2020",
address = "Indian Institute of Technology Patna, Patna, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2020.icon-main.51/",
pages = "373--378",
abstract = "In this paper we present a novel transliteration technique based on Orthographic Syllable(OS) segmentation for low-resource Indian languages (ILs). Given that alignment has produced promising results in Statistical Machine Transliteration systems and phonology plays an important role in transliteration, we introduce a new model which uses alignment representation similar to that of IBM model 3 to pre-process the tokenized input sequence and then use pre-trained source and target OS-embeddings for training. We apply our model for transliteration from ILs to English and report our accuracy based on Top-1 Exact Match. We also compare our accuracy with a previously proposed Phrase-Based model and report improvements."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="patel-etal-2020-leveraging">
<titleInfo>
<title>Leveraging Alignment and Phonology for low-resource Indic to English Neural Machine Transliteration</title>
</titleInfo>
<name type="personal">
<namePart type="given">Parth</namePart>
<namePart type="family">Patel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manthan</namePart>
<namePart type="family">Mehta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arjun</namePart>
<namePart type="family">Atreya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipti</namePart>
<namePart type="given">Misra</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajeev</namePart>
<namePart type="family">Sangal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">Indian Institute of Technology Patna, Patna, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present a novel transliteration technique based on Orthographic Syllable(OS) segmentation for low-resource Indian languages (ILs). Given that alignment has produced promising results in Statistical Machine Transliteration systems and phonology plays an important role in transliteration, we introduce a new model which uses alignment representation similar to that of IBM model 3 to pre-process the tokenized input sequence and then use pre-trained source and target OS-embeddings for training. We apply our model for transliteration from ILs to English and report our accuracy based on Top-1 Exact Match. We also compare our accuracy with a previously proposed Phrase-Based model and report improvements.</abstract>
<identifier type="citekey">patel-etal-2020-leveraging</identifier>
<location>
<url>https://aclanthology.org/2020.icon-main.51/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>373</start>
<end>378</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Leveraging Alignment and Phonology for low-resource Indic to English Neural Machine Transliteration
%A Patel, Parth
%A Mehta, Manthan
%A Bhattacharya, Pushpak
%A Atreya, Arjun
%Y Bhattacharyya, Pushpak
%Y Sharma, Dipti Misra
%Y Sangal, Rajeev
%S Proceedings of the 17th International Conference on Natural Language Processing (ICON)
%D 2020
%8 December
%I NLP Association of India (NLPAI)
%C Indian Institute of Technology Patna, Patna, India
%F patel-etal-2020-leveraging
%X In this paper we present a novel transliteration technique based on Orthographic Syllable(OS) segmentation for low-resource Indian languages (ILs). Given that alignment has produced promising results in Statistical Machine Transliteration systems and phonology plays an important role in transliteration, we introduce a new model which uses alignment representation similar to that of IBM model 3 to pre-process the tokenized input sequence and then use pre-trained source and target OS-embeddings for training. We apply our model for transliteration from ILs to English and report our accuracy based on Top-1 Exact Match. We also compare our accuracy with a previously proposed Phrase-Based model and report improvements.
%U https://aclanthology.org/2020.icon-main.51/
%P 373-378
Markdown (Informal)
[Leveraging Alignment and Phonology for low-resource Indic to English Neural Machine Transliteration](https://aclanthology.org/2020.icon-main.51/) (Patel et al., ICON 2020)
ACL