@inproceedings{ren-2023-back,
title = "Back-Transliteration of {E}nglish Loanwords in {J}apanese",
author = "Ren, Yuying",
editor = "Gorman, Kyle and
Sproat, Richard and
Roark, Brian",
booktitle = "Proceedings of the Workshop on Computation and Written Language (CAWL 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.cawl-1.6/",
doi = "10.18653/v1/2023.cawl-1.6",
pages = "43--49",
abstract = "We propose methods for transliterating English loanwords in Japanese from their Japanese written form (katakana/romaji) to their original English written form. Our data is a Japanese-English loanwords dictionary that we have created ourselves. We employ two approaches: direct transliteration, which directly converts words from katakana to English, and indirect transliteration, which utilizes the English pronunciation as a means to convert katakana words into their corresponding English sound representations, which are subsequently converted into English words. Additionally, we compare the effectiveness of using katakana versus romaji as input characters. We develop 6 models of 2 types for our experiments: one with an English lexicon-filter, and the other without. For each type, we built 3 models, including a pair n-gram based on WFSTs and two sequence-to-sequence models leveraging LSTM and transformer. Our best performing model was the pair n-gram model with a lexicon-filter, directly transliterating from katakana to English."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ren-2023-back">
<titleInfo>
<title>Back-Transliteration of English Loanwords in Japanese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuying</namePart>
<namePart type="family">Ren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Computation and Written Language (CAWL 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kyle</namePart>
<namePart type="family">Gorman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Sproat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Roark</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose methods for transliterating English loanwords in Japanese from their Japanese written form (katakana/romaji) to their original English written form. Our data is a Japanese-English loanwords dictionary that we have created ourselves. We employ two approaches: direct transliteration, which directly converts words from katakana to English, and indirect transliteration, which utilizes the English pronunciation as a means to convert katakana words into their corresponding English sound representations, which are subsequently converted into English words. Additionally, we compare the effectiveness of using katakana versus romaji as input characters. We develop 6 models of 2 types for our experiments: one with an English lexicon-filter, and the other without. For each type, we built 3 models, including a pair n-gram based on WFSTs and two sequence-to-sequence models leveraging LSTM and transformer. Our best performing model was the pair n-gram model with a lexicon-filter, directly transliterating from katakana to English.</abstract>
<identifier type="citekey">ren-2023-back</identifier>
<identifier type="doi">10.18653/v1/2023.cawl-1.6</identifier>
<location>
<url>https://aclanthology.org/2023.cawl-1.6/</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>43</start>
<end>49</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Back-Transliteration of English Loanwords in Japanese
%A Ren, Yuying
%Y Gorman, Kyle
%Y Sproat, Richard
%Y Roark, Brian
%S Proceedings of the Workshop on Computation and Written Language (CAWL 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F ren-2023-back
%X We propose methods for transliterating English loanwords in Japanese from their Japanese written form (katakana/romaji) to their original English written form. Our data is a Japanese-English loanwords dictionary that we have created ourselves. We employ two approaches: direct transliteration, which directly converts words from katakana to English, and indirect transliteration, which utilizes the English pronunciation as a means to convert katakana words into their corresponding English sound representations, which are subsequently converted into English words. Additionally, we compare the effectiveness of using katakana versus romaji as input characters. We develop 6 models of 2 types for our experiments: one with an English lexicon-filter, and the other without. For each type, we built 3 models, including a pair n-gram based on WFSTs and two sequence-to-sequence models leveraging LSTM and transformer. Our best performing model was the pair n-gram model with a lexicon-filter, directly transliterating from katakana to English.
%R 10.18653/v1/2023.cawl-1.6
%U https://aclanthology.org/2023.cawl-1.6/
%U https://doi.org/10.18653/v1/2023.cawl-1.6
%P 43-49
Markdown (Informal)
[Back-Transliteration of English Loanwords in Japanese](https://aclanthology.org/2023.cawl-1.6/) (Ren, CAWL 2023)
ACL