@inproceedings{zhang-2023-pronunciation,
title = "Pronunciation Ambiguities in {J}apanese Kanji",
author = "Zhang, Wen",
editor = "Gorman, Kyle and
Sproat, Richard and
Roark, Brian",
booktitle = "Proceedings of the Workshop on Computation and Written Language (CAWL 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.cawl-1.7/",
doi = "10.18653/v1/2023.cawl-1.7",
pages = "50--60",
abstract = "Japanese writing is a complex system, and a large part of the complexity resides in the use of kanji. A single kanji character in modern Japanese may have multiple pronunciations, either as native vocabulary or as words borrowed from Chinese. This causes a problem for text-to-speech synthesis (TTS) because the system has to predict which pronunciation of each kanji character is appropriate in the context. The problem is called homograph disambiguation. To solve the problem, this research provides a new annotated Japanese single kanji character pronunciation data set and describes an experiment using the logistic regression (LR) classifier. A baseline is computed to compare with the LR classifier accuracy. This experiment provides the first experimental research in Japanese single kanji homograph disambiguation. The annotated Japanese data is freely released to the public to support further work."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-2023-pronunciation">
<titleInfo>
<title>Pronunciation Ambiguities in Japanese Kanji</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wen</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Computation and Written Language (CAWL 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kyle</namePart>
<namePart type="family">Gorman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Sproat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Roark</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Japanese writing is a complex system, and a large part of the complexity resides in the use of kanji. A single kanji character in modern Japanese may have multiple pronunciations, either as native vocabulary or as words borrowed from Chinese. This causes a problem for text-to-speech synthesis (TTS) because the system has to predict which pronunciation of each kanji character is appropriate in the context. The problem is called homograph disambiguation. To solve the problem, this research provides a new annotated Japanese single kanji character pronunciation data set and describes an experiment using the logistic regression (LR) classifier. A baseline is computed to compare with the LR classifier accuracy. This experiment provides the first experimental research in Japanese single kanji homograph disambiguation. The annotated Japanese data is freely released to the public to support further work.</abstract>
<identifier type="citekey">zhang-2023-pronunciation</identifier>
<identifier type="doi">10.18653/v1/2023.cawl-1.7</identifier>
<location>
<url>https://aclanthology.org/2023.cawl-1.7/</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>50</start>
<end>60</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pronunciation Ambiguities in Japanese Kanji
%A Zhang, Wen
%Y Gorman, Kyle
%Y Sproat, Richard
%Y Roark, Brian
%S Proceedings of the Workshop on Computation and Written Language (CAWL 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F zhang-2023-pronunciation
%X Japanese writing is a complex system, and a large part of the complexity resides in the use of kanji. A single kanji character in modern Japanese may have multiple pronunciations, either as native vocabulary or as words borrowed from Chinese. This causes a problem for text-to-speech synthesis (TTS) because the system has to predict which pronunciation of each kanji character is appropriate in the context. The problem is called homograph disambiguation. To solve the problem, this research provides a new annotated Japanese single kanji character pronunciation data set and describes an experiment using the logistic regression (LR) classifier. A baseline is computed to compare with the LR classifier accuracy. This experiment provides the first experimental research in Japanese single kanji homograph disambiguation. The annotated Japanese data is freely released to the public to support further work.
%R 10.18653/v1/2023.cawl-1.7
%U https://aclanthology.org/2023.cawl-1.7/
%U https://doi.org/10.18653/v1/2023.cawl-1.7
%P 50-60
Markdown (Informal)
[Pronunciation Ambiguities in Japanese Kanji](https://aclanthology.org/2023.cawl-1.7/) (Zhang, CAWL 2023)
ACL