@inproceedings{den-etal-2008-proper,
title = "A Proper Approach to {J}apanese Morphological Analysis: Dictionary, Model, and Evaluation",
author = "Den, Yasuharu and
Nakamura, Junpei and
Ogiso, Toshinobu and
Ogura, Hideki",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/258_paper.pdf",
abstract = "In this paper, we discuss lemma identification in Japanese morphological analysis, which is crucial for a proper formulation of morphological analysis that benefits not only NLP researchers but also corpus linguists. Since Japanese words often have variation in orthography and the vocabulary of Japanese consists of words of several different origins, it sometimes happens that more than one writing form corresponds to the same lemma and that a single writing form corresponds to two or more lemmas with different readings and/or meanings. The mapping from a writing form onto a lemma is important in linguistic analysis of corpora. The current study focuses on disambiguation of heteronyms, words with the same writing form but with different word forms. To resolve heteronym ambiguity, we make use of goshu information, the classification of words based on their origin. Founded on the fact that words of some goshu classes are more likely to combine into compound words than words of other classes, we employ a statistical model based on CRFs using goshu information. Experimental results show that the use of goshu information considerably improves the performance of heteronym disambiguation and lemma identification, suggesting that goshu information solves the lemma identification task very effectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="den-etal-2008-proper">
<titleInfo>
<title>A Proper Approach to Japanese Morphological Analysis: Dictionary, Model, and Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yasuharu</namePart>
<namePart type="family">Den</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junpei</namePart>
<namePart type="family">Nakamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Toshinobu</namePart>
<namePart type="family">Ogiso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hideki</namePart>
<namePart type="family">Ogura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we discuss lemma identification in Japanese morphological analysis, which is crucial for a proper formulation of morphological analysis that benefits not only NLP researchers but also corpus linguists. Since Japanese words often have variation in orthography and the vocabulary of Japanese consists of words of several different origins, it sometimes happens that more than one writing form corresponds to the same lemma and that a single writing form corresponds to two or more lemmas with different readings and/or meanings. The mapping from a writing form onto a lemma is important in linguistic analysis of corpora. The current study focuses on disambiguation of heteronyms, words with the same writing form but with different word forms. To resolve heteronym ambiguity, we make use of goshu information, the classification of words based on their origin. Founded on the fact that words of some goshu classes are more likely to combine into compound words than words of other classes, we employ a statistical model based on CRFs using goshu information. Experimental results show that the use of goshu information considerably improves the performance of heteronym disambiguation and lemma identification, suggesting that goshu information solves the lemma identification task very effectively.</abstract>
<identifier type="citekey">den-etal-2008-proper</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/258_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Proper Approach to Japanese Morphological Analysis: Dictionary, Model, and Evaluation
%A Den, Yasuharu
%A Nakamura, Junpei
%A Ogiso, Toshinobu
%A Ogura, Hideki
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F den-etal-2008-proper
%X In this paper, we discuss lemma identification in Japanese morphological analysis, which is crucial for a proper formulation of morphological analysis that benefits not only NLP researchers but also corpus linguists. Since Japanese words often have variation in orthography and the vocabulary of Japanese consists of words of several different origins, it sometimes happens that more than one writing form corresponds to the same lemma and that a single writing form corresponds to two or more lemmas with different readings and/or meanings. The mapping from a writing form onto a lemma is important in linguistic analysis of corpora. The current study focuses on disambiguation of heteronyms, words with the same writing form but with different word forms. To resolve heteronym ambiguity, we make use of goshu information, the classification of words based on their origin. Founded on the fact that words of some goshu classes are more likely to combine into compound words than words of other classes, we employ a statistical model based on CRFs using goshu information. Experimental results show that the use of goshu information considerably improves the performance of heteronym disambiguation and lemma identification, suggesting that goshu information solves the lemma identification task very effectively.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/258_paper.pdf
Markdown (Informal)
[A Proper Approach to Japanese Morphological Analysis: Dictionary, Model, and Evaluation](http://www.lrec-conf.org/proceedings/lrec2008/pdf/258_paper.pdf) (Den et al., LREC 2008)
ACL