@inproceedings{asahara-etal-2022-chj,
title = "{CHJ}-{WLSP}: Annotation of {\textquoteleft}Word List by Semantic Principles' Labels for the Corpus of Historical {J}apanese",
author = "Asahara, Masayuki and
Ikegami, Nao and
Suzuki, Tai and
Ichimura, Taro and
Kondo, Asuko and
Kato, Sachi and
Yamazaki, Makoto",
editor = "Sprugnoli, Rachele and
Passarotti, Marco",
booktitle = "Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lt4hala-1.5/",
pages = "31--37",
abstract = "This article presents a word-sense annotation for the Corpus of Historical Japanese: a mashed-up Japanese lexicon based on the {\textquoteleft}Word List by Semantic Principles' (WLSP). The WLSP is a large-scale Japanese thesaurus that includes 98,241 entries with syntactic and hierarchical semantic categories. The historical WLSP is also compiled for the words in ancient Japanese. We utilized a morpheme-word sense alignment table to extract all possible word sense candidates for each word appearing in the target corpus. Then, we manually disambiguated the word senses for 647,751 words in the texts from the 10th century to 1910."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="asahara-etal-2022-chj">
<titleInfo>
<title>CHJ-WLSP: Annotation of ‘Word List by Semantic Principles’ Labels for the Corpus of Historical Japanese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Masayuki</namePart>
<namePart type="family">Asahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nao</namePart>
<namePart type="family">Ikegami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tai</namePart>
<namePart type="family">Suzuki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taro</namePart>
<namePart type="family">Ichimura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuko</namePart>
<namePart type="family">Kondo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sachi</namePart>
<namePart type="family">Kato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Makoto</namePart>
<namePart type="family">Yamazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This article presents a word-sense annotation for the Corpus of Historical Japanese: a mashed-up Japanese lexicon based on the ‘Word List by Semantic Principles’ (WLSP). The WLSP is a large-scale Japanese thesaurus that includes 98,241 entries with syntactic and hierarchical semantic categories. The historical WLSP is also compiled for the words in ancient Japanese. We utilized a morpheme-word sense alignment table to extract all possible word sense candidates for each word appearing in the target corpus. Then, we manually disambiguated the word senses for 647,751 words in the texts from the 10th century to 1910.</abstract>
<identifier type="citekey">asahara-etal-2022-chj</identifier>
<location>
<url>https://aclanthology.org/2022.lt4hala-1.5/</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>31</start>
<end>37</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CHJ-WLSP: Annotation of ‘Word List by Semantic Principles’ Labels for the Corpus of Historical Japanese
%A Asahara, Masayuki
%A Ikegami, Nao
%A Suzuki, Tai
%A Ichimura, Taro
%A Kondo, Asuko
%A Kato, Sachi
%A Yamazaki, Makoto
%Y Sprugnoli, Rachele
%Y Passarotti, Marco
%S Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F asahara-etal-2022-chj
%X This article presents a word-sense annotation for the Corpus of Historical Japanese: a mashed-up Japanese lexicon based on the ‘Word List by Semantic Principles’ (WLSP). The WLSP is a large-scale Japanese thesaurus that includes 98,241 entries with syntactic and hierarchical semantic categories. The historical WLSP is also compiled for the words in ancient Japanese. We utilized a morpheme-word sense alignment table to extract all possible word sense candidates for each word appearing in the target corpus. Then, we manually disambiguated the word senses for 647,751 words in the texts from the 10th century to 1910.
%U https://aclanthology.org/2022.lt4hala-1.5/
%P 31-37
Markdown (Informal)
[CHJ-WLSP: Annotation of ‘Word List by Semantic Principles’ Labels for the Corpus of Historical Japanese](https://aclanthology.org/2022.lt4hala-1.5/) (Asahara et al., LT4HALA 2022)
ACL