@inproceedings{wang-etal-2024-morpheme,
title = "Morpheme Sense Disambiguation: A New Task Aiming for Understanding the Language at Character Level",
author = "Wang, Yue and
Zheng, Hua and
Yin, Yaqi and
Wang, Hansi and
Liang, Qiliang and
Liu, Yang",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1014/",
pages = "11605--11618",
abstract = "Morphemes serve as a strong linguistic feature to capture lexical semantics, with higher coverage than words and more natural than sememes. However, due to the lack of morpheme-informed resources and the expense of manual annotation, morpheme-enhanced methods remain largely unexplored in Computational Linguistics. To address this issue, we propose the task of Morpheme Sense Disambiguation (MSD), with two subtasks in-text and in-word, similar to Word Sense Disambiguation (WSD) and Sememe Prediction (SP), to generalize morpheme features on more tasks. We first build the MorDis resource for Chinese, including MorInv as a morpheme inventory, MorTxt and MorWrd as two types of morpheme-annotated datasets. Next, we provide two baselines in each evaluation; the best model yields a promising precision of 77.66{\%} on in-text MSD and 88.19{\%} on in-word MSD, indicating its comparability with WSD and superiority over SP. Finally, we demonstrate that predicted morphemes achieve comparable performance with the ground-truth ones on a downstream application of Definition Generation (DG). This validates the feasibility and applicability of our proposed tasks. The resources and workflow of MSD will provide new insights and solutions for downstream tasks, including DG as well as WSD, training pre-trained models, etc."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2024-morpheme">
<titleInfo>
<title>Morpheme Sense Disambiguation: A New Task Aiming for Understanding the Language at Character Level</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaqi</namePart>
<namePart type="family">Yin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hansi</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qiliang</namePart>
<namePart type="family">Liang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Morphemes serve as a strong linguistic feature to capture lexical semantics, with higher coverage than words and more natural than sememes. However, due to the lack of morpheme-informed resources and the expense of manual annotation, morpheme-enhanced methods remain largely unexplored in Computational Linguistics. To address this issue, we propose the task of Morpheme Sense Disambiguation (MSD), with two subtasks in-text and in-word, similar to Word Sense Disambiguation (WSD) and Sememe Prediction (SP), to generalize morpheme features on more tasks. We first build the MorDis resource for Chinese, including MorInv as a morpheme inventory, MorTxt and MorWrd as two types of morpheme-annotated datasets. Next, we provide two baselines in each evaluation; the best model yields a promising precision of 77.66% on in-text MSD and 88.19% on in-word MSD, indicating its comparability with WSD and superiority over SP. Finally, we demonstrate that predicted morphemes achieve comparable performance with the ground-truth ones on a downstream application of Definition Generation (DG). This validates the feasibility and applicability of our proposed tasks. The resources and workflow of MSD will provide new insights and solutions for downstream tasks, including DG as well as WSD, training pre-trained models, etc.</abstract>
<identifier type="citekey">wang-etal-2024-morpheme</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1014/</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>11605</start>
<end>11618</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Morpheme Sense Disambiguation: A New Task Aiming for Understanding the Language at Character Level
%A Wang, Yue
%A Zheng, Hua
%A Yin, Yaqi
%A Wang, Hansi
%A Liang, Qiliang
%A Liu, Yang
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F wang-etal-2024-morpheme
%X Morphemes serve as a strong linguistic feature to capture lexical semantics, with higher coverage than words and more natural than sememes. However, due to the lack of morpheme-informed resources and the expense of manual annotation, morpheme-enhanced methods remain largely unexplored in Computational Linguistics. To address this issue, we propose the task of Morpheme Sense Disambiguation (MSD), with two subtasks in-text and in-word, similar to Word Sense Disambiguation (WSD) and Sememe Prediction (SP), to generalize morpheme features on more tasks. We first build the MorDis resource for Chinese, including MorInv as a morpheme inventory, MorTxt and MorWrd as two types of morpheme-annotated datasets. Next, we provide two baselines in each evaluation; the best model yields a promising precision of 77.66% on in-text MSD and 88.19% on in-word MSD, indicating its comparability with WSD and superiority over SP. Finally, we demonstrate that predicted morphemes achieve comparable performance with the ground-truth ones on a downstream application of Definition Generation (DG). This validates the feasibility and applicability of our proposed tasks. The resources and workflow of MSD will provide new insights and solutions for downstream tasks, including DG as well as WSD, training pre-trained models, etc.
%U https://aclanthology.org/2024.lrec-main.1014/
%P 11605-11618
Markdown (Informal)
[Morpheme Sense Disambiguation: A New Task Aiming for Understanding the Language at Character Level](https://aclanthology.org/2024.lrec-main.1014/) (Wang et al., LREC-COLING 2024)
ACL