@inproceedings{yen-etal-2020-msd,
title = "{MSD}-1030: A Well-built Multi-Sense Evaluation Dataset for Sense Representation Models",
author = "Yen, Ting-Yu and
Lee, Yang-Yin and
Shiue, Yow-Ting and
Huang, Hen-Hsen and
Chen, Hsin-Hsi",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.711",
pages = "5802--5809",
abstract = "Sense embedding models handle polysemy by giving each distinct meaning of a word form a separate representation. They are considered improvements over word models, and their effectiveness is usually judged with benchmarks such as semantic similarity datasets. However, most of these datasets are not designed for evaluating sense embeddings. In this research, we show that there are at least six concerns about evaluating sense embeddings with existing benchmark datasets, including the large proportions of single-sense words and the unexpected inferior performance of several multi-sense models to their single-sense counterparts. These observations call into serious question whether evaluations based on these datasets can reflect the sense model{'}s ability to capture different meanings. To address the issues, we propose the Multi-Sense Dataset (MSD-1030), which contains a high ratio of multi-sense word pairs. A series of analyses and experiments show that MSD-1030 serves as a more reliable benchmark for sense embeddings. The dataset is available at \url{http://nlg.csie.ntu.edu.tw/nlpresource/MSD-1030/}.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yen-etal-2020-msd">
<titleInfo>
<title>MSD-1030: A Well-built Multi-Sense Evaluation Dataset for Sense Representation Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ting-Yu</namePart>
<namePart type="family">Yen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang-Yin</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yow-Ting</namePart>
<namePart type="family">Shiue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hen-Hsen</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>Sense embedding models handle polysemy by giving each distinct meaning of a word form a separate representation. They are considered improvements over word models, and their effectiveness is usually judged with benchmarks such as semantic similarity datasets. However, most of these datasets are not designed for evaluating sense embeddings. In this research, we show that there are at least six concerns about evaluating sense embeddings with existing benchmark datasets, including the large proportions of single-sense words and the unexpected inferior performance of several multi-sense models to their single-sense counterparts. These observations call into serious question whether evaluations based on these datasets can reflect the sense model’s ability to capture different meanings. To address the issues, we propose the Multi-Sense Dataset (MSD-1030), which contains a high ratio of multi-sense word pairs. A series of analyses and experiments show that MSD-1030 serves as a more reliable benchmark for sense embeddings. The dataset is available at http://nlg.csie.ntu.edu.tw/nlpresource/MSD-1030/.</abstract>
<identifier type="citekey">yen-etal-2020-msd</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.711</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>5802</start>
<end>5809</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MSD-1030: A Well-built Multi-Sense Evaluation Dataset for Sense Representation Models
%A Yen, Ting-Yu
%A Lee, Yang-Yin
%A Shiue, Yow-Ting
%A Huang, Hen-Hsen
%A Chen, Hsin-Hsi
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F yen-etal-2020-msd
%X Sense embedding models handle polysemy by giving each distinct meaning of a word form a separate representation. They are considered improvements over word models, and their effectiveness is usually judged with benchmarks such as semantic similarity datasets. However, most of these datasets are not designed for evaluating sense embeddings. In this research, we show that there are at least six concerns about evaluating sense embeddings with existing benchmark datasets, including the large proportions of single-sense words and the unexpected inferior performance of several multi-sense models to their single-sense counterparts. These observations call into serious question whether evaluations based on these datasets can reflect the sense model’s ability to capture different meanings. To address the issues, we propose the Multi-Sense Dataset (MSD-1030), which contains a high ratio of multi-sense word pairs. A series of analyses and experiments show that MSD-1030 serves as a more reliable benchmark for sense embeddings. The dataset is available at http://nlg.csie.ntu.edu.tw/nlpresource/MSD-1030/.
%U https://aclanthology.org/2020.lrec-1.711
%P 5802-5809
Markdown (Informal)
[MSD-1030: A Well-built Multi-Sense Evaluation Dataset for Sense Representation Models](https://aclanthology.org/2020.lrec-1.711) (Yen et al., LREC 2020)
ACL