@inproceedings{rapp-2003-word,
title = "Word sense discovery based on sense descriptor dissimilarity",
author = "Rapp, Reinhard",
booktitle = "Proceedings of Machine Translation Summit IX: Papers",
month = sep # " 23-27",
year = "2003",
address = "New Orleans, USA",
url = "https://aclanthology.org/2003.mtsummit-papers.42",
abstract = "In machine translation, information on word ambiguities is usually provided by the lexicographers who construct the lexicon. In this paper we propose an automatic method for word sense induction, i.e. for the discovery of a set of sense descriptors to a given ambiguous word. The approach is based on the statistics of the distributional similarity between the words in a corpus. Our algorithm works as follows: The 20 strongest first-order associations to the ambiguous word are considered as sense descriptor candidates. All pairs of these candidates are ranked according to the following two criteria: First, the two words in a pair should be as dissimilar as possible. Second, although being dissimilar their co-occurrence vectors should add up to the co-occurrence vector of the ambiguous word scaled by two. Both conditions together have the effect that preference is given to pairs whose co-occurring words are complementary. For best results, our implementation uses singular value decomposition, entropy-based weights, and second-order similarity metrics.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rapp-2003-word">
<titleInfo>
<title>Word sense discovery based on sense descriptor dissimilarity</title>
</titleInfo>
<name type="personal">
<namePart type="given">Reinhard</namePart>
<namePart type="family">Rapp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2003-sep 23-27</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Machine Translation Summit IX: Papers</title>
</titleInfo>
<originInfo>
<place>
<placeTerm type="text">New Orleans, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In machine translation, information on word ambiguities is usually provided by the lexicographers who construct the lexicon. In this paper we propose an automatic method for word sense induction, i.e. for the discovery of a set of sense descriptors to a given ambiguous word. The approach is based on the statistics of the distributional similarity between the words in a corpus. Our algorithm works as follows: The 20 strongest first-order associations to the ambiguous word are considered as sense descriptor candidates. All pairs of these candidates are ranked according to the following two criteria: First, the two words in a pair should be as dissimilar as possible. Second, although being dissimilar their co-occurrence vectors should add up to the co-occurrence vector of the ambiguous word scaled by two. Both conditions together have the effect that preference is given to pairs whose co-occurring words are complementary. For best results, our implementation uses singular value decomposition, entropy-based weights, and second-order similarity metrics.</abstract>
<identifier type="citekey">rapp-2003-word</identifier>
<location>
<url>https://aclanthology.org/2003.mtsummit-papers.42</url>
</location>
<part>
<date>2003-sep 23-27</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Word sense discovery based on sense descriptor dissimilarity
%A Rapp, Reinhard
%S Proceedings of Machine Translation Summit IX: Papers
%D 2003
%8 sep 23 27
%C New Orleans, USA
%F rapp-2003-word
%X In machine translation, information on word ambiguities is usually provided by the lexicographers who construct the lexicon. In this paper we propose an automatic method for word sense induction, i.e. for the discovery of a set of sense descriptors to a given ambiguous word. The approach is based on the statistics of the distributional similarity between the words in a corpus. Our algorithm works as follows: The 20 strongest first-order associations to the ambiguous word are considered as sense descriptor candidates. All pairs of these candidates are ranked according to the following two criteria: First, the two words in a pair should be as dissimilar as possible. Second, although being dissimilar their co-occurrence vectors should add up to the co-occurrence vector of the ambiguous word scaled by two. Both conditions together have the effect that preference is given to pairs whose co-occurring words are complementary. For best results, our implementation uses singular value decomposition, entropy-based weights, and second-order similarity metrics.
%U https://aclanthology.org/2003.mtsummit-papers.42
Markdown (Informal)
[Word sense discovery based on sense descriptor dissimilarity](https://aclanthology.org/2003.mtsummit-papers.42) (Rapp, MTSummit 2003)
ACL