@inproceedings{espinosa-anke-etal-2021-evaluating,
title = "Evaluating language models for the retrieval and categorization of lexical collocations",
author = "Espinosa Anke, Luis and
Codina-Filba, Joan and
Wanner, Leo",
editor = "Merlo, Paola and
Tiedemann, Jorg and
Tsarfaty, Reut",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eacl-main.120/",
doi = "10.18653/v1/2021.eacl-main.120",
pages = "1406--1417",
abstract = "Lexical collocations are idiosyncratic combinations of two syntactically bound lexical items (e.g., {\textquotedblleft}heavy rain{\textquotedblright} or {\textquotedblleft}take a step{\textquotedblright}). Understanding their degree of compositionality and idiosyncrasy, as well their underlying semantics, is crucial for language learners, lexicographers and downstream NLP applications. In this paper, we perform an exhaustive analysis of current language models for collocation understanding. We first construct a dataset of apparitions of lexical collocations in context, categorized into 17 representative semantic categories. Then, we perform two experiments: (1) unsupervised collocate retrieval using BERT, and (2) supervised collocation classification in context. We find that most models perform well in distinguishing light verb constructions, especially if the collocation`s first argument acts as subject, but often fail to distinguish, first, different syntactic structures within the same semantic category, and second, fine-grained semantic categories which restrict the use of small sets of valid collocates for a given base."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="espinosa-anke-etal-2021-evaluating">
<titleInfo>
<title>Evaluating language models for the retrieval and categorization of lexical collocations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Espinosa Anke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joan</namePart>
<namePart type="family">Codina-Filba</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paola</namePart>
<namePart type="family">Merlo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jorg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reut</namePart>
<namePart type="family">Tsarfaty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Lexical collocations are idiosyncratic combinations of two syntactically bound lexical items (e.g., “heavy rain” or “take a step”). Understanding their degree of compositionality and idiosyncrasy, as well their underlying semantics, is crucial for language learners, lexicographers and downstream NLP applications. In this paper, we perform an exhaustive analysis of current language models for collocation understanding. We first construct a dataset of apparitions of lexical collocations in context, categorized into 17 representative semantic categories. Then, we perform two experiments: (1) unsupervised collocate retrieval using BERT, and (2) supervised collocation classification in context. We find that most models perform well in distinguishing light verb constructions, especially if the collocation‘s first argument acts as subject, but often fail to distinguish, first, different syntactic structures within the same semantic category, and second, fine-grained semantic categories which restrict the use of small sets of valid collocates for a given base.</abstract>
<identifier type="citekey">espinosa-anke-etal-2021-evaluating</identifier>
<identifier type="doi">10.18653/v1/2021.eacl-main.120</identifier>
<location>
<url>https://aclanthology.org/2021.eacl-main.120/</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>1406</start>
<end>1417</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating language models for the retrieval and categorization of lexical collocations
%A Espinosa Anke, Luis
%A Codina-Filba, Joan
%A Wanner, Leo
%Y Merlo, Paola
%Y Tiedemann, Jorg
%Y Tsarfaty, Reut
%S Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume
%D 2021
%8 April
%I Association for Computational Linguistics
%C Online
%F espinosa-anke-etal-2021-evaluating
%X Lexical collocations are idiosyncratic combinations of two syntactically bound lexical items (e.g., “heavy rain” or “take a step”). Understanding their degree of compositionality and idiosyncrasy, as well their underlying semantics, is crucial for language learners, lexicographers and downstream NLP applications. In this paper, we perform an exhaustive analysis of current language models for collocation understanding. We first construct a dataset of apparitions of lexical collocations in context, categorized into 17 representative semantic categories. Then, we perform two experiments: (1) unsupervised collocate retrieval using BERT, and (2) supervised collocation classification in context. We find that most models perform well in distinguishing light verb constructions, especially if the collocation‘s first argument acts as subject, but often fail to distinguish, first, different syntactic structures within the same semantic category, and second, fine-grained semantic categories which restrict the use of small sets of valid collocates for a given base.
%R 10.18653/v1/2021.eacl-main.120
%U https://aclanthology.org/2021.eacl-main.120/
%U https://doi.org/10.18653/v1/2021.eacl-main.120
%P 1406-1417
Markdown (Informal)
[Evaluating language models for the retrieval and categorization of lexical collocations](https://aclanthology.org/2021.eacl-main.120/) (Espinosa Anke et al., EACL 2021)
ACL