@inproceedings{amin-etal-2021-data,
title = "Data-driven Identification of Idioms in Song Lyrics",
author = "Amin, Miriam and
Fankhauser, Peter and
Kupietz, Marc and
Schneider, Roman",
editor = "Cook, Paul and
Mitrovi{\'c}, Jelena and
Escart{\'\i}n, Carla Parra and
Vaidya, Ashwini and
Osenova, Petya and
Taslimipoor, Shiva and
Ramisch, Carlos",
booktitle = "Proceedings of the 17th Workshop on Multiword Expressions (MWE 2021)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.mwe-1.3",
doi = "10.18653/v1/2021.mwe-1.3",
pages = "13--22",
abstract = "The automatic recognition of idioms poses a challenging problem for NLP applications. Whereas native speakers can intuitively handle multiword expressions whose compositional meanings are hard to trace back to individual word semantics, there is still ample scope for improvement regarding computational approaches. We assume that idiomatic constructions can be characterized by gradual intensities of semantic non-compositionality, formal fixedness, and unusual usage context, and introduce a number of measures for these characteristics, comprising count-based and predictive collocation measures together with measures of context (un)similarity. We evaluate our approach on a manually labelled gold standard, derived from a corpus of German pop lyrics. To this end, we apply a Random Forest classifier to analyze the individual contribution of features for automatically detecting idioms, and study the trade-off between recall and precision. Finally, we evaluate the classifier on an independent dataset of idioms extracted from a list of Wikipedia idioms, achieving state-of-the art accuracy.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="amin-etal-2021-data">
<titleInfo>
<title>Data-driven Identification of Idioms in Song Lyrics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Miriam</namePart>
<namePart type="family">Amin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Fankhauser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marc</namePart>
<namePart type="family">Kupietz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Workshop on Multiword Expressions (MWE 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Cook</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jelena</namePart>
<namePart type="family">Mitrović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carla</namePart>
<namePart type="given">Parra</namePart>
<namePart type="family">Escartín</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashwini</namePart>
<namePart type="family">Vaidya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Petya</namePart>
<namePart type="family">Osenova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiva</namePart>
<namePart type="family">Taslimipoor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Ramisch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The automatic recognition of idioms poses a challenging problem for NLP applications. Whereas native speakers can intuitively handle multiword expressions whose compositional meanings are hard to trace back to individual word semantics, there is still ample scope for improvement regarding computational approaches. We assume that idiomatic constructions can be characterized by gradual intensities of semantic non-compositionality, formal fixedness, and unusual usage context, and introduce a number of measures for these characteristics, comprising count-based and predictive collocation measures together with measures of context (un)similarity. We evaluate our approach on a manually labelled gold standard, derived from a corpus of German pop lyrics. To this end, we apply a Random Forest classifier to analyze the individual contribution of features for automatically detecting idioms, and study the trade-off between recall and precision. Finally, we evaluate the classifier on an independent dataset of idioms extracted from a list of Wikipedia idioms, achieving state-of-the art accuracy.</abstract>
<identifier type="citekey">amin-etal-2021-data</identifier>
<identifier type="doi">10.18653/v1/2021.mwe-1.3</identifier>
<location>
<url>https://aclanthology.org/2021.mwe-1.3</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>13</start>
<end>22</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data-driven Identification of Idioms in Song Lyrics
%A Amin, Miriam
%A Fankhauser, Peter
%A Kupietz, Marc
%A Schneider, Roman
%Y Cook, Paul
%Y Mitrović, Jelena
%Y Escartín, Carla Parra
%Y Vaidya, Ashwini
%Y Osenova, Petya
%Y Taslimipoor, Shiva
%Y Ramisch, Carlos
%S Proceedings of the 17th Workshop on Multiword Expressions (MWE 2021)
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F amin-etal-2021-data
%X The automatic recognition of idioms poses a challenging problem for NLP applications. Whereas native speakers can intuitively handle multiword expressions whose compositional meanings are hard to trace back to individual word semantics, there is still ample scope for improvement regarding computational approaches. We assume that idiomatic constructions can be characterized by gradual intensities of semantic non-compositionality, formal fixedness, and unusual usage context, and introduce a number of measures for these characteristics, comprising count-based and predictive collocation measures together with measures of context (un)similarity. We evaluate our approach on a manually labelled gold standard, derived from a corpus of German pop lyrics. To this end, we apply a Random Forest classifier to analyze the individual contribution of features for automatically detecting idioms, and study the trade-off between recall and precision. Finally, we evaluate the classifier on an independent dataset of idioms extracted from a list of Wikipedia idioms, achieving state-of-the art accuracy.
%R 10.18653/v1/2021.mwe-1.3
%U https://aclanthology.org/2021.mwe-1.3
%U https://doi.org/10.18653/v1/2021.mwe-1.3
%P 13-22
Markdown (Informal)
[Data-driven Identification of Idioms in Song Lyrics](https://aclanthology.org/2021.mwe-1.3) (Amin et al., MWE 2021)
ACL
- Miriam Amin, Peter Fankhauser, Marc Kupietz, and Roman Schneider. 2021. Data-driven Identification of Idioms in Song Lyrics. In Proceedings of the 17th Workshop on Multiword Expressions (MWE 2021), pages 13–22, Online. Association for Computational Linguistics.