@inproceedings{kutuzov-etal-2022-contextualized,
title = "Contextualized embeddings for semantic change detection: Lessons learned",
author = "Kutuzov, Andrey and
Velldal, Erik and
{\O}vrelid, Lilja",
editor = "Derczynski, Leon",
booktitle = "Northern European Journal of Language Technology, Volume 8",
year = "2022",
address = "Copenhagen, Denmark",
publisher = "Northern European Association of Language Technology",
url = "https://aclanthology.org/2022.nejlt-1.9",
doi = "https://doi.org/10.3384/nejlt.2000-1533.2022.3478",
abstract = "We present a qualitative analysis of the (potentially erroneous) outputs of contextualized embedding-based methods for detecting diachronic semantic change. First, we introduce an ensemble method outperforming previously described contextualized approaches. This method is used as a basis for an in-depth analysis of the degrees of semantic change predicted for English words across 5 decades. Our findings show that contextualized methods can often predict high change scores for words which are not undergoing any real diachronic semantic shift in the lexicographic sense of the term (or at least the status of these shifts is questionable). Such challenging cases are discussed in detail with examples, and their linguistic categorization is proposed. Our conclusion is that pre-trained contextualized language models are prone to confound changes in lexicographic senses and changes in contextual variance, which naturally stem from their distributional nature, but is different from the types of issues observed in methods based on static embeddings. Additionally, they often merge together syntactic and semantic aspects of lexical entities. We propose a range of possible future solutions to these issues.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kutuzov-etal-2022-contextualized">
<titleInfo>
<title>Contextualized embeddings for semantic change detection: Lessons learned</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andrey</namePart>
<namePart type="family">Kutuzov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erik</namePart>
<namePart type="family">Velldal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lilja</namePart>
<namePart type="family">Øvrelid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Northern European Journal of Language Technology, Volume 8</title>
</titleInfo>
<name type="personal">
<namePart type="given">Leon</namePart>
<namePart type="family">Derczynski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Northern European Association of Language Technology</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a qualitative analysis of the (potentially erroneous) outputs of contextualized embedding-based methods for detecting diachronic semantic change. First, we introduce an ensemble method outperforming previously described contextualized approaches. This method is used as a basis for an in-depth analysis of the degrees of semantic change predicted for English words across 5 decades. Our findings show that contextualized methods can often predict high change scores for words which are not undergoing any real diachronic semantic shift in the lexicographic sense of the term (or at least the status of these shifts is questionable). Such challenging cases are discussed in detail with examples, and their linguistic categorization is proposed. Our conclusion is that pre-trained contextualized language models are prone to confound changes in lexicographic senses and changes in contextual variance, which naturally stem from their distributional nature, but is different from the types of issues observed in methods based on static embeddings. Additionally, they often merge together syntactic and semantic aspects of lexical entities. We propose a range of possible future solutions to these issues.</abstract>
<identifier type="citekey">kutuzov-etal-2022-contextualized</identifier>
<identifier type="doi">https://doi.org/10.3384/nejlt.2000-1533.2022.3478</identifier>
<location>
<url>https://aclanthology.org/2022.nejlt-1.9</url>
</location>
<part>
<date>2022</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Contextualized embeddings for semantic change detection: Lessons learned
%A Kutuzov, Andrey
%A Velldal, Erik
%A Øvrelid, Lilja
%Y Derczynski, Leon
%S Northern European Journal of Language Technology, Volume 8
%D 2022
%I Northern European Association of Language Technology
%C Copenhagen, Denmark
%F kutuzov-etal-2022-contextualized
%X We present a qualitative analysis of the (potentially erroneous) outputs of contextualized embedding-based methods for detecting diachronic semantic change. First, we introduce an ensemble method outperforming previously described contextualized approaches. This method is used as a basis for an in-depth analysis of the degrees of semantic change predicted for English words across 5 decades. Our findings show that contextualized methods can often predict high change scores for words which are not undergoing any real diachronic semantic shift in the lexicographic sense of the term (or at least the status of these shifts is questionable). Such challenging cases are discussed in detail with examples, and their linguistic categorization is proposed. Our conclusion is that pre-trained contextualized language models are prone to confound changes in lexicographic senses and changes in contextual variance, which naturally stem from their distributional nature, but is different from the types of issues observed in methods based on static embeddings. Additionally, they often merge together syntactic and semantic aspects of lexical entities. We propose a range of possible future solutions to these issues.
%R https://doi.org/10.3384/nejlt.2000-1533.2022.3478
%U https://aclanthology.org/2022.nejlt-1.9
%U https://doi.org/https://doi.org/10.3384/nejlt.2000-1533.2022.3478
Markdown (Informal)
[Contextualized embeddings for semantic change detection: Lessons learned](https://aclanthology.org/2022.nejlt-1.9) (Kutuzov et al., NEJLT 2022)
ACL