@inproceedings{ehrenworth-keith-2023-literary-intertextual,
title = "Literary Intertextual Semantic Change Detection: Application and Motivation for Evaluating Models on Small Corpora",
author = "Ehrenworth, Jackson and
Keith, Katherine",
editor = "Tahmasebi, Nina and
Montariol, Syrielle and
Dubossarsky, Haim and
Kutuzov, Andrey and
Hengchen, Simon and
Alfter, David and
Periti, Francesco and
Cassotti, Pierluigi",
booktitle = "Proceedings of the 4th Workshop on Computational Approaches to Historical Language Change",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.lchange-1.1",
doi = "10.18653/v1/2023.lchange-1.1",
pages = "1--14",
abstract = "Lexical semantic change detection is the study of how words change meaning between corpora. While Schlechtweg et al. (2020) standardized both datasets and evaluation metrics for this shared task, for those interested in applying semantic change detection models to small corpora{---}e.g., in the digital humanities{---}there is a need for evaluation involving much smaller datasets. We present a method and open-source code pipeline for downsampling the SemEval-2020 Task 1 corpora while preserving gold standard measures of semantic change. We then evaluate several state-of-the-art models trained on these downsampled corpora and find both dramatically decreased performance (average 67{\%} decrease) and high variance. We also propose a novel application to the digital humanities and provide a case study demonstrating that semantic change detection can be used in an exploratory manner to produce insightful avenues of investigation for literary scholars.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ehrenworth-keith-2023-literary-intertextual">
<titleInfo>
<title>Literary Intertextual Semantic Change Detection: Application and Motivation for Evaluating Models on Small Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="family">Ehrenworth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katherine</namePart>
<namePart type="family">Keith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Computational Approaches to Historical Language Change</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nina</namePart>
<namePart type="family">Tahmasebi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Syrielle</namePart>
<namePart type="family">Montariol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haim</namePart>
<namePart type="family">Dubossarsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrey</namePart>
<namePart type="family">Kutuzov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Hengchen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Alfter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francesco</namePart>
<namePart type="family">Periti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierluigi</namePart>
<namePart type="family">Cassotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Lexical semantic change detection is the study of how words change meaning between corpora. While Schlechtweg et al. (2020) standardized both datasets and evaluation metrics for this shared task, for those interested in applying semantic change detection models to small corpora—e.g., in the digital humanities—there is a need for evaluation involving much smaller datasets. We present a method and open-source code pipeline for downsampling the SemEval-2020 Task 1 corpora while preserving gold standard measures of semantic change. We then evaluate several state-of-the-art models trained on these downsampled corpora and find both dramatically decreased performance (average 67% decrease) and high variance. We also propose a novel application to the digital humanities and provide a case study demonstrating that semantic change detection can be used in an exploratory manner to produce insightful avenues of investigation for literary scholars.</abstract>
<identifier type="citekey">ehrenworth-keith-2023-literary-intertextual</identifier>
<identifier type="doi">10.18653/v1/2023.lchange-1.1</identifier>
<location>
<url>https://aclanthology.org/2023.lchange-1.1</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>1</start>
<end>14</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Literary Intertextual Semantic Change Detection: Application and Motivation for Evaluating Models on Small Corpora
%A Ehrenworth, Jackson
%A Keith, Katherine
%Y Tahmasebi, Nina
%Y Montariol, Syrielle
%Y Dubossarsky, Haim
%Y Kutuzov, Andrey
%Y Hengchen, Simon
%Y Alfter, David
%Y Periti, Francesco
%Y Cassotti, Pierluigi
%S Proceedings of the 4th Workshop on Computational Approaches to Historical Language Change
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F ehrenworth-keith-2023-literary-intertextual
%X Lexical semantic change detection is the study of how words change meaning between corpora. While Schlechtweg et al. (2020) standardized both datasets and evaluation metrics for this shared task, for those interested in applying semantic change detection models to small corpora—e.g., in the digital humanities—there is a need for evaluation involving much smaller datasets. We present a method and open-source code pipeline for downsampling the SemEval-2020 Task 1 corpora while preserving gold standard measures of semantic change. We then evaluate several state-of-the-art models trained on these downsampled corpora and find both dramatically decreased performance (average 67% decrease) and high variance. We also propose a novel application to the digital humanities and provide a case study demonstrating that semantic change detection can be used in an exploratory manner to produce insightful avenues of investigation for literary scholars.
%R 10.18653/v1/2023.lchange-1.1
%U https://aclanthology.org/2023.lchange-1.1
%U https://doi.org/10.18653/v1/2023.lchange-1.1
%P 1-14
Markdown (Informal)
[Literary Intertextual Semantic Change Detection: Application and Motivation for Evaluating Models on Small Corpora](https://aclanthology.org/2023.lchange-1.1) (Ehrenworth & Keith, LChange 2023)
ACL