@inproceedings{tudor-pettersson-2024-people,
title = "People and Places of the Past - Named Entity Recognition in {S}wedish Labour Movement Documents from Historical Sources",
author = "Tudor, Crina and
Pettersson, Eva",
editor = "Bizzoni, Yuri and
Degaetano-Ortlieb, Stefania and
Kazantseva, Anna and
Szpakowicz, Stan",
booktitle = "Proceedings of the 8th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2024)",
month = mar,
year = "2024",
address = "St. Julians, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.latechclfl-1.17",
pages = "185--195",
abstract = "Named Entity Recognition (NER) is an important step in many Natural Language Processing tasks. The existing state-of-the-art NER systems are however typically developed based on contemporary data, and not very well suited for analyzing historical text. In this paper, we present a comparative analysis of the performance of several language models when applied to Named Entity Recognition for historical Swedish text. The source texts we work with are documents from Swedish labour unions from the 19th and 20th century. We experiment with three off-the-shelf models for contemporary Swedish text, and one language model built on historical Swedish text that we fine-tune with labelled data for adaptation to the NER task. Lastly, we propose a hybrid approach by combining the results of two models in order to maximize usability. We show that, even though historical Swedish is a low-resource language with data sparsity issues affecting overall performance, historical language models still show very promising results. Further contributions of our paper are the release of our newly trained model for NER of historical Swedish text, along with a manually annotated corpus of over 650 named entities.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tudor-pettersson-2024-people">
<titleInfo>
<title>People and Places of the Past - Named Entity Recognition in Swedish Labour Movement Documents from Historical Sources</title>
</titleInfo>
<name type="personal">
<namePart type="given">Crina</namePart>
<namePart type="family">Tudor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eva</namePart>
<namePart type="family">Pettersson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefania</namePart>
<namePart type="family">Degaetano-Ortlieb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kazantseva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stan</namePart>
<namePart type="family">Szpakowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julians, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Named Entity Recognition (NER) is an important step in many Natural Language Processing tasks. The existing state-of-the-art NER systems are however typically developed based on contemporary data, and not very well suited for analyzing historical text. In this paper, we present a comparative analysis of the performance of several language models when applied to Named Entity Recognition for historical Swedish text. The source texts we work with are documents from Swedish labour unions from the 19th and 20th century. We experiment with three off-the-shelf models for contemporary Swedish text, and one language model built on historical Swedish text that we fine-tune with labelled data for adaptation to the NER task. Lastly, we propose a hybrid approach by combining the results of two models in order to maximize usability. We show that, even though historical Swedish is a low-resource language with data sparsity issues affecting overall performance, historical language models still show very promising results. Further contributions of our paper are the release of our newly trained model for NER of historical Swedish text, along with a manually annotated corpus of over 650 named entities.</abstract>
<identifier type="citekey">tudor-pettersson-2024-people</identifier>
<location>
<url>https://aclanthology.org/2024.latechclfl-1.17</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>185</start>
<end>195</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T People and Places of the Past - Named Entity Recognition in Swedish Labour Movement Documents from Historical Sources
%A Tudor, Crina
%A Pettersson, Eva
%Y Bizzoni, Yuri
%Y Degaetano-Ortlieb, Stefania
%Y Kazantseva, Anna
%Y Szpakowicz, Stan
%S Proceedings of the 8th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2024)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julians, Malta
%F tudor-pettersson-2024-people
%X Named Entity Recognition (NER) is an important step in many Natural Language Processing tasks. The existing state-of-the-art NER systems are however typically developed based on contemporary data, and not very well suited for analyzing historical text. In this paper, we present a comparative analysis of the performance of several language models when applied to Named Entity Recognition for historical Swedish text. The source texts we work with are documents from Swedish labour unions from the 19th and 20th century. We experiment with three off-the-shelf models for contemporary Swedish text, and one language model built on historical Swedish text that we fine-tune with labelled data for adaptation to the NER task. Lastly, we propose a hybrid approach by combining the results of two models in order to maximize usability. We show that, even though historical Swedish is a low-resource language with data sparsity issues affecting overall performance, historical language models still show very promising results. Further contributions of our paper are the release of our newly trained model for NER of historical Swedish text, along with a manually annotated corpus of over 650 named entities.
%U https://aclanthology.org/2024.latechclfl-1.17
%P 185-195
Markdown (Informal)
[People and Places of the Past - Named Entity Recognition in Swedish Labour Movement Documents from Historical Sources](https://aclanthology.org/2024.latechclfl-1.17) (Tudor & Pettersson, LaTeCHCLfL-WS 2024)
ACL