@inproceedings{hamalainen-etal-2021-lemmatization,
title = "Lemmatization of Historical Old Literary {F}innish Texts in Modern Orthography",
author = {H{\"a}m{\"a}l{\"a}inen, Mika and
Partanen, Niko and
Alnajjar, Khalid},
editor = "Denis, Pascal and
Grabar, Natalia and
Fraisse, Amel and
Cardon, R{\'e}mi and
Jacquemin, Bernard and
Kergosien, Eric and
Balvet, Antonio",
booktitle = "Actes de la 28e Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles. Volume 1 : conf{\'e}rence principale",
month = "6",
year = "2021",
address = "Lille, France",
publisher = "ATALA",
url = "https://aclanthology.org/2021.jeptalnrecital-taln.18/",
pages = "189--198",
abstract = "Texts written in Old Literary Finnish represent the first literary work ever written in Finnish starting from the 16th century. There have been several projects in Finland that have digitized old publications and made them available for research use. However, using modern NLP methods in such data poses great challenges. In this paper we propose an approach for simultaneously normalizing and lemmatizing Old Literary Finnish into modern spelling. Our best model reaches to 96.3{\%} accuracy in texts written by Agricola and 87.7{\%} accuracy in other contemporary out-of-domain text. Our method has been made freely available on Zenodo and Github."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hamalainen-etal-2021-lemmatization">
<titleInfo>
<title>Lemmatization of Historical Old Literary Finnish Texts in Modern Orthography</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niko</namePart>
<namePart type="family">Partanen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Actes de la 28e Conférence sur le Traitement Automatique des Langues Naturelles. Volume 1 : conférence principale</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pascal</namePart>
<namePart type="family">Denis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalia</namePart>
<namePart type="family">Grabar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amel</namePart>
<namePart type="family">Fraisse</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rémi</namePart>
<namePart type="family">Cardon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bernard</namePart>
<namePart type="family">Jacquemin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Kergosien</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Balvet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ATALA</publisher>
<place>
<placeTerm type="text">Lille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Texts written in Old Literary Finnish represent the first literary work ever written in Finnish starting from the 16th century. There have been several projects in Finland that have digitized old publications and made them available for research use. However, using modern NLP methods in such data poses great challenges. In this paper we propose an approach for simultaneously normalizing and lemmatizing Old Literary Finnish into modern spelling. Our best model reaches to 96.3% accuracy in texts written by Agricola and 87.7% accuracy in other contemporary out-of-domain text. Our method has been made freely available on Zenodo and Github.</abstract>
<identifier type="citekey">hamalainen-etal-2021-lemmatization</identifier>
<location>
<url>https://aclanthology.org/2021.jeptalnrecital-taln.18/</url>
</location>
<part>
<date>2021-6</date>
<extent unit="page">
<start>189</start>
<end>198</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Lemmatization of Historical Old Literary Finnish Texts in Modern Orthography
%A Hämäläinen, Mika
%A Partanen, Niko
%A Alnajjar, Khalid
%Y Denis, Pascal
%Y Grabar, Natalia
%Y Fraisse, Amel
%Y Cardon, Rémi
%Y Jacquemin, Bernard
%Y Kergosien, Eric
%Y Balvet, Antonio
%S Actes de la 28e Conférence sur le Traitement Automatique des Langues Naturelles. Volume 1 : conférence principale
%D 2021
%8 June
%I ATALA
%C Lille, France
%F hamalainen-etal-2021-lemmatization
%X Texts written in Old Literary Finnish represent the first literary work ever written in Finnish starting from the 16th century. There have been several projects in Finland that have digitized old publications and made them available for research use. However, using modern NLP methods in such data poses great challenges. In this paper we propose an approach for simultaneously normalizing and lemmatizing Old Literary Finnish into modern spelling. Our best model reaches to 96.3% accuracy in texts written by Agricola and 87.7% accuracy in other contemporary out-of-domain text. Our method has been made freely available on Zenodo and Github.
%U https://aclanthology.org/2021.jeptalnrecital-taln.18/
%P 189-198
Markdown (Informal)
[Lemmatization of Historical Old Literary Finnish Texts in Modern Orthography](https://aclanthology.org/2021.jeptalnrecital-taln.18/) (Hämäläinen et al., JEP/TALN/RECITAL 2021)
ACL