@inproceedings{dudzic-etal-2024-two,
title = "Two Approaches to Diachronic Normalization of {P}olish Texts",
author = "Dudzic, Kacper and
Gralinski, Filip and
Jassem, Krzysztof and
Kubis, Marek and
Wierzchon, Piotr",
editor = "Bizzoni, Yuri and
Degaetano-Ortlieb, Stefania and
Kazantseva, Anna and
Szpakowicz, Stan",
booktitle = "Proceedings of the 8th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2024)",
month = mar,
year = "2024",
address = "St. Julians, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.latechclfl-1.19/",
pages = "207--212",
abstract = "This paper discusses two approaches to the diachronic normalization of Polish texts: a rule-based solution that relies on a set of handcrafted patterns, and a neural normalization model based on the text-to-text transfer transformer architecture. The training and evaluation data prepared for the task are discussed in detail, along with experiments conducted to compare the proposed normalization solutions. A quantitative and qualitative analysis is made. It is shown that at the current stage of inquiry into the problem, the rule-based solution outperforms the neural one on 3 out of 4 variants of the prepared dataset, although in practice both approaches have distinct advantages and disadvantages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dudzic-etal-2024-two">
<titleInfo>
<title>Two Approaches to Diachronic Normalization of Polish Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kacper</namePart>
<namePart type="family">Dudzic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Filip</namePart>
<namePart type="family">Gralinski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Krzysztof</namePart>
<namePart type="family">Jassem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marek</namePart>
<namePart type="family">Kubis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piotr</namePart>
<namePart type="family">Wierzchon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefania</namePart>
<namePart type="family">Degaetano-Ortlieb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kazantseva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stan</namePart>
<namePart type="family">Szpakowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julians, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper discusses two approaches to the diachronic normalization of Polish texts: a rule-based solution that relies on a set of handcrafted patterns, and a neural normalization model based on the text-to-text transfer transformer architecture. The training and evaluation data prepared for the task are discussed in detail, along with experiments conducted to compare the proposed normalization solutions. A quantitative and qualitative analysis is made. It is shown that at the current stage of inquiry into the problem, the rule-based solution outperforms the neural one on 3 out of 4 variants of the prepared dataset, although in practice both approaches have distinct advantages and disadvantages.</abstract>
<identifier type="citekey">dudzic-etal-2024-two</identifier>
<location>
<url>https://aclanthology.org/2024.latechclfl-1.19/</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>207</start>
<end>212</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Two Approaches to Diachronic Normalization of Polish Texts
%A Dudzic, Kacper
%A Gralinski, Filip
%A Jassem, Krzysztof
%A Kubis, Marek
%A Wierzchon, Piotr
%Y Bizzoni, Yuri
%Y Degaetano-Ortlieb, Stefania
%Y Kazantseva, Anna
%Y Szpakowicz, Stan
%S Proceedings of the 8th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2024)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julians, Malta
%F dudzic-etal-2024-two
%X This paper discusses two approaches to the diachronic normalization of Polish texts: a rule-based solution that relies on a set of handcrafted patterns, and a neural normalization model based on the text-to-text transfer transformer architecture. The training and evaluation data prepared for the task are discussed in detail, along with experiments conducted to compare the proposed normalization solutions. A quantitative and qualitative analysis is made. It is shown that at the current stage of inquiry into the problem, the rule-based solution outperforms the neural one on 3 out of 4 variants of the prepared dataset, although in practice both approaches have distinct advantages and disadvantages.
%U https://aclanthology.org/2024.latechclfl-1.19/
%P 207-212
Markdown (Informal)
[Two Approaches to Diachronic Normalization of Polish Texts](https://aclanthology.org/2024.latechclfl-1.19/) (Dudzic et al., LaTeCHCLfL 2024)
ACL
- Kacper Dudzic, Filip Gralinski, Krzysztof Jassem, Marek Kubis, and Piotr Wierzchon. 2024. Two Approaches to Diachronic Normalization of Polish Texts. In Proceedings of the 8th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2024), pages 207–212, St. Julians, Malta. Association for Computational Linguistics.