@inproceedings{favaro-etal-2022-towards,
title = "Towards the Creation of a Diachronic Corpus for {I}talian: A Case Study on the {GDLI} Quotations",
author = "Favaro, Manuel and
Guadagnini, Elisa and
Sassolini, Eva and
Biffi, Marco and
Montemagni, Simonetta",
editor = "Sprugnoli, Rachele and
Passarotti, Marco",
booktitle = "Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lt4hala-1.13/",
pages = "94--100",
abstract = "In this paper we describe some experiments related to a corpus derived from an authoritative historical Italian dictionary, namely the Grande dizionario della lingua italiana ({\textquoteleft}Great Dictionary of Italian Language', in short GDLI). Thanks to the digitization and structuring of this dictionary, we have been able to set up the first nucleus of a diachronic annotated corpus that selects{---}according to specific criteria, and distinguishing between prose and poetry{---}some of the quotations that within the entries illustrate the different definitions and sub-definitions. In fact, the GDLI presents a huge collection of quotations covering the entire history of the Italian language and thus ranging from the Middle Ages to the present day. The corpus was enriched with linguistic annotation and used to train and evaluate NLP models for POS tagging and lemmatization, with promising results."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="favaro-etal-2022-towards">
<titleInfo>
<title>Towards the Creation of a Diachronic Corpus for Italian: A Case Study on the GDLI Quotations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Favaro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisa</namePart>
<namePart type="family">Guadagnini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eva</namePart>
<namePart type="family">Sassolini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Biffi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we describe some experiments related to a corpus derived from an authoritative historical Italian dictionary, namely the Grande dizionario della lingua italiana (‘Great Dictionary of Italian Language’, in short GDLI). Thanks to the digitization and structuring of this dictionary, we have been able to set up the first nucleus of a diachronic annotated corpus that selects—according to specific criteria, and distinguishing between prose and poetry—some of the quotations that within the entries illustrate the different definitions and sub-definitions. In fact, the GDLI presents a huge collection of quotations covering the entire history of the Italian language and thus ranging from the Middle Ages to the present day. The corpus was enriched with linguistic annotation and used to train and evaluate NLP models for POS tagging and lemmatization, with promising results.</abstract>
<identifier type="citekey">favaro-etal-2022-towards</identifier>
<location>
<url>https://aclanthology.org/2022.lt4hala-1.13/</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>94</start>
<end>100</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards the Creation of a Diachronic Corpus for Italian: A Case Study on the GDLI Quotations
%A Favaro, Manuel
%A Guadagnini, Elisa
%A Sassolini, Eva
%A Biffi, Marco
%A Montemagni, Simonetta
%Y Sprugnoli, Rachele
%Y Passarotti, Marco
%S Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F favaro-etal-2022-towards
%X In this paper we describe some experiments related to a corpus derived from an authoritative historical Italian dictionary, namely the Grande dizionario della lingua italiana (‘Great Dictionary of Italian Language’, in short GDLI). Thanks to the digitization and structuring of this dictionary, we have been able to set up the first nucleus of a diachronic annotated corpus that selects—according to specific criteria, and distinguishing between prose and poetry—some of the quotations that within the entries illustrate the different definitions and sub-definitions. In fact, the GDLI presents a huge collection of quotations covering the entire history of the Italian language and thus ranging from the Middle Ages to the present day. The corpus was enriched with linguistic annotation and used to train and evaluate NLP models for POS tagging and lemmatization, with promising results.
%U https://aclanthology.org/2022.lt4hala-1.13/
%P 94-100
Markdown (Informal)
[Towards the Creation of a Diachronic Corpus for Italian: A Case Study on the GDLI Quotations](https://aclanthology.org/2022.lt4hala-1.13/) (Favaro et al., LT4HALA 2022)
ACL