@inproceedings{blum-etal-2024-resource,
title = "Resource Acquisition for Understudied Languages: Extracting Wordlists from Dictionaries for Computer-assisted Language Comparison",
author = "Blum, Frederic and
Englisch, Johannes and
Hermida Rodriguez, Alba and
van Gijn, Rik and
List, Johann-Mattis",
editor = "Melero, Maite and
Sakti, Sakriani and
Soria, Claudia",
booktitle = "Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.sigul-1.36/",
pages = "300--306",
abstract = "Comparative wordlists play a crucial role for historical language comparison. They are regularly used for the identification of related words and languages, or for the reconstruction of language phylogenies and proto-languages. While automated solutions exist for the majority of methods used for this purpose, no standardized computational or computer-assisted approaches for the compilation of comparative wordlists have been proposed so far. Up to today, scholars compile wordlists by sifting manually through dictionaries or similar language resources and typing them into spreadsheets. In this study we present a semi-automatic approach to extract wordlists from machine-readable dictionaries. The transparent workflow allows to build user-defined wordlists for individual languages in a standardized format. By automating the search for translation equivalents in dictionaries, our approach greatly facilitates the aggregation of individual resources into multilingual comparative wordlists that can be used for a variety of purposes."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="blum-etal-2024-resource">
<titleInfo>
<title>Resource Acquisition for Understudied Languages: Extracting Wordlists from Dictionaries for Computer-assisted Language Comparison</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frederic</namePart>
<namePart type="family">Blum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johannes</namePart>
<namePart type="family">Englisch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alba</namePart>
<namePart type="family">Hermida Rodriguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rik</namePart>
<namePart type="family">van Gijn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johann-Mattis</namePart>
<namePart type="family">List</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maite</namePart>
<namePart type="family">Melero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Soria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Comparative wordlists play a crucial role for historical language comparison. They are regularly used for the identification of related words and languages, or for the reconstruction of language phylogenies and proto-languages. While automated solutions exist for the majority of methods used for this purpose, no standardized computational or computer-assisted approaches for the compilation of comparative wordlists have been proposed so far. Up to today, scholars compile wordlists by sifting manually through dictionaries or similar language resources and typing them into spreadsheets. In this study we present a semi-automatic approach to extract wordlists from machine-readable dictionaries. The transparent workflow allows to build user-defined wordlists for individual languages in a standardized format. By automating the search for translation equivalents in dictionaries, our approach greatly facilitates the aggregation of individual resources into multilingual comparative wordlists that can be used for a variety of purposes.</abstract>
<identifier type="citekey">blum-etal-2024-resource</identifier>
<location>
<url>https://aclanthology.org/2024.sigul-1.36/</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>300</start>
<end>306</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Resource Acquisition for Understudied Languages: Extracting Wordlists from Dictionaries for Computer-assisted Language Comparison
%A Blum, Frederic
%A Englisch, Johannes
%A Hermida Rodriguez, Alba
%A van Gijn, Rik
%A List, Johann-Mattis
%Y Melero, Maite
%Y Sakti, Sakriani
%Y Soria, Claudia
%S Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F blum-etal-2024-resource
%X Comparative wordlists play a crucial role for historical language comparison. They are regularly used for the identification of related words and languages, or for the reconstruction of language phylogenies and proto-languages. While automated solutions exist for the majority of methods used for this purpose, no standardized computational or computer-assisted approaches for the compilation of comparative wordlists have been proposed so far. Up to today, scholars compile wordlists by sifting manually through dictionaries or similar language resources and typing them into spreadsheets. In this study we present a semi-automatic approach to extract wordlists from machine-readable dictionaries. The transparent workflow allows to build user-defined wordlists for individual languages in a standardized format. By automating the search for translation equivalents in dictionaries, our approach greatly facilitates the aggregation of individual resources into multilingual comparative wordlists that can be used for a variety of purposes.
%U https://aclanthology.org/2024.sigul-1.36/
%P 300-306
Markdown (Informal)
[Resource Acquisition for Understudied Languages: Extracting Wordlists from Dictionaries for Computer-assisted Language Comparison](https://aclanthology.org/2024.sigul-1.36/) (Blum et al., SIGUL 2024)
ACL