@inproceedings{blum-list-2023-trimming,
title = "Trimming Phonetic Alignments Improves the Inference of Sound Correspondence Patterns from Multilingual Wordlists",
author = "Blum, Frederic and
List, Johann-Mattis",
editor = "Beinborn, Lisa and
Goswami, Koustava and
Murado{\u{g}}lu, Saliha and
Sorokin, Alexey and
Kumar, Ritesh and
Shcherbakov, Andreas and
Ponti, Edoardo M. and
Cotterell, Ryan and
Vylomova, Ekaterina",
booktitle = "Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.sigtyp-1.6",
doi = "10.18653/v1/2023.sigtyp-1.6",
pages = "52--64",
abstract = "Sound correspondence patterns form the basis of cognate detection and phonological reconstruction in historical language comparison. Methods for the automatic inference of correspondence patterns from phonetically aligned cognate sets have been proposed, but their application to multilingual wordlists requires extremely well annotated datasets. Since annotation is tedious and time consuming, it would be desirable to find ways to improve aligned cognate data automatically. Taking inspiration from trimming techniques in evolutionary biology, which improve alignments by excluding problematic sites, we propose a workflow that trims phonetic alignments in comparative linguistics prior to the inference of correspondence patterns. Testing these techniques on a large standardized collection of ten datasets with expert annotations from different language families, we find that the best trimming technique substantially improves the overall consistency of the alignments, showing a clear increase in the proportion of frequent correspondence patterns and words exhibiting regular cognate relations.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="blum-list-2023-trimming">
<titleInfo>
<title>Trimming Phonetic Alignments Improves the Inference of Sound Correspondence Patterns from Multilingual Wordlists</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frederic</namePart>
<namePart type="family">Blum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johann-Mattis</namePart>
<namePart type="family">List</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lisa</namePart>
<namePart type="family">Beinborn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koustava</namePart>
<namePart type="family">Goswami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saliha</namePart>
<namePart type="family">Muradoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexey</namePart>
<namePart type="family">Sorokin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Shcherbakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edoardo</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Ponti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sound correspondence patterns form the basis of cognate detection and phonological reconstruction in historical language comparison. Methods for the automatic inference of correspondence patterns from phonetically aligned cognate sets have been proposed, but their application to multilingual wordlists requires extremely well annotated datasets. Since annotation is tedious and time consuming, it would be desirable to find ways to improve aligned cognate data automatically. Taking inspiration from trimming techniques in evolutionary biology, which improve alignments by excluding problematic sites, we propose a workflow that trims phonetic alignments in comparative linguistics prior to the inference of correspondence patterns. Testing these techniques on a large standardized collection of ten datasets with expert annotations from different language families, we find that the best trimming technique substantially improves the overall consistency of the alignments, showing a clear increase in the proportion of frequent correspondence patterns and words exhibiting regular cognate relations.</abstract>
<identifier type="citekey">blum-list-2023-trimming</identifier>
<identifier type="doi">10.18653/v1/2023.sigtyp-1.6</identifier>
<location>
<url>https://aclanthology.org/2023.sigtyp-1.6</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>52</start>
<end>64</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Trimming Phonetic Alignments Improves the Inference of Sound Correspondence Patterns from Multilingual Wordlists
%A Blum, Frederic
%A List, Johann-Mattis
%Y Beinborn, Lisa
%Y Goswami, Koustava
%Y Muradoğlu, Saliha
%Y Sorokin, Alexey
%Y Kumar, Ritesh
%Y Shcherbakov, Andreas
%Y Ponti, Edoardo M.
%Y Cotterell, Ryan
%Y Vylomova, Ekaterina
%S Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F blum-list-2023-trimming
%X Sound correspondence patterns form the basis of cognate detection and phonological reconstruction in historical language comparison. Methods for the automatic inference of correspondence patterns from phonetically aligned cognate sets have been proposed, but their application to multilingual wordlists requires extremely well annotated datasets. Since annotation is tedious and time consuming, it would be desirable to find ways to improve aligned cognate data automatically. Taking inspiration from trimming techniques in evolutionary biology, which improve alignments by excluding problematic sites, we propose a workflow that trims phonetic alignments in comparative linguistics prior to the inference of correspondence patterns. Testing these techniques on a large standardized collection of ten datasets with expert annotations from different language families, we find that the best trimming technique substantially improves the overall consistency of the alignments, showing a clear increase in the proportion of frequent correspondence patterns and words exhibiting regular cognate relations.
%R 10.18653/v1/2023.sigtyp-1.6
%U https://aclanthology.org/2023.sigtyp-1.6
%U https://doi.org/10.18653/v1/2023.sigtyp-1.6
%P 52-64
Markdown (Informal)
[Trimming Phonetic Alignments Improves the Inference of Sound Correspondence Patterns from Multilingual Wordlists](https://aclanthology.org/2023.sigtyp-1.6) (Blum & List, SIGTYP 2023)
ACL