@inproceedings{tyers-howell-2021-survey,
title = "A survey of part-of-speech tagging approaches applied to K{'}iche{'}",
author = "Tyers, Francis and
Howell, Nick",
editor = "Mager, Manuel and
Oncevay, Arturo and
Rios, Annette and
Ruiz, Ivan Vladimir Meza and
Palmer, Alexis and
Neubig, Graham and
Kann, Katharina",
booktitle = "Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.americasnlp-1.6",
doi = "10.18653/v1/2021.americasnlp-1.6",
pages = "44--52",
abstract = "We study the performance of several popular neural part-of-speech taggers from the Universal Dependencies ecosystem on Mayan languages using a small corpus of 1435 annotated K{'}iche{'} sentences consisting of approximately 10,000 tokens, with encouraging results: $F_1$ scores 93{\%}+ on lemmatisation, part-of-speech and morphological feature assignment. The high performance motivates a cross-language part-of-speech tagging study, where K{'}iche{'}-trained models are evaluated on two other Mayan languages, Kaqchikel and Uspanteko: performance on Kaqchikel is good, 63-85{\%}, and on Uspanteko modest, 60-71{\%}. Supporting experiments lead us to conclude the relative diversity of morphological features as a plausible explanation for the limiting factors in cross-language tagging performance, providing some direction for future sentence annotation and collection work to support these and other Mayan languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tyers-howell-2021-survey">
<titleInfo>
<title>A survey of part-of-speech tagging approaches applied to K’iche’</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Tyers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nick</namePart>
<namePart type="family">Howell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Mager</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturo</namePart>
<namePart type="family">Oncevay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annette</namePart>
<namePart type="family">Rios</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="given">Vladimir</namePart>
<namePart type="given">Meza</namePart>
<namePart type="family">Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We study the performance of several popular neural part-of-speech taggers from the Universal Dependencies ecosystem on Mayan languages using a small corpus of 1435 annotated K’iche’ sentences consisting of approximately 10,000 tokens, with encouraging results: F₁ scores 93%+ on lemmatisation, part-of-speech and morphological feature assignment. The high performance motivates a cross-language part-of-speech tagging study, where K’iche’-trained models are evaluated on two other Mayan languages, Kaqchikel and Uspanteko: performance on Kaqchikel is good, 63-85%, and on Uspanteko modest, 60-71%. Supporting experiments lead us to conclude the relative diversity of morphological features as a plausible explanation for the limiting factors in cross-language tagging performance, providing some direction for future sentence annotation and collection work to support these and other Mayan languages.</abstract>
<identifier type="citekey">tyers-howell-2021-survey</identifier>
<identifier type="doi">10.18653/v1/2021.americasnlp-1.6</identifier>
<location>
<url>https://aclanthology.org/2021.americasnlp-1.6</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>44</start>
<end>52</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A survey of part-of-speech tagging approaches applied to K’iche’
%A Tyers, Francis
%A Howell, Nick
%Y Mager, Manuel
%Y Oncevay, Arturo
%Y Rios, Annette
%Y Ruiz, Ivan Vladimir Meza
%Y Palmer, Alexis
%Y Neubig, Graham
%Y Kann, Katharina
%S Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F tyers-howell-2021-survey
%X We study the performance of several popular neural part-of-speech taggers from the Universal Dependencies ecosystem on Mayan languages using a small corpus of 1435 annotated K’iche’ sentences consisting of approximately 10,000 tokens, with encouraging results: F₁ scores 93%+ on lemmatisation, part-of-speech and morphological feature assignment. The high performance motivates a cross-language part-of-speech tagging study, where K’iche’-trained models are evaluated on two other Mayan languages, Kaqchikel and Uspanteko: performance on Kaqchikel is good, 63-85%, and on Uspanteko modest, 60-71%. Supporting experiments lead us to conclude the relative diversity of morphological features as a plausible explanation for the limiting factors in cross-language tagging performance, providing some direction for future sentence annotation and collection work to support these and other Mayan languages.
%R 10.18653/v1/2021.americasnlp-1.6
%U https://aclanthology.org/2021.americasnlp-1.6
%U https://doi.org/10.18653/v1/2021.americasnlp-1.6
%P 44-52
Markdown (Informal)
[A survey of part-of-speech tagging approaches applied to K’iche’](https://aclanthology.org/2021.americasnlp-1.6) (Tyers & Howell, AmericasNLP 2021)
ACL