@inproceedings{khandagale-etal-2022-towards,
title = "Towards Unsupervised Morphological Analysis of Polysynthetic Languages",
author = "Khandagale, Sujay and
L{\'e}veill{\'e}, Yoann and
Miller, Samuel and
Pham, Derek and
Eskander, Ramy and
Lowry, Cass and
Compton, Richard and
Klavans, Judith and
Polinsky, Maria and
Muresan, Smaranda",
editor = "He, Yulan and
Ji, Heng and
Li, Sujian and
Liu, Yang and
Chang, Chua-Hui",
booktitle = "Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)",
month = nov,
year = "2022",
address = "Online only",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.aacl-short.41/",
doi = "10.18653/v1/2022.aacl-short.41",
pages = "334--340",
abstract = "Polysynthetic languages present a challenge for morphological analysis due to the complexity of their words and the lack of high-quality annotated datasets needed to build and/or evaluate computational models. The contribution of this work is twofold. First, using linguists' help, we generate and contribute high-quality annotated data for two low-resource polysynthetic languages for two tasks: morphological segmentation and part-of-speech (POS) tagging. Second, we present the results of state-of-the-art unsupervised approaches for these two tasks on Adyghe and Inuktitut. Our findings show that for these polysynthetic languages, using linguistic priors helps the task of morphological segmentation and that using stems rather than words as the core unit of abstraction leads to superior performance on POS tagging."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="khandagale-etal-2022-towards">
<titleInfo>
<title>Towards Unsupervised Morphological Analysis of Polysynthetic Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sujay</namePart>
<namePart type="family">Khandagale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoann</namePart>
<namePart type="family">Léveillé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Miller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Derek</namePart>
<namePart type="family">Pham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ramy</namePart>
<namePart type="family">Eskander</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cass</namePart>
<namePart type="family">Lowry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Compton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Judith</namePart>
<namePart type="family">Klavans</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Polinsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sujian</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chua-Hui</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online only</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Polysynthetic languages present a challenge for morphological analysis due to the complexity of their words and the lack of high-quality annotated datasets needed to build and/or evaluate computational models. The contribution of this work is twofold. First, using linguists’ help, we generate and contribute high-quality annotated data for two low-resource polysynthetic languages for two tasks: morphological segmentation and part-of-speech (POS) tagging. Second, we present the results of state-of-the-art unsupervised approaches for these two tasks on Adyghe and Inuktitut. Our findings show that for these polysynthetic languages, using linguistic priors helps the task of morphological segmentation and that using stems rather than words as the core unit of abstraction leads to superior performance on POS tagging.</abstract>
<identifier type="citekey">khandagale-etal-2022-towards</identifier>
<identifier type="doi">10.18653/v1/2022.aacl-short.41</identifier>
<location>
<url>https://aclanthology.org/2022.aacl-short.41/</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>334</start>
<end>340</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Unsupervised Morphological Analysis of Polysynthetic Languages
%A Khandagale, Sujay
%A Léveillé, Yoann
%A Miller, Samuel
%A Pham, Derek
%A Eskander, Ramy
%A Lowry, Cass
%A Compton, Richard
%A Klavans, Judith
%A Polinsky, Maria
%A Muresan, Smaranda
%Y He, Yulan
%Y Ji, Heng
%Y Li, Sujian
%Y Liu, Yang
%Y Chang, Chua-Hui
%S Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)
%D 2022
%8 November
%I Association for Computational Linguistics
%C Online only
%F khandagale-etal-2022-towards
%X Polysynthetic languages present a challenge for morphological analysis due to the complexity of their words and the lack of high-quality annotated datasets needed to build and/or evaluate computational models. The contribution of this work is twofold. First, using linguists’ help, we generate and contribute high-quality annotated data for two low-resource polysynthetic languages for two tasks: morphological segmentation and part-of-speech (POS) tagging. Second, we present the results of state-of-the-art unsupervised approaches for these two tasks on Adyghe and Inuktitut. Our findings show that for these polysynthetic languages, using linguistic priors helps the task of morphological segmentation and that using stems rather than words as the core unit of abstraction leads to superior performance on POS tagging.
%R 10.18653/v1/2022.aacl-short.41
%U https://aclanthology.org/2022.aacl-short.41/
%U https://doi.org/10.18653/v1/2022.aacl-short.41
%P 334-340
Markdown (Informal)
[Towards Unsupervised Morphological Analysis of Polysynthetic Languages](https://aclanthology.org/2022.aacl-short.41/) (Khandagale et al., AACL-IJCNLP 2022)
ACL
- Sujay Khandagale, Yoann Léveillé, Samuel Miller, Derek Pham, Ramy Eskander, Cass Lowry, Richard Compton, Judith Klavans, Maria Polinsky, and Smaranda Muresan. 2022. Towards Unsupervised Morphological Analysis of Polysynthetic Languages. In Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers), pages 334–340, Online only. Association for Computational Linguistics.