@inproceedings{hamalainen-wiechetek-2020-morphological,
title = "Morphological Disambiguation of {S}outh {S}{\'a}mi with {FST}s and Neural Networks",
author = {H{\"a}m{\"a}l{\"a}inen, Mika and
Wiechetek, Linda},
editor = "Beermann, Dorothee and
Besacier, Laurent and
Sakti, Sakriani and
Soria, Claudia",
booktitle = "Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources association",
url = "https://aclanthology.org/2020.sltu-1.5",
pages = "36--40",
abstract = "We present a method for conducting morphological disambiguation for South S{\'a}mi, which is an endangered language. Our method uses an FST-based morphological analyzer to produce an ambiguous set of morphological readings for each word in a sentence. These readings are disambiguated with a Bi-RNN model trained on the related North S{\'a}mi UD Treebank and some synthetically generated South S{\'a}mi data. The disambiguation is done on the level of morphological tags ignoring word forms and lemmas; this makes it possible to use North S{\'a}mi training data for South S{\'a}mi without the need for a bilingual dictionary or aligned word embeddings. Our approach requires only minimal resources for South S{\'a}mi, which makes it usable and applicable in the contexts of any other endangered language as well.",
language = "English",
ISBN = "979-10-95546-35-1",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hamalainen-wiechetek-2020-morphological">
<titleInfo>
<title>Morphological Disambiguation of South Sámi with FSTs and Neural Networks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Linda</namePart>
<namePart type="family">Wiechetek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dorothee</namePart>
<namePart type="family">Beermann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laurent</namePart>
<namePart type="family">Besacier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Soria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-35-1</identifier>
</relatedItem>
<abstract>We present a method for conducting morphological disambiguation for South Sámi, which is an endangered language. Our method uses an FST-based morphological analyzer to produce an ambiguous set of morphological readings for each word in a sentence. These readings are disambiguated with a Bi-RNN model trained on the related North Sámi UD Treebank and some synthetically generated South Sámi data. The disambiguation is done on the level of morphological tags ignoring word forms and lemmas; this makes it possible to use North Sámi training data for South Sámi without the need for a bilingual dictionary or aligned word embeddings. Our approach requires only minimal resources for South Sámi, which makes it usable and applicable in the contexts of any other endangered language as well.</abstract>
<identifier type="citekey">hamalainen-wiechetek-2020-morphological</identifier>
<location>
<url>https://aclanthology.org/2020.sltu-1.5</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>36</start>
<end>40</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Morphological Disambiguation of South Sámi with FSTs and Neural Networks
%A Hämäläinen, Mika
%A Wiechetek, Linda
%Y Beermann, Dorothee
%Y Besacier, Laurent
%Y Sakti, Sakriani
%Y Soria, Claudia
%S Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)
%D 2020
%8 May
%I European Language Resources association
%C Marseille, France
%@ 979-10-95546-35-1
%G English
%F hamalainen-wiechetek-2020-morphological
%X We present a method for conducting morphological disambiguation for South Sámi, which is an endangered language. Our method uses an FST-based morphological analyzer to produce an ambiguous set of morphological readings for each word in a sentence. These readings are disambiguated with a Bi-RNN model trained on the related North Sámi UD Treebank and some synthetically generated South Sámi data. The disambiguation is done on the level of morphological tags ignoring word forms and lemmas; this makes it possible to use North Sámi training data for South Sámi without the need for a bilingual dictionary or aligned word embeddings. Our approach requires only minimal resources for South Sámi, which makes it usable and applicable in the contexts of any other endangered language as well.
%U https://aclanthology.org/2020.sltu-1.5
%P 36-40
Markdown (Informal)
[Morphological Disambiguation of South Sámi with FSTs and Neural Networks](https://aclanthology.org/2020.sltu-1.5) (Hämäläinen & Wiechetek, SLTU 2020)
ACL
- Mika Hämäläinen and Linda Wiechetek. 2020. Morphological Disambiguation of South Sámi with FSTs and Neural Networks. In Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL), pages 36–40, Marseille, France. European Language Resources association.