@inproceedings{graichen-etal-2023-enriching,
title = "Enriching {W}ay{\'u}unaiki-{S}panish Neural Machine Translation with Linguistic Information",
author = "Graichen, Nora and
Van Genabith, Josef and
Espa{\~n}a-bonet, Cristina",
editor = "Mager, Manuel and
Ebrahimi, Abteen and
Oncevay, Arturo and
Rice, Enora and
Rijhwani, Shruti and
Palmer, Alexis and
Kann, Katharina",
booktitle = "Proceedings of the Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.americasnlp-1.9",
doi = "10.18653/v1/2023.americasnlp-1.9",
pages = "67--83",
abstract = "We present the first neural machine translation system for the low-resource language pair Way{\'u}unaiki{--}Spanish and explore strategies to inject linguistic knowledge into the model to improve translation quality. We explore a wide range of methods and combine complementary approaches. Results indicate that incorporating linguistic information through linguistically motivated subword segmentation, factored models, and pretrained embeddings helps the system to generate improved translations, with the segmentation contributing most. In order to evaluate translation quality in a general domain and go beyond the available religious domain data, we gather and make publicly available a new test set and supplementary material. Although translation quality as measured with automatic metrics is low, we hope these resources will facilitate and support further research on Way{\'u}unaiki.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="graichen-etal-2023-enriching">
<titleInfo>
<title>Enriching Wayúunaiki-Spanish Neural Machine Translation with Linguistic Information</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nora</namePart>
<namePart type="family">Graichen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josef</namePart>
<namePart type="family">Van Genabith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cristina</namePart>
<namePart type="family">España-bonet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Mager</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abteen</namePart>
<namePart type="family">Ebrahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturo</namePart>
<namePart type="family">Oncevay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enora</namePart>
<namePart type="family">Rice</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present the first neural machine translation system for the low-resource language pair Wayúunaiki–Spanish and explore strategies to inject linguistic knowledge into the model to improve translation quality. We explore a wide range of methods and combine complementary approaches. Results indicate that incorporating linguistic information through linguistically motivated subword segmentation, factored models, and pretrained embeddings helps the system to generate improved translations, with the segmentation contributing most. In order to evaluate translation quality in a general domain and go beyond the available religious domain data, we gather and make publicly available a new test set and supplementary material. Although translation quality as measured with automatic metrics is low, we hope these resources will facilitate and support further research on Wayúunaiki.</abstract>
<identifier type="citekey">graichen-etal-2023-enriching</identifier>
<identifier type="doi">10.18653/v1/2023.americasnlp-1.9</identifier>
<location>
<url>https://aclanthology.org/2023.americasnlp-1.9</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>67</start>
<end>83</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Enriching Wayúunaiki-Spanish Neural Machine Translation with Linguistic Information
%A Graichen, Nora
%A Van Genabith, Josef
%A España-bonet, Cristina
%Y Mager, Manuel
%Y Ebrahimi, Abteen
%Y Oncevay, Arturo
%Y Rice, Enora
%Y Rijhwani, Shruti
%Y Palmer, Alexis
%Y Kann, Katharina
%S Proceedings of the Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F graichen-etal-2023-enriching
%X We present the first neural machine translation system for the low-resource language pair Wayúunaiki–Spanish and explore strategies to inject linguistic knowledge into the model to improve translation quality. We explore a wide range of methods and combine complementary approaches. Results indicate that incorporating linguistic information through linguistically motivated subword segmentation, factored models, and pretrained embeddings helps the system to generate improved translations, with the segmentation contributing most. In order to evaluate translation quality in a general domain and go beyond the available religious domain data, we gather and make publicly available a new test set and supplementary material. Although translation quality as measured with automatic metrics is low, we hope these resources will facilitate and support further research on Wayúunaiki.
%R 10.18653/v1/2023.americasnlp-1.9
%U https://aclanthology.org/2023.americasnlp-1.9
%U https://doi.org/10.18653/v1/2023.americasnlp-1.9
%P 67-83
Markdown (Informal)
[Enriching Wayúunaiki-Spanish Neural Machine Translation with Linguistic Information](https://aclanthology.org/2023.americasnlp-1.9) (Graichen et al., AmericasNLP 2023)
ACL