@inproceedings{stap-araabi-2023-chatgpt,
title = "{C}hat{GPT} is not a good indigenous translator",
author = "Stap, David and
Araabi, Ali",
editor = "Mager, Manuel and
Ebrahimi, Abteen and
Oncevay, Arturo and
Rice, Enora and
Rijhwani, Shruti and
Palmer, Alexis and
Kann, Katharina",
booktitle = "Proceedings of the Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.americasnlp-1.17",
doi = "10.18653/v1/2023.americasnlp-1.17",
pages = "163--167",
abstract = "This report investigates the continuous challenges of Machine Translation (MT) systems on indigenous and extremely low-resource language pairs. Despite the notable achievements of Large Language Models (LLMs) that excel in various tasks, their applicability to low-resource languages remains questionable. In this study, we leveraged the AmericasNLP competition to evaluate the translation performance of different systems for Spanish to 11 indigenous languages from South America. Our team, LTLAmsterdam, submitted a total of four systems including GPT-4, a bilingual model, fine-tuned M2M100, and a combination of fine-tuned M2M100 with {\$}k{\$}NN-MT. We found that even large language models like GPT-4 are not well-suited for extremely low-resource languages. Our results suggest that fine-tuning M2M100 models can offer significantly better performance for extremely low-resource translation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stap-araabi-2023-chatgpt">
<titleInfo>
<title>ChatGPT is not a good indigenous translator</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Stap</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Araabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Mager</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abteen</namePart>
<namePart type="family">Ebrahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturo</namePart>
<namePart type="family">Oncevay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enora</namePart>
<namePart type="family">Rice</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This report investigates the continuous challenges of Machine Translation (MT) systems on indigenous and extremely low-resource language pairs. Despite the notable achievements of Large Language Models (LLMs) that excel in various tasks, their applicability to low-resource languages remains questionable. In this study, we leveraged the AmericasNLP competition to evaluate the translation performance of different systems for Spanish to 11 indigenous languages from South America. Our team, LTLAmsterdam, submitted a total of four systems including GPT-4, a bilingual model, fine-tuned M2M100, and a combination of fine-tuned M2M100 with $k$NN-MT. We found that even large language models like GPT-4 are not well-suited for extremely low-resource languages. Our results suggest that fine-tuning M2M100 models can offer significantly better performance for extremely low-resource translation.</abstract>
<identifier type="citekey">stap-araabi-2023-chatgpt</identifier>
<identifier type="doi">10.18653/v1/2023.americasnlp-1.17</identifier>
<location>
<url>https://aclanthology.org/2023.americasnlp-1.17</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>163</start>
<end>167</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ChatGPT is not a good indigenous translator
%A Stap, David
%A Araabi, Ali
%Y Mager, Manuel
%Y Ebrahimi, Abteen
%Y Oncevay, Arturo
%Y Rice, Enora
%Y Rijhwani, Shruti
%Y Palmer, Alexis
%Y Kann, Katharina
%S Proceedings of the Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F stap-araabi-2023-chatgpt
%X This report investigates the continuous challenges of Machine Translation (MT) systems on indigenous and extremely low-resource language pairs. Despite the notable achievements of Large Language Models (LLMs) that excel in various tasks, their applicability to low-resource languages remains questionable. In this study, we leveraged the AmericasNLP competition to evaluate the translation performance of different systems for Spanish to 11 indigenous languages from South America. Our team, LTLAmsterdam, submitted a total of four systems including GPT-4, a bilingual model, fine-tuned M2M100, and a combination of fine-tuned M2M100 with $k$NN-MT. We found that even large language models like GPT-4 are not well-suited for extremely low-resource languages. Our results suggest that fine-tuning M2M100 models can offer significantly better performance for extremely low-resource translation.
%R 10.18653/v1/2023.americasnlp-1.17
%U https://aclanthology.org/2023.americasnlp-1.17
%U https://doi.org/10.18653/v1/2023.americasnlp-1.17
%P 163-167
Markdown (Informal)
[ChatGPT is not a good indigenous translator](https://aclanthology.org/2023.americasnlp-1.17) (Stap & Araabi, AmericasNLP 2023)
ACL
- David Stap and Ali Araabi. 2023. ChatGPT is not a good indigenous translator. In Proceedings of the Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP), pages 163–167, Toronto, Canada. Association for Computational Linguistics.