@inproceedings{kuulmets-fishel-2023-translated,
title = "Translated Benchmarks Can Be Misleading: the Case of {E}stonian Question Answering",
author = "Kuulmets, Hele-Andra and
Fishel, Mark",
editor = {Alum{\"a}e, Tanel and
Fishel, Mark},
booktitle = "Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may,
year = "2023",
address = "T{\'o}rshavn, Faroe Islands",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2023.nodalida-1.71/",
pages = "710--716",
abstract = "Translated test datasets are a popular and cheaper alternative to native test datasets. However, one of the properties of translated data is the existence of cultural knowledge unfamiliar to the target language speakers. This can make translated test datasets differ significantly from native target datasets. As a result, we might inaccurately estimate the performance of the models in the target language. In this paper, we use both native and translated Estonian QA datasets to study this topic more closely. We discover that relying on the translated test dataset results in an overestimation of the model`s performance on native Estonian data."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kuulmets-fishel-2023-translated">
<titleInfo>
<title>Translated Benchmarks Can Be Misleading: the Case of Estonian Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hele-Andra</namePart>
<namePart type="family">Kuulmets</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Fishel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tanel</namePart>
<namePart type="family">Alumäe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Fishel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tórshavn, Faroe Islands</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Translated test datasets are a popular and cheaper alternative to native test datasets. However, one of the properties of translated data is the existence of cultural knowledge unfamiliar to the target language speakers. This can make translated test datasets differ significantly from native target datasets. As a result, we might inaccurately estimate the performance of the models in the target language. In this paper, we use both native and translated Estonian QA datasets to study this topic more closely. We discover that relying on the translated test dataset results in an overestimation of the model‘s performance on native Estonian data.</abstract>
<identifier type="citekey">kuulmets-fishel-2023-translated</identifier>
<location>
<url>https://aclanthology.org/2023.nodalida-1.71/</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>710</start>
<end>716</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Translated Benchmarks Can Be Misleading: the Case of Estonian Question Answering
%A Kuulmets, Hele-Andra
%A Fishel, Mark
%Y Alumäe, Tanel
%Y Fishel, Mark
%S Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)
%D 2023
%8 May
%I University of Tartu Library
%C Tórshavn, Faroe Islands
%F kuulmets-fishel-2023-translated
%X Translated test datasets are a popular and cheaper alternative to native test datasets. However, one of the properties of translated data is the existence of cultural knowledge unfamiliar to the target language speakers. This can make translated test datasets differ significantly from native target datasets. As a result, we might inaccurately estimate the performance of the models in the target language. In this paper, we use both native and translated Estonian QA datasets to study this topic more closely. We discover that relying on the translated test dataset results in an overestimation of the model‘s performance on native Estonian data.
%U https://aclanthology.org/2023.nodalida-1.71/
%P 710-716
Markdown (Informal)
[Translated Benchmarks Can Be Misleading: the Case of Estonian Question Answering](https://aclanthology.org/2023.nodalida-1.71/) (Kuulmets & Fishel, NoDaLiDa 2023)
ACL