@inproceedings{dereza-etal-2023-trust,
title = "Do not Trust the Experts - How the Lack of Standard Complicates {NLP} for Historical {I}rish",
author = "Dereza, Oksana and
Fransen, Theodorus and
Mccrae, John P.",
editor = "Tafreshi, Shabnam and
Akula, Arjun and
Sedoc, Jo{\~a}o and
Drozd, Aleksandr and
Rogers, Anna and
Rumshisky, Anna",
booktitle = "Proceedings of the Fourth Workshop on Insights from Negative Results in NLP",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.insights-1.10",
doi = "10.18653/v1/2023.insights-1.10",
pages = "82--87",
abstract = "In this paper, we describe how we unearthed some fundamental problems while building an analogy dataset modelled on BATS (Gladkova et al., 2016) to evaluate historical Irish embeddings on their ability to detect orthographic, morphological and semantic similarity.performance of our models in the analogy task was extremely poor regardless of the architecture, hyperparameters and evaluation metrics, while the qualitative evaluation revealed positive tendencies. argue that low agreement between field experts on fundamental lexical and orthographic issues, and the lack of a unified editorial standard in available resources make it impossible to build reliable evaluation datasets for computational models and obtain interpretable results. We emphasise the need for such a standard, particularly for NLP applications, and prompt Celticists and historical linguists to engage in further discussion. We would also like to draw NLP scholars{'} attention to the role of data and its (extra)linguistic properties in testing new models, technologies and evaluation scenarios.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dereza-etal-2023-trust">
<titleInfo>
<title>Do not Trust the Experts - How the Lack of Standard Complicates NLP for Historical Irish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oksana</namePart>
<namePart type="family">Dereza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Theodorus</namePart>
<namePart type="family">Fransen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Mccrae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Insights from Negative Results in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shabnam</namePart>
<namePart type="family">Tafreshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arjun</namePart>
<namePart type="family">Akula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aleksandr</namePart>
<namePart type="family">Drozd</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we describe how we unearthed some fundamental problems while building an analogy dataset modelled on BATS (Gladkova et al., 2016) to evaluate historical Irish embeddings on their ability to detect orthographic, morphological and semantic similarity.performance of our models in the analogy task was extremely poor regardless of the architecture, hyperparameters and evaluation metrics, while the qualitative evaluation revealed positive tendencies. argue that low agreement between field experts on fundamental lexical and orthographic issues, and the lack of a unified editorial standard in available resources make it impossible to build reliable evaluation datasets for computational models and obtain interpretable results. We emphasise the need for such a standard, particularly for NLP applications, and prompt Celticists and historical linguists to engage in further discussion. We would also like to draw NLP scholars’ attention to the role of data and its (extra)linguistic properties in testing new models, technologies and evaluation scenarios.</abstract>
<identifier type="citekey">dereza-etal-2023-trust</identifier>
<identifier type="doi">10.18653/v1/2023.insights-1.10</identifier>
<location>
<url>https://aclanthology.org/2023.insights-1.10</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>82</start>
<end>87</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Do not Trust the Experts - How the Lack of Standard Complicates NLP for Historical Irish
%A Dereza, Oksana
%A Fransen, Theodorus
%A Mccrae, John P.
%Y Tafreshi, Shabnam
%Y Akula, Arjun
%Y Sedoc, João
%Y Drozd, Aleksandr
%Y Rogers, Anna
%Y Rumshisky, Anna
%S Proceedings of the Fourth Workshop on Insights from Negative Results in NLP
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F dereza-etal-2023-trust
%X In this paper, we describe how we unearthed some fundamental problems while building an analogy dataset modelled on BATS (Gladkova et al., 2016) to evaluate historical Irish embeddings on their ability to detect orthographic, morphological and semantic similarity.performance of our models in the analogy task was extremely poor regardless of the architecture, hyperparameters and evaluation metrics, while the qualitative evaluation revealed positive tendencies. argue that low agreement between field experts on fundamental lexical and orthographic issues, and the lack of a unified editorial standard in available resources make it impossible to build reliable evaluation datasets for computational models and obtain interpretable results. We emphasise the need for such a standard, particularly for NLP applications, and prompt Celticists and historical linguists to engage in further discussion. We would also like to draw NLP scholars’ attention to the role of data and its (extra)linguistic properties in testing new models, technologies and evaluation scenarios.
%R 10.18653/v1/2023.insights-1.10
%U https://aclanthology.org/2023.insights-1.10
%U https://doi.org/10.18653/v1/2023.insights-1.10
%P 82-87
Markdown (Informal)
[Do not Trust the Experts - How the Lack of Standard Complicates NLP for Historical Irish](https://aclanthology.org/2023.insights-1.10) (Dereza et al., insights-WS 2023)
ACL