@inproceedings{scherrer-etal-2023-helsinki,
title = "The {H}elsinki-{NLP} Submissions at {NADI} 2023 Shared Task: Walking the Baseline",
author = "Scherrer, Yves and
Mileti{\'c}, Aleksandra and
Kuparinen, Olli",
editor = "Sawaf, Hassan and
El-Beltagy, Samhaa and
Zaghouani, Wajdi and
Magdy, Walid and
Abdelali, Ahmed and
Tomeh, Nadi and
Abu Farha, Ibrahim and
Habash, Nizar and
Khalifa, Salam and
Keleg, Amr and
Haddad, Hatem and
Zitouni, Imed and
Mrini, Khalil and
Almatham, Rawan",
booktitle = "Proceedings of ArabicNLP 2023",
month = dec,
year = "2023",
address = "Singapore (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.arabicnlp-1.73",
doi = "10.18653/v1/2023.arabicnlp-1.73",
pages = "670--677",
abstract = "The Helsinki-NLP team participated in the NADI 2023 shared tasks on Arabic dialect translation with seven submissions. We used statistical (SMT) and neural machine translation (NMT) methods and explored character- and subword-based data preprocessing. Our submissions placed second in both tracks. In the open track, our winning submission is a character-level SMT system with additional Modern Standard Arabic language models. In the closed track, our best BLEU scores were obtained with the leave-as-is baseline, a simple copy of the input, and narrowly followed by SMT systems. In both tracks, fine-tuning existing multilingual models such as AraT5 or ByT5 did not yield superior performance compared to SMT.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="scherrer-etal-2023-helsinki">
<titleInfo>
<title>The Helsinki-NLP Submissions at NADI 2023 Shared Task: Walking the Baseline</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aleksandra</namePart>
<namePart type="family">Miletić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olli</namePart>
<namePart type="family">Kuparinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of ArabicNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hassan</namePart>
<namePart type="family">Sawaf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samhaa</namePart>
<namePart type="family">El-Beltagy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walid</namePart>
<namePart type="family">Magdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Abdelali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibrahim</namePart>
<namePart type="family">Abu Farha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salam</namePart>
<namePart type="family">Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amr</namePart>
<namePart type="family">Keleg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hatem</namePart>
<namePart type="family">Haddad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalil</namePart>
<namePart type="family">Mrini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rawan</namePart>
<namePart type="family">Almatham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Helsinki-NLP team participated in the NADI 2023 shared tasks on Arabic dialect translation with seven submissions. We used statistical (SMT) and neural machine translation (NMT) methods and explored character- and subword-based data preprocessing. Our submissions placed second in both tracks. In the open track, our winning submission is a character-level SMT system with additional Modern Standard Arabic language models. In the closed track, our best BLEU scores were obtained with the leave-as-is baseline, a simple copy of the input, and narrowly followed by SMT systems. In both tracks, fine-tuning existing multilingual models such as AraT5 or ByT5 did not yield superior performance compared to SMT.</abstract>
<identifier type="citekey">scherrer-etal-2023-helsinki</identifier>
<identifier type="doi">10.18653/v1/2023.arabicnlp-1.73</identifier>
<location>
<url>https://aclanthology.org/2023.arabicnlp-1.73</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>670</start>
<end>677</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Helsinki-NLP Submissions at NADI 2023 Shared Task: Walking the Baseline
%A Scherrer, Yves
%A Miletić, Aleksandra
%A Kuparinen, Olli
%Y Sawaf, Hassan
%Y El-Beltagy, Samhaa
%Y Zaghouani, Wajdi
%Y Magdy, Walid
%Y Abdelali, Ahmed
%Y Tomeh, Nadi
%Y Abu Farha, Ibrahim
%Y Habash, Nizar
%Y Khalifa, Salam
%Y Keleg, Amr
%Y Haddad, Hatem
%Y Zitouni, Imed
%Y Mrini, Khalil
%Y Almatham, Rawan
%S Proceedings of ArabicNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore (Hybrid)
%F scherrer-etal-2023-helsinki
%X The Helsinki-NLP team participated in the NADI 2023 shared tasks on Arabic dialect translation with seven submissions. We used statistical (SMT) and neural machine translation (NMT) methods and explored character- and subword-based data preprocessing. Our submissions placed second in both tracks. In the open track, our winning submission is a character-level SMT system with additional Modern Standard Arabic language models. In the closed track, our best BLEU scores were obtained with the leave-as-is baseline, a simple copy of the input, and narrowly followed by SMT systems. In both tracks, fine-tuning existing multilingual models such as AraT5 or ByT5 did not yield superior performance compared to SMT.
%R 10.18653/v1/2023.arabicnlp-1.73
%U https://aclanthology.org/2023.arabicnlp-1.73
%U https://doi.org/10.18653/v1/2023.arabicnlp-1.73
%P 670-677
Markdown (Informal)
[The Helsinki-NLP Submissions at NADI 2023 Shared Task: Walking the Baseline](https://aclanthology.org/2023.arabicnlp-1.73) (Scherrer et al., ArabicNLP-WS 2023)
ACL