@inproceedings{vamvas-etal-2024-modular,
title = "Modular Adaptation of Multilingual Encoders to Written {S}wiss {G}erman Dialect",
author = {Vamvas, Jannis and
Aepli, No{\"e}mi and
Sennrich, Rico},
editor = {V{\'a}zquez, Ra{\'u}l and
Mickus, Timothee and
Tiedemann, J{\"o}rg and
Vuli{\'c}, Ivan and
{\"U}st{\"u}n, Ahmet},
booktitle = "Proceedings of the 1st Workshop on Modular and Open Multilingual NLP (MOOMIN 2024)",
month = mar,
year = "2024",
address = "St Julians, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.moomin-1.3",
pages = "16--23",
abstract = "Creating neural text encoders for written Swiss German is challenging due to a dearth of training data combined with dialectal variation. In this paper, we build on several existing multilingual encoders and adapt them to Swiss German using continued pre-training. Evaluation on three diverse downstream tasks shows that simply adding a Swiss German adapter to a modular encoder achieves 97.5{\%} of fully monolithic adaptation performance. We further find that for the task of retrieving Swiss German sentences given Standard German queries, adapting a character-level model is more effective than the other adaptation strategies. We release our code and the models trained for our experiments.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vamvas-etal-2024-modular">
<titleInfo>
<title>Modular Adaptation of Multilingual Encoders to Written Swiss German Dialect</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jannis</namePart>
<namePart type="family">Vamvas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Noëmi</namePart>
<namePart type="family">Aepli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rico</namePart>
<namePart type="family">Sennrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Modular and Open Multilingual NLP (MOOMIN 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raúl</namePart>
<namePart type="family">Vázquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timothee</namePart>
<namePart type="family">Mickus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Vulić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmet</namePart>
<namePart type="family">Üstün</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St Julians, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Creating neural text encoders for written Swiss German is challenging due to a dearth of training data combined with dialectal variation. In this paper, we build on several existing multilingual encoders and adapt them to Swiss German using continued pre-training. Evaluation on three diverse downstream tasks shows that simply adding a Swiss German adapter to a modular encoder achieves 97.5% of fully monolithic adaptation performance. We further find that for the task of retrieving Swiss German sentences given Standard German queries, adapting a character-level model is more effective than the other adaptation strategies. We release our code and the models trained for our experiments.</abstract>
<identifier type="citekey">vamvas-etal-2024-modular</identifier>
<location>
<url>https://aclanthology.org/2024.moomin-1.3</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>16</start>
<end>23</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Modular Adaptation of Multilingual Encoders to Written Swiss German Dialect
%A Vamvas, Jannis
%A Aepli, Noëmi
%A Sennrich, Rico
%Y Vázquez, Raúl
%Y Mickus, Timothee
%Y Tiedemann, Jörg
%Y Vulić, Ivan
%Y Üstün, Ahmet
%S Proceedings of the 1st Workshop on Modular and Open Multilingual NLP (MOOMIN 2024)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St Julians, Malta
%F vamvas-etal-2024-modular
%X Creating neural text encoders for written Swiss German is challenging due to a dearth of training data combined with dialectal variation. In this paper, we build on several existing multilingual encoders and adapt them to Swiss German using continued pre-training. Evaluation on three diverse downstream tasks shows that simply adding a Swiss German adapter to a modular encoder achieves 97.5% of fully monolithic adaptation performance. We further find that for the task of retrieving Swiss German sentences given Standard German queries, adapting a character-level model is more effective than the other adaptation strategies. We release our code and the models trained for our experiments.
%U https://aclanthology.org/2024.moomin-1.3
%P 16-23
Markdown (Informal)
[Modular Adaptation of Multilingual Encoders to Written Swiss German Dialect](https://aclanthology.org/2024.moomin-1.3) (Vamvas et al., MOOMIN-WS 2024)
ACL