@inproceedings{choenni-etal-2023-languages,
title = "How do languages influence each other? Studying cross-lingual data sharing during {LM} fine-tuning",
author = "Choenni, Rochelle and
Garrette, Dan and
Shutova, Ekaterina",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.818",
doi = "10.18653/v1/2023.emnlp-main.818",
pages = "13244--13257",
abstract = "Multilingual language models (MLMs) are jointly trained on data from many different languages such that representation of individual languages can benefit from other languages{'} data. Impressive performance in zero-shot cross-lingual transfer shows that these models are able to exploit this property. Yet, it remains unclear to what extent, and under which conditions, languages rely on each other{'}s data. To answer this question, we use TracIn (Pruthi et al., 2020), a training data attribution (TDA) method, to retrieve training samples from multilingual data that are most influential for test predictions in a given language. This allows us to analyse cross-lingual sharing mechanisms of MLMs from a new perspective. While previous work studied cross-lingual sharing at the model parameter level, we present the first approach to study it at the data level. We find that MLMs rely on data from multiple languages during fine-tuning and this reliance increases as fine-tuning progresses. We further find that training samples from other languages can both reinforce and complement the knowledge acquired from data of the test language itself.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="choenni-etal-2023-languages">
<titleInfo>
<title>How do languages influence each other? Studying cross-lingual data sharing during LM fine-tuning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rochelle</namePart>
<namePart type="family">Choenni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Garrette</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Multilingual language models (MLMs) are jointly trained on data from many different languages such that representation of individual languages can benefit from other languages’ data. Impressive performance in zero-shot cross-lingual transfer shows that these models are able to exploit this property. Yet, it remains unclear to what extent, and under which conditions, languages rely on each other’s data. To answer this question, we use TracIn (Pruthi et al., 2020), a training data attribution (TDA) method, to retrieve training samples from multilingual data that are most influential for test predictions in a given language. This allows us to analyse cross-lingual sharing mechanisms of MLMs from a new perspective. While previous work studied cross-lingual sharing at the model parameter level, we present the first approach to study it at the data level. We find that MLMs rely on data from multiple languages during fine-tuning and this reliance increases as fine-tuning progresses. We further find that training samples from other languages can both reinforce and complement the knowledge acquired from data of the test language itself.</abstract>
<identifier type="citekey">choenni-etal-2023-languages</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.818</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.818</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>13244</start>
<end>13257</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How do languages influence each other? Studying cross-lingual data sharing during LM fine-tuning
%A Choenni, Rochelle
%A Garrette, Dan
%A Shutova, Ekaterina
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F choenni-etal-2023-languages
%X Multilingual language models (MLMs) are jointly trained on data from many different languages such that representation of individual languages can benefit from other languages’ data. Impressive performance in zero-shot cross-lingual transfer shows that these models are able to exploit this property. Yet, it remains unclear to what extent, and under which conditions, languages rely on each other’s data. To answer this question, we use TracIn (Pruthi et al., 2020), a training data attribution (TDA) method, to retrieve training samples from multilingual data that are most influential for test predictions in a given language. This allows us to analyse cross-lingual sharing mechanisms of MLMs from a new perspective. While previous work studied cross-lingual sharing at the model parameter level, we present the first approach to study it at the data level. We find that MLMs rely on data from multiple languages during fine-tuning and this reliance increases as fine-tuning progresses. We further find that training samples from other languages can both reinforce and complement the knowledge acquired from data of the test language itself.
%R 10.18653/v1/2023.emnlp-main.818
%U https://aclanthology.org/2023.emnlp-main.818
%U https://doi.org/10.18653/v1/2023.emnlp-main.818
%P 13244-13257
Markdown (Informal)
[How do languages influence each other? Studying cross-lingual data sharing during LM fine-tuning](https://aclanthology.org/2023.emnlp-main.818) (Choenni et al., EMNLP 2023)
ACL