@inproceedings{wu-etal-2022-zero,
title = "Zero-shot Cross-lingual Transfer is Under-specified Optimization",
author = "Wu, Shijie and
Van Durme, Benjamin and
Dredze, Mark",
editor = "Gella, Spandana and
He, He and
Majumder, Bodhisattwa Prasad and
Can, Burcu and
Giunchiglia, Eleonora and
Cahyawijaya, Samuel and
Min, Sewon and
Mozes, Maximilian and
Li, Xiang Lorraine and
Augenstein, Isabelle and
Rogers, Anna and
Cho, Kyunghyun and
Grefenstette, Edward and
Rimell, Laura and
Dyer, Chris",
booktitle = "Proceedings of the 7th Workshop on Representation Learning for NLP",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.repl4nlp-1.25/",
doi = "10.18653/v1/2022.repl4nlp-1.25",
pages = "236--248",
abstract = "Pretrained multilingual encoders enable zero-shot cross-lingual transfer, but often produce unreliable models that exhibit high performance variance on the target language. We postulate that this high variance results from zero-shot cross-lingual transfer solving an under-specified optimization problem. We show that any linear-interpolated model between the source language monolingual model and source + target bilingual model has equally low source language generalization error, yet the target language generalization error reduces smoothly and linearly as we move from the monolingual to bilingual model, suggesting that the model struggles to identify good solutions for both source and target languages using the source language alone. Additionally, we show that zero-shot solution lies in non-flat region of target language error generalization surface, causing the high variance."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wu-etal-2022-zero">
<titleInfo>
<title>Zero-shot Cross-lingual Transfer is Under-specified Optimization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shijie</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Van Durme</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Dredze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 7th Workshop on Representation Learning for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Spandana</namePart>
<namePart type="family">Gella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">He</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bodhisattwa</namePart>
<namePart type="given">Prasad</namePart>
<namePart type="family">Majumder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Burcu</namePart>
<namePart type="family">Can</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eleonora</namePart>
<namePart type="family">Giunchiglia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Cahyawijaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sewon</namePart>
<namePart type="family">Min</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maximilian</namePart>
<namePart type="family">Mozes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="given">Lorraine</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyunghyun</namePart>
<namePart type="family">Cho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edward</namePart>
<namePart type="family">Grefenstette</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Rimell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Dyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Pretrained multilingual encoders enable zero-shot cross-lingual transfer, but often produce unreliable models that exhibit high performance variance on the target language. We postulate that this high variance results from zero-shot cross-lingual transfer solving an under-specified optimization problem. We show that any linear-interpolated model between the source language monolingual model and source + target bilingual model has equally low source language generalization error, yet the target language generalization error reduces smoothly and linearly as we move from the monolingual to bilingual model, suggesting that the model struggles to identify good solutions for both source and target languages using the source language alone. Additionally, we show that zero-shot solution lies in non-flat region of target language error generalization surface, causing the high variance.</abstract>
<identifier type="citekey">wu-etal-2022-zero</identifier>
<identifier type="doi">10.18653/v1/2022.repl4nlp-1.25</identifier>
<location>
<url>https://aclanthology.org/2022.repl4nlp-1.25/</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>236</start>
<end>248</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Zero-shot Cross-lingual Transfer is Under-specified Optimization
%A Wu, Shijie
%A Van Durme, Benjamin
%A Dredze, Mark
%Y Gella, Spandana
%Y He, He
%Y Majumder, Bodhisattwa Prasad
%Y Can, Burcu
%Y Giunchiglia, Eleonora
%Y Cahyawijaya, Samuel
%Y Min, Sewon
%Y Mozes, Maximilian
%Y Li, Xiang Lorraine
%Y Augenstein, Isabelle
%Y Rogers, Anna
%Y Cho, Kyunghyun
%Y Grefenstette, Edward
%Y Rimell, Laura
%Y Dyer, Chris
%S Proceedings of the 7th Workshop on Representation Learning for NLP
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F wu-etal-2022-zero
%X Pretrained multilingual encoders enable zero-shot cross-lingual transfer, but often produce unreliable models that exhibit high performance variance on the target language. We postulate that this high variance results from zero-shot cross-lingual transfer solving an under-specified optimization problem. We show that any linear-interpolated model between the source language monolingual model and source + target bilingual model has equally low source language generalization error, yet the target language generalization error reduces smoothly and linearly as we move from the monolingual to bilingual model, suggesting that the model struggles to identify good solutions for both source and target languages using the source language alone. Additionally, we show that zero-shot solution lies in non-flat region of target language error generalization surface, causing the high variance.
%R 10.18653/v1/2022.repl4nlp-1.25
%U https://aclanthology.org/2022.repl4nlp-1.25/
%U https://doi.org/10.18653/v1/2022.repl4nlp-1.25
%P 236-248
Markdown (Informal)
[Zero-shot Cross-lingual Transfer is Under-specified Optimization](https://aclanthology.org/2022.repl4nlp-1.25/) (Wu et al., RepL4NLP 2022)
ACL