@inproceedings{wang-etal-2022-helping,
title = "Helping the Weak Makes You Strong: Simple Multi-Task Learning Improves Non-Autoregressive Translators",
author = "Wang, Xinyou and
Zheng, Zaixiang and
Huang, Shujian",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-main.371",
doi = "10.18653/v1/2022.emnlp-main.371",
pages = "5513--5519",
abstract = "Recently, non-autoregressive (NAR) neural machine translation models have received increasing attention due to their efficient parallel decoding.However, the probabilistic framework of NAR models necessitates conditional independence assumption on target sequences, falling short of characterizing human language data.This drawback results in less informative learning signals for NAR models under conventional MLE training, thereby yielding unsatisfactory accuracy compared to their autoregressive (AR) counterparts.In this paper, we propose a simple and model-agnostic multi-task learning framework to provide more informative learning signals.During training stage, we introduce a set of sufficiently weak AR decoders that solely rely on the information provided by NAR decoder to make prediction, forcing the NAR decoder to become stronger or else it will be unable to support its weak AR partners.Experiments on WMT and IWSLT datasets show that our approach can consistently improve accuracy of multiple NAR baselines without adding any additional decoding overhead.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2022-helping">
<titleInfo>
<title>Helping the Weak Makes You Strong: Simple Multi-Task Learning Improves Non-Autoregressive Translators</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xinyou</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zaixiang</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shujian</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently, non-autoregressive (NAR) neural machine translation models have received increasing attention due to their efficient parallel decoding.However, the probabilistic framework of NAR models necessitates conditional independence assumption on target sequences, falling short of characterizing human language data.This drawback results in less informative learning signals for NAR models under conventional MLE training, thereby yielding unsatisfactory accuracy compared to their autoregressive (AR) counterparts.In this paper, we propose a simple and model-agnostic multi-task learning framework to provide more informative learning signals.During training stage, we introduce a set of sufficiently weak AR decoders that solely rely on the information provided by NAR decoder to make prediction, forcing the NAR decoder to become stronger or else it will be unable to support its weak AR partners.Experiments on WMT and IWSLT datasets show that our approach can consistently improve accuracy of multiple NAR baselines without adding any additional decoding overhead.</abstract>
<identifier type="citekey">wang-etal-2022-helping</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-main.371</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-main.371</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>5513</start>
<end>5519</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Helping the Weak Makes You Strong: Simple Multi-Task Learning Improves Non-Autoregressive Translators
%A Wang, Xinyou
%A Zheng, Zaixiang
%A Huang, Shujian
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F wang-etal-2022-helping
%X Recently, non-autoregressive (NAR) neural machine translation models have received increasing attention due to their efficient parallel decoding.However, the probabilistic framework of NAR models necessitates conditional independence assumption on target sequences, falling short of characterizing human language data.This drawback results in less informative learning signals for NAR models under conventional MLE training, thereby yielding unsatisfactory accuracy compared to their autoregressive (AR) counterparts.In this paper, we propose a simple and model-agnostic multi-task learning framework to provide more informative learning signals.During training stage, we introduce a set of sufficiently weak AR decoders that solely rely on the information provided by NAR decoder to make prediction, forcing the NAR decoder to become stronger or else it will be unable to support its weak AR partners.Experiments on WMT and IWSLT datasets show that our approach can consistently improve accuracy of multiple NAR baselines without adding any additional decoding overhead.
%R 10.18653/v1/2022.emnlp-main.371
%U https://aclanthology.org/2022.emnlp-main.371
%U https://doi.org/10.18653/v1/2022.emnlp-main.371
%P 5513-5519
Markdown (Informal)
[Helping the Weak Makes You Strong: Simple Multi-Task Learning Improves Non-Autoregressive Translators](https://aclanthology.org/2022.emnlp-main.371) (Wang et al., EMNLP 2022)
ACL