@inproceedings{zhou-etal-2020-improving,
title = "Improving Autoregressive {NMT} with Non-Autoregressive Model",
author = "Zhou, Long and
Zhang, Jiajun and
Zong, Chengqing",
editor = "Wu, Hua and
Cherry, Colin and
Huang, Liang and
He, Zhongjun and
Liberman, Mark and
Cross, James and
Liu, Yang",
booktitle = "Proceedings of the First Workshop on Automatic Simultaneous Translation",
month = jul,
year = "2020",
address = "Seattle, Washington",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.autosimtrans-1.4",
doi = "10.18653/v1/2020.autosimtrans-1.4",
pages = "24--29",
abstract = "Autoregressive neural machine translation (NMT) models are often used to teach non-autoregressive models via knowledge distillation. However, there are few studies on improving the quality of autoregressive translation (AT) using non-autoregressive translation (NAT). In this work, we propose a novel Encoder-NAD-AD framework for NMT, aiming at boosting AT with global information produced by NAT model. Specifically, under the semantic guidance of source-side context captured by the encoder, the non-autoregressive decoder (NAD) first learns to generate target-side hidden state sequence in parallel. Then the autoregressive decoder (AD) performs translation from left to right, conditioned on source-side and target-side hidden states. Since AD has global information generated by low-latency NAD, it is more likely to produce a better translation with less time delay. Experiments on WMT14 En-De, WMT16 En-Ro, and IWSLT14 De-En translation tasks demonstrate that our framework achieves significant improvements with only 8{\%} speed degeneration over the autoregressive NMT.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhou-etal-2020-improving">
<titleInfo>
<title>Improving Autoregressive NMT with Non-Autoregressive Model</title>
</titleInfo>
<name type="personal">
<namePart type="given">Long</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Automatic Simultaneous Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Colin</namePart>
<namePart type="family">Cherry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhongjun</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Liberman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Cross</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, Washington</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Autoregressive neural machine translation (NMT) models are often used to teach non-autoregressive models via knowledge distillation. However, there are few studies on improving the quality of autoregressive translation (AT) using non-autoregressive translation (NAT). In this work, we propose a novel Encoder-NAD-AD framework for NMT, aiming at boosting AT with global information produced by NAT model. Specifically, under the semantic guidance of source-side context captured by the encoder, the non-autoregressive decoder (NAD) first learns to generate target-side hidden state sequence in parallel. Then the autoregressive decoder (AD) performs translation from left to right, conditioned on source-side and target-side hidden states. Since AD has global information generated by low-latency NAD, it is more likely to produce a better translation with less time delay. Experiments on WMT14 En-De, WMT16 En-Ro, and IWSLT14 De-En translation tasks demonstrate that our framework achieves significant improvements with only 8% speed degeneration over the autoregressive NMT.</abstract>
<identifier type="citekey">zhou-etal-2020-improving</identifier>
<identifier type="doi">10.18653/v1/2020.autosimtrans-1.4</identifier>
<location>
<url>https://aclanthology.org/2020.autosimtrans-1.4</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>24</start>
<end>29</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving Autoregressive NMT with Non-Autoregressive Model
%A Zhou, Long
%A Zhang, Jiajun
%A Zong, Chengqing
%Y Wu, Hua
%Y Cherry, Colin
%Y Huang, Liang
%Y He, Zhongjun
%Y Liberman, Mark
%Y Cross, James
%Y Liu, Yang
%S Proceedings of the First Workshop on Automatic Simultaneous Translation
%D 2020
%8 July
%I Association for Computational Linguistics
%C Seattle, Washington
%F zhou-etal-2020-improving
%X Autoregressive neural machine translation (NMT) models are often used to teach non-autoregressive models via knowledge distillation. However, there are few studies on improving the quality of autoregressive translation (AT) using non-autoregressive translation (NAT). In this work, we propose a novel Encoder-NAD-AD framework for NMT, aiming at boosting AT with global information produced by NAT model. Specifically, under the semantic guidance of source-side context captured by the encoder, the non-autoregressive decoder (NAD) first learns to generate target-side hidden state sequence in parallel. Then the autoregressive decoder (AD) performs translation from left to right, conditioned on source-side and target-side hidden states. Since AD has global information generated by low-latency NAD, it is more likely to produce a better translation with less time delay. Experiments on WMT14 En-De, WMT16 En-Ro, and IWSLT14 De-En translation tasks demonstrate that our framework achieves significant improvements with only 8% speed degeneration over the autoregressive NMT.
%R 10.18653/v1/2020.autosimtrans-1.4
%U https://aclanthology.org/2020.autosimtrans-1.4
%U https://doi.org/10.18653/v1/2020.autosimtrans-1.4
%P 24-29
Markdown (Informal)
[Improving Autoregressive NMT with Non-Autoregressive Model](https://aclanthology.org/2020.autosimtrans-1.4) (Zhou et al., AutoSimTrans 2020)
ACL