@inproceedings{zeng-etal-2022-end,
title = "End-to-End Simultaneous Speech Translation with Pretraining and Distillation: Huawei {N}oah`s System for {A}uto{S}im{T}ran{S} 2022",
author = "Zeng, Xingshan and
Li, Pengfei and
Li, Liangyou and
Liu, Qun",
editor = "Ive, Julia and
Zhang, Ruiqing",
booktitle = "Proceedings of the Third Workshop on Automatic Simultaneous Translation",
month = jul,
year = "2022",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.autosimtrans-1.5/",
doi = "10.18653/v1/2022.autosimtrans-1.5",
pages = "25--33",
abstract = "This paper describes the system submitted to AutoSimTrans 2022 from Huawei Noah`s Ark Lab, which won the first place in the audio input track of the Chinese-English translation task. Our system is based on RealTranS, an end-to-end simultaneous speech translation model. We enhance the model with pretraining, by initializing the acoustic encoder with ASR encoder, and the semantic encoder and decoder with NMT encoder and decoder, respectively. To relieve the data scarcity, we further construct pseudo training corpus as a kind of knowledge distillation with ASR data and the pretrained NMT model. Meanwhile, we also apply several techniques to improve the robustness and domain generalizability, including punctuation removal, token-level knowledge distillation and multi-domain finetuning. Experiments show that our system significantly outperforms the baselines at all latency and also verify the effectiveness of our proposed methods."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zeng-etal-2022-end">
<titleInfo>
<title>End-to-End Simultaneous Speech Translation with Pretraining and Distillation: Huawei Noah‘s System for AutoSimTranS 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xingshan</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pengfei</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liangyou</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qun</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Automatic Simultaneous Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Ive</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruiqing</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the system submitted to AutoSimTrans 2022 from Huawei Noah‘s Ark Lab, which won the first place in the audio input track of the Chinese-English translation task. Our system is based on RealTranS, an end-to-end simultaneous speech translation model. We enhance the model with pretraining, by initializing the acoustic encoder with ASR encoder, and the semantic encoder and decoder with NMT encoder and decoder, respectively. To relieve the data scarcity, we further construct pseudo training corpus as a kind of knowledge distillation with ASR data and the pretrained NMT model. Meanwhile, we also apply several techniques to improve the robustness and domain generalizability, including punctuation removal, token-level knowledge distillation and multi-domain finetuning. Experiments show that our system significantly outperforms the baselines at all latency and also verify the effectiveness of our proposed methods.</abstract>
<identifier type="citekey">zeng-etal-2022-end</identifier>
<identifier type="doi">10.18653/v1/2022.autosimtrans-1.5</identifier>
<location>
<url>https://aclanthology.org/2022.autosimtrans-1.5/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>25</start>
<end>33</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T End-to-End Simultaneous Speech Translation with Pretraining and Distillation: Huawei Noah‘s System for AutoSimTranS 2022
%A Zeng, Xingshan
%A Li, Pengfei
%A Li, Liangyou
%A Liu, Qun
%Y Ive, Julia
%Y Zhang, Ruiqing
%S Proceedings of the Third Workshop on Automatic Simultaneous Translation
%D 2022
%8 July
%I Association for Computational Linguistics
%C Online
%F zeng-etal-2022-end
%X This paper describes the system submitted to AutoSimTrans 2022 from Huawei Noah‘s Ark Lab, which won the first place in the audio input track of the Chinese-English translation task. Our system is based on RealTranS, an end-to-end simultaneous speech translation model. We enhance the model with pretraining, by initializing the acoustic encoder with ASR encoder, and the semantic encoder and decoder with NMT encoder and decoder, respectively. To relieve the data scarcity, we further construct pseudo training corpus as a kind of knowledge distillation with ASR data and the pretrained NMT model. Meanwhile, we also apply several techniques to improve the robustness and domain generalizability, including punctuation removal, token-level knowledge distillation and multi-domain finetuning. Experiments show that our system significantly outperforms the baselines at all latency and also verify the effectiveness of our proposed methods.
%R 10.18653/v1/2022.autosimtrans-1.5
%U https://aclanthology.org/2022.autosimtrans-1.5/
%U https://doi.org/10.18653/v1/2022.autosimtrans-1.5
%P 25-33
Markdown (Informal)
[End-to-End Simultaneous Speech Translation with Pretraining and Distillation: Huawei Noah’s System for AutoSimTranS 2022](https://aclanthology.org/2022.autosimtrans-1.5/) (Zeng et al., AutoSimTrans 2022)
ACL