@inproceedings{zhang-ao-2022-yitrans,
title = "The {Y}i{T}rans Speech Translation System for {IWSLT} 2022 Offline Shared Task",
author = "Zhang, Ziqiang and
Ao, Junyi",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Costa-juss{\`a}, Marta",
booktitle = "Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022)",
month = may,
year = "2022",
address = "Dublin, Ireland (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.iwslt-1.11/",
doi = "10.18653/v1/2022.iwslt-1.11",
pages = "158--168",
abstract = "This paper describes the submission of our end-to-end YiTrans speech translation system for the IWSLT 2022 offline task, which translates from English audio to German, Chinese, and Japanese. The YiTrans system is built on large-scale pre-trained encoder-decoder models. More specifically, we first design a multi-stage pre-training strategy to build a multi-modality model with a large amount of labeled and unlabeled data. We then fine-tune the corresponding components of the model for the downstream speech translation tasks. Moreover, we make various efforts to improve performance, such as data filtering, data augmentation, speech segmentation, model ensemble, and so on. Experimental results show that our YiTrans system obtains a significant improvement than the strong baseline on three translation directions, and it achieves +5.2 BLEU improvements over last year`s optimal end-to-end system on tst2021 English-German."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-ao-2022-yitrans">
<titleInfo>
<title>The YiTrans Speech Translation System for IWSLT 2022 Offline Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ziqiang</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyi</namePart>
<namePart type="family">Ao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the submission of our end-to-end YiTrans speech translation system for the IWSLT 2022 offline task, which translates from English audio to German, Chinese, and Japanese. The YiTrans system is built on large-scale pre-trained encoder-decoder models. More specifically, we first design a multi-stage pre-training strategy to build a multi-modality model with a large amount of labeled and unlabeled data. We then fine-tune the corresponding components of the model for the downstream speech translation tasks. Moreover, we make various efforts to improve performance, such as data filtering, data augmentation, speech segmentation, model ensemble, and so on. Experimental results show that our YiTrans system obtains a significant improvement than the strong baseline on three translation directions, and it achieves +5.2 BLEU improvements over last year‘s optimal end-to-end system on tst2021 English-German.</abstract>
<identifier type="citekey">zhang-ao-2022-yitrans</identifier>
<identifier type="doi">10.18653/v1/2022.iwslt-1.11</identifier>
<location>
<url>https://aclanthology.org/2022.iwslt-1.11/</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>158</start>
<end>168</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The YiTrans Speech Translation System for IWSLT 2022 Offline Shared Task
%A Zhang, Ziqiang
%A Ao, Junyi
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Costa-jussà, Marta
%S Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland (in-person and online)
%F zhang-ao-2022-yitrans
%X This paper describes the submission of our end-to-end YiTrans speech translation system for the IWSLT 2022 offline task, which translates from English audio to German, Chinese, and Japanese. The YiTrans system is built on large-scale pre-trained encoder-decoder models. More specifically, we first design a multi-stage pre-training strategy to build a multi-modality model with a large amount of labeled and unlabeled data. We then fine-tune the corresponding components of the model for the downstream speech translation tasks. Moreover, we make various efforts to improve performance, such as data filtering, data augmentation, speech segmentation, model ensemble, and so on. Experimental results show that our YiTrans system obtains a significant improvement than the strong baseline on three translation directions, and it achieves +5.2 BLEU improvements over last year‘s optimal end-to-end system on tst2021 English-German.
%R 10.18653/v1/2022.iwslt-1.11
%U https://aclanthology.org/2022.iwslt-1.11/
%U https://doi.org/10.18653/v1/2022.iwslt-1.11
%P 158-168
Markdown (Informal)
[The YiTrans Speech Translation System for IWSLT 2022 Offline Shared Task](https://aclanthology.org/2022.iwslt-1.11/) (Zhang & Ao, IWSLT 2022)
ACL