@inproceedings{di-gangi-etal-2018-fine,
title = "Fine-tuning on Clean Data for End-to-End Speech Translation: {FBK} @ {IWSLT} 2018",
author = "Di Gangi, Mattia Antonino and
Dess{\`\i}, Roberto and
Cattoni, Roldano and
Negri, Matteo and
Turchi, Marco",
editor = "Turchi, Marco and
Niehues, Jan and
Frederico, Marcello",
booktitle = "Proceedings of the 15th International Conference on Spoken Language Translation",
month = oct # " 29-30",
year = "2018",
address = "Brussels",
publisher = "International Conference on Spoken Language Translation",
url = "https://aclanthology.org/2018.iwslt-1.22",
pages = "147--152",
abstract = "This paper describes FBK{'}s submission to the end-to-end English-German speech translation task at IWSLT 2018. Our system relies on a state-of-the-art model based on LSTMs and CNNs, where the CNNs are used to reduce the temporal dimension of the audio input, which is in general much higher than machine translation input. Our model was trained only on the audio-to-text parallel data released for the task, and fine-tuned on cleaned subsets of the original training corpus. The addition of weight normalization and label smoothing improved the baseline system by 1.0 BLEU point on our validation set. The final submission also featured checkpoint averaging within a training run and ensemble decoding of models trained during multiple runs. On test data, our best single model obtained a BLEU score of 9.7, while the ensemble obtained a BLEU score of 10.24.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="di-gangi-etal-2018-fine">
<titleInfo>
<title>Fine-tuning on Clean Data for End-to-End Speech Translation: FBK @ IWSLT 2018</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mattia</namePart>
<namePart type="given">Antonino</namePart>
<namePart type="family">Di Gangi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roberto</namePart>
<namePart type="family">Dessì</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roldano</namePart>
<namePart type="family">Cattoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Negri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Turchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct 29-30</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Spoken Language Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Turchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Niehues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Frederico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Conference on Spoken Language Translation</publisher>
<place>
<placeTerm type="text">Brussels</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes FBK’s submission to the end-to-end English-German speech translation task at IWSLT 2018. Our system relies on a state-of-the-art model based on LSTMs and CNNs, where the CNNs are used to reduce the temporal dimension of the audio input, which is in general much higher than machine translation input. Our model was trained only on the audio-to-text parallel data released for the task, and fine-tuned on cleaned subsets of the original training corpus. The addition of weight normalization and label smoothing improved the baseline system by 1.0 BLEU point on our validation set. The final submission also featured checkpoint averaging within a training run and ensemble decoding of models trained during multiple runs. On test data, our best single model obtained a BLEU score of 9.7, while the ensemble obtained a BLEU score of 10.24.</abstract>
<identifier type="citekey">di-gangi-etal-2018-fine</identifier>
<location>
<url>https://aclanthology.org/2018.iwslt-1.22</url>
</location>
<part>
<date>2018-oct 29-30</date>
<extent unit="page">
<start>147</start>
<end>152</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fine-tuning on Clean Data for End-to-End Speech Translation: FBK @ IWSLT 2018
%A Di Gangi, Mattia Antonino
%A Dessì, Roberto
%A Cattoni, Roldano
%A Negri, Matteo
%A Turchi, Marco
%Y Turchi, Marco
%Y Niehues, Jan
%Y Frederico, Marcello
%S Proceedings of the 15th International Conference on Spoken Language Translation
%D 2018
%8 oct 29 30
%I International Conference on Spoken Language Translation
%C Brussels
%F di-gangi-etal-2018-fine
%X This paper describes FBK’s submission to the end-to-end English-German speech translation task at IWSLT 2018. Our system relies on a state-of-the-art model based on LSTMs and CNNs, where the CNNs are used to reduce the temporal dimension of the audio input, which is in general much higher than machine translation input. Our model was trained only on the audio-to-text parallel data released for the task, and fine-tuned on cleaned subsets of the original training corpus. The addition of weight normalization and label smoothing improved the baseline system by 1.0 BLEU point on our validation set. The final submission also featured checkpoint averaging within a training run and ensemble decoding of models trained during multiple runs. On test data, our best single model obtained a BLEU score of 9.7, while the ensemble obtained a BLEU score of 10.24.
%U https://aclanthology.org/2018.iwslt-1.22
%P 147-152
Markdown (Informal)
[Fine-tuning on Clean Data for End-to-End Speech Translation: FBK @ IWSLT 2018](https://aclanthology.org/2018.iwslt-1.22) (Di Gangi et al., IWSLT 2018)
ACL