@inproceedings{kilgour-etal-2014-2014,
title = "The 2014 {KIT} {IWSLT} speech-to-text systems for {E}nglish, {G}erman and {I}talian",
author = {Kilgour, Kevin and
Heck, Michael and
M{\"u}ller, Markus and
Sperber, Matthias and
St{\"u}ker, Sebastian and
Waibel, Alex},
editor = {Federico, Marcello and
St{\"u}ker, Sebastian and
Yvon, Fran{\c{c}}ois},
booktitle = "Proceedings of the 11th International Workshop on Spoken Language Translation: Evaluation Campaign",
month = dec # " 4-5",
year = "2014",
address = "Lake Tahoe, California",
url = "https://aclanthology.org/2014.iwslt-evaluation.9",
pages = "73--79",
abstract = "This paper describes our German, Italian and English Speech-to-Text (STT) systems for the 2014 IWSLT TED ASR track. Our setup uses ROVER and confusion network combination from various subsystems to achieve a good overall performance. The individual subsystems are built by using different front-ends, (e.g., MVDR-MFCC or lMel), acoustic models (GMM or modular DNN) and phone sets and by training on various subsets of the training data. Decoding is performed in two stages, where the GMM systems are adapted in an unsupervised manner on the combination of the first stage outputs using VTLN, MLLR, and cMLLR. The combination setup produces a final hypothesis that has a significantly lower WER than any of the individual subsystems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kilgour-etal-2014-2014">
<titleInfo>
<title>The 2014 KIT IWSLT speech-to-text systems for English, German and Italian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Kilgour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Heck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Markus</namePart>
<namePart type="family">Müller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Sperber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Stüker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Waibel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-dec 4-5</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th International Workshop on Spoken Language Translation: Evaluation Campaign</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Stüker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">François</namePart>
<namePart type="family">Yvon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<place>
<placeTerm type="text">Lake Tahoe, California</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes our German, Italian and English Speech-to-Text (STT) systems for the 2014 IWSLT TED ASR track. Our setup uses ROVER and confusion network combination from various subsystems to achieve a good overall performance. The individual subsystems are built by using different front-ends, (e.g., MVDR-MFCC or lMel), acoustic models (GMM or modular DNN) and phone sets and by training on various subsets of the training data. Decoding is performed in two stages, where the GMM systems are adapted in an unsupervised manner on the combination of the first stage outputs using VTLN, MLLR, and cMLLR. The combination setup produces a final hypothesis that has a significantly lower WER than any of the individual subsystems.</abstract>
<identifier type="citekey">kilgour-etal-2014-2014</identifier>
<location>
<url>https://aclanthology.org/2014.iwslt-evaluation.9</url>
</location>
<part>
<date>2014-dec 4-5</date>
<extent unit="page">
<start>73</start>
<end>79</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The 2014 KIT IWSLT speech-to-text systems for English, German and Italian
%A Kilgour, Kevin
%A Heck, Michael
%A Müller, Markus
%A Sperber, Matthias
%A Stüker, Sebastian
%A Waibel, Alex
%Y Federico, Marcello
%Y Stüker, Sebastian
%Y Yvon, François
%S Proceedings of the 11th International Workshop on Spoken Language Translation: Evaluation Campaign
%D 2014
%8 dec 4 5
%C Lake Tahoe, California
%F kilgour-etal-2014-2014
%X This paper describes our German, Italian and English Speech-to-Text (STT) systems for the 2014 IWSLT TED ASR track. Our setup uses ROVER and confusion network combination from various subsystems to achieve a good overall performance. The individual subsystems are built by using different front-ends, (e.g., MVDR-MFCC or lMel), acoustic models (GMM or modular DNN) and phone sets and by training on various subsets of the training data. Decoding is performed in two stages, where the GMM systems are adapted in an unsupervised manner on the combination of the first stage outputs using VTLN, MLLR, and cMLLR. The combination setup produces a final hypothesis that has a significantly lower WER than any of the individual subsystems.
%U https://aclanthology.org/2014.iwslt-evaluation.9
%P 73-79
Markdown (Informal)
[The 2014 KIT IWSLT speech-to-text systems for English, German and Italian](https://aclanthology.org/2014.iwslt-evaluation.9) (Kilgour et al., IWSLT 2014)
ACL