@inproceedings{post-etal-2013-improved,
title = "Improved speech-to-text translation with the Fisher and Callhome {S}panish-{E}nglish speech translation corpus",
author = "Post, Matt and
Kumar, Gaurav and
Lopez, Adam and
Karakos, Damianos and
Callison-Burch, Chris and
Khudanpur, Sanjeev",
editor = "Zhang, Joy Ying",
booktitle = "Proceedings of the 10th International Workshop on Spoken Language Translation: Papers",
month = dec # " 5-6",
year = "2013",
address = "Heidelberg, Germany",
url = "https://aclanthology.org/2013.iwslt-papers.14",
abstract = "Research into the translation of the output of automatic speech recognition (ASR) systems is hindered by the dearth of datasets developed for that explicit purpose. For SpanishEnglish translation, in particular, most parallel data available exists only in vastly different domains and registers. In order to support research on cross-lingual speech applications, we introduce the Fisher and Callhome Spanish-English Speech Translation Corpus, supplementing existing LDC audio and transcripts with (a) ASR 1-best, lattice, and oracle output produced by the Kaldi recognition system and (b) English translations obtained on Amazon{'}s Mechanical Turk. The result is a four-way parallel dataset of Spanish audio, transcriptions, ASR lattices, and English translations of approximately 38 hours of speech, with defined training, development, and held-out test sets. We conduct baseline machine translation experiments using models trained on the provided training data, and validate the dataset by corroborating a number of known results in the field, including the utility of in-domain (information, conversational) training data, increased performance translating lattices (instead of recognizer 1-best output), and the relationship between word error rate and BLEU score.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="post-etal-2013-improved">
<titleInfo>
<title>Improved speech-to-text translation with the Fisher and Callhome Spanish-English speech translation corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matt</namePart>
<namePart type="family">Post</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaurav</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Lopez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Damianos</namePart>
<namePart type="family">Karakos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Callison-Burch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanjeev</namePart>
<namePart type="family">Khudanpur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2013-dec 5-6</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th International Workshop on Spoken Language Translation: Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joy</namePart>
<namePart type="given">Ying</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<place>
<placeTerm type="text">Heidelberg, Germany</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Research into the translation of the output of automatic speech recognition (ASR) systems is hindered by the dearth of datasets developed for that explicit purpose. For SpanishEnglish translation, in particular, most parallel data available exists only in vastly different domains and registers. In order to support research on cross-lingual speech applications, we introduce the Fisher and Callhome Spanish-English Speech Translation Corpus, supplementing existing LDC audio and transcripts with (a) ASR 1-best, lattice, and oracle output produced by the Kaldi recognition system and (b) English translations obtained on Amazon’s Mechanical Turk. The result is a four-way parallel dataset of Spanish audio, transcriptions, ASR lattices, and English translations of approximately 38 hours of speech, with defined training, development, and held-out test sets. We conduct baseline machine translation experiments using models trained on the provided training data, and validate the dataset by corroborating a number of known results in the field, including the utility of in-domain (information, conversational) training data, increased performance translating lattices (instead of recognizer 1-best output), and the relationship between word error rate and BLEU score.</abstract>
<identifier type="citekey">post-etal-2013-improved</identifier>
<location>
<url>https://aclanthology.org/2013.iwslt-papers.14</url>
</location>
<part>
<date>2013-dec 5-6</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improved speech-to-text translation with the Fisher and Callhome Spanish-English speech translation corpus
%A Post, Matt
%A Kumar, Gaurav
%A Lopez, Adam
%A Karakos, Damianos
%A Callison-Burch, Chris
%A Khudanpur, Sanjeev
%Y Zhang, Joy Ying
%S Proceedings of the 10th International Workshop on Spoken Language Translation: Papers
%D 2013
%8 dec 5 6
%C Heidelberg, Germany
%F post-etal-2013-improved
%X Research into the translation of the output of automatic speech recognition (ASR) systems is hindered by the dearth of datasets developed for that explicit purpose. For SpanishEnglish translation, in particular, most parallel data available exists only in vastly different domains and registers. In order to support research on cross-lingual speech applications, we introduce the Fisher and Callhome Spanish-English Speech Translation Corpus, supplementing existing LDC audio and transcripts with (a) ASR 1-best, lattice, and oracle output produced by the Kaldi recognition system and (b) English translations obtained on Amazon’s Mechanical Turk. The result is a four-way parallel dataset of Spanish audio, transcriptions, ASR lattices, and English translations of approximately 38 hours of speech, with defined training, development, and held-out test sets. We conduct baseline machine translation experiments using models trained on the provided training data, and validate the dataset by corroborating a number of known results in the field, including the utility of in-domain (information, conversational) training data, increased performance translating lattices (instead of recognizer 1-best output), and the relationship between word error rate and BLEU score.
%U https://aclanthology.org/2013.iwslt-papers.14
Markdown (Informal)
[Improved speech-to-text translation with the Fisher and Callhome Spanish-English speech translation corpus](https://aclanthology.org/2013.iwslt-papers.14) (Post et al., IWSLT 2013)
ACL