@inproceedings{heck-etal-2013-incremental,
title = "Incremental unsupervised training for university lecture recognition",
author = {Heck, Michael and
St{\"u}ker, Sebastian and
Sakti, Sakriani and
Waibel, Alex and
Nakamura, Satoshi},
editor = "Zhang, Joy Ying",
booktitle = "Proceedings of the 10th International Workshop on Spoken Language Translation: Papers",
month = dec # " 5-6",
year = "2013",
address = "Heidelberg, Germany",
url = "https://aclanthology.org/2013.iwslt-papers.8/",
abstract = "In this paper we describe our work on unsupervised adaptation of the acoustic model of our simultaneous lecture translation system. We trained a speaker independent acoustic model, with which we produce automatic transcriptions of new lectures in order to improve the system for a specific lecturer. We compare our results against a model that was trained in a supervised way on an exact manual transcription. We examine four different ways of processing the decoder outputs of the automatic transcription with respect to the treatment of pronunciation variants and noise words. We will show that, instead of fixating the latter informations in the transcriptions, it is of advantage to let the Viterbi algorithm during training decide which pronunciations to use and where to insert which noise words. Further, we utilize word level posterior probabilities obtained during decoding by weighting and thresholding the words of a transcription."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="heck-etal-2013-incremental">
<titleInfo>
<title>Incremental unsupervised training for university lecture recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Heck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Stüker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Waibel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Satoshi</namePart>
<namePart type="family">Nakamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2013-dec 5-6</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th International Workshop on Spoken Language Translation: Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joy</namePart>
<namePart type="given">Ying</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<place>
<placeTerm type="text">Heidelberg, Germany</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we describe our work on unsupervised adaptation of the acoustic model of our simultaneous lecture translation system. We trained a speaker independent acoustic model, with which we produce automatic transcriptions of new lectures in order to improve the system for a specific lecturer. We compare our results against a model that was trained in a supervised way on an exact manual transcription. We examine four different ways of processing the decoder outputs of the automatic transcription with respect to the treatment of pronunciation variants and noise words. We will show that, instead of fixating the latter informations in the transcriptions, it is of advantage to let the Viterbi algorithm during training decide which pronunciations to use and where to insert which noise words. Further, we utilize word level posterior probabilities obtained during decoding by weighting and thresholding the words of a transcription.</abstract>
<identifier type="citekey">heck-etal-2013-incremental</identifier>
<location>
<url>https://aclanthology.org/2013.iwslt-papers.8/</url>
</location>
<part>
<date>2013-dec 5-6</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Incremental unsupervised training for university lecture recognition
%A Heck, Michael
%A Stüker, Sebastian
%A Sakti, Sakriani
%A Waibel, Alex
%A Nakamura, Satoshi
%Y Zhang, Joy Ying
%S Proceedings of the 10th International Workshop on Spoken Language Translation: Papers
%D 2013
%8 dec 5 6
%C Heidelberg, Germany
%F heck-etal-2013-incremental
%X In this paper we describe our work on unsupervised adaptation of the acoustic model of our simultaneous lecture translation system. We trained a speaker independent acoustic model, with which we produce automatic transcriptions of new lectures in order to improve the system for a specific lecturer. We compare our results against a model that was trained in a supervised way on an exact manual transcription. We examine four different ways of processing the decoder outputs of the automatic transcription with respect to the treatment of pronunciation variants and noise words. We will show that, instead of fixating the latter informations in the transcriptions, it is of advantage to let the Viterbi algorithm during training decide which pronunciations to use and where to insert which noise words. Further, we utilize word level posterior probabilities obtained during decoding by weighting and thresholding the words of a transcription.
%U https://aclanthology.org/2013.iwslt-papers.8/
Markdown (Informal)
[Incremental unsupervised training for university lecture recognition](https://aclanthology.org/2013.iwslt-papers.8/) (Heck et al., IWSLT 2013)
ACL