@inproceedings{serriere-etal-2016-weakly,
title = "Weakly-supervised text-to-speech alignment confidence measure",
author = "Serri{\`e}re, Guillaume and
Cerisara, Christophe and
Fohr, Dominique and
Mella, Odile",
editor = "Matsumoto, Yuji and
Prasad, Rashmi",
booktitle = "Proceedings of {COLING} 2016, the 26th International Conference on Computational Linguistics: Technical Papers",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/C16-1192",
pages = "2042--2050",
abstract = "This work proposes a new confidence measure for evaluating text-to-speech alignment systems outputs, which is a key component for many applications, such as semi-automatic corpus anonymization, lips syncing, film dubbing, corpus preparation for speech synthesis and speech recognition acoustic models training. This confidence measure exploits deep neural networks that are trained on large corpora without direct supervision. It is evaluated on an open-source spontaneous speech corpus and outperforms a confidence score derived from a state-of-the-art text-to-speech aligner. We further show that this confidence measure can be used to fine-tune the output of this aligner and improve the quality of the resulting alignment.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="serriere-etal-2016-weakly">
<titleInfo>
<title>Weakly-supervised text-to-speech alignment confidence measure</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guillaume</namePart>
<namePart type="family">Serrière</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christophe</namePart>
<namePart type="family">Cerisara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dominique</namePart>
<namePart type="family">Fohr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Odile</namePart>
<namePart type="family">Mella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuji</namePart>
<namePart type="family">Matsumoto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rashmi</namePart>
<namePart type="family">Prasad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This work proposes a new confidence measure for evaluating text-to-speech alignment systems outputs, which is a key component for many applications, such as semi-automatic corpus anonymization, lips syncing, film dubbing, corpus preparation for speech synthesis and speech recognition acoustic models training. This confidence measure exploits deep neural networks that are trained on large corpora without direct supervision. It is evaluated on an open-source spontaneous speech corpus and outperforms a confidence score derived from a state-of-the-art text-to-speech aligner. We further show that this confidence measure can be used to fine-tune the output of this aligner and improve the quality of the resulting alignment.</abstract>
<identifier type="citekey">serriere-etal-2016-weakly</identifier>
<location>
<url>https://aclanthology.org/C16-1192</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>2042</start>
<end>2050</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Weakly-supervised text-to-speech alignment confidence measure
%A Serrière, Guillaume
%A Cerisara, Christophe
%A Fohr, Dominique
%A Mella, Odile
%Y Matsumoto, Yuji
%Y Prasad, Rashmi
%S Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F serriere-etal-2016-weakly
%X This work proposes a new confidence measure for evaluating text-to-speech alignment systems outputs, which is a key component for many applications, such as semi-automatic corpus anonymization, lips syncing, film dubbing, corpus preparation for speech synthesis and speech recognition acoustic models training. This confidence measure exploits deep neural networks that are trained on large corpora without direct supervision. It is evaluated on an open-source spontaneous speech corpus and outperforms a confidence score derived from a state-of-the-art text-to-speech aligner. We further show that this confidence measure can be used to fine-tune the output of this aligner and improve the quality of the resulting alignment.
%U https://aclanthology.org/C16-1192
%P 2042-2050
Markdown (Informal)
[Weakly-supervised text-to-speech alignment confidence measure](https://aclanthology.org/C16-1192) (Serrière et al., COLING 2016)
ACL
- Guillaume Serrière, Christophe Cerisara, Dominique Fohr, and Odile Mella. 2016. Weakly-supervised text-to-speech alignment confidence measure. In Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers, pages 2042–2050, Osaka, Japan. The COLING 2016 Organizing Committee.