@inproceedings{hurlimann-cieliebak-2023-reproducing,
title = "Reproducing a Comparative Evaluation of {G}erman Text-to-Speech Systems",
author = {H{\"u}rlimann, Manuela and
Cieliebak, Mark},
editor = "Belz, Anya and
Popovi{\'c}, Maja and
Reiter, Ehud and
Thomson, Craig and
Sedoc, Jo{\~a}o",
booktitle = "Proceedings of the 3rd Workshop on Human Evaluation of NLP Systems",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.humeval-1.12",
pages = "136--144",
abstract = "This paper describes the reproduction of a human evaluation in Language-Agnostic Meta- Learning for Low-Resource Text-to-Speech with Articulatory Features reported in Lux and Vu (2022). It is a contribution to the ReproNLP 2023 Shared Task on Reproducibility of Evaluations in NLP. The original evaluation assessed the naturalness of audio generated by different Text-to-Speech (TTS) systems for German, and our goal was to repeat the experiment with a different set of evaluators. We reproduced the evaluation based on data and instructions provided by the original authors, with some uncertainty concerning the randomisation of question order. Evaluators were recruited via email to relevant mailing lists and we received 157 responses over the course of three weeks. Our initial results show low reproducibility, but when we assume that the systems of the original and repeat evaluation experiment have been transposed, the reproducibility assessment improves markedly. We do not know if and at what point such a transposition happened; however, an initial analysis of our audio and video files provides some evidence that the system assignment in our repeat experiment is correct.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hurlimann-cieliebak-2023-reproducing">
<titleInfo>
<title>Reproducing a Comparative Evaluation of German Text-to-Speech Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manuela</namePart>
<namePart type="family">Hürlimann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Cieliebak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on Human Evaluation of NLP Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anya</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maja</namePart>
<namePart type="family">Popović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehud</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Craig</namePart>
<namePart type="family">Thomson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the reproduction of a human evaluation in Language-Agnostic Meta- Learning for Low-Resource Text-to-Speech with Articulatory Features reported in Lux and Vu (2022). It is a contribution to the ReproNLP 2023 Shared Task on Reproducibility of Evaluations in NLP. The original evaluation assessed the naturalness of audio generated by different Text-to-Speech (TTS) systems for German, and our goal was to repeat the experiment with a different set of evaluators. We reproduced the evaluation based on data and instructions provided by the original authors, with some uncertainty concerning the randomisation of question order. Evaluators were recruited via email to relevant mailing lists and we received 157 responses over the course of three weeks. Our initial results show low reproducibility, but when we assume that the systems of the original and repeat evaluation experiment have been transposed, the reproducibility assessment improves markedly. We do not know if and at what point such a transposition happened; however, an initial analysis of our audio and video files provides some evidence that the system assignment in our repeat experiment is correct.</abstract>
<identifier type="citekey">hurlimann-cieliebak-2023-reproducing</identifier>
<location>
<url>https://aclanthology.org/2023.humeval-1.12</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>136</start>
<end>144</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Reproducing a Comparative Evaluation of German Text-to-Speech Systems
%A Hürlimann, Manuela
%A Cieliebak, Mark
%Y Belz, Anya
%Y Popović, Maja
%Y Reiter, Ehud
%Y Thomson, Craig
%Y Sedoc, João
%S Proceedings of the 3rd Workshop on Human Evaluation of NLP Systems
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F hurlimann-cieliebak-2023-reproducing
%X This paper describes the reproduction of a human evaluation in Language-Agnostic Meta- Learning for Low-Resource Text-to-Speech with Articulatory Features reported in Lux and Vu (2022). It is a contribution to the ReproNLP 2023 Shared Task on Reproducibility of Evaluations in NLP. The original evaluation assessed the naturalness of audio generated by different Text-to-Speech (TTS) systems for German, and our goal was to repeat the experiment with a different set of evaluators. We reproduced the evaluation based on data and instructions provided by the original authors, with some uncertainty concerning the randomisation of question order. Evaluators were recruited via email to relevant mailing lists and we received 157 responses over the course of three weeks. Our initial results show low reproducibility, but when we assume that the systems of the original and repeat evaluation experiment have been transposed, the reproducibility assessment improves markedly. We do not know if and at what point such a transposition happened; however, an initial analysis of our audio and video files provides some evidence that the system assignment in our repeat experiment is correct.
%U https://aclanthology.org/2023.humeval-1.12
%P 136-144
Markdown (Informal)
[Reproducing a Comparative Evaluation of German Text-to-Speech Systems](https://aclanthology.org/2023.humeval-1.12) (Hürlimann & Cieliebak, HumEval-WS 2023)
ACL