@inproceedings{mieskes-benz-2023-h,
title = "h{\_}da@{R}epro{H}umn {--} Reproduction of Human Evaluation and Technical Pipeline",
author = "Mieskes, Margot and
Benz, Jacob Georg",
editor = "Belz, Anya and
Popovi{\'c}, Maja and
Reiter, Ehud and
Thomson, Craig and
Sedoc, Jo{\~a}o",
booktitle = "Proceedings of the 3rd Workshop on Human Evaluation of NLP Systems",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.humeval-1.11",
pages = "130--135",
abstract = "How reliable are human evaluation results? Is it possible to replicate human evaluation? This work takes a closer look at the evaluation of the output of a Text-to-Speech (TTS) system. Unfortunately, our results indicate that human evaluation is not as straightforward to replicate as expected. Additionally, we also present results on reproducing the technical background of the TTS system and discuss potential reasons for the reproduction failure.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mieskes-benz-2023-h">
<titleInfo>
<title>h_da@ReproHumn – Reproduction of Human Evaluation and Technical Pipeline</title>
</titleInfo>
<name type="personal">
<namePart type="given">Margot</namePart>
<namePart type="family">Mieskes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="given">Georg</namePart>
<namePart type="family">Benz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on Human Evaluation of NLP Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anya</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maja</namePart>
<namePart type="family">Popović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehud</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Craig</namePart>
<namePart type="family">Thomson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>How reliable are human evaluation results? Is it possible to replicate human evaluation? This work takes a closer look at the evaluation of the output of a Text-to-Speech (TTS) system. Unfortunately, our results indicate that human evaluation is not as straightforward to replicate as expected. Additionally, we also present results on reproducing the technical background of the TTS system and discuss potential reasons for the reproduction failure.</abstract>
<identifier type="citekey">mieskes-benz-2023-h</identifier>
<location>
<url>https://aclanthology.org/2023.humeval-1.11</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>130</start>
<end>135</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T h_da@ReproHumn – Reproduction of Human Evaluation and Technical Pipeline
%A Mieskes, Margot
%A Benz, Jacob Georg
%Y Belz, Anya
%Y Popović, Maja
%Y Reiter, Ehud
%Y Thomson, Craig
%Y Sedoc, João
%S Proceedings of the 3rd Workshop on Human Evaluation of NLP Systems
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F mieskes-benz-2023-h
%X How reliable are human evaluation results? Is it possible to replicate human evaluation? This work takes a closer look at the evaluation of the output of a Text-to-Speech (TTS) system. Unfortunately, our results indicate that human evaluation is not as straightforward to replicate as expected. Additionally, we also present results on reproducing the technical background of the TTS system and discuss potential reasons for the reproduction failure.
%U https://aclanthology.org/2023.humeval-1.11
%P 130-135
Markdown (Informal)
[h_da@ReproHumn – Reproduction of Human Evaluation and Technical Pipeline](https://aclanthology.org/2023.humeval-1.11) (Mieskes & Benz, HumEval-WS 2023)
ACL