@inproceedings{briakou-etal-2021-review,
title = "A Review of Human Evaluation for Style Transfer",
author = "Briakou, Eleftheria and
Agrawal, Sweta and
Zhang, Ke and
Tetreault, Joel and
Carpuat, Marine",
editor = "Bosselut, Antoine and
Durmus, Esin and
Gangal, Varun Prashant and
Gehrmann, Sebastian and
Jernite, Yacine and
Perez-Beltrachini, Laura and
Shaikh, Samira and
Xu, Wei",
booktitle = "Proceedings of the 1st Workshop on Natural Language Generation, Evaluation, and Metrics (GEM 2021)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.gem-1.6",
doi = "10.18653/v1/2021.gem-1.6",
pages = "58--67",
abstract = "This paper reviews and summarizes human evaluation practices described in 97 style transfer papers with respect to three main evaluation aspects: style transfer, meaning preservation, and fluency. In principle, evaluations by human raters should be the most reliable. However, in style transfer papers, we find that protocols for human evaluations are often underspecified and not standardized, which hampers the reproducibility of research in this field and progress toward better human and automatic evaluation methods.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="briakou-etal-2021-review">
<titleInfo>
<title>A Review of Human Evaluation for Style Transfer</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eleftheria</namePart>
<namePart type="family">Briakou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sweta</namePart>
<namePart type="family">Agrawal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ke</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Tetreault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Natural Language Generation, Evaluation, and Metrics (GEM 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Bosselut</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Esin</namePart>
<namePart type="family">Durmus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Varun</namePart>
<namePart type="given">Prashant</namePart>
<namePart type="family">Gangal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Gehrmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yacine</namePart>
<namePart type="family">Jernite</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Perez-Beltrachini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samira</namePart>
<namePart type="family">Shaikh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper reviews and summarizes human evaluation practices described in 97 style transfer papers with respect to three main evaluation aspects: style transfer, meaning preservation, and fluency. In principle, evaluations by human raters should be the most reliable. However, in style transfer papers, we find that protocols for human evaluations are often underspecified and not standardized, which hampers the reproducibility of research in this field and progress toward better human and automatic evaluation methods.</abstract>
<identifier type="citekey">briakou-etal-2021-review</identifier>
<identifier type="doi">10.18653/v1/2021.gem-1.6</identifier>
<location>
<url>https://aclanthology.org/2021.gem-1.6</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>58</start>
<end>67</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Review of Human Evaluation for Style Transfer
%A Briakou, Eleftheria
%A Agrawal, Sweta
%A Zhang, Ke
%A Tetreault, Joel
%A Carpuat, Marine
%Y Bosselut, Antoine
%Y Durmus, Esin
%Y Gangal, Varun Prashant
%Y Gehrmann, Sebastian
%Y Jernite, Yacine
%Y Perez-Beltrachini, Laura
%Y Shaikh, Samira
%Y Xu, Wei
%S Proceedings of the 1st Workshop on Natural Language Generation, Evaluation, and Metrics (GEM 2021)
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F briakou-etal-2021-review
%X This paper reviews and summarizes human evaluation practices described in 97 style transfer papers with respect to three main evaluation aspects: style transfer, meaning preservation, and fluency. In principle, evaluations by human raters should be the most reliable. However, in style transfer papers, we find that protocols for human evaluations are often underspecified and not standardized, which hampers the reproducibility of research in this field and progress toward better human and automatic evaluation methods.
%R 10.18653/v1/2021.gem-1.6
%U https://aclanthology.org/2021.gem-1.6
%U https://doi.org/10.18653/v1/2021.gem-1.6
%P 58-67
Markdown (Informal)
[A Review of Human Evaluation for Style Transfer](https://aclanthology.org/2021.gem-1.6) (Briakou et al., GEM 2021)
ACL
- Eleftheria Briakou, Sweta Agrawal, Ke Zhang, Joel Tetreault, and Marine Carpuat. 2021. A Review of Human Evaluation for Style Transfer. In Proceedings of the 1st Workshop on Natural Language Generation, Evaluation, and Metrics (GEM 2021), pages 58–67, Online. Association for Computational Linguistics.