@inproceedings{schoch-etal-2020-problem,
title = "{\textquotedblleft}This is a Problem, Don`t You Agree?{\textquotedblright} Framing and Bias in Human Evaluation for Natural Language Generation",
author = "Schoch, Stephanie and
Yang, Diyi and
Ji, Yangfeng",
editor = "Agarwal, Shubham and
Du{\v{s}}ek, Ond{\v{r}}ej and
Gehrmann, Sebastian and
Gkatzia, Dimitra and
Konstas, Ioannis and
Van Miltenburg, Emiel and
Santhanam, Sashank",
booktitle = "Proceedings of the 1st Workshop on Evaluating NLG Evaluation",
month = dec,
year = "2020",
address = "Online (Dublin, Ireland)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.evalnlgeval-1.2/",
pages = "10--16",
abstract = "Despite recent efforts reviewing current human evaluation practices for natural language generation (NLG) research, the lack of reported question wording and potential for framing effects or cognitive biases influencing results has been widely overlooked. In this opinion paper, we detail three possible framing effects and cognitive biases that could be imposed on human evaluation in NLG. Based on this, we make a call for increased transparency for human evaluation in NLG and propose the concept of human evaluation statements. We make several recommendations for design details to report that could potentially influence results, such as question wording, and suggest that reporting pertinent design details can help increase comparability across studies as well as reproducibility of results."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="schoch-etal-2020-problem">
<titleInfo>
<title>“This is a Problem, Don‘t You Agree?” Framing and Bias in Human Evaluation for Natural Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stephanie</namePart>
<namePart type="family">Schoch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diyi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yangfeng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Evaluating NLG Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shubham</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Dušek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Gehrmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dimitra</namePart>
<namePart type="family">Gkatzia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ioannis</namePart>
<namePart type="family">Konstas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emiel</namePart>
<namePart type="family">Van Miltenburg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sashank</namePart>
<namePart type="family">Santhanam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online (Dublin, Ireland)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite recent efforts reviewing current human evaluation practices for natural language generation (NLG) research, the lack of reported question wording and potential for framing effects or cognitive biases influencing results has been widely overlooked. In this opinion paper, we detail three possible framing effects and cognitive biases that could be imposed on human evaluation in NLG. Based on this, we make a call for increased transparency for human evaluation in NLG and propose the concept of human evaluation statements. We make several recommendations for design details to report that could potentially influence results, such as question wording, and suggest that reporting pertinent design details can help increase comparability across studies as well as reproducibility of results.</abstract>
<identifier type="citekey">schoch-etal-2020-problem</identifier>
<location>
<url>https://aclanthology.org/2020.evalnlgeval-1.2/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>10</start>
<end>16</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T “This is a Problem, Don‘t You Agree?” Framing and Bias in Human Evaluation for Natural Language Generation
%A Schoch, Stephanie
%A Yang, Diyi
%A Ji, Yangfeng
%Y Agarwal, Shubham
%Y Dušek, Ondřej
%Y Gehrmann, Sebastian
%Y Gkatzia, Dimitra
%Y Konstas, Ioannis
%Y Van Miltenburg, Emiel
%Y Santhanam, Sashank
%S Proceedings of the 1st Workshop on Evaluating NLG Evaluation
%D 2020
%8 December
%I Association for Computational Linguistics
%C Online (Dublin, Ireland)
%F schoch-etal-2020-problem
%X Despite recent efforts reviewing current human evaluation practices for natural language generation (NLG) research, the lack of reported question wording and potential for framing effects or cognitive biases influencing results has been widely overlooked. In this opinion paper, we detail three possible framing effects and cognitive biases that could be imposed on human evaluation in NLG. Based on this, we make a call for increased transparency for human evaluation in NLG and propose the concept of human evaluation statements. We make several recommendations for design details to report that could potentially influence results, such as question wording, and suggest that reporting pertinent design details can help increase comparability across studies as well as reproducibility of results.
%U https://aclanthology.org/2020.evalnlgeval-1.2/
%P 10-16
Markdown (Informal)
[“This is a Problem, Don’t You Agree?” Framing and Bias in Human Evaluation for Natural Language Generation](https://aclanthology.org/2020.evalnlgeval-1.2/) (Schoch et al., EvalNLGEval 2020)
ACL