@inproceedings{thomson-reiter-2020-gold,
title = "A Gold Standard Methodology for Evaluating Accuracy in Data-To-Text Systems",
author = "Thomson, Craig and
Reiter, Ehud",
editor = "Davis, Brian and
Graham, Yvette and
Kelleher, John and
Sripada, Yaji",
booktitle = "Proceedings of the 13th International Conference on Natural Language Generation",
month = dec,
year = "2020",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.inlg-1.22/",
doi = "10.18653/v1/2020.inlg-1.22",
pages = "158--168",
abstract = "Most Natural Language Generation systems need to produce accurate texts. We propose a methodology for high-quality human evaluation of the accuracy of generated texts, which is intended to serve as a gold-standard for accuracy evaluations of data-to-text systems. We use our methodology to evaluate the accuracy of computer generated basketball summaries. We then show how our gold standard evaluation can be used to validate automated metrics."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="thomson-reiter-2020-gold">
<titleInfo>
<title>A Gold Standard Methodology for Evaluating Accuracy in Data-To-Text Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Craig</namePart>
<namePart type="family">Thomson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehud</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th International Conference on Natural Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Davis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Kelleher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaji</namePart>
<namePart type="family">Sripada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Most Natural Language Generation systems need to produce accurate texts. We propose a methodology for high-quality human evaluation of the accuracy of generated texts, which is intended to serve as a gold-standard for accuracy evaluations of data-to-text systems. We use our methodology to evaluate the accuracy of computer generated basketball summaries. We then show how our gold standard evaluation can be used to validate automated metrics.</abstract>
<identifier type="citekey">thomson-reiter-2020-gold</identifier>
<identifier type="doi">10.18653/v1/2020.inlg-1.22</identifier>
<location>
<url>https://aclanthology.org/2020.inlg-1.22/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>158</start>
<end>168</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Gold Standard Methodology for Evaluating Accuracy in Data-To-Text Systems
%A Thomson, Craig
%A Reiter, Ehud
%Y Davis, Brian
%Y Graham, Yvette
%Y Kelleher, John
%Y Sripada, Yaji
%S Proceedings of the 13th International Conference on Natural Language Generation
%D 2020
%8 December
%I Association for Computational Linguistics
%C Dublin, Ireland
%F thomson-reiter-2020-gold
%X Most Natural Language Generation systems need to produce accurate texts. We propose a methodology for high-quality human evaluation of the accuracy of generated texts, which is intended to serve as a gold-standard for accuracy evaluations of data-to-text systems. We use our methodology to evaluate the accuracy of computer generated basketball summaries. We then show how our gold standard evaluation can be used to validate automated metrics.
%R 10.18653/v1/2020.inlg-1.22
%U https://aclanthology.org/2020.inlg-1.22/
%U https://doi.org/10.18653/v1/2020.inlg-1.22
%P 158-168
Markdown (Informal)
[A Gold Standard Methodology for Evaluating Accuracy in Data-To-Text Systems](https://aclanthology.org/2020.inlg-1.22/) (Thomson & Reiter, INLG 2020)
ACL