@inproceedings{gooding-etal-2023-study,
title = "A Study on Annotation Interfaces for Summary Comparison",
author = "Gooding, Sian and
Werner, Lucas and
C{\u{a}}rbune, Victor",
editor = "Prange, Jakob and
Friedrich, Annemarie",
booktitle = "Proceedings of the 17th Linguistic Annotation Workshop (LAW-XVII)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.law-1.18/",
doi = "10.18653/v1/2023.law-1.18",
pages = "179--187",
abstract = "The task of summarisation is notoriously difficult to evaluate, with agreement even between expert raters unlikely to be perfect. One technique for summary evaluation relies on collecting comparison data by presenting annotators with generated summaries and tasking them with selecting the best one. This paradigm is currently being exploited in reinforcement learning using human feedback, whereby a reward function is trained using pairwise choice data. Comparisons are an easier way to elicit human feedback for summarisation, however, such decisions can be bottle necked by the usability of the annotator interface. In this paper, we present the results of a pilot study exploring how the user interface impacts annotator agreement when judging summary quality."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gooding-etal-2023-study">
<titleInfo>
<title>A Study on Annotation Interfaces for Summary Comparison</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sian</namePart>
<namePart type="family">Gooding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucas</namePart>
<namePart type="family">Werner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victor</namePart>
<namePart type="family">Cărbune</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Linguistic Annotation Workshop (LAW-XVII)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jakob</namePart>
<namePart type="family">Prange</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annemarie</namePart>
<namePart type="family">Friedrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The task of summarisation is notoriously difficult to evaluate, with agreement even between expert raters unlikely to be perfect. One technique for summary evaluation relies on collecting comparison data by presenting annotators with generated summaries and tasking them with selecting the best one. This paradigm is currently being exploited in reinforcement learning using human feedback, whereby a reward function is trained using pairwise choice data. Comparisons are an easier way to elicit human feedback for summarisation, however, such decisions can be bottle necked by the usability of the annotator interface. In this paper, we present the results of a pilot study exploring how the user interface impacts annotator agreement when judging summary quality.</abstract>
<identifier type="citekey">gooding-etal-2023-study</identifier>
<identifier type="doi">10.18653/v1/2023.law-1.18</identifier>
<location>
<url>https://aclanthology.org/2023.law-1.18/</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>179</start>
<end>187</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Study on Annotation Interfaces for Summary Comparison
%A Gooding, Sian
%A Werner, Lucas
%A Cărbune, Victor
%Y Prange, Jakob
%Y Friedrich, Annemarie
%S Proceedings of the 17th Linguistic Annotation Workshop (LAW-XVII)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F gooding-etal-2023-study
%X The task of summarisation is notoriously difficult to evaluate, with agreement even between expert raters unlikely to be perfect. One technique for summary evaluation relies on collecting comparison data by presenting annotators with generated summaries and tasking them with selecting the best one. This paradigm is currently being exploited in reinforcement learning using human feedback, whereby a reward function is trained using pairwise choice data. Comparisons are an easier way to elicit human feedback for summarisation, however, such decisions can be bottle necked by the usability of the annotator interface. In this paper, we present the results of a pilot study exploring how the user interface impacts annotator agreement when judging summary quality.
%R 10.18653/v1/2023.law-1.18
%U https://aclanthology.org/2023.law-1.18/
%U https://doi.org/10.18653/v1/2023.law-1.18
%P 179-187
Markdown (Informal)
[A Study on Annotation Interfaces for Summary Comparison](https://aclanthology.org/2023.law-1.18/) (Gooding et al., LAW 2023)
ACL