@inproceedings{mattei-etal-2020-interaction,
title = "On the interaction of automatic evaluation and task framing in headline style transfer",
author = "Mattei, Lorenzo De and
Cafagna, Michele and
Lai, Huiyuan and
Dell{'}Orletta, Felice and
Nissim, Malvina and
Gatt, Albert",
editor = "Agarwal, Shubham and
Du{\v{s}}ek, Ond{\v{r}}ej and
Gehrmann, Sebastian and
Gkatzia, Dimitra and
Konstas, Ioannis and
Van Miltenburg, Emiel and
Santhanam, Sashank",
booktitle = "Proceedings of the 1st Workshop on Evaluating NLG Evaluation",
month = dec,
year = "2020",
address = "Online (Dublin, Ireland)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.evalnlgeval-1.5",
pages = "38--43",
abstract = "An ongoing debate in the NLG community concerns the best way to evaluate systems, with human evaluation often being considered the most reliable method, compared to corpus-based metrics. However, tasks involving subtle textual differences, such as style transfer, tend to be hard for humans to perform. In this paper, we propose an evaluation method for this task based on purposely-trained classifiers, showing that it better reflects system differences than traditional metrics such as BLEU.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mattei-etal-2020-interaction">
<titleInfo>
<title>On the interaction of automatic evaluation and task framing in headline style transfer</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lorenzo</namePart>
<namePart type="given">De</namePart>
<namePart type="family">Mattei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michele</namePart>
<namePart type="family">Cafagna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huiyuan</namePart>
<namePart type="family">Lai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malvina</namePart>
<namePart type="family">Nissim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Albert</namePart>
<namePart type="family">Gatt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Evaluating NLG Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shubham</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Dušek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Gehrmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dimitra</namePart>
<namePart type="family">Gkatzia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ioannis</namePart>
<namePart type="family">Konstas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emiel</namePart>
<namePart type="family">Van Miltenburg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sashank</namePart>
<namePart type="family">Santhanam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online (Dublin, Ireland)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>An ongoing debate in the NLG community concerns the best way to evaluate systems, with human evaluation often being considered the most reliable method, compared to corpus-based metrics. However, tasks involving subtle textual differences, such as style transfer, tend to be hard for humans to perform. In this paper, we propose an evaluation method for this task based on purposely-trained classifiers, showing that it better reflects system differences than traditional metrics such as BLEU.</abstract>
<identifier type="citekey">mattei-etal-2020-interaction</identifier>
<location>
<url>https://aclanthology.org/2020.evalnlgeval-1.5</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>38</start>
<end>43</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the interaction of automatic evaluation and task framing in headline style transfer
%A Mattei, Lorenzo De
%A Cafagna, Michele
%A Lai, Huiyuan
%A Dell’Orletta, Felice
%A Nissim, Malvina
%A Gatt, Albert
%Y Agarwal, Shubham
%Y Dušek, Ondřej
%Y Gehrmann, Sebastian
%Y Gkatzia, Dimitra
%Y Konstas, Ioannis
%Y Van Miltenburg, Emiel
%Y Santhanam, Sashank
%S Proceedings of the 1st Workshop on Evaluating NLG Evaluation
%D 2020
%8 December
%I Association for Computational Linguistics
%C Online (Dublin, Ireland)
%F mattei-etal-2020-interaction
%X An ongoing debate in the NLG community concerns the best way to evaluate systems, with human evaluation often being considered the most reliable method, compared to corpus-based metrics. However, tasks involving subtle textual differences, such as style transfer, tend to be hard for humans to perform. In this paper, we propose an evaluation method for this task based on purposely-trained classifiers, showing that it better reflects system differences than traditional metrics such as BLEU.
%U https://aclanthology.org/2020.evalnlgeval-1.5
%P 38-43
Markdown (Informal)
[On the interaction of automatic evaluation and task framing in headline style transfer](https://aclanthology.org/2020.evalnlgeval-1.5) (Mattei et al., EvalNLGEval 2020)
ACL