@inproceedings{van-miltenburg-etal-2020-evaluation,
title = "Evaluation rules! On the use of grammars and rule-based systems for {NLG} evaluation",
author = "van Miltenburg, Emiel and
van der Lee, Chris and
Castro-Ferreira, Thiago and
Krahmer, Emiel",
editor = "Agarwal, Shubham and
Du{\v{s}}ek, Ond{\v{r}}ej and
Gehrmann, Sebastian and
Gkatzia, Dimitra and
Konstas, Ioannis and
Van Miltenburg, Emiel and
Santhanam, Sashank",
booktitle = "Proceedings of the 1st Workshop on Evaluating NLG Evaluation",
month = dec,
year = "2020",
address = "Online (Dublin, Ireland)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.evalnlgeval-1.3/",
pages = "17--27",
abstract = "NLG researchers often use uncontrolled corpora to train and evaluate their systems, using textual similarity metrics, such as BLEU. This position paper argues in favour of two alternative evaluation strategies, using grammars or rule-based systems. These strategies are particularly useful to identify the strengths and weaknesses of different systems. We contrast our proposals with the (extended) WebNLG dataset, which is revealed to have a skewed distribution of predicates. We predict that this distribution affects the quality of the predictions for systems trained on this data. However, this hypothesis can only be thoroughly tested (without any confounds) once we are able to systematically manipulate the skewness of the data, using a rule-based approach."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="van-miltenburg-etal-2020-evaluation">
<titleInfo>
<title>Evaluation rules! On the use of grammars and rule-based systems for NLG evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emiel</namePart>
<namePart type="family">van Miltenburg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">van der Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thiago</namePart>
<namePart type="family">Castro-Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emiel</namePart>
<namePart type="family">Krahmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Evaluating NLG Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shubham</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Dušek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Gehrmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dimitra</namePart>
<namePart type="family">Gkatzia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ioannis</namePart>
<namePart type="family">Konstas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emiel</namePart>
<namePart type="family">Van Miltenburg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sashank</namePart>
<namePart type="family">Santhanam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online (Dublin, Ireland)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>NLG researchers often use uncontrolled corpora to train and evaluate their systems, using textual similarity metrics, such as BLEU. This position paper argues in favour of two alternative evaluation strategies, using grammars or rule-based systems. These strategies are particularly useful to identify the strengths and weaknesses of different systems. We contrast our proposals with the (extended) WebNLG dataset, which is revealed to have a skewed distribution of predicates. We predict that this distribution affects the quality of the predictions for systems trained on this data. However, this hypothesis can only be thoroughly tested (without any confounds) once we are able to systematically manipulate the skewness of the data, using a rule-based approach.</abstract>
<identifier type="citekey">van-miltenburg-etal-2020-evaluation</identifier>
<location>
<url>https://aclanthology.org/2020.evalnlgeval-1.3/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>17</start>
<end>27</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluation rules! On the use of grammars and rule-based systems for NLG evaluation
%A van Miltenburg, Emiel
%A van der Lee, Chris
%A Castro-Ferreira, Thiago
%A Krahmer, Emiel
%Y Agarwal, Shubham
%Y Dušek, Ondřej
%Y Gehrmann, Sebastian
%Y Gkatzia, Dimitra
%Y Konstas, Ioannis
%Y Van Miltenburg, Emiel
%Y Santhanam, Sashank
%S Proceedings of the 1st Workshop on Evaluating NLG Evaluation
%D 2020
%8 December
%I Association for Computational Linguistics
%C Online (Dublin, Ireland)
%F van-miltenburg-etal-2020-evaluation
%X NLG researchers often use uncontrolled corpora to train and evaluate their systems, using textual similarity metrics, such as BLEU. This position paper argues in favour of two alternative evaluation strategies, using grammars or rule-based systems. These strategies are particularly useful to identify the strengths and weaknesses of different systems. We contrast our proposals with the (extended) WebNLG dataset, which is revealed to have a skewed distribution of predicates. We predict that this distribution affects the quality of the predictions for systems trained on this data. However, this hypothesis can only be thoroughly tested (without any confounds) once we are able to systematically manipulate the skewness of the data, using a rule-based approach.
%U https://aclanthology.org/2020.evalnlgeval-1.3/
%P 17-27
Markdown (Informal)
[Evaluation rules! On the use of grammars and rule-based systems for NLG evaluation](https://aclanthology.org/2020.evalnlgeval-1.3/) (van Miltenburg et al., EvalNLGEval 2020)
ACL