@inproceedings{khapra-sai-2021-tutorial,
title = "A Tutorial on Evaluation Metrics used in Natural Language Generation",
author = "Khapra, Mitesh M. and
Sai, Ananya B.",
editor = "Kondrak, Greg and
Bontcheva, Kalina and
Gillick, Dan",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Tutorials",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.naacl-tutorials.4/",
doi = "10.18653/v1/2021.naacl-tutorials.4",
pages = "15--19",
abstract = "The advent of Deep Learning and the availability of large scale datasets has accelerated research on Natural Language Generation with a focus on newer tasks and better models. With such rapid progress, it is vital to assess the extent of scientific progress made and identify the areas/components that need improvement. To accomplish this in an automatic and reliable manner, the NLP community has actively pursued the development of automatic evaluation metrics. Especially in the last few years, there has been an increasing focus on evaluation metrics, with several criticisms of existing metrics and proposals for several new metrics. This tutorial presents the evolution of automatic evaluation metrics to their current state along with the emerging trends in this field by specifically addressing the following questions: (i) What makes NLG evaluation challenging? (ii) Why do we need automatic evaluation metrics? (iii) What are the existing automatic evaluation metrics and how can they be organised in a coherent taxonomy? (iv) What are the criticisms and shortcomings of existing metrics? (v) What are the possible future directions of research?"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="khapra-sai-2021-tutorial">
<titleInfo>
<title>A Tutorial on Evaluation Metrics used in Natural Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mitesh</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Khapra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ananya</namePart>
<namePart type="given">B</namePart>
<namePart type="family">Sai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Tutorials</title>
</titleInfo>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Kondrak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalina</namePart>
<namePart type="family">Bontcheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Gillick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The advent of Deep Learning and the availability of large scale datasets has accelerated research on Natural Language Generation with a focus on newer tasks and better models. With such rapid progress, it is vital to assess the extent of scientific progress made and identify the areas/components that need improvement. To accomplish this in an automatic and reliable manner, the NLP community has actively pursued the development of automatic evaluation metrics. Especially in the last few years, there has been an increasing focus on evaluation metrics, with several criticisms of existing metrics and proposals for several new metrics. This tutorial presents the evolution of automatic evaluation metrics to their current state along with the emerging trends in this field by specifically addressing the following questions: (i) What makes NLG evaluation challenging? (ii) Why do we need automatic evaluation metrics? (iii) What are the existing automatic evaluation metrics and how can they be organised in a coherent taxonomy? (iv) What are the criticisms and shortcomings of existing metrics? (v) What are the possible future directions of research?</abstract>
<identifier type="citekey">khapra-sai-2021-tutorial</identifier>
<identifier type="doi">10.18653/v1/2021.naacl-tutorials.4</identifier>
<location>
<url>https://aclanthology.org/2021.naacl-tutorials.4/</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>15</start>
<end>19</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Tutorial on Evaluation Metrics used in Natural Language Generation
%A Khapra, Mitesh M.
%A Sai, Ananya B.
%Y Kondrak, Greg
%Y Bontcheva, Kalina
%Y Gillick, Dan
%S Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Tutorials
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F khapra-sai-2021-tutorial
%X The advent of Deep Learning and the availability of large scale datasets has accelerated research on Natural Language Generation with a focus on newer tasks and better models. With such rapid progress, it is vital to assess the extent of scientific progress made and identify the areas/components that need improvement. To accomplish this in an automatic and reliable manner, the NLP community has actively pursued the development of automatic evaluation metrics. Especially in the last few years, there has been an increasing focus on evaluation metrics, with several criticisms of existing metrics and proposals for several new metrics. This tutorial presents the evolution of automatic evaluation metrics to their current state along with the emerging trends in this field by specifically addressing the following questions: (i) What makes NLG evaluation challenging? (ii) Why do we need automatic evaluation metrics? (iii) What are the existing automatic evaluation metrics and how can they be organised in a coherent taxonomy? (iv) What are the criticisms and shortcomings of existing metrics? (v) What are the possible future directions of research?
%R 10.18653/v1/2021.naacl-tutorials.4
%U https://aclanthology.org/2021.naacl-tutorials.4/
%U https://doi.org/10.18653/v1/2021.naacl-tutorials.4
%P 15-19
Markdown (Informal)
[A Tutorial on Evaluation Metrics used in Natural Language Generation](https://aclanthology.org/2021.naacl-tutorials.4/) (Khapra & Sai, NAACL 2021)
ACL