@inproceedings{xu-etal-2022-endex,
title = "{E}n{D}ex: Evaluation of Dialogue Engagingness at Scale",
author = "Xu, Guangxuan and
Liu, Ruibo and
Harel-Canada, Fabrice and
Chandra, Nischal Reddy and
Peng, Nanyun",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.359/",
doi = "10.18653/v1/2022.findings-emnlp.359",
pages = "4884--4893",
abstract = "We propose EnDex, the first human-reaction based model to evaluate dialogue engagingness. EnDex is trained on 80k Reddit-based Engagement Dataset (RED) curated using a novel distant-supervision framework. Engagingness is a key measure that captures high-level quality of AI dialogue systems and closely reflects actual user experience. However, data shortage, plus the abstract and extensive definition of engagingness makes it challenging to develop an automatic metric. Our work departs from mainstream approaches that use synthetic negative examples to train binary classifiers, and instead, proposes a solution using distant-supervision from human-reaction feedback. To support the soundness of our EnDex metric, we offer a theoretical foundation for engagement, an extensive ablation study, and empirical evidence of high correlation on five engagingness related datasets. We will release code, off-the-shelf EnDex model, and a large-scale dataset upon paper publication to facilitate future research."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xu-etal-2022-endex">
<titleInfo>
<title>EnDex: Evaluation of Dialogue Engagingness at Scale</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guangxuan</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruibo</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabrice</namePart>
<namePart type="family">Harel-Canada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nischal</namePart>
<namePart type="given">Reddy</namePart>
<namePart type="family">Chandra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nanyun</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose EnDex, the first human-reaction based model to evaluate dialogue engagingness. EnDex is trained on 80k Reddit-based Engagement Dataset (RED) curated using a novel distant-supervision framework. Engagingness is a key measure that captures high-level quality of AI dialogue systems and closely reflects actual user experience. However, data shortage, plus the abstract and extensive definition of engagingness makes it challenging to develop an automatic metric. Our work departs from mainstream approaches that use synthetic negative examples to train binary classifiers, and instead, proposes a solution using distant-supervision from human-reaction feedback. To support the soundness of our EnDex metric, we offer a theoretical foundation for engagement, an extensive ablation study, and empirical evidence of high correlation on five engagingness related datasets. We will release code, off-the-shelf EnDex model, and a large-scale dataset upon paper publication to facilitate future research.</abstract>
<identifier type="citekey">xu-etal-2022-endex</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.359</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.359/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>4884</start>
<end>4893</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T EnDex: Evaluation of Dialogue Engagingness at Scale
%A Xu, Guangxuan
%A Liu, Ruibo
%A Harel-Canada, Fabrice
%A Chandra, Nischal Reddy
%A Peng, Nanyun
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F xu-etal-2022-endex
%X We propose EnDex, the first human-reaction based model to evaluate dialogue engagingness. EnDex is trained on 80k Reddit-based Engagement Dataset (RED) curated using a novel distant-supervision framework. Engagingness is a key measure that captures high-level quality of AI dialogue systems and closely reflects actual user experience. However, data shortage, plus the abstract and extensive definition of engagingness makes it challenging to develop an automatic metric. Our work departs from mainstream approaches that use synthetic negative examples to train binary classifiers, and instead, proposes a solution using distant-supervision from human-reaction feedback. To support the soundness of our EnDex metric, we offer a theoretical foundation for engagement, an extensive ablation study, and empirical evidence of high correlation on five engagingness related datasets. We will release code, off-the-shelf EnDex model, and a large-scale dataset upon paper publication to facilitate future research.
%R 10.18653/v1/2022.findings-emnlp.359
%U https://aclanthology.org/2022.findings-emnlp.359/
%U https://doi.org/10.18653/v1/2022.findings-emnlp.359
%P 4884-4893
Markdown (Informal)
[EnDex: Evaluation of Dialogue Engagingness at Scale](https://aclanthology.org/2022.findings-emnlp.359/) (Xu et al., Findings 2022)
ACL
- Guangxuan Xu, Ruibo Liu, Fabrice Harel-Canada, Nischal Reddy Chandra, and Nanyun Peng. 2022. EnDex: Evaluation of Dialogue Engagingness at Scale. In Findings of the Association for Computational Linguistics: EMNLP 2022, pages 4884–4893, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.