@inproceedings{ultes-maier-2021-blending,
title = "Blending Task Success and User Satisfaction: Analysis of Learned Dialogue Behaviour with Multiple Rewards",
author = "Ultes, Stefan and
Maier, Wolfgang",
editor = "Li, Haizhou and
Levow, Gina-Anne and
Yu, Zhou and
Gupta, Chitralekha and
Sisman, Berrak and
Cai, Siqi and
Vandyke, David and
Dethlefs, Nina and
Wu, Yan and
Li, Junyi Jessy",
booktitle = "Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = jul,
year = "2021",
address = "Singapore and Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.sigdial-1.42/",
doi = "10.18653/v1/2021.sigdial-1.42",
pages = "403--410",
abstract = "Recently, principal reward components for dialogue policy reinforcement learning use task success and user satisfaction independently and neither the resulting learned behaviour has been analysed nor a suitable proper analysis method even existed. In this work, we employ both principal reward components jointly and propose a method to analyse the resulting behaviour through a structured way of probing the learned policy. We show that blending both reward components increases user satisfaction without sacrificing task success in more hostile environments and provide insight about actions chosen by the learned policies."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ultes-maier-2021-blending">
<titleInfo>
<title>Blending Task Success and User Satisfaction: Analysis of Learned Dialogue Behaviour with Multiple Rewards</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Ultes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wolfgang</namePart>
<namePart type="family">Maier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haizhou</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gina-Anne</namePart>
<namePart type="family">Levow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhou</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chitralekha</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Berrak</namePart>
<namePart type="family">Sisman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siqi</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Vandyke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nina</namePart>
<namePart type="family">Dethlefs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yan</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyi</namePart>
<namePart type="given">Jessy</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore and Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently, principal reward components for dialogue policy reinforcement learning use task success and user satisfaction independently and neither the resulting learned behaviour has been analysed nor a suitable proper analysis method even existed. In this work, we employ both principal reward components jointly and propose a method to analyse the resulting behaviour through a structured way of probing the learned policy. We show that blending both reward components increases user satisfaction without sacrificing task success in more hostile environments and provide insight about actions chosen by the learned policies.</abstract>
<identifier type="citekey">ultes-maier-2021-blending</identifier>
<identifier type="doi">10.18653/v1/2021.sigdial-1.42</identifier>
<location>
<url>https://aclanthology.org/2021.sigdial-1.42/</url>
</location>
<part>
<date>2021-07</date>
<extent unit="page">
<start>403</start>
<end>410</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Blending Task Success and User Satisfaction: Analysis of Learned Dialogue Behaviour with Multiple Rewards
%A Ultes, Stefan
%A Maier, Wolfgang
%Y Li, Haizhou
%Y Levow, Gina-Anne
%Y Yu, Zhou
%Y Gupta, Chitralekha
%Y Sisman, Berrak
%Y Cai, Siqi
%Y Vandyke, David
%Y Dethlefs, Nina
%Y Wu, Yan
%Y Li, Junyi Jessy
%S Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2021
%8 July
%I Association for Computational Linguistics
%C Singapore and Online
%F ultes-maier-2021-blending
%X Recently, principal reward components for dialogue policy reinforcement learning use task success and user satisfaction independently and neither the resulting learned behaviour has been analysed nor a suitable proper analysis method even existed. In this work, we employ both principal reward components jointly and propose a method to analyse the resulting behaviour through a structured way of probing the learned policy. We show that blending both reward components increases user satisfaction without sacrificing task success in more hostile environments and provide insight about actions chosen by the learned policies.
%R 10.18653/v1/2021.sigdial-1.42
%U https://aclanthology.org/2021.sigdial-1.42/
%U https://doi.org/10.18653/v1/2021.sigdial-1.42
%P 403-410
Markdown (Informal)
[Blending Task Success and User Satisfaction: Analysis of Learned Dialogue Behaviour with Multiple Rewards](https://aclanthology.org/2021.sigdial-1.42/) (Ultes & Maier, SIGDIAL 2021)
ACL