@inproceedings{liu-etal-2022-generative,
title = "A Generative User Simulator with {GPT}-based Architecture and Goal State Tracking for Reinforced Multi-Domain Dialog Systems",
author = "Liu, Hong and
Cai, Yucheng and
Ou, Zhijian and
Huang, Yi and
Feng, Junlan",
editor = "Ou, Zhijian and
Feng, Junlan and
Li, Juanzi",
booktitle = "Proceedings of the Towards Semi-Supervised and Reinforced Task-Oriented Dialog Systems (SereTOD)",
month = dec,
year = "2022",
address = "Abu Dhabi, Beijing (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.seretod-1.10/",
doi = "10.18653/v1/2022.seretod-1.10",
pages = "85--97",
abstract = "Building user simulators (USs) for reinforcement learning (RL) of task-oriented dialog systems (DSs) has gained more and more attention, which, however, still faces several fundamental challenges. First, it is unclear whether we can leverage pretrained language models to design, for example, GPT-2 based USs, to catch up and interact with the recently advanced GPT- 2 based DSs. Second, an important ingredient in a US is that the user goal can be effectively incorporated and tracked; but how to flexibly integrate goal state tracking and develop an end-to-end trainable US for multi-domains has remained to be a challenge. In this work, we propose a generative user simulator (GUS) with GPT-2 based architecture and goal state tracking towards addressing the above two challenges. Extensive experiments are conducted on MultiWOZ2.1. Different DSs are trained via RL with GUS, the classic agenda-based user simulator (ABUS) and other ablation simulators respectively, and are compared for crossmodel evaluation, corpus-based evaluation and human evaluation. The GUS achieves superior results in all three evaluation tasks."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2022-generative">
<titleInfo>
<title>A Generative User Simulator with GPT-based Architecture and Goal State Tracking for Reinforced Multi-Domain Dialog Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yucheng</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhijian</namePart>
<namePart type="family">Ou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junlan</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Towards Semi-Supervised and Reinforced Task-Oriented Dialog Systems (SereTOD)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhijian</namePart>
<namePart type="family">Ou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junlan</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juanzi</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, Beijing (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Building user simulators (USs) for reinforcement learning (RL) of task-oriented dialog systems (DSs) has gained more and more attention, which, however, still faces several fundamental challenges. First, it is unclear whether we can leverage pretrained language models to design, for example, GPT-2 based USs, to catch up and interact with the recently advanced GPT- 2 based DSs. Second, an important ingredient in a US is that the user goal can be effectively incorporated and tracked; but how to flexibly integrate goal state tracking and develop an end-to-end trainable US for multi-domains has remained to be a challenge. In this work, we propose a generative user simulator (GUS) with GPT-2 based architecture and goal state tracking towards addressing the above two challenges. Extensive experiments are conducted on MultiWOZ2.1. Different DSs are trained via RL with GUS, the classic agenda-based user simulator (ABUS) and other ablation simulators respectively, and are compared for crossmodel evaluation, corpus-based evaluation and human evaluation. The GUS achieves superior results in all three evaluation tasks.</abstract>
<identifier type="citekey">liu-etal-2022-generative</identifier>
<identifier type="doi">10.18653/v1/2022.seretod-1.10</identifier>
<location>
<url>https://aclanthology.org/2022.seretod-1.10/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>85</start>
<end>97</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Generative User Simulator with GPT-based Architecture and Goal State Tracking for Reinforced Multi-Domain Dialog Systems
%A Liu, Hong
%A Cai, Yucheng
%A Ou, Zhijian
%A Huang, Yi
%A Feng, Junlan
%Y Ou, Zhijian
%Y Feng, Junlan
%Y Li, Juanzi
%S Proceedings of the Towards Semi-Supervised and Reinforced Task-Oriented Dialog Systems (SereTOD)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, Beijing (Hybrid)
%F liu-etal-2022-generative
%X Building user simulators (USs) for reinforcement learning (RL) of task-oriented dialog systems (DSs) has gained more and more attention, which, however, still faces several fundamental challenges. First, it is unclear whether we can leverage pretrained language models to design, for example, GPT-2 based USs, to catch up and interact with the recently advanced GPT- 2 based DSs. Second, an important ingredient in a US is that the user goal can be effectively incorporated and tracked; but how to flexibly integrate goal state tracking and develop an end-to-end trainable US for multi-domains has remained to be a challenge. In this work, we propose a generative user simulator (GUS) with GPT-2 based architecture and goal state tracking towards addressing the above two challenges. Extensive experiments are conducted on MultiWOZ2.1. Different DSs are trained via RL with GUS, the classic agenda-based user simulator (ABUS) and other ablation simulators respectively, and are compared for crossmodel evaluation, corpus-based evaluation and human evaluation. The GUS achieves superior results in all three evaluation tasks.
%R 10.18653/v1/2022.seretod-1.10
%U https://aclanthology.org/2022.seretod-1.10/
%U https://doi.org/10.18653/v1/2022.seretod-1.10
%P 85-97
Markdown (Informal)
[A Generative User Simulator with GPT-based Architecture and Goal State Tracking for Reinforced Multi-Domain Dialog Systems](https://aclanthology.org/2022.seretod-1.10/) (Liu et al., SereTOD 2022)
ACL