@inproceedings{xiang-etal-2024-diffusiondialog,
title = "{D}iffusion{D}ialog: A Diffusion Model for Diverse Dialog Generation with Latent Space",
author = "Xiang, Jianxiang and
Liu, Zhenhua and
Liu, Haodong and
Bai, Yin and
Cheng, Jia and
Chen, Wenliang",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.440/",
pages = "4912--4921",
abstract = "In real-life conversations, the content is diverse, and there exist one-to-many problems that require diverse generation. Previous studies attempted to introduce discrete or Gaussian-based latent variables to address the one-to-many problem, but the diversity is limited. Recently, diffusion models have made breakthroughs in computer vision and some attempts have been made in natural language processing. In this paper, we propose DiffusionDialog, a novel approach to enhance the diversity of dialogue generation with the help of diffusion model. In our approach, we introduce the continuous latent variables in the diffusion model instead of the discrete ones or VAE, which are often used in the previous studies. The problem of using discrete variables in dialog task is how to build a effective prior of latent space and inferring process to infer the proper latent given the context. Combining the encoder and latent-based diffusion model, we encode the latent of response in a continuous space as the prior instead of fixed Gaussian distribution in VAE or simply discrete ones, and we infer the latent by denoising step by step with diffusion model. The experimental results show that our model greatly enhance the diversity of dialog response while keeping the coherence. In further analysis, we find that our diffusion model achieved high inference efficiency which is the main challenge of applying diffusion model in natural language processing."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xiang-etal-2024-diffusiondialog">
<titleInfo>
<title>DiffusionDialog: A Diffusion Model for Diverse Dialog Generation with Latent Space</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jianxiang</namePart>
<namePart type="family">Xiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenhua</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haodong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yin</namePart>
<namePart type="family">Bai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jia</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenliang</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In real-life conversations, the content is diverse, and there exist one-to-many problems that require diverse generation. Previous studies attempted to introduce discrete or Gaussian-based latent variables to address the one-to-many problem, but the diversity is limited. Recently, diffusion models have made breakthroughs in computer vision and some attempts have been made in natural language processing. In this paper, we propose DiffusionDialog, a novel approach to enhance the diversity of dialogue generation with the help of diffusion model. In our approach, we introduce the continuous latent variables in the diffusion model instead of the discrete ones or VAE, which are often used in the previous studies. The problem of using discrete variables in dialog task is how to build a effective prior of latent space and inferring process to infer the proper latent given the context. Combining the encoder and latent-based diffusion model, we encode the latent of response in a continuous space as the prior instead of fixed Gaussian distribution in VAE or simply discrete ones, and we infer the latent by denoising step by step with diffusion model. The experimental results show that our model greatly enhance the diversity of dialog response while keeping the coherence. In further analysis, we find that our diffusion model achieved high inference efficiency which is the main challenge of applying diffusion model in natural language processing.</abstract>
<identifier type="citekey">xiang-etal-2024-diffusiondialog</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.440/</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>4912</start>
<end>4921</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DiffusionDialog: A Diffusion Model for Diverse Dialog Generation with Latent Space
%A Xiang, Jianxiang
%A Liu, Zhenhua
%A Liu, Haodong
%A Bai, Yin
%A Cheng, Jia
%A Chen, Wenliang
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F xiang-etal-2024-diffusiondialog
%X In real-life conversations, the content is diverse, and there exist one-to-many problems that require diverse generation. Previous studies attempted to introduce discrete or Gaussian-based latent variables to address the one-to-many problem, but the diversity is limited. Recently, diffusion models have made breakthroughs in computer vision and some attempts have been made in natural language processing. In this paper, we propose DiffusionDialog, a novel approach to enhance the diversity of dialogue generation with the help of diffusion model. In our approach, we introduce the continuous latent variables in the diffusion model instead of the discrete ones or VAE, which are often used in the previous studies. The problem of using discrete variables in dialog task is how to build a effective prior of latent space and inferring process to infer the proper latent given the context. Combining the encoder and latent-based diffusion model, we encode the latent of response in a continuous space as the prior instead of fixed Gaussian distribution in VAE or simply discrete ones, and we infer the latent by denoising step by step with diffusion model. The experimental results show that our model greatly enhance the diversity of dialog response while keeping the coherence. In further analysis, we find that our diffusion model achieved high inference efficiency which is the main challenge of applying diffusion model in natural language processing.
%U https://aclanthology.org/2024.lrec-main.440/
%P 4912-4921
Markdown (Informal)
[DiffusionDialog: A Diffusion Model for Diverse Dialog Generation with Latent Space](https://aclanthology.org/2024.lrec-main.440/) (Xiang et al., LREC-COLING 2024)
ACL