@inproceedings{razumovskaia-etal-2024-dial,
title = "Dial {B}e{I}nfo for Faithfulness: Improving Factuality of Information-Seeking Dialogue via Behavioural Fine-Tuning",
author = "Razumovskaia, Evgeniia and
Vuli{\'c}, Ivan and
Markovi{\'c}, Pavle and
Cichy, Tomasz and
Zheng, Qian and
Wen, Tsung-Hsien and
Budzianowski, Pawe{\l}",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.998/",
doi = "10.18653/v1/2024.findings-emnlp.998",
pages = "17139--17152",
abstract = "Factual faithfulness is a crucial requirement in information-seeking dialogue: the system should respond to the user queries so that the responses are meaningful and aligned with the knowledge provided to the system. However, most modern large language models (LLMs) suffer from hallucinations, that is, they generate responses not supported by or even contradicting the knowledge source. To mitigate the issue and increase faithfulness of information-seeking dialogue systems supported by the LLMs, we introduce BeInfo, a simple yet effective method that applies {\textquoteleft}behavioural tuning' on the LLMs to aid information-seeking dialogue. Relying on three standard information seeking dialogue datasets, we show that models tuned with BeInfo become considerably more faithful to the knowledge source both for datasets and domains seen during BeInfo-tuning, as well as on unseen domains, when applied in a zero-shot manner. In addition, we present a {\textquoteleft}real-life' case study on conversations with real users, showcasing that the models with 3B parameters (e.g., Flan-T5) tuned with BeInfo demonstrate strong performance on data from real {\textquoteleft}production' conversations: when tuned on a limited amount of such realistic in-domain dialogues, they surpass much larger LLMs used {\textquoteleft}off-the-shelf', both on automatic and human evaluation metrics."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="razumovskaia-etal-2024-dial">
<titleInfo>
<title>Dial BeInfo for Faithfulness: Improving Factuality of Information-Seeking Dialogue via Behavioural Fine-Tuning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Evgeniia</namePart>
<namePart type="family">Razumovskaia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Vulić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavle</namePart>
<namePart type="family">Marković</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomasz</namePart>
<namePart type="family">Cichy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qian</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tsung-Hsien</namePart>
<namePart type="family">Wen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paweł</namePart>
<namePart type="family">Budzianowski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Factual faithfulness is a crucial requirement in information-seeking dialogue: the system should respond to the user queries so that the responses are meaningful and aligned with the knowledge provided to the system. However, most modern large language models (LLMs) suffer from hallucinations, that is, they generate responses not supported by or even contradicting the knowledge source. To mitigate the issue and increase faithfulness of information-seeking dialogue systems supported by the LLMs, we introduce BeInfo, a simple yet effective method that applies ‘behavioural tuning’ on the LLMs to aid information-seeking dialogue. Relying on three standard information seeking dialogue datasets, we show that models tuned with BeInfo become considerably more faithful to the knowledge source both for datasets and domains seen during BeInfo-tuning, as well as on unseen domains, when applied in a zero-shot manner. In addition, we present a ‘real-life’ case study on conversations with real users, showcasing that the models with 3B parameters (e.g., Flan-T5) tuned with BeInfo demonstrate strong performance on data from real ‘production’ conversations: when tuned on a limited amount of such realistic in-domain dialogues, they surpass much larger LLMs used ‘off-the-shelf’, both on automatic and human evaluation metrics.</abstract>
<identifier type="citekey">razumovskaia-etal-2024-dial</identifier>
<identifier type="doi">10.18653/v1/2024.findings-emnlp.998</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.998/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>17139</start>
<end>17152</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Dial BeInfo for Faithfulness: Improving Factuality of Information-Seeking Dialogue via Behavioural Fine-Tuning
%A Razumovskaia, Evgeniia
%A Vulić, Ivan
%A Marković, Pavle
%A Cichy, Tomasz
%A Zheng, Qian
%A Wen, Tsung-Hsien
%A Budzianowski, Paweł
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F razumovskaia-etal-2024-dial
%X Factual faithfulness is a crucial requirement in information-seeking dialogue: the system should respond to the user queries so that the responses are meaningful and aligned with the knowledge provided to the system. However, most modern large language models (LLMs) suffer from hallucinations, that is, they generate responses not supported by or even contradicting the knowledge source. To mitigate the issue and increase faithfulness of information-seeking dialogue systems supported by the LLMs, we introduce BeInfo, a simple yet effective method that applies ‘behavioural tuning’ on the LLMs to aid information-seeking dialogue. Relying on three standard information seeking dialogue datasets, we show that models tuned with BeInfo become considerably more faithful to the knowledge source both for datasets and domains seen during BeInfo-tuning, as well as on unseen domains, when applied in a zero-shot manner. In addition, we present a ‘real-life’ case study on conversations with real users, showcasing that the models with 3B parameters (e.g., Flan-T5) tuned with BeInfo demonstrate strong performance on data from real ‘production’ conversations: when tuned on a limited amount of such realistic in-domain dialogues, they surpass much larger LLMs used ‘off-the-shelf’, both on automatic and human evaluation metrics.
%R 10.18653/v1/2024.findings-emnlp.998
%U https://aclanthology.org/2024.findings-emnlp.998/
%U https://doi.org/10.18653/v1/2024.findings-emnlp.998
%P 17139-17152
Markdown (Informal)
[Dial BeInfo for Faithfulness: Improving Factuality of Information-Seeking Dialogue via Behavioural Fine-Tuning](https://aclanthology.org/2024.findings-emnlp.998/) (Razumovskaia et al., Findings 2024)
ACL
- Evgeniia Razumovskaia, Ivan Vulić, Pavle Marković, Tomasz Cichy, Qian Zheng, Tsung-Hsien Wen, and Paweł Budzianowski. 2024. Dial BeInfo for Faithfulness: Improving Factuality of Information-Seeking Dialogue via Behavioural Fine-Tuning. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 17139–17152, Miami, Florida, USA. Association for Computational Linguistics.