@inproceedings{li-etal-2024-traq,
title = "{TRAQ}: Trustworthy Retrieval Augmented Question Answering via Conformal Prediction",
author = "Li, Shuo and
Park, Sangdon and
Lee, Insup and
Bastani, Osbert",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-long.210/",
doi = "10.18653/v1/2024.naacl-long.210",
pages = "3799--3821",
abstract = "When applied to open-domain question answering, large language models (LLMs) frequently generate incorrect responses based on made-up facts, which are called \textit{hallucinations}. Retrieval augmented generation (RAG) is a promising strategy to avoid hallucinations, but it does not provide guarantees on its correctness. To address this challenge, we propose the Trustworthy Retrieval Augmented Question Answering, or *TRAQ*, which provides the first end-to-end statistical correctness guarantee for RAG. TRAQ uses conformal prediction, a statistical technique for constructing prediction sets that are guaranteed to contain the semantically correct response with high probability. Additionally, TRAQ leverages Bayesian optimization to minimize the size of the constructed sets. In an extensive experimental evaluation, we demonstrate that TRAQ provides the desired correctness guarantee while reducing prediction set size by 16.2{\%} on average compared to an ablation. The implementation is available: [https://github.com/shuoli90/TRAQ](https://github.com/shuoli90/TRAQ)."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2024-traq">
<titleInfo>
<title>TRAQ: Trustworthy Retrieval Augmented Question Answering via Conformal Prediction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shuo</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sangdon</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Insup</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Osbert</namePart>
<namePart type="family">Bastani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>When applied to open-domain question answering, large language models (LLMs) frequently generate incorrect responses based on made-up facts, which are called hallucinations. Retrieval augmented generation (RAG) is a promising strategy to avoid hallucinations, but it does not provide guarantees on its correctness. To address this challenge, we propose the Trustworthy Retrieval Augmented Question Answering, or *TRAQ*, which provides the first end-to-end statistical correctness guarantee for RAG. TRAQ uses conformal prediction, a statistical technique for constructing prediction sets that are guaranteed to contain the semantically correct response with high probability. Additionally, TRAQ leverages Bayesian optimization to minimize the size of the constructed sets. In an extensive experimental evaluation, we demonstrate that TRAQ provides the desired correctness guarantee while reducing prediction set size by 16.2% on average compared to an ablation. The implementation is available: [https://github.com/shuoli90/TRAQ](https://github.com/shuoli90/TRAQ).</abstract>
<identifier type="citekey">li-etal-2024-traq</identifier>
<identifier type="doi">10.18653/v1/2024.naacl-long.210</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-long.210/</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>3799</start>
<end>3821</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TRAQ: Trustworthy Retrieval Augmented Question Answering via Conformal Prediction
%A Li, Shuo
%A Park, Sangdon
%A Lee, Insup
%A Bastani, Osbert
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F li-etal-2024-traq
%X When applied to open-domain question answering, large language models (LLMs) frequently generate incorrect responses based on made-up facts, which are called hallucinations. Retrieval augmented generation (RAG) is a promising strategy to avoid hallucinations, but it does not provide guarantees on its correctness. To address this challenge, we propose the Trustworthy Retrieval Augmented Question Answering, or *TRAQ*, which provides the first end-to-end statistical correctness guarantee for RAG. TRAQ uses conformal prediction, a statistical technique for constructing prediction sets that are guaranteed to contain the semantically correct response with high probability. Additionally, TRAQ leverages Bayesian optimization to minimize the size of the constructed sets. In an extensive experimental evaluation, we demonstrate that TRAQ provides the desired correctness guarantee while reducing prediction set size by 16.2% on average compared to an ablation. The implementation is available: [https://github.com/shuoli90/TRAQ](https://github.com/shuoli90/TRAQ).
%R 10.18653/v1/2024.naacl-long.210
%U https://aclanthology.org/2024.naacl-long.210/
%U https://doi.org/10.18653/v1/2024.naacl-long.210
%P 3799-3821
Markdown (Informal)
[TRAQ: Trustworthy Retrieval Augmented Question Answering via Conformal Prediction](https://aclanthology.org/2024.naacl-long.210/) (Li et al., NAACL 2024)
ACL