@inproceedings{lechner-etal-2023-challenges,
title = "Challenges of {GPT}-3-Based Conversational Agents for Healthcare",
author = "Lechner, Fabian and
Lahnala, Allison and
Welch, Charles and
Flek, Lucie",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.ranlp-1.67",
pages = "619--630",
abstract = "The potential of medical domain dialogue agents lies in their ability to provide patients with faster information access while enabling medical specialists to concentrate on critical tasks. However, the integration of large-language models (LLMs) into these agents presents certain limitations that may result in serious consequences. This paper investigates the challenges and risks of using GPT-3-based models for medical question-answering (MedQA). We perform several evaluations contextualized in terms of standard medical principles. We provide a procedure for manually designing patient queries to stress-test high-risk limitations of LLMs in MedQA systems. Our analysis reveals that LLMs fail to respond adequately to these queries, generating erroneous medical information, unsafe recommendations, and content that may be considered offensive.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lechner-etal-2023-challenges">
<titleInfo>
<title>Challenges of GPT-3-Based Conversational Agents for Healthcare</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fabian</namePart>
<namePart type="family">Lechner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Allison</namePart>
<namePart type="family">Lahnala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charles</namePart>
<namePart type="family">Welch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucie</namePart>
<namePart type="family">Flek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The potential of medical domain dialogue agents lies in their ability to provide patients with faster information access while enabling medical specialists to concentrate on critical tasks. However, the integration of large-language models (LLMs) into these agents presents certain limitations that may result in serious consequences. This paper investigates the challenges and risks of using GPT-3-based models for medical question-answering (MedQA). We perform several evaluations contextualized in terms of standard medical principles. We provide a procedure for manually designing patient queries to stress-test high-risk limitations of LLMs in MedQA systems. Our analysis reveals that LLMs fail to respond adequately to these queries, generating erroneous medical information, unsafe recommendations, and content that may be considered offensive.</abstract>
<identifier type="citekey">lechner-etal-2023-challenges</identifier>
<location>
<url>https://aclanthology.org/2023.ranlp-1.67</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>619</start>
<end>630</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Challenges of GPT-3-Based Conversational Agents for Healthcare
%A Lechner, Fabian
%A Lahnala, Allison
%A Welch, Charles
%A Flek, Lucie
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F lechner-etal-2023-challenges
%X The potential of medical domain dialogue agents lies in their ability to provide patients with faster information access while enabling medical specialists to concentrate on critical tasks. However, the integration of large-language models (LLMs) into these agents presents certain limitations that may result in serious consequences. This paper investigates the challenges and risks of using GPT-3-based models for medical question-answering (MedQA). We perform several evaluations contextualized in terms of standard medical principles. We provide a procedure for manually designing patient queries to stress-test high-risk limitations of LLMs in MedQA systems. Our analysis reveals that LLMs fail to respond adequately to these queries, generating erroneous medical information, unsafe recommendations, and content that may be considered offensive.
%U https://aclanthology.org/2023.ranlp-1.67
%P 619-630
Markdown (Informal)
[Challenges of GPT-3-Based Conversational Agents for Healthcare](https://aclanthology.org/2023.ranlp-1.67) (Lechner et al., RANLP 2023)
ACL