@inproceedings{zhao-etal-2024-llms,
title = "Can {LLM}s Replace Clinical Doctors? Exploring Bias in Disease Diagnosis by Large Language Models",
author = "Zhao, Yutian and
Wang, Huimin and
Liu, Yuqi and
Suhuang, Wu and
Wu, Xian and
Zheng, Yefeng",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.814/",
doi = "10.18653/v1/2024.findings-emnlp.814",
pages = "13914--13935",
abstract = "The bias of disease prediction in Large Language Models (LLMs) is a critical yet underexplored issue, with potential implications for healthcare outcomes and equity. As LLMs increasingly find applications in healthcare, understanding and addressing their biases becomes paramount. This study focuses on this crucial topic, investigating the bias of disease prediction in models such as GPT-4, ChatGPT, and Qwen1.5-72b across gender, age range, and disease judgment behaviors. Utilizing a comprehensive real-clinical health record dataset of over 330,000 entries, we uncover that all three models exhibit distinct biases, indicating a pervasive issue of unfairness. To measure this, we introduce a novel metric{--}the diagnosis bias score, which reflects the ratio of prediction numbers to label numbers. Our in-depth analysis, based on this score, sheds light on the inherent biases in these models. In response to these findings, we propose a simple yet effective prompt-based solution to alleviate the observed bias in disease prediction with LLMs. This research underscores the importance of fairness in AI, particularly in healthcare applications, and offers a practical approach to enhance the equity of disease prediction models."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhao-etal-2024-llms">
<titleInfo>
<title>Can LLMs Replace Clinical Doctors? Exploring Bias in Disease Diagnosis by Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yutian</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huimin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuqi</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wu</namePart>
<namePart type="family">Suhuang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xian</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yefeng</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The bias of disease prediction in Large Language Models (LLMs) is a critical yet underexplored issue, with potential implications for healthcare outcomes and equity. As LLMs increasingly find applications in healthcare, understanding and addressing their biases becomes paramount. This study focuses on this crucial topic, investigating the bias of disease prediction in models such as GPT-4, ChatGPT, and Qwen1.5-72b across gender, age range, and disease judgment behaviors. Utilizing a comprehensive real-clinical health record dataset of over 330,000 entries, we uncover that all three models exhibit distinct biases, indicating a pervasive issue of unfairness. To measure this, we introduce a novel metric–the diagnosis bias score, which reflects the ratio of prediction numbers to label numbers. Our in-depth analysis, based on this score, sheds light on the inherent biases in these models. In response to these findings, we propose a simple yet effective prompt-based solution to alleviate the observed bias in disease prediction with LLMs. This research underscores the importance of fairness in AI, particularly in healthcare applications, and offers a practical approach to enhance the equity of disease prediction models.</abstract>
<identifier type="citekey">zhao-etal-2024-llms</identifier>
<identifier type="doi">10.18653/v1/2024.findings-emnlp.814</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.814/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>13914</start>
<end>13935</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can LLMs Replace Clinical Doctors? Exploring Bias in Disease Diagnosis by Large Language Models
%A Zhao, Yutian
%A Wang, Huimin
%A Liu, Yuqi
%A Suhuang, Wu
%A Wu, Xian
%A Zheng, Yefeng
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F zhao-etal-2024-llms
%X The bias of disease prediction in Large Language Models (LLMs) is a critical yet underexplored issue, with potential implications for healthcare outcomes and equity. As LLMs increasingly find applications in healthcare, understanding and addressing their biases becomes paramount. This study focuses on this crucial topic, investigating the bias of disease prediction in models such as GPT-4, ChatGPT, and Qwen1.5-72b across gender, age range, and disease judgment behaviors. Utilizing a comprehensive real-clinical health record dataset of over 330,000 entries, we uncover that all three models exhibit distinct biases, indicating a pervasive issue of unfairness. To measure this, we introduce a novel metric–the diagnosis bias score, which reflects the ratio of prediction numbers to label numbers. Our in-depth analysis, based on this score, sheds light on the inherent biases in these models. In response to these findings, we propose a simple yet effective prompt-based solution to alleviate the observed bias in disease prediction with LLMs. This research underscores the importance of fairness in AI, particularly in healthcare applications, and offers a practical approach to enhance the equity of disease prediction models.
%R 10.18653/v1/2024.findings-emnlp.814
%U https://aclanthology.org/2024.findings-emnlp.814/
%U https://doi.org/10.18653/v1/2024.findings-emnlp.814
%P 13914-13935
Markdown (Informal)
[Can LLMs Replace Clinical Doctors? Exploring Bias in Disease Diagnosis by Large Language Models](https://aclanthology.org/2024.findings-emnlp.814/) (Zhao et al., Findings 2024)
ACL