@inproceedings{xu-etal-2024-walking,
title = "Walking in Others' Shoes: How Perspective-Taking Guides Large Language Models in Reducing Toxicity and Bias",
author = "Xu, Rongwu and
Zhou, Zian and
Zhang, Tianwei and
Qi, Zehan and
Yao, Su and
Xu, Ke and
Xu, Wei and
Qiu, Han",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.476/",
doi = "10.18653/v1/2024.emnlp-main.476",
pages = "8341--8368",
abstract = "The common toxicity and societal bias in contents generated by large language models (LLMs) necessitate strategies to reduce harm. Present solutions often demand white-box access to the model or substantial training, which is impractical for cutting-edge commercial LLMs. Moreover, prevailing prompting methods depend on external tool feedback and fail to simultaneously lessen toxicity and bias. Motivated by social psychology principles, we propose a novel strategy named perspective-taking prompting (PeT) that inspires LLMs to integrate diverse human perspectives and self-regulate their responses. This self-correction mechanism can significantly diminish toxicity (up to 89{\%}) and bias (up to 73{\%}) in LLMs' responses. Rigorous evaluations and ablation studies are conducted on two commercial LLMs (ChatGPT and GLM) and three open-source LLMs, revealing PeT`s superiority in producing less harmful responses, outperforming five strong baselines."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xu-etal-2024-walking">
<titleInfo>
<title>Walking in Others’ Shoes: How Perspective-Taking Guides Large Language Models in Reducing Toxicity and Bias</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rongwu</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zian</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tianwei</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zehan</namePart>
<namePart type="family">Qi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Su</namePart>
<namePart type="family">Yao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ke</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Han</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The common toxicity and societal bias in contents generated by large language models (LLMs) necessitate strategies to reduce harm. Present solutions often demand white-box access to the model or substantial training, which is impractical for cutting-edge commercial LLMs. Moreover, prevailing prompting methods depend on external tool feedback and fail to simultaneously lessen toxicity and bias. Motivated by social psychology principles, we propose a novel strategy named perspective-taking prompting (PeT) that inspires LLMs to integrate diverse human perspectives and self-regulate their responses. This self-correction mechanism can significantly diminish toxicity (up to 89%) and bias (up to 73%) in LLMs’ responses. Rigorous evaluations and ablation studies are conducted on two commercial LLMs (ChatGPT and GLM) and three open-source LLMs, revealing PeT‘s superiority in producing less harmful responses, outperforming five strong baselines.</abstract>
<identifier type="citekey">xu-etal-2024-walking</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.476</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.476/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>8341</start>
<end>8368</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Walking in Others’ Shoes: How Perspective-Taking Guides Large Language Models in Reducing Toxicity and Bias
%A Xu, Rongwu
%A Zhou, Zian
%A Zhang, Tianwei
%A Qi, Zehan
%A Yao, Su
%A Xu, Ke
%A Xu, Wei
%A Qiu, Han
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F xu-etal-2024-walking
%X The common toxicity and societal bias in contents generated by large language models (LLMs) necessitate strategies to reduce harm. Present solutions often demand white-box access to the model or substantial training, which is impractical for cutting-edge commercial LLMs. Moreover, prevailing prompting methods depend on external tool feedback and fail to simultaneously lessen toxicity and bias. Motivated by social psychology principles, we propose a novel strategy named perspective-taking prompting (PeT) that inspires LLMs to integrate diverse human perspectives and self-regulate their responses. This self-correction mechanism can significantly diminish toxicity (up to 89%) and bias (up to 73%) in LLMs’ responses. Rigorous evaluations and ablation studies are conducted on two commercial LLMs (ChatGPT and GLM) and three open-source LLMs, revealing PeT‘s superiority in producing less harmful responses, outperforming five strong baselines.
%R 10.18653/v1/2024.emnlp-main.476
%U https://aclanthology.org/2024.emnlp-main.476/
%U https://doi.org/10.18653/v1/2024.emnlp-main.476
%P 8341-8368
Markdown (Informal)
[Walking in Others’ Shoes: How Perspective-Taking Guides Large Language Models in Reducing Toxicity and Bias](https://aclanthology.org/2024.emnlp-main.476/) (Xu et al., EMNLP 2024)
ACL
- Rongwu Xu, Zian Zhou, Tianwei Zhang, Zehan Qi, Su Yao, Ke Xu, Wei Xu, and Han Qiu. 2024. Walking in Others’ Shoes: How Perspective-Taking Guides Large Language Models in Reducing Toxicity and Bias. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 8341–8368, Miami, Florida, USA. Association for Computational Linguistics.