@inproceedings{sun-etal-2024-exploring,
title = "Exploring and Mitigating Shortcut Learning for Generative Large Language Models",
author = "Sun, Zechen and
Xiao, Yisheng and
Li, Juntao and
Ji, Yixin and
Chen, Wenliang and
Zhang, Min",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.602/",
pages = "6883--6893",
abstract = "Recent generative large language models (LLMs) have exhibited incredible instruction-following capabilities while keeping strong task completion ability, even without task-specific fine-tuning. Some works attribute this to the bonus of the new scaling law, in which the continuous improvement of model capacity yields emergent capabilities, e.g., reasoning and universal generalization. However, we point out that recent LLMs still show shortcut learning behavior, where the models tend to exploit spurious correlations between non-robust features and labels for prediction, which might lead to overestimating model capabilities. LLMs memorize more complex spurious correlations (i.e., task $\leftrightarrow$ feature $\leftrightarrow$ label) compared with that learned from previous pre-training and task-specific fine-tuning paradigm (i.e., feature $\leftrightarrow$ label). Based on our findings, we propose FSLI, a framework for encouraging LLMs to \textbf{F}orget \textbf{S}purious correlations and \textbf{L}earn from \textbf{I}n-context information. Experiments on three tasks show that FSFI can effectively mitigate shortcut learning. Besides, we argue not to overestimate the capabilities of LLMs and conduct evaluations in more challenging and complete test scenarios."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sun-etal-2024-exploring">
<titleInfo>
<title>Exploring and Mitigating Shortcut Learning for Generative Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zechen</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yisheng</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juntao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yixin</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenliang</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent generative large language models (LLMs) have exhibited incredible instruction-following capabilities while keeping strong task completion ability, even without task-specific fine-tuning. Some works attribute this to the bonus of the new scaling law, in which the continuous improvement of model capacity yields emergent capabilities, e.g., reasoning and universal generalization. However, we point out that recent LLMs still show shortcut learning behavior, where the models tend to exploit spurious correlations between non-robust features and labels for prediction, which might lead to overestimating model capabilities. LLMs memorize more complex spurious correlations (i.e., task łeftrightarrow feature łeftrightarrow label) compared with that learned from previous pre-training and task-specific fine-tuning paradigm (i.e., feature łeftrightarrow label). Based on our findings, we propose FSLI, a framework for encouraging LLMs to Forget Spurious correlations and Learn from In-context information. Experiments on three tasks show that FSFI can effectively mitigate shortcut learning. Besides, we argue not to overestimate the capabilities of LLMs and conduct evaluations in more challenging and complete test scenarios.</abstract>
<identifier type="citekey">sun-etal-2024-exploring</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.602/</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>6883</start>
<end>6893</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring and Mitigating Shortcut Learning for Generative Large Language Models
%A Sun, Zechen
%A Xiao, Yisheng
%A Li, Juntao
%A Ji, Yixin
%A Chen, Wenliang
%A Zhang, Min
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F sun-etal-2024-exploring
%X Recent generative large language models (LLMs) have exhibited incredible instruction-following capabilities while keeping strong task completion ability, even without task-specific fine-tuning. Some works attribute this to the bonus of the new scaling law, in which the continuous improvement of model capacity yields emergent capabilities, e.g., reasoning and universal generalization. However, we point out that recent LLMs still show shortcut learning behavior, where the models tend to exploit spurious correlations between non-robust features and labels for prediction, which might lead to overestimating model capabilities. LLMs memorize more complex spurious correlations (i.e., task łeftrightarrow feature łeftrightarrow label) compared with that learned from previous pre-training and task-specific fine-tuning paradigm (i.e., feature łeftrightarrow label). Based on our findings, we propose FSLI, a framework for encouraging LLMs to Forget Spurious correlations and Learn from In-context information. Experiments on three tasks show that FSFI can effectively mitigate shortcut learning. Besides, we argue not to overestimate the capabilities of LLMs and conduct evaluations in more challenging and complete test scenarios.
%U https://aclanthology.org/2024.lrec-main.602/
%P 6883-6893
Markdown (Informal)
[Exploring and Mitigating Shortcut Learning for Generative Large Language Models](https://aclanthology.org/2024.lrec-main.602/) (Sun et al., LREC-COLING 2024)
ACL