@inproceedings{su-etal-2024-unveiling,
title = "Unveiling Narrative Reasoning Limits of Large Language Models with Trope in Movie Synopses",
author = "Su, Hung-Ting and
Hsu, Ya-Ching and
Lin, Xudong and
Shi, Xiang-Qian and
Niu, Yulei and
Hsu, Han-Yuan and
Lee, Hung-yi and
Hsu, Winston H.",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.872/",
doi = "10.18653/v1/2024.findings-emnlp.872",
pages = "14839--14854",
abstract = "Large language models (LLMs) equipped with chain-of-thoughts (CoT) prompting have shown significant multi-step reasoning capabilities in factual content like mathematics, commonsense, and logic. However, their performance in narrative reasoning, which demands greater abstraction capabilities, remains unexplored. This study utilizes tropes in movie synopses to assess the abstract reasoning abilities of state-of-the-art LLMs and uncovers their low performance. We introduce a trope-wise querying approach to address these challenges and boost the F1 score by 11.8 points. Moreover, while prior studies suggest that CoT enhances multi-step reasoning, this study shows CoT can cause hallucinations in narrative content, reducing GPT-4`s performance. We also introduce an Adversarial Injection method to embed trope-related text tokens into movie synopses without explicit tropes, revealing CoT`s heightened sensitivity to such injections. Our comprehensive analysis provides insights for future research directions."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="su-etal-2024-unveiling">
<titleInfo>
<title>Unveiling Narrative Reasoning Limits of Large Language Models with Trope in Movie Synopses</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hung-Ting</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ya-Ching</namePart>
<namePart type="family">Hsu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xudong</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang-Qian</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulei</namePart>
<namePart type="family">Niu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Han-Yuan</namePart>
<namePart type="family">Hsu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hung-yi</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Winston</namePart>
<namePart type="given">H</namePart>
<namePart type="family">Hsu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models (LLMs) equipped with chain-of-thoughts (CoT) prompting have shown significant multi-step reasoning capabilities in factual content like mathematics, commonsense, and logic. However, their performance in narrative reasoning, which demands greater abstraction capabilities, remains unexplored. This study utilizes tropes in movie synopses to assess the abstract reasoning abilities of state-of-the-art LLMs and uncovers their low performance. We introduce a trope-wise querying approach to address these challenges and boost the F1 score by 11.8 points. Moreover, while prior studies suggest that CoT enhances multi-step reasoning, this study shows CoT can cause hallucinations in narrative content, reducing GPT-4‘s performance. We also introduce an Adversarial Injection method to embed trope-related text tokens into movie synopses without explicit tropes, revealing CoT‘s heightened sensitivity to such injections. Our comprehensive analysis provides insights for future research directions.</abstract>
<identifier type="citekey">su-etal-2024-unveiling</identifier>
<identifier type="doi">10.18653/v1/2024.findings-emnlp.872</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.872/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>14839</start>
<end>14854</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unveiling Narrative Reasoning Limits of Large Language Models with Trope in Movie Synopses
%A Su, Hung-Ting
%A Hsu, Ya-Ching
%A Lin, Xudong
%A Shi, Xiang-Qian
%A Niu, Yulei
%A Hsu, Han-Yuan
%A Lee, Hung-yi
%A Hsu, Winston H.
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F su-etal-2024-unveiling
%X Large language models (LLMs) equipped with chain-of-thoughts (CoT) prompting have shown significant multi-step reasoning capabilities in factual content like mathematics, commonsense, and logic. However, their performance in narrative reasoning, which demands greater abstraction capabilities, remains unexplored. This study utilizes tropes in movie synopses to assess the abstract reasoning abilities of state-of-the-art LLMs and uncovers their low performance. We introduce a trope-wise querying approach to address these challenges and boost the F1 score by 11.8 points. Moreover, while prior studies suggest that CoT enhances multi-step reasoning, this study shows CoT can cause hallucinations in narrative content, reducing GPT-4‘s performance. We also introduce an Adversarial Injection method to embed trope-related text tokens into movie synopses without explicit tropes, revealing CoT‘s heightened sensitivity to such injections. Our comprehensive analysis provides insights for future research directions.
%R 10.18653/v1/2024.findings-emnlp.872
%U https://aclanthology.org/2024.findings-emnlp.872/
%U https://doi.org/10.18653/v1/2024.findings-emnlp.872
%P 14839-14854
Markdown (Informal)
[Unveiling Narrative Reasoning Limits of Large Language Models with Trope in Movie Synopses](https://aclanthology.org/2024.findings-emnlp.872/) (Su et al., Findings 2024)
ACL
- Hung-Ting Su, Ya-Ching Hsu, Xudong Lin, Xiang-Qian Shi, Yulei Niu, Han-Yuan Hsu, Hung-yi Lee, and Winston H. Hsu. 2024. Unveiling Narrative Reasoning Limits of Large Language Models with Trope in Movie Synopses. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 14839–14854, Miami, Florida, USA. Association for Computational Linguistics.