@inproceedings{toroghi-etal-2024-right,
title = "Right for Right Reasons: Large Language Models for Verifiable Commonsense Knowledge Graph Question Answering",
author = "Toroghi, Armin and
Guo, Willis and
Abdollah Pour, Mohammad Mahdi and
Sanner, Scott",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.378/",
doi = "10.18653/v1/2024.emnlp-main.378",
pages = "6601--6633",
abstract = "Knowledge Graph Question Answering (KGQA) methods seek to answer Natural Language questions using the relational information stored in Knowledge Graphs (KGs). With the recent advancements of Large Language Models (LLMs) and their remarkable reasoning abilities, there is a growing trend to leverage them for KGQA. However, existing methodologies have only focused on answering factual questions, e.g., *{\textquotedblleft}In which city was Silvio Berlusconi`s first wife born?{\textquotedblright}*, leaving questions involving commonsense reasoning that real-world users may pose more often, e.g., *{\textquotedblleft}Do I need separate visas to see the Venus of Willendorf and attend the Olympics this summer?{\textquotedblright}* unaddressed. In this work, we first observe that existing LLM-based methods for KGQA struggle with hallucination on such questions, especially on queries targeting long-tail entities (e.g., non-mainstream and recent entities), thus hindering their applicability in real-world applications especially since their reasoning processes are not easily verifiable. In response, we propose Right for Right Reasons ($R^3$), a commonsense KGQA methodology that allows for a verifiable reasoning procedure by axiomatically surfacing intrinsic commonsense knowledge of LLMs and grounding every factual reasoning step on KG triples. Through experimental evaluations across three different tasks{---}question answering, claim verification, and preference matching{---}our findings showcase $R^3$ as a superior approach, outperforming existing methodologies and notably reducing instances of hallucination and reasoning errors."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="toroghi-etal-2024-right">
<titleInfo>
<title>Right for Right Reasons: Large Language Models for Verifiable Commonsense Knowledge Graph Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Armin</namePart>
<namePart type="family">Toroghi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Willis</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Mahdi</namePart>
<namePart type="family">Abdollah Pour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="family">Sanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Knowledge Graph Question Answering (KGQA) methods seek to answer Natural Language questions using the relational information stored in Knowledge Graphs (KGs). With the recent advancements of Large Language Models (LLMs) and their remarkable reasoning abilities, there is a growing trend to leverage them for KGQA. However, existing methodologies have only focused on answering factual questions, e.g., *“In which city was Silvio Berlusconi‘s first wife born?”*, leaving questions involving commonsense reasoning that real-world users may pose more often, e.g., *“Do I need separate visas to see the Venus of Willendorf and attend the Olympics this summer?”* unaddressed. In this work, we first observe that existing LLM-based methods for KGQA struggle with hallucination on such questions, especially on queries targeting long-tail entities (e.g., non-mainstream and recent entities), thus hindering their applicability in real-world applications especially since their reasoning processes are not easily verifiable. In response, we propose Right for Right Reasons (R³), a commonsense KGQA methodology that allows for a verifiable reasoning procedure by axiomatically surfacing intrinsic commonsense knowledge of LLMs and grounding every factual reasoning step on KG triples. Through experimental evaluations across three different tasks—question answering, claim verification, and preference matching—our findings showcase R³ as a superior approach, outperforming existing methodologies and notably reducing instances of hallucination and reasoning errors.</abstract>
<identifier type="citekey">toroghi-etal-2024-right</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.378</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.378/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>6601</start>
<end>6633</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Right for Right Reasons: Large Language Models for Verifiable Commonsense Knowledge Graph Question Answering
%A Toroghi, Armin
%A Guo, Willis
%A Abdollah Pour, Mohammad Mahdi
%A Sanner, Scott
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F toroghi-etal-2024-right
%X Knowledge Graph Question Answering (KGQA) methods seek to answer Natural Language questions using the relational information stored in Knowledge Graphs (KGs). With the recent advancements of Large Language Models (LLMs) and their remarkable reasoning abilities, there is a growing trend to leverage them for KGQA. However, existing methodologies have only focused on answering factual questions, e.g., *“In which city was Silvio Berlusconi‘s first wife born?”*, leaving questions involving commonsense reasoning that real-world users may pose more often, e.g., *“Do I need separate visas to see the Venus of Willendorf and attend the Olympics this summer?”* unaddressed. In this work, we first observe that existing LLM-based methods for KGQA struggle with hallucination on such questions, especially on queries targeting long-tail entities (e.g., non-mainstream and recent entities), thus hindering their applicability in real-world applications especially since their reasoning processes are not easily verifiable. In response, we propose Right for Right Reasons (R³), a commonsense KGQA methodology that allows for a verifiable reasoning procedure by axiomatically surfacing intrinsic commonsense knowledge of LLMs and grounding every factual reasoning step on KG triples. Through experimental evaluations across three different tasks—question answering, claim verification, and preference matching—our findings showcase R³ as a superior approach, outperforming existing methodologies and notably reducing instances of hallucination and reasoning errors.
%R 10.18653/v1/2024.emnlp-main.378
%U https://aclanthology.org/2024.emnlp-main.378/
%U https://doi.org/10.18653/v1/2024.emnlp-main.378
%P 6601-6633
Markdown (Informal)
[Right for Right Reasons: Large Language Models for Verifiable Commonsense Knowledge Graph Question Answering](https://aclanthology.org/2024.emnlp-main.378/) (Toroghi et al., EMNLP 2024)
ACL