@inproceedings{prato-etal-2024-large,
title = "Do Large Language Models Know How Much They Know?",
author = "Prato, Gabriele and
Huang, Jerry and
Parthasarathi, Prasanna and
Sodhani, Shagun and
Chandar, Sarath",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.348/",
doi = "10.18653/v1/2024.emnlp-main.348",
pages = "6054--6070",
abstract = "Large Language Models (LLMs) have emerged as highly capable systems and are increasingly being integrated into various uses. Nevertheless, the rapid advancement in their deployment trails a comprehensive understanding of their internal mechanisms, as well as a delineation of their capabilities and limitations. A desired characteristic of an intelligent system is its ability to recognize the scope of its own knowledge. To investigate whether LLMs embody this attribute, we develop a benchmark that challenges these models to enumerate all information they possess on specific topics. This benchmark assesses whether the models recall excessive, insufficient, or the precise amount of required information, thereby indicating their awareness of how much they know about the given topic. Our findings reveal that the emergence of this property varies across different architectures and manifests at diverse rates. However, with sufficient scaling, all tested models are ultimately capable of performing this task. The insights gained from this research advance our understanding of LLMs, shedding light on their operational capabilities and contributing to the ongoing exploration of their intricate dynamics."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="prato-etal-2024-large">
<titleInfo>
<title>Do Large Language Models Know How Much They Know?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gabriele</namePart>
<namePart type="family">Prato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jerry</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prasanna</namePart>
<namePart type="family">Parthasarathi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shagun</namePart>
<namePart type="family">Sodhani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarath</namePart>
<namePart type="family">Chandar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) have emerged as highly capable systems and are increasingly being integrated into various uses. Nevertheless, the rapid advancement in their deployment trails a comprehensive understanding of their internal mechanisms, as well as a delineation of their capabilities and limitations. A desired characteristic of an intelligent system is its ability to recognize the scope of its own knowledge. To investigate whether LLMs embody this attribute, we develop a benchmark that challenges these models to enumerate all information they possess on specific topics. This benchmark assesses whether the models recall excessive, insufficient, or the precise amount of required information, thereby indicating their awareness of how much they know about the given topic. Our findings reveal that the emergence of this property varies across different architectures and manifests at diverse rates. However, with sufficient scaling, all tested models are ultimately capable of performing this task. The insights gained from this research advance our understanding of LLMs, shedding light on their operational capabilities and contributing to the ongoing exploration of their intricate dynamics.</abstract>
<identifier type="citekey">prato-etal-2024-large</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.348</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.348/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>6054</start>
<end>6070</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Do Large Language Models Know How Much They Know?
%A Prato, Gabriele
%A Huang, Jerry
%A Parthasarathi, Prasanna
%A Sodhani, Shagun
%A Chandar, Sarath
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F prato-etal-2024-large
%X Large Language Models (LLMs) have emerged as highly capable systems and are increasingly being integrated into various uses. Nevertheless, the rapid advancement in their deployment trails a comprehensive understanding of their internal mechanisms, as well as a delineation of their capabilities and limitations. A desired characteristic of an intelligent system is its ability to recognize the scope of its own knowledge. To investigate whether LLMs embody this attribute, we develop a benchmark that challenges these models to enumerate all information they possess on specific topics. This benchmark assesses whether the models recall excessive, insufficient, or the precise amount of required information, thereby indicating their awareness of how much they know about the given topic. Our findings reveal that the emergence of this property varies across different architectures and manifests at diverse rates. However, with sufficient scaling, all tested models are ultimately capable of performing this task. The insights gained from this research advance our understanding of LLMs, shedding light on their operational capabilities and contributing to the ongoing exploration of their intricate dynamics.
%R 10.18653/v1/2024.emnlp-main.348
%U https://aclanthology.org/2024.emnlp-main.348/
%U https://doi.org/10.18653/v1/2024.emnlp-main.348
%P 6054-6070
Markdown (Informal)
[Do Large Language Models Know How Much They Know?](https://aclanthology.org/2024.emnlp-main.348/) (Prato et al., EMNLP 2024)
ACL
- Gabriele Prato, Jerry Huang, Prasanna Parthasarathi, Shagun Sodhani, and Sarath Chandar. 2024. Do Large Language Models Know How Much They Know?. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 6054–6070, Miami, Florida, USA. Association for Computational Linguistics.