@inproceedings{murty-etal-2023-pseudointelligence,
title = "Pseudointelligence: A Unifying Lens on Language Model Evaluation",
author = "Murty, Shikhar and
Paradise, Orr and
Sharma, Pratyusha",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.485/",
doi = "10.18653/v1/2023.findings-emnlp.485",
pages = "7284--7290",
abstract = "With large language models surpassing human performance on an increasing number of benchmarks, we must take a principled approach for targeted evaluation of model capabilities. Inspired by pseudorandomness, we propose pseudointelligence, which captures the maxim that {\textquotedblleft}(perceived) intelligence lies in the eye of the beholder.{\textquotedblright} That is, that claims of intelligence are meaningful only when their evaluator is taken into account. Concretely, we propose a complexity-theoretic framework of model evaluation cast as a dynamic interaction between a model and a learned evaluator. We demonstrate that this framework can be used to reason about two case studies in language model evaluation, as well as analyze existing evaluation methods."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="murty-etal-2023-pseudointelligence">
<titleInfo>
<title>Pseudointelligence: A Unifying Lens on Language Model Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shikhar</namePart>
<namePart type="family">Murty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Orr</namePart>
<namePart type="family">Paradise</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pratyusha</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With large language models surpassing human performance on an increasing number of benchmarks, we must take a principled approach for targeted evaluation of model capabilities. Inspired by pseudorandomness, we propose pseudointelligence, which captures the maxim that “(perceived) intelligence lies in the eye of the beholder.” That is, that claims of intelligence are meaningful only when their evaluator is taken into account. Concretely, we propose a complexity-theoretic framework of model evaluation cast as a dynamic interaction between a model and a learned evaluator. We demonstrate that this framework can be used to reason about two case studies in language model evaluation, as well as analyze existing evaluation methods.</abstract>
<identifier type="citekey">murty-etal-2023-pseudointelligence</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.485</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.485/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>7284</start>
<end>7290</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pseudointelligence: A Unifying Lens on Language Model Evaluation
%A Murty, Shikhar
%A Paradise, Orr
%A Sharma, Pratyusha
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F murty-etal-2023-pseudointelligence
%X With large language models surpassing human performance on an increasing number of benchmarks, we must take a principled approach for targeted evaluation of model capabilities. Inspired by pseudorandomness, we propose pseudointelligence, which captures the maxim that “(perceived) intelligence lies in the eye of the beholder.” That is, that claims of intelligence are meaningful only when their evaluator is taken into account. Concretely, we propose a complexity-theoretic framework of model evaluation cast as a dynamic interaction between a model and a learned evaluator. We demonstrate that this framework can be used to reason about two case studies in language model evaluation, as well as analyze existing evaluation methods.
%R 10.18653/v1/2023.findings-emnlp.485
%U https://aclanthology.org/2023.findings-emnlp.485/
%U https://doi.org/10.18653/v1/2023.findings-emnlp.485
%P 7284-7290
Markdown (Informal)
[Pseudointelligence: A Unifying Lens on Language Model Evaluation](https://aclanthology.org/2023.findings-emnlp.485/) (Murty et al., Findings 2023)
ACL