@inproceedings{shani-etal-2023-towards,
title = "Towards Concept-Aware Large Language Models",
author = "Shani, Chen and
Vreeken, Jilles and
Shahaf, Dafna",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.877/",
doi = "10.18653/v1/2023.findings-emnlp.877",
pages = "13158--13170",
abstract = "Concepts play a pivotal role in various human cognitive functions, including learning, reasoning and communication. However, there is very little work on endowing machines with the ability to form and reason with concepts. In particular, state-of-the-art large language models (LLMs) work at the level of tokens, not concepts. In this work, we analyze how well contemporary LLMs capture human concepts and their structure. We then discuss ways to develop concept-aware LLMs, taking place at different stages of the pipeline. We sketch a method for pretraining LLMs using concepts, and also explore the simpler approach that uses the output of existing LLMs. Despite its simplicity, our proof-of-concept is shown to better match human intuition, as well as improve the robustness of predictions. These preliminary results underscore the promise of concept-aware LLMs."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shani-etal-2023-towards">
<titleInfo>
<title>Towards Concept-Aware Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chen</namePart>
<namePart type="family">Shani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jilles</namePart>
<namePart type="family">Vreeken</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dafna</namePart>
<namePart type="family">Shahaf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Concepts play a pivotal role in various human cognitive functions, including learning, reasoning and communication. However, there is very little work on endowing machines with the ability to form and reason with concepts. In particular, state-of-the-art large language models (LLMs) work at the level of tokens, not concepts. In this work, we analyze how well contemporary LLMs capture human concepts and their structure. We then discuss ways to develop concept-aware LLMs, taking place at different stages of the pipeline. We sketch a method for pretraining LLMs using concepts, and also explore the simpler approach that uses the output of existing LLMs. Despite its simplicity, our proof-of-concept is shown to better match human intuition, as well as improve the robustness of predictions. These preliminary results underscore the promise of concept-aware LLMs.</abstract>
<identifier type="citekey">shani-etal-2023-towards</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.877</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.877/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>13158</start>
<end>13170</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Concept-Aware Large Language Models
%A Shani, Chen
%A Vreeken, Jilles
%A Shahaf, Dafna
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F shani-etal-2023-towards
%X Concepts play a pivotal role in various human cognitive functions, including learning, reasoning and communication. However, there is very little work on endowing machines with the ability to form and reason with concepts. In particular, state-of-the-art large language models (LLMs) work at the level of tokens, not concepts. In this work, we analyze how well contemporary LLMs capture human concepts and their structure. We then discuss ways to develop concept-aware LLMs, taking place at different stages of the pipeline. We sketch a method for pretraining LLMs using concepts, and also explore the simpler approach that uses the output of existing LLMs. Despite its simplicity, our proof-of-concept is shown to better match human intuition, as well as improve the robustness of predictions. These preliminary results underscore the promise of concept-aware LLMs.
%R 10.18653/v1/2023.findings-emnlp.877
%U https://aclanthology.org/2023.findings-emnlp.877/
%U https://doi.org/10.18653/v1/2023.findings-emnlp.877
%P 13158-13170
Markdown (Informal)
[Towards Concept-Aware Large Language Models](https://aclanthology.org/2023.findings-emnlp.877/) (Shani et al., Findings 2023)
ACL
- Chen Shani, Jilles Vreeken, and Dafna Shahaf. 2023. Towards Concept-Aware Large Language Models. In Findings of the Association for Computational Linguistics: EMNLP 2023, pages 13158–13170, Singapore. Association for Computational Linguistics.