@inproceedings{gong-etal-2024-mindagent,
title = "{M}ind{A}gent: Emergent Gaming Interaction",
author = "Gong, Ran and
Huang, Qiuyuan and
Ma, Xiaojian and
Noda, Yusuke and
Durante, Zane and
Zheng, Zilong and
Terzopoulos, Demetri and
Fei-Fei, Li and
Gao, Jianfeng and
Vo, Hoi",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-naacl.200",
doi = "10.18653/v1/2024.findings-naacl.200",
pages = "3154--3183",
abstract = "Large Foundation Models (LFMs) can perform complex scheduling in a multi-agent system and can coordinate agents to complete sophisticated tasks that require extensive collaboration.However, despite the introduction of numerous gaming frameworks, the community lacks adequate benchmarks that support the implementation of a general multi-agent infrastructure encompassing collaboration between LFMs and human-NPCs. We propose a novel infrastructure{---}Mindagent{---}for evaluating planning and coordination capabilities in the context of gaming interaction. In particular, our infrastructure leverages an existing gaming framework to (i) act as the coordinator for a multi-agent system, (ii) collaborate with human players via instructions, and (iii) enable in-context learning based on few-shot prompting with feedback.Furthermore, we introduce {``}Cuisineworld{''}, a new gaming scenario and its related benchmark that supervises multiple agents playing the game simultaneously and measures multi-agent collaboration efficiency. We have conducted comprehensive evaluations with a new auto-metric Collaboration Score: CoS for assessing the collaboration efficiency. Finally, Mindagent can be deployed in real-world gaming scenarios in a customized VR version of Cuisineworld and adapted in the {``}Minecraft{''} domain. Our work involving LFMs within our new infrastructure for general-purpose scheduling and coordination can elucidate how such skills may be obtained by learning from large language corpora.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gong-etal-2024-mindagent">
<titleInfo>
<title>MindAgent: Emergent Gaming Interaction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ran</namePart>
<namePart type="family">Gong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qiuyuan</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaojian</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Noda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zane</namePart>
<namePart type="family">Durante</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zilong</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Demetri</namePart>
<namePart type="family">Terzopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Fei-Fei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jianfeng</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hoi</namePart>
<namePart type="family">Vo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Foundation Models (LFMs) can perform complex scheduling in a multi-agent system and can coordinate agents to complete sophisticated tasks that require extensive collaboration.However, despite the introduction of numerous gaming frameworks, the community lacks adequate benchmarks that support the implementation of a general multi-agent infrastructure encompassing collaboration between LFMs and human-NPCs. We propose a novel infrastructure—Mindagent—for evaluating planning and coordination capabilities in the context of gaming interaction. In particular, our infrastructure leverages an existing gaming framework to (i) act as the coordinator for a multi-agent system, (ii) collaborate with human players via instructions, and (iii) enable in-context learning based on few-shot prompting with feedback.Furthermore, we introduce “Cuisineworld”, a new gaming scenario and its related benchmark that supervises multiple agents playing the game simultaneously and measures multi-agent collaboration efficiency. We have conducted comprehensive evaluations with a new auto-metric Collaboration Score: CoS for assessing the collaboration efficiency. Finally, Mindagent can be deployed in real-world gaming scenarios in a customized VR version of Cuisineworld and adapted in the “Minecraft” domain. Our work involving LFMs within our new infrastructure for general-purpose scheduling and coordination can elucidate how such skills may be obtained by learning from large language corpora.</abstract>
<identifier type="citekey">gong-etal-2024-mindagent</identifier>
<identifier type="doi">10.18653/v1/2024.findings-naacl.200</identifier>
<location>
<url>https://aclanthology.org/2024.findings-naacl.200</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>3154</start>
<end>3183</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MindAgent: Emergent Gaming Interaction
%A Gong, Ran
%A Huang, Qiuyuan
%A Ma, Xiaojian
%A Noda, Yusuke
%A Durante, Zane
%A Zheng, Zilong
%A Terzopoulos, Demetri
%A Fei-Fei, Li
%A Gao, Jianfeng
%A Vo, Hoi
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Findings of the Association for Computational Linguistics: NAACL 2024
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F gong-etal-2024-mindagent
%X Large Foundation Models (LFMs) can perform complex scheduling in a multi-agent system and can coordinate agents to complete sophisticated tasks that require extensive collaboration.However, despite the introduction of numerous gaming frameworks, the community lacks adequate benchmarks that support the implementation of a general multi-agent infrastructure encompassing collaboration between LFMs and human-NPCs. We propose a novel infrastructure—Mindagent—for evaluating planning and coordination capabilities in the context of gaming interaction. In particular, our infrastructure leverages an existing gaming framework to (i) act as the coordinator for a multi-agent system, (ii) collaborate with human players via instructions, and (iii) enable in-context learning based on few-shot prompting with feedback.Furthermore, we introduce “Cuisineworld”, a new gaming scenario and its related benchmark that supervises multiple agents playing the game simultaneously and measures multi-agent collaboration efficiency. We have conducted comprehensive evaluations with a new auto-metric Collaboration Score: CoS for assessing the collaboration efficiency. Finally, Mindagent can be deployed in real-world gaming scenarios in a customized VR version of Cuisineworld and adapted in the “Minecraft” domain. Our work involving LFMs within our new infrastructure for general-purpose scheduling and coordination can elucidate how such skills may be obtained by learning from large language corpora.
%R 10.18653/v1/2024.findings-naacl.200
%U https://aclanthology.org/2024.findings-naacl.200
%U https://doi.org/10.18653/v1/2024.findings-naacl.200
%P 3154-3183
Markdown (Informal)
[MindAgent: Emergent Gaming Interaction](https://aclanthology.org/2024.findings-naacl.200) (Gong et al., Findings 2024)
ACL
- Ran Gong, Qiuyuan Huang, Xiaojian Ma, Yusuke Noda, Zane Durante, Zilong Zheng, Demetri Terzopoulos, Li Fei-Fei, Jianfeng Gao, and Hoi Vo. 2024. MindAgent: Emergent Gaming Interaction. In Findings of the Association for Computational Linguistics: NAACL 2024, pages 3154–3183, Mexico City, Mexico. Association for Computational Linguistics.