@inproceedings{dainese-etal-2023-reader,
title = "Reader: Model-based language-instructed reinforcement learning",
author = "Dainese, Nicola and
Marttinen, Pekka and
Ilin, Alexander",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.1032/",
doi = "10.18653/v1/2023.emnlp-main.1032",
pages = "16583--16599",
abstract = "We explore how we can build accurate world models, which are partially specified by language, and how we can plan with them in the face of novelty and uncertainty. We propose the first model-based reinforcement learning approach to tackle the environment Read To Fight Monsters (Zhong et al., 2019), a grounded policy learning problem. In RTFM an agent has to reason over a set of rules and a goal, both described in a language manual, and the observations, while taking into account the uncertainty arising from the stochasticity of the environment, in order to generalize successfully its policy to test episodes. We demonstrate the superior performance and sample efficiency of our model-based approach to the existing model-free SOTA agents in eight variants of RTFM. Furthermore, we show how the agent`s plans can be inspected, which represents progress towards more interpretable agents."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dainese-etal-2023-reader">
<titleInfo>
<title>Reader: Model-based language-instructed reinforcement learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicola</namePart>
<namePart type="family">Dainese</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pekka</namePart>
<namePart type="family">Marttinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Ilin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We explore how we can build accurate world models, which are partially specified by language, and how we can plan with them in the face of novelty and uncertainty. We propose the first model-based reinforcement learning approach to tackle the environment Read To Fight Monsters (Zhong et al., 2019), a grounded policy learning problem. In RTFM an agent has to reason over a set of rules and a goal, both described in a language manual, and the observations, while taking into account the uncertainty arising from the stochasticity of the environment, in order to generalize successfully its policy to test episodes. We demonstrate the superior performance and sample efficiency of our model-based approach to the existing model-free SOTA agents in eight variants of RTFM. Furthermore, we show how the agent‘s plans can be inspected, which represents progress towards more interpretable agents.</abstract>
<identifier type="citekey">dainese-etal-2023-reader</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.1032</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.1032/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>16583</start>
<end>16599</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Reader: Model-based language-instructed reinforcement learning
%A Dainese, Nicola
%A Marttinen, Pekka
%A Ilin, Alexander
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F dainese-etal-2023-reader
%X We explore how we can build accurate world models, which are partially specified by language, and how we can plan with them in the face of novelty and uncertainty. We propose the first model-based reinforcement learning approach to tackle the environment Read To Fight Monsters (Zhong et al., 2019), a grounded policy learning problem. In RTFM an agent has to reason over a set of rules and a goal, both described in a language manual, and the observations, while taking into account the uncertainty arising from the stochasticity of the environment, in order to generalize successfully its policy to test episodes. We demonstrate the superior performance and sample efficiency of our model-based approach to the existing model-free SOTA agents in eight variants of RTFM. Furthermore, we show how the agent‘s plans can be inspected, which represents progress towards more interpretable agents.
%R 10.18653/v1/2023.emnlp-main.1032
%U https://aclanthology.org/2023.emnlp-main.1032/
%U https://doi.org/10.18653/v1/2023.emnlp-main.1032
%P 16583-16599
Markdown (Informal)
[Reader: Model-based language-instructed reinforcement learning](https://aclanthology.org/2023.emnlp-main.1032/) (Dainese et al., EMNLP 2023)
ACL