@inproceedings{corona-etal-2021-modular,
title = "Modular Networks for Compositional Instruction Following",
author = "Corona, Rodolfo and
Fried, Daniel and
Devin, Coline and
Klein, Dan and
Darrell, Trevor",
editor = "Toutanova, Kristina and
Rumshisky, Anna and
Zettlemoyer, Luke and
Hakkani-Tur, Dilek and
Beltagy, Iz and
Bethard, Steven and
Cotterell, Ryan and
Chakraborty, Tanmoy and
Zhou, Yichao",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.naacl-main.81/",
doi = "10.18653/v1/2021.naacl-main.81",
pages = "1033--1040",
abstract = "Standard architectures used in instruction following often struggle on novel compositions of subgoals (e.g. navigating to landmarks or picking up objects) observed during training. We propose a modular architecture for following natural language instructions that describe sequences of diverse subgoals. In our approach, subgoal modules each carry out natural language instructions for a specific subgoal type. A sequence of modules to execute is chosen by learning to segment the instructions and predicting a subgoal type for each segment. When compared to standard, non-modular sequence-to-sequence approaches on ALFRED, a challenging instruction following benchmark, we find that modularization improves generalization to novel subgoal compositions, as well as to environments unseen in training."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="corona-etal-2021-modular">
<titleInfo>
<title>Modular Networks for Compositional Instruction Following</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rodolfo</namePart>
<namePart type="family">Corona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Fried</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Coline</namePart>
<namePart type="family">Devin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Klein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Darrell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kristina</namePart>
<namePart type="family">Toutanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luke</namePart>
<namePart type="family">Zettlemoyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dilek</namePart>
<namePart type="family">Hakkani-Tur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iz</namePart>
<namePart type="family">Beltagy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yichao</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Standard architectures used in instruction following often struggle on novel compositions of subgoals (e.g. navigating to landmarks or picking up objects) observed during training. We propose a modular architecture for following natural language instructions that describe sequences of diverse subgoals. In our approach, subgoal modules each carry out natural language instructions for a specific subgoal type. A sequence of modules to execute is chosen by learning to segment the instructions and predicting a subgoal type for each segment. When compared to standard, non-modular sequence-to-sequence approaches on ALFRED, a challenging instruction following benchmark, we find that modularization improves generalization to novel subgoal compositions, as well as to environments unseen in training.</abstract>
<identifier type="citekey">corona-etal-2021-modular</identifier>
<identifier type="doi">10.18653/v1/2021.naacl-main.81</identifier>
<location>
<url>https://aclanthology.org/2021.naacl-main.81/</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>1033</start>
<end>1040</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Modular Networks for Compositional Instruction Following
%A Corona, Rodolfo
%A Fried, Daniel
%A Devin, Coline
%A Klein, Dan
%A Darrell, Trevor
%Y Toutanova, Kristina
%Y Rumshisky, Anna
%Y Zettlemoyer, Luke
%Y Hakkani-Tur, Dilek
%Y Beltagy, Iz
%Y Bethard, Steven
%Y Cotterell, Ryan
%Y Chakraborty, Tanmoy
%Y Zhou, Yichao
%S Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F corona-etal-2021-modular
%X Standard architectures used in instruction following often struggle on novel compositions of subgoals (e.g. navigating to landmarks or picking up objects) observed during training. We propose a modular architecture for following natural language instructions that describe sequences of diverse subgoals. In our approach, subgoal modules each carry out natural language instructions for a specific subgoal type. A sequence of modules to execute is chosen by learning to segment the instructions and predicting a subgoal type for each segment. When compared to standard, non-modular sequence-to-sequence approaches on ALFRED, a challenging instruction following benchmark, we find that modularization improves generalization to novel subgoal compositions, as well as to environments unseen in training.
%R 10.18653/v1/2021.naacl-main.81
%U https://aclanthology.org/2021.naacl-main.81/
%U https://doi.org/10.18653/v1/2021.naacl-main.81
%P 1033-1040
Markdown (Informal)
[Modular Networks for Compositional Instruction Following](https://aclanthology.org/2021.naacl-main.81/) (Corona et al., NAACL 2021)
ACL
- Rodolfo Corona, Daniel Fried, Coline Devin, Dan Klein, and Trevor Darrell. 2021. Modular Networks for Compositional Instruction Following. In Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pages 1033–1040, Online. Association for Computational Linguistics.