@inproceedings{el-naggar-etal-2023-theoretical,
title = "Theoretical Conditions and Empirical Failure of Bracket Counting on Long Sequences with Linear Recurrent Networks",
author = "El-Naggar, Nadine and
Madhyastha, Pranava and
Weyde, Tillman",
editor = "Bassignana, Elisa and
Lindemann, Matthias and
Petit, Alban",
booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.eacl-srw.15/",
doi = "10.18653/v1/2023.eacl-srw.15",
pages = "143--148",
abstract = "Previous work has established that RNNs with an unbounded activation function have the capacity to count exactly. However, it has also been shown that RNNs are challenging to train effectively and generally do not learn exact counting behaviour. In this paper, we focus on this problem by studying the simplest possible RNN, a linear single-cell network. We conduct a theoretical analysis of linear RNNs and identify conditions for the models to exhibit exact counting behaviour. We provide a formal proof that these conditions are necessary and sufficient. We also conduct an empirical analysis using tasks involving a Dyck-1-like Balanced Bracket language under two different settings. We observe that linear RNNs generally do not meet the necessary and sufficient conditions for counting behaviour when trained with the standard approach. We investigate how varying the length of training sequences and utilising different target classes impacts model behaviour during training and the ability of linear RNN models to effectively approximate the indicator conditions."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="el-naggar-etal-2023-theoretical">
<titleInfo>
<title>Theoretical Conditions and Empirical Failure of Bracket Counting on Long Sequences with Linear Recurrent Networks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nadine</namePart>
<namePart type="family">El-Naggar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pranava</namePart>
<namePart type="family">Madhyastha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tillman</namePart>
<namePart type="family">Weyde</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elisa</namePart>
<namePart type="family">Bassignana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Lindemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alban</namePart>
<namePart type="family">Petit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Previous work has established that RNNs with an unbounded activation function have the capacity to count exactly. However, it has also been shown that RNNs are challenging to train effectively and generally do not learn exact counting behaviour. In this paper, we focus on this problem by studying the simplest possible RNN, a linear single-cell network. We conduct a theoretical analysis of linear RNNs and identify conditions for the models to exhibit exact counting behaviour. We provide a formal proof that these conditions are necessary and sufficient. We also conduct an empirical analysis using tasks involving a Dyck-1-like Balanced Bracket language under two different settings. We observe that linear RNNs generally do not meet the necessary and sufficient conditions for counting behaviour when trained with the standard approach. We investigate how varying the length of training sequences and utilising different target classes impacts model behaviour during training and the ability of linear RNN models to effectively approximate the indicator conditions.</abstract>
<identifier type="citekey">el-naggar-etal-2023-theoretical</identifier>
<identifier type="doi">10.18653/v1/2023.eacl-srw.15</identifier>
<location>
<url>https://aclanthology.org/2023.eacl-srw.15/</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>143</start>
<end>148</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Theoretical Conditions and Empirical Failure of Bracket Counting on Long Sequences with Linear Recurrent Networks
%A El-Naggar, Nadine
%A Madhyastha, Pranava
%A Weyde, Tillman
%Y Bassignana, Elisa
%Y Lindemann, Matthias
%Y Petit, Alban
%S Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F el-naggar-etal-2023-theoretical
%X Previous work has established that RNNs with an unbounded activation function have the capacity to count exactly. However, it has also been shown that RNNs are challenging to train effectively and generally do not learn exact counting behaviour. In this paper, we focus on this problem by studying the simplest possible RNN, a linear single-cell network. We conduct a theoretical analysis of linear RNNs and identify conditions for the models to exhibit exact counting behaviour. We provide a formal proof that these conditions are necessary and sufficient. We also conduct an empirical analysis using tasks involving a Dyck-1-like Balanced Bracket language under two different settings. We observe that linear RNNs generally do not meet the necessary and sufficient conditions for counting behaviour when trained with the standard approach. We investigate how varying the length of training sequences and utilising different target classes impacts model behaviour during training and the ability of linear RNN models to effectively approximate the indicator conditions.
%R 10.18653/v1/2023.eacl-srw.15
%U https://aclanthology.org/2023.eacl-srw.15/
%U https://doi.org/10.18653/v1/2023.eacl-srw.15
%P 143-148
Markdown (Informal)
[Theoretical Conditions and Empirical Failure of Bracket Counting on Long Sequences with Linear Recurrent Networks](https://aclanthology.org/2023.eacl-srw.15/) (El-Naggar et al., EACL 2023)
ACL