@article{rogers-etal-2020-primer,
title = "A Primer in {BERT}ology: What We Know About How {BERT} Works",
author = "Rogers, Anna and
Kovaleva, Olga and
Rumshisky, Anna",
editor = "Johnson, Mark and
Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "8",
year = "2020",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2020.tacl-1.54/",
doi = "10.1162/tacl_a_00349",
pages = "842--866",
abstract = "Transformer-based models have pushed state of the art in many areas of NLP, but our understanding of what is behind their success is still limited. This paper is the first survey of over 150 studies of the popular BERT model. We review the current state of knowledge about how BERT works, what kind of information it learns and how it is represented, common modifications to its training objectives and architecture, the overparameterization issue, and approaches to compression. We then outline directions for future research."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rogers-etal-2020-primer">
<titleInfo>
<title>A Primer in BERTology: What We Know About How BERT Works</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olga</namePart>
<namePart type="family">Kovaleva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Transformer-based models have pushed state of the art in many areas of NLP, but our understanding of what is behind their success is still limited. This paper is the first survey of over 150 studies of the popular BERT model. We review the current state of knowledge about how BERT works, what kind of information it learns and how it is represented, common modifications to its training objectives and architecture, the overparameterization issue, and approaches to compression. We then outline directions for future research.</abstract>
<identifier type="citekey">rogers-etal-2020-primer</identifier>
<identifier type="doi">10.1162/tacl_a_00349</identifier>
<location>
<url>https://aclanthology.org/2020.tacl-1.54/</url>
</location>
<part>
<date>2020</date>
<detail type="volume"><number>8</number></detail>
<extent unit="page">
<start>842</start>
<end>866</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T A Primer in BERTology: What We Know About How BERT Works
%A Rogers, Anna
%A Kovaleva, Olga
%A Rumshisky, Anna
%J Transactions of the Association for Computational Linguistics
%D 2020
%V 8
%I MIT Press
%C Cambridge, MA
%F rogers-etal-2020-primer
%X Transformer-based models have pushed state of the art in many areas of NLP, but our understanding of what is behind their success is still limited. This paper is the first survey of over 150 studies of the popular BERT model. We review the current state of knowledge about how BERT works, what kind of information it learns and how it is represented, common modifications to its training objectives and architecture, the overparameterization issue, and approaches to compression. We then outline directions for future research.
%R 10.1162/tacl_a_00349
%U https://aclanthology.org/2020.tacl-1.54/
%U https://doi.org/10.1162/tacl_a_00349
%P 842-866
Markdown (Informal)
[A Primer in BERTology: What We Know About How BERT Works](https://aclanthology.org/2020.tacl-1.54/) (Rogers et al., TACL 2020)
ACL