@inproceedings{chaudhary-etal-2020-topicbert,
title = "{T}opic{BERT} for Energy Efficient Document Classification",
author = {Chaudhary, Yatin and
Gupta, Pankaj and
Saxena, Khushbu and
Kulkarni, Vivek and
Runkler, Thomas and
Sch{\"u}tze, Hinrich},
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.152/",
doi = "10.18653/v1/2020.findings-emnlp.152",
pages = "1682--1690",
abstract = "Prior research notes that BERT`s computational cost grows quadratically with sequence length thus leading to longer training times, higher GPU memory constraints and carbon emissions. While recent work seeks to address these scalability issues at pre-training, these issues are also prominent in fine-tuning especially for long sequence tasks like document classification. Our work thus focuses on optimizing the computational cost of fine-tuning for document classification. We achieve this by complementary learning of both topic and language models in a unified framework, named TopicBERT. This significantly reduces the number of self-attention operations {--} a main performance bottleneck. Consequently, our model achieves a 1.4x ( 40{\%}) speedup with 40{\%} reduction in CO2 emission while retaining 99.9{\%} performance over 5 datasets."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chaudhary-etal-2020-topicbert">
<titleInfo>
<title>TopicBERT for Energy Efficient Document Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yatin</namePart>
<namePart type="family">Chaudhary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pankaj</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khushbu</namePart>
<namePart type="family">Saxena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Kulkarni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Runkler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hinrich</namePart>
<namePart type="family">Schütze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2020</title>
</titleInfo>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Prior research notes that BERT‘s computational cost grows quadratically with sequence length thus leading to longer training times, higher GPU memory constraints and carbon emissions. While recent work seeks to address these scalability issues at pre-training, these issues are also prominent in fine-tuning especially for long sequence tasks like document classification. Our work thus focuses on optimizing the computational cost of fine-tuning for document classification. We achieve this by complementary learning of both topic and language models in a unified framework, named TopicBERT. This significantly reduces the number of self-attention operations – a main performance bottleneck. Consequently, our model achieves a 1.4x ( 40%) speedup with 40% reduction in CO2 emission while retaining 99.9% performance over 5 datasets.</abstract>
<identifier type="citekey">chaudhary-etal-2020-topicbert</identifier>
<identifier type="doi">10.18653/v1/2020.findings-emnlp.152</identifier>
<location>
<url>https://aclanthology.org/2020.findings-emnlp.152/</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>1682</start>
<end>1690</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TopicBERT for Energy Efficient Document Classification
%A Chaudhary, Yatin
%A Gupta, Pankaj
%A Saxena, Khushbu
%A Kulkarni, Vivek
%A Runkler, Thomas
%A Schütze, Hinrich
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Findings of the Association for Computational Linguistics: EMNLP 2020
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F chaudhary-etal-2020-topicbert
%X Prior research notes that BERT‘s computational cost grows quadratically with sequence length thus leading to longer training times, higher GPU memory constraints and carbon emissions. While recent work seeks to address these scalability issues at pre-training, these issues are also prominent in fine-tuning especially for long sequence tasks like document classification. Our work thus focuses on optimizing the computational cost of fine-tuning for document classification. We achieve this by complementary learning of both topic and language models in a unified framework, named TopicBERT. This significantly reduces the number of self-attention operations – a main performance bottleneck. Consequently, our model achieves a 1.4x ( 40%) speedup with 40% reduction in CO2 emission while retaining 99.9% performance over 5 datasets.
%R 10.18653/v1/2020.findings-emnlp.152
%U https://aclanthology.org/2020.findings-emnlp.152/
%U https://doi.org/10.18653/v1/2020.findings-emnlp.152
%P 1682-1690
Markdown (Informal)
[TopicBERT for Energy Efficient Document Classification](https://aclanthology.org/2020.findings-emnlp.152/) (Chaudhary et al., Findings 2020)
ACL
- Yatin Chaudhary, Pankaj Gupta, Khushbu Saxena, Vivek Kulkarni, Thomas Runkler, and Hinrich Schütze. 2020. TopicBERT for Energy Efficient Document Classification. In Findings of the Association for Computational Linguistics: EMNLP 2020, pages 1682–1690, Online. Association for Computational Linguistics.