@inproceedings{puvis-de-chavannes-etal-2021-hyperparameter,
title = "Hyperparameter Power Impact in Transformer Language Model Training",
author = "Puvis de Chavannes, Lucas H{\o}yberg and
Kongsbak, Mads Guldborg Kjeldgaard and
Rantzau, Timmie and
Derczynski, Leon",
editor = "Moosavi, Nafise Sadat and
Gurevych, Iryna and
Fan, Angela and
Wolf, Thomas and
Hou, Yufang and
Marasovi{\'c}, Ana and
Ravi, Sujith",
booktitle = "Proceedings of the Second Workshop on Simple and Efficient Natural Language Processing",
month = nov,
year = "2021",
address = "Virtual",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.sustainlp-1.12/",
doi = "10.18653/v1/2021.sustainlp-1.12",
pages = "96--118",
abstract = "Training large language models can consume a large amount of energy. We hypothesize that the language model`s configuration impacts its energy consumption, and that there is room for power consumption optimisation in modern large language models. To investigate these claims, we introduce a power consumption factor to the objective function, and explore the range of models and hyperparameter configurations that affect power. We identify multiple configuration factors that can reduce power consumption during language model training while retaining model quality."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="puvis-de-chavannes-etal-2021-hyperparameter">
<titleInfo>
<title>Hyperparameter Power Impact in Transformer Language Model Training</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lucas</namePart>
<namePart type="given">Høyberg</namePart>
<namePart type="family">Puvis de Chavannes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mads</namePart>
<namePart type="given">Guldborg</namePart>
<namePart type="given">Kjeldgaard</namePart>
<namePart type="family">Kongsbak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timmie</namePart>
<namePart type="family">Rantzau</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leon</namePart>
<namePart type="family">Derczynski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Simple and Efficient Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nafise</namePart>
<namePart type="given">Sadat</namePart>
<namePart type="family">Moosavi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Wolf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yufang</namePart>
<namePart type="family">Hou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ana</namePart>
<namePart type="family">Marasović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sujith</namePart>
<namePart type="family">Ravi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Virtual</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Training large language models can consume a large amount of energy. We hypothesize that the language model‘s configuration impacts its energy consumption, and that there is room for power consumption optimisation in modern large language models. To investigate these claims, we introduce a power consumption factor to the objective function, and explore the range of models and hyperparameter configurations that affect power. We identify multiple configuration factors that can reduce power consumption during language model training while retaining model quality.</abstract>
<identifier type="citekey">puvis-de-chavannes-etal-2021-hyperparameter</identifier>
<identifier type="doi">10.18653/v1/2021.sustainlp-1.12</identifier>
<location>
<url>https://aclanthology.org/2021.sustainlp-1.12/</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>96</start>
<end>118</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hyperparameter Power Impact in Transformer Language Model Training
%A Puvis de Chavannes, Lucas Høyberg
%A Kongsbak, Mads Guldborg Kjeldgaard
%A Rantzau, Timmie
%A Derczynski, Leon
%Y Moosavi, Nafise Sadat
%Y Gurevych, Iryna
%Y Fan, Angela
%Y Wolf, Thomas
%Y Hou, Yufang
%Y Marasović, Ana
%Y Ravi, Sujith
%S Proceedings of the Second Workshop on Simple and Efficient Natural Language Processing
%D 2021
%8 November
%I Association for Computational Linguistics
%C Virtual
%F puvis-de-chavannes-etal-2021-hyperparameter
%X Training large language models can consume a large amount of energy. We hypothesize that the language model‘s configuration impacts its energy consumption, and that there is room for power consumption optimisation in modern large language models. To investigate these claims, we introduce a power consumption factor to the objective function, and explore the range of models and hyperparameter configurations that affect power. We identify multiple configuration factors that can reduce power consumption during language model training while retaining model quality.
%R 10.18653/v1/2021.sustainlp-1.12
%U https://aclanthology.org/2021.sustainlp-1.12/
%U https://doi.org/10.18653/v1/2021.sustainlp-1.12
%P 96-118
Markdown (Informal)
[Hyperparameter Power Impact in Transformer Language Model Training](https://aclanthology.org/2021.sustainlp-1.12/) (Puvis de Chavannes et al., sustainlp 2021)
ACL
- Lucas Høyberg Puvis de Chavannes, Mads Guldborg Kjeldgaard Kongsbak, Timmie Rantzau, and Leon Derczynski. 2021. Hyperparameter Power Impact in Transformer Language Model Training. In Proceedings of the Second Workshop on Simple and Efficient Natural Language Processing, pages 96–118, Virtual. Association for Computational Linguistics.