@inproceedings{lakim-etal-2022-holistic,
title = "A Holistic Assessment of the Carbon Footprint of Noor, a Very Large {A}rabic Language Model",
author = "Lakim, Imad and
Almazrouei, Ebtesam and
Abualhaol, Ibrahim and
Debbah, Merouane and
Launay, Julien",
editor = "Fan, Angela and
Ilic, Suzana and
Wolf, Thomas and
Gall{\'e}, Matthias",
booktitle = "Proceedings of BigScience Episode {\#}5 -- Workshop on Challenges {\&} Perspectives in Creating Large Language Models",
month = may,
year = "2022",
address = "virtual+Dublin",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.bigscience-1.8/",
doi = "10.18653/v1/2022.bigscience-1.8",
pages = "84--94",
abstract = "As ever larger language models grow more ubiquitous, it is crucial to consider their environmental impact. Characterised by extreme size and resource use, recent generations of models have been criticised for their voracious appetite for compute, and thus significant carbon footprint. Although reporting of carbon impact has grown more common in machine learning papers, this reporting is usually limited to compute resources used strictly for training. In this work, we propose a holistic assessment of the footprint of an extreme-scale language model, Noor. Noor is an ongoing project aiming to develop the largest multi-task Arabic language models{--}with up to 13B parameters{--}leveraging zero-shot generalisation to enable a wide range of downstream tasks via natural language instructions. We assess the total carbon bill of the entire project: starting with data collection and storage costs, including research and development budgets, pretraining costs, future serving estimates, and other exogenous costs necessary for this international cooperation. Notably, we find that inference costs and exogenous factors can have a significant impact on total budget. Finally, we discuss pathways to reduce the carbon footprint of extreme-scale models."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lakim-etal-2022-holistic">
<titleInfo>
<title>A Holistic Assessment of the Carbon Footprint of Noor, a Very Large Arabic Language Model</title>
</titleInfo>
<name type="personal">
<namePart type="given">Imad</namePart>
<namePart type="family">Lakim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ebtesam</namePart>
<namePart type="family">Almazrouei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibrahim</namePart>
<namePart type="family">Abualhaol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Merouane</namePart>
<namePart type="family">Debbah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julien</namePart>
<namePart type="family">Launay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of BigScience Episode #5 – Workshop on Challenges & Perspectives in Creating Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suzana</namePart>
<namePart type="family">Ilic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Wolf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Gallé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">virtual+Dublin</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As ever larger language models grow more ubiquitous, it is crucial to consider their environmental impact. Characterised by extreme size and resource use, recent generations of models have been criticised for their voracious appetite for compute, and thus significant carbon footprint. Although reporting of carbon impact has grown more common in machine learning papers, this reporting is usually limited to compute resources used strictly for training. In this work, we propose a holistic assessment of the footprint of an extreme-scale language model, Noor. Noor is an ongoing project aiming to develop the largest multi-task Arabic language models–with up to 13B parameters–leveraging zero-shot generalisation to enable a wide range of downstream tasks via natural language instructions. We assess the total carbon bill of the entire project: starting with data collection and storage costs, including research and development budgets, pretraining costs, future serving estimates, and other exogenous costs necessary for this international cooperation. Notably, we find that inference costs and exogenous factors can have a significant impact on total budget. Finally, we discuss pathways to reduce the carbon footprint of extreme-scale models.</abstract>
<identifier type="citekey">lakim-etal-2022-holistic</identifier>
<identifier type="doi">10.18653/v1/2022.bigscience-1.8</identifier>
<location>
<url>https://aclanthology.org/2022.bigscience-1.8/</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>84</start>
<end>94</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Holistic Assessment of the Carbon Footprint of Noor, a Very Large Arabic Language Model
%A Lakim, Imad
%A Almazrouei, Ebtesam
%A Abualhaol, Ibrahim
%A Debbah, Merouane
%A Launay, Julien
%Y Fan, Angela
%Y Ilic, Suzana
%Y Wolf, Thomas
%Y Gallé, Matthias
%S Proceedings of BigScience Episode #5 – Workshop on Challenges & Perspectives in Creating Large Language Models
%D 2022
%8 May
%I Association for Computational Linguistics
%C virtual+Dublin
%F lakim-etal-2022-holistic
%X As ever larger language models grow more ubiquitous, it is crucial to consider their environmental impact. Characterised by extreme size and resource use, recent generations of models have been criticised for their voracious appetite for compute, and thus significant carbon footprint. Although reporting of carbon impact has grown more common in machine learning papers, this reporting is usually limited to compute resources used strictly for training. In this work, we propose a holistic assessment of the footprint of an extreme-scale language model, Noor. Noor is an ongoing project aiming to develop the largest multi-task Arabic language models–with up to 13B parameters–leveraging zero-shot generalisation to enable a wide range of downstream tasks via natural language instructions. We assess the total carbon bill of the entire project: starting with data collection and storage costs, including research and development budgets, pretraining costs, future serving estimates, and other exogenous costs necessary for this international cooperation. Notably, we find that inference costs and exogenous factors can have a significant impact on total budget. Finally, we discuss pathways to reduce the carbon footprint of extreme-scale models.
%R 10.18653/v1/2022.bigscience-1.8
%U https://aclanthology.org/2022.bigscience-1.8/
%U https://doi.org/10.18653/v1/2022.bigscience-1.8
%P 84-94
Markdown (Informal)
[A Holistic Assessment of the Carbon Footprint of Noor, a Very Large Arabic Language Model](https://aclanthology.org/2022.bigscience-1.8/) (Lakim et al., BigScience 2022)
ACL