@inproceedings{zhang-etal-2022-bmcook,
title = "{BMC}ook: A Task-agnostic Compression Toolkit for Big Models",
author = "Zhang, Zhengyan and
Gong, Baitao and
Chen, Yingfa and
Han, Xu and
Zeng, Guoyang and
Zhao, Weilin and
Chen, Yanxu and
Liu, Zhiyuan and
Sun, Maosong",
editor = "Che, Wanxiang and
Shutova, Ekaterina",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = dec,
year = "2022",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-demos.40/",
doi = "10.18653/v1/2022.emnlp-demos.40",
pages = "396--405",
abstract = "Recently, pre-trained language models (PLMs) have achieved great success on various NLP tasks and have shown a trend of exponential growth in model size. To alleviate the unaffordable computational costs brought by the size growth, model compression has been widely explored. Existing efforts have achieved promising results in compressing medium-sized models for specific tasks, while task-agnostic compression for big models with over billions of parameters is rarely studied. Task-agnostic compression can provide an efficient and versatile big model for both prompting and delta tuning, leading to a more general impact than task-specific compression. Hence, we introduce a task-agnostic compression toolkit BMCook for big models. In BMCook, we implement four representative compression methods, including quantization, pruning, distillation, and MoEfication. Developers can easily combine these methods towards better efficiency. To evaluate BMCook, we apply it to compress T5-3B (a PLM with 3 billion parameters). We achieve nearly 12x efficiency improvement while maintaining over 97{\%} of the original T5-3B performance on three typical NLP benchmarks. Moreover, the final compressed model also significantly outperforms T5-base (a PLM with 220 million parameters), which has a similar computational cost. BMCook is publicly available at \url{https://github.com/OpenBMB/BMCook}."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2022-bmcook">
<titleInfo>
<title>BMCook: A Task-agnostic Compression Toolkit for Big Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhengyan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Baitao</namePart>
<namePart type="family">Gong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yingfa</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xu</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guoyang</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weilin</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanxu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiyuan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently, pre-trained language models (PLMs) have achieved great success on various NLP tasks and have shown a trend of exponential growth in model size. To alleviate the unaffordable computational costs brought by the size growth, model compression has been widely explored. Existing efforts have achieved promising results in compressing medium-sized models for specific tasks, while task-agnostic compression for big models with over billions of parameters is rarely studied. Task-agnostic compression can provide an efficient and versatile big model for both prompting and delta tuning, leading to a more general impact than task-specific compression. Hence, we introduce a task-agnostic compression toolkit BMCook for big models. In BMCook, we implement four representative compression methods, including quantization, pruning, distillation, and MoEfication. Developers can easily combine these methods towards better efficiency. To evaluate BMCook, we apply it to compress T5-3B (a PLM with 3 billion parameters). We achieve nearly 12x efficiency improvement while maintaining over 97% of the original T5-3B performance on three typical NLP benchmarks. Moreover, the final compressed model also significantly outperforms T5-base (a PLM with 220 million parameters), which has a similar computational cost. BMCook is publicly available at https://github.com/OpenBMB/BMCook.</abstract>
<identifier type="citekey">zhang-etal-2022-bmcook</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-demos.40</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-demos.40/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>396</start>
<end>405</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BMCook: A Task-agnostic Compression Toolkit for Big Models
%A Zhang, Zhengyan
%A Gong, Baitao
%A Chen, Yingfa
%A Han, Xu
%A Zeng, Guoyang
%A Zhao, Weilin
%A Chen, Yanxu
%A Liu, Zhiyuan
%A Sun, Maosong
%Y Che, Wanxiang
%Y Shutova, Ekaterina
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F zhang-etal-2022-bmcook
%X Recently, pre-trained language models (PLMs) have achieved great success on various NLP tasks and have shown a trend of exponential growth in model size. To alleviate the unaffordable computational costs brought by the size growth, model compression has been widely explored. Existing efforts have achieved promising results in compressing medium-sized models for specific tasks, while task-agnostic compression for big models with over billions of parameters is rarely studied. Task-agnostic compression can provide an efficient and versatile big model for both prompting and delta tuning, leading to a more general impact than task-specific compression. Hence, we introduce a task-agnostic compression toolkit BMCook for big models. In BMCook, we implement four representative compression methods, including quantization, pruning, distillation, and MoEfication. Developers can easily combine these methods towards better efficiency. To evaluate BMCook, we apply it to compress T5-3B (a PLM with 3 billion parameters). We achieve nearly 12x efficiency improvement while maintaining over 97% of the original T5-3B performance on three typical NLP benchmarks. Moreover, the final compressed model also significantly outperforms T5-base (a PLM with 220 million parameters), which has a similar computational cost. BMCook is publicly available at https://github.com/OpenBMB/BMCook.
%R 10.18653/v1/2022.emnlp-demos.40
%U https://aclanthology.org/2022.emnlp-demos.40/
%U https://doi.org/10.18653/v1/2022.emnlp-demos.40
%P 396-405
Markdown (Informal)
[BMCook: A Task-agnostic Compression Toolkit for Big Models](https://aclanthology.org/2022.emnlp-demos.40/) (Zhang et al., EMNLP 2022)
ACL
- Zhengyan Zhang, Baitao Gong, Yingfa Chen, Xu Han, Guoyang Zeng, Weilin Zhao, Yanxu Chen, Zhiyuan Liu, and Maosong Sun. 2022. BMCook: A Task-agnostic Compression Toolkit for Big Models. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pages 396–405, Abu Dhabi, UAE. Association for Computational Linguistics.