@inproceedings{ben-noach-goldberg-2020-compressing,
title = "Compressing Pre-trained Language Models by Matrix Decomposition",
author = "Ben Noach, Matan and
Goldberg, Yoav",
editor = "Wong, Kam-Fai and
Knight, Kevin and
Wu, Hua",
booktitle = "Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing",
month = dec,
year = "2020",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.aacl-main.88/",
doi = "10.18653/v1/2020.aacl-main.88",
pages = "884--889",
abstract = "Large pre-trained language models reach state-of-the-art results on many different NLP tasks when fine-tuned individually; They also come with a significant memory and computational requirements, calling for methods to reduce model sizes (green AI). We propose a two-stage model-compression method to reduce a model`s inference time cost. We first decompose the matrices in the model into smaller matrices and then perform feature distillation on the internal representation to recover from the decomposition. This approach has the benefit of reducing the number of parameters while preserving much of the information within the model. We experimented on BERT-base model with the GLUE benchmark dataset and show that we can reduce the number of parameters by a factor of 0.4x, and increase inference speed by a factor of 1.45x, while maintaining a minimal loss in metric performance."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ben-noach-goldberg-2020-compressing">
<titleInfo>
<title>Compressing Pre-trained Language Models by Matrix Decomposition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matan</namePart>
<namePart type="family">Ben Noach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kam-Fai</namePart>
<namePart type="family">Wong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Knight</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large pre-trained language models reach state-of-the-art results on many different NLP tasks when fine-tuned individually; They also come with a significant memory and computational requirements, calling for methods to reduce model sizes (green AI). We propose a two-stage model-compression method to reduce a model‘s inference time cost. We first decompose the matrices in the model into smaller matrices and then perform feature distillation on the internal representation to recover from the decomposition. This approach has the benefit of reducing the number of parameters while preserving much of the information within the model. We experimented on BERT-base model with the GLUE benchmark dataset and show that we can reduce the number of parameters by a factor of 0.4x, and increase inference speed by a factor of 1.45x, while maintaining a minimal loss in metric performance.</abstract>
<identifier type="citekey">ben-noach-goldberg-2020-compressing</identifier>
<identifier type="doi">10.18653/v1/2020.aacl-main.88</identifier>
<location>
<url>https://aclanthology.org/2020.aacl-main.88/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>884</start>
<end>889</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Compressing Pre-trained Language Models by Matrix Decomposition
%A Ben Noach, Matan
%A Goldberg, Yoav
%Y Wong, Kam-Fai
%Y Knight, Kevin
%Y Wu, Hua
%S Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing
%D 2020
%8 December
%I Association for Computational Linguistics
%C Suzhou, China
%F ben-noach-goldberg-2020-compressing
%X Large pre-trained language models reach state-of-the-art results on many different NLP tasks when fine-tuned individually; They also come with a significant memory and computational requirements, calling for methods to reduce model sizes (green AI). We propose a two-stage model-compression method to reduce a model‘s inference time cost. We first decompose the matrices in the model into smaller matrices and then perform feature distillation on the internal representation to recover from the decomposition. This approach has the benefit of reducing the number of parameters while preserving much of the information within the model. We experimented on BERT-base model with the GLUE benchmark dataset and show that we can reduce the number of parameters by a factor of 0.4x, and increase inference speed by a factor of 1.45x, while maintaining a minimal loss in metric performance.
%R 10.18653/v1/2020.aacl-main.88
%U https://aclanthology.org/2020.aacl-main.88/
%U https://doi.org/10.18653/v1/2020.aacl-main.88
%P 884-889
Markdown (Informal)
[Compressing Pre-trained Language Models by Matrix Decomposition](https://aclanthology.org/2020.aacl-main.88/) (Ben Noach & Goldberg, AACL 2020)
ACL
- Matan Ben Noach and Yoav Goldberg. 2020. Compressing Pre-trained Language Models by Matrix Decomposition. In Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing, pages 884–889, Suzhou, China. Association for Computational Linguistics.