@inproceedings{zhao-etal-2022-compressing,
title = "Compressing Sentence Representation for Semantic Retrieval via Homomorphic Projective Distillation",
author = "Zhao, Xuandong and
Yu, Zhiguo and
Wu, Ming and
Li, Lei",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2022",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-acl.64",
doi = "10.18653/v1/2022.findings-acl.64",
pages = "774--781",
abstract = "How to learn highly compact yet effective sentence representation? Pre-trained language models have been effective in many NLP tasks. However, these models are often huge and produce large sentence embeddings. Moreover, there is a big performance gap between large and small models. In this paper, we propose Homomorphic Projective Distillation (HPD) to learn compressed sentence embeddings. Our method augments a small Transformer encoder model with learnable projection layers to produce compact representations while mimicking a large pre-trained language model to retain the sentence representation quality. We evaluate our method with different model sizes on both semantic textual similarity (STS) and semantic retrieval (SR) tasks. Experiments show that our method achieves 2.7-4.5 points performance gain on STS tasks compared with previous best representations of the same size. In SR tasks, our method improves retrieval speed (8.2{\mbox{$\times$}}) and memory usage (8.0{\mbox{$\times$}}) compared with state-of-the-art large models. Our implementation is available at \url{https://github.com/XuandongZhao/HPD}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhao-etal-2022-compressing">
<titleInfo>
<title>Compressing Sentence Representation for Semantic Retrieval via Homomorphic Projective Distillation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xuandong</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiguo</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ming</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lei</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>How to learn highly compact yet effective sentence representation? Pre-trained language models have been effective in many NLP tasks. However, these models are often huge and produce large sentence embeddings. Moreover, there is a big performance gap between large and small models. In this paper, we propose Homomorphic Projective Distillation (HPD) to learn compressed sentence embeddings. Our method augments a small Transformer encoder model with learnable projection layers to produce compact representations while mimicking a large pre-trained language model to retain the sentence representation quality. We evaluate our method with different model sizes on both semantic textual similarity (STS) and semantic retrieval (SR) tasks. Experiments show that our method achieves 2.7-4.5 points performance gain on STS tasks compared with previous best representations of the same size. In SR tasks, our method improves retrieval speed (8.2\times) and memory usage (8.0\times) compared with state-of-the-art large models. Our implementation is available at https://github.com/XuandongZhao/HPD.</abstract>
<identifier type="citekey">zhao-etal-2022-compressing</identifier>
<identifier type="doi">10.18653/v1/2022.findings-acl.64</identifier>
<location>
<url>https://aclanthology.org/2022.findings-acl.64</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>774</start>
<end>781</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Compressing Sentence Representation for Semantic Retrieval via Homomorphic Projective Distillation
%A Zhao, Xuandong
%A Yu, Zhiguo
%A Wu, Ming
%A Li, Lei
%Y Muresan, Smaranda
%Y Nakov, Preslav
%Y Villavicencio, Aline
%S Findings of the Association for Computational Linguistics: ACL 2022
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F zhao-etal-2022-compressing
%X How to learn highly compact yet effective sentence representation? Pre-trained language models have been effective in many NLP tasks. However, these models are often huge and produce large sentence embeddings. Moreover, there is a big performance gap between large and small models. In this paper, we propose Homomorphic Projective Distillation (HPD) to learn compressed sentence embeddings. Our method augments a small Transformer encoder model with learnable projection layers to produce compact representations while mimicking a large pre-trained language model to retain the sentence representation quality. We evaluate our method with different model sizes on both semantic textual similarity (STS) and semantic retrieval (SR) tasks. Experiments show that our method achieves 2.7-4.5 points performance gain on STS tasks compared with previous best representations of the same size. In SR tasks, our method improves retrieval speed (8.2\times) and memory usage (8.0\times) compared with state-of-the-art large models. Our implementation is available at https://github.com/XuandongZhao/HPD.
%R 10.18653/v1/2022.findings-acl.64
%U https://aclanthology.org/2022.findings-acl.64
%U https://doi.org/10.18653/v1/2022.findings-acl.64
%P 774-781
Markdown (Informal)
[Compressing Sentence Representation for Semantic Retrieval via Homomorphic Projective Distillation](https://aclanthology.org/2022.findings-acl.64) (Zhao et al., Findings 2022)
ACL