@inproceedings{chvasta-etal-2022-lost,
title = "Lost in Distillation: A Case Study in Toxicity Modeling",
author = "Chvasta, Alyssa and
Lees, Alyssa and
Sorensen, Jeffrey and
Vasserman, Lucy and
Goyal, Nitesh",
editor = "Narang, Kanika and
Mostafazadeh Davani, Aida and
Mathias, Lambert and
Vidgen, Bertie and
Talat, Zeerak",
booktitle = "Proceedings of the Sixth Workshop on Online Abuse and Harms (WOAH)",
month = jul,
year = "2022",
address = "Seattle, Washington (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.woah-1.9",
doi = "10.18653/v1/2022.woah-1.9",
pages = "92--101",
abstract = "In an era of increasingly large pre-trained language models, knowledge distillation is a powerful tool for transferring information from a large model to a smaller one. In particular, distillation is of tremendous benefit when it comes to real-world constraints such as serving latency or serving at scale. However, a loss of robustness in language understanding may be hidden in the process and not immediately revealed when looking at high-level evaluation metrics. In this work, we investigate the hidden costs: what is {``}lost in distillation{''}, especially in regards to identity-based model bias using the case study of toxicity modeling. With reproducible models using open source training sets, we investigate models distilled from a BERT teacher baseline. Using both open source and proprietary big data models, we investigate these hidden performance costs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chvasta-etal-2022-lost">
<titleInfo>
<title>Lost in Distillation: A Case Study in Toxicity Modeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alyssa</namePart>
<namePart type="family">Chvasta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alyssa</namePart>
<namePart type="family">Lees</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeffrey</namePart>
<namePart type="family">Sorensen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucy</namePart>
<namePart type="family">Vasserman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nitesh</namePart>
<namePart type="family">Goyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Online Abuse and Harms (WOAH)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kanika</namePart>
<namePart type="family">Narang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aida</namePart>
<namePart type="family">Mostafazadeh Davani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lambert</namePart>
<namePart type="family">Mathias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bertie</namePart>
<namePart type="family">Vidgen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Talat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, Washington (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In an era of increasingly large pre-trained language models, knowledge distillation is a powerful tool for transferring information from a large model to a smaller one. In particular, distillation is of tremendous benefit when it comes to real-world constraints such as serving latency or serving at scale. However, a loss of robustness in language understanding may be hidden in the process and not immediately revealed when looking at high-level evaluation metrics. In this work, we investigate the hidden costs: what is “lost in distillation”, especially in regards to identity-based model bias using the case study of toxicity modeling. With reproducible models using open source training sets, we investigate models distilled from a BERT teacher baseline. Using both open source and proprietary big data models, we investigate these hidden performance costs.</abstract>
<identifier type="citekey">chvasta-etal-2022-lost</identifier>
<identifier type="doi">10.18653/v1/2022.woah-1.9</identifier>
<location>
<url>https://aclanthology.org/2022.woah-1.9</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>92</start>
<end>101</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Lost in Distillation: A Case Study in Toxicity Modeling
%A Chvasta, Alyssa
%A Lees, Alyssa
%A Sorensen, Jeffrey
%A Vasserman, Lucy
%A Goyal, Nitesh
%Y Narang, Kanika
%Y Mostafazadeh Davani, Aida
%Y Mathias, Lambert
%Y Vidgen, Bertie
%Y Talat, Zeerak
%S Proceedings of the Sixth Workshop on Online Abuse and Harms (WOAH)
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, Washington (Hybrid)
%F chvasta-etal-2022-lost
%X In an era of increasingly large pre-trained language models, knowledge distillation is a powerful tool for transferring information from a large model to a smaller one. In particular, distillation is of tremendous benefit when it comes to real-world constraints such as serving latency or serving at scale. However, a loss of robustness in language understanding may be hidden in the process and not immediately revealed when looking at high-level evaluation metrics. In this work, we investigate the hidden costs: what is “lost in distillation”, especially in regards to identity-based model bias using the case study of toxicity modeling. With reproducible models using open source training sets, we investigate models distilled from a BERT teacher baseline. Using both open source and proprietary big data models, we investigate these hidden performance costs.
%R 10.18653/v1/2022.woah-1.9
%U https://aclanthology.org/2022.woah-1.9
%U https://doi.org/10.18653/v1/2022.woah-1.9
%P 92-101
Markdown (Informal)
[Lost in Distillation: A Case Study in Toxicity Modeling](https://aclanthology.org/2022.woah-1.9) (Chvasta et al., WOAH 2022)
ACL
- Alyssa Chvasta, Alyssa Lees, Jeffrey Sorensen, Lucy Vasserman, and Nitesh Goyal. 2022. Lost in Distillation: A Case Study in Toxicity Modeling. In Proceedings of the Sixth Workshop on Online Abuse and Harms (WOAH), pages 92–101, Seattle, Washington (Hybrid). Association for Computational Linguistics.