@inproceedings{hidey-etal-2022-reducing,
title = "Reducing Model Churn: Stable Re-training of Conversational Agents",
author = "Hidey, Christopher and
Liu, Fei and
Goel, Rahul",
editor = "Lemon, Oliver and
Hakkani-Tur, Dilek and
Li, Junyi Jessy and
Ashrafzadeh, Arash and
Garcia, Daniel Hern{\'a}ndez and
Alikhani, Malihe and
Vandyke, David and
Du{\v{s}}ek, Ond{\v{r}}ej",
booktitle = "Proceedings of the 23rd Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = sep,
year = "2022",
address = "Edinburgh, UK",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.sigdial-1.2",
doi = "10.18653/v1/2022.sigdial-1.2",
pages = "14--25",
abstract = "Retraining modern deep learning systems can lead to variations in model performance even when trained using the same data and hyper-parameters by simply using different random seeds. This phenomenon is known as model churn or model jitter. This issue is often exacerbated in real world settings, where noise may be introduced in the data collection process. In this work we tackle the problem of stable retraining with a novel focus on structured prediction for conversational semantic parsing. We first quantify the model churn by introducing metrics for agreement between predictions across multiple retrainings. Next, we devise realistic scenarios for noise injection and demonstrate the effectiveness of various churn reduction techniques such as ensembling and distillation. Lastly, we discuss practical trade-offs between such techniques and show that co-distillation provides a sweet spot in terms of churn reduction with only a modest increase in resource usage.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hidey-etal-2022-reducing">
<titleInfo>
<title>Reducing Model Churn: Stable Re-training of Conversational Agents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Hidey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rahul</namePart>
<namePart type="family">Goel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oliver</namePart>
<namePart type="family">Lemon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dilek</namePart>
<namePart type="family">Hakkani-Tur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyi</namePart>
<namePart type="given">Jessy</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arash</namePart>
<namePart type="family">Ashrafzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="given">Hernández</namePart>
<namePart type="family">Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malihe</namePart>
<namePart type="family">Alikhani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Vandyke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Dušek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Edinburgh, UK</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Retraining modern deep learning systems can lead to variations in model performance even when trained using the same data and hyper-parameters by simply using different random seeds. This phenomenon is known as model churn or model jitter. This issue is often exacerbated in real world settings, where noise may be introduced in the data collection process. In this work we tackle the problem of stable retraining with a novel focus on structured prediction for conversational semantic parsing. We first quantify the model churn by introducing metrics for agreement between predictions across multiple retrainings. Next, we devise realistic scenarios for noise injection and demonstrate the effectiveness of various churn reduction techniques such as ensembling and distillation. Lastly, we discuss practical trade-offs between such techniques and show that co-distillation provides a sweet spot in terms of churn reduction with only a modest increase in resource usage.</abstract>
<identifier type="citekey">hidey-etal-2022-reducing</identifier>
<identifier type="doi">10.18653/v1/2022.sigdial-1.2</identifier>
<location>
<url>https://aclanthology.org/2022.sigdial-1.2</url>
</location>
<part>
<date>2022-09</date>
<extent unit="page">
<start>14</start>
<end>25</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Reducing Model Churn: Stable Re-training of Conversational Agents
%A Hidey, Christopher
%A Liu, Fei
%A Goel, Rahul
%Y Lemon, Oliver
%Y Hakkani-Tur, Dilek
%Y Li, Junyi Jessy
%Y Ashrafzadeh, Arash
%Y Garcia, Daniel Hernández
%Y Alikhani, Malihe
%Y Vandyke, David
%Y Dušek, Ondřej
%S Proceedings of the 23rd Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2022
%8 September
%I Association for Computational Linguistics
%C Edinburgh, UK
%F hidey-etal-2022-reducing
%X Retraining modern deep learning systems can lead to variations in model performance even when trained using the same data and hyper-parameters by simply using different random seeds. This phenomenon is known as model churn or model jitter. This issue is often exacerbated in real world settings, where noise may be introduced in the data collection process. In this work we tackle the problem of stable retraining with a novel focus on structured prediction for conversational semantic parsing. We first quantify the model churn by introducing metrics for agreement between predictions across multiple retrainings. Next, we devise realistic scenarios for noise injection and demonstrate the effectiveness of various churn reduction techniques such as ensembling and distillation. Lastly, we discuss practical trade-offs between such techniques and show that co-distillation provides a sweet spot in terms of churn reduction with only a modest increase in resource usage.
%R 10.18653/v1/2022.sigdial-1.2
%U https://aclanthology.org/2022.sigdial-1.2
%U https://doi.org/10.18653/v1/2022.sigdial-1.2
%P 14-25
Markdown (Informal)
[Reducing Model Churn: Stable Re-training of Conversational Agents](https://aclanthology.org/2022.sigdial-1.2) (Hidey et al., SIGDIAL 2022)
ACL