@inproceedings{xu-etal-2023-federated,
title = "Federated Learning of Gboard Language Models with Differential Privacy",
author = "Xu, Zheng and
Zhang, Yanxiang and
Andrew, Galen and
Choquette, Christopher and
Kairouz, Peter and
Mcmahan, Brendan and
Rosenstock, Jesse and
Zhang, Yuanbo",
editor = "Sitaram, Sunayana and
Beigman Klebanov, Beata and
Williams, Jason D",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-industry.60/",
doi = "10.18653/v1/2023.acl-industry.60",
pages = "629--639",
abstract = "We train and deploy language models (LMs) with federated learning (FL) and differential privacy (DP) in Google Keyboard (Gboard). The recent DP-Follow the Regularized Leader (DP-FTRL) algorithm is applied to achieve meaningfully formal DP guarantees without requiring uniform sampling of clients. To provide favorable privacy-utility trade-offs, we introduce a new client participation criterion and discuss the implication of its configuration in large scale systems. We show how quantile-based clip estimation can be combined with DP-FTRL to adaptively choose the clip norm during training or reduce the hyperparameter tuning in preparation of training. With the help of pretraining on public data, we trained and deployed more than fifteen Gboard LMs that achieve high utility and {\$}{\textbackslash}rho-{\$}zCDP privacy guarantees with {\$}{\textbackslash}rho {\textbackslash}in (0.3, 2){\$}, with one model additionally trained with secure aggregation. We summarize our experience and provide concrete suggestions on DP training for practitioners."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xu-etal-2023-federated">
<titleInfo>
<title>Federated Learning of Gboard Language Models with Differential Privacy</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanxiang</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galen</namePart>
<namePart type="family">Andrew</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Choquette</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Kairouz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brendan</namePart>
<namePart type="family">Mcmahan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jesse</namePart>
<namePart type="family">Rosenstock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuanbo</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sunayana</namePart>
<namePart type="family">Sitaram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beata</namePart>
<namePart type="family">Beigman Klebanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Williams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We train and deploy language models (LMs) with federated learning (FL) and differential privacy (DP) in Google Keyboard (Gboard). The recent DP-Follow the Regularized Leader (DP-FTRL) algorithm is applied to achieve meaningfully formal DP guarantees without requiring uniform sampling of clients. To provide favorable privacy-utility trade-offs, we introduce a new client participation criterion and discuss the implication of its configuration in large scale systems. We show how quantile-based clip estimation can be combined with DP-FTRL to adaptively choose the clip norm during training or reduce the hyperparameter tuning in preparation of training. With the help of pretraining on public data, we trained and deployed more than fifteen Gboard LMs that achieve high utility and $\textbackslashrho-$zCDP privacy guarantees with $\textbackslashrho \textbackslashin (0.3, 2)$, with one model additionally trained with secure aggregation. We summarize our experience and provide concrete suggestions on DP training for practitioners.</abstract>
<identifier type="citekey">xu-etal-2023-federated</identifier>
<identifier type="doi">10.18653/v1/2023.acl-industry.60</identifier>
<location>
<url>https://aclanthology.org/2023.acl-industry.60/</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>629</start>
<end>639</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Federated Learning of Gboard Language Models with Differential Privacy
%A Xu, Zheng
%A Zhang, Yanxiang
%A Andrew, Galen
%A Choquette, Christopher
%A Kairouz, Peter
%A Mcmahan, Brendan
%A Rosenstock, Jesse
%A Zhang, Yuanbo
%Y Sitaram, Sunayana
%Y Beigman Klebanov, Beata
%Y Williams, Jason D.
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F xu-etal-2023-federated
%X We train and deploy language models (LMs) with federated learning (FL) and differential privacy (DP) in Google Keyboard (Gboard). The recent DP-Follow the Regularized Leader (DP-FTRL) algorithm is applied to achieve meaningfully formal DP guarantees without requiring uniform sampling of clients. To provide favorable privacy-utility trade-offs, we introduce a new client participation criterion and discuss the implication of its configuration in large scale systems. We show how quantile-based clip estimation can be combined with DP-FTRL to adaptively choose the clip norm during training or reduce the hyperparameter tuning in preparation of training. With the help of pretraining on public data, we trained and deployed more than fifteen Gboard LMs that achieve high utility and $\textbackslashrho-$zCDP privacy guarantees with $\textbackslashrho \textbackslashin (0.3, 2)$, with one model additionally trained with secure aggregation. We summarize our experience and provide concrete suggestions on DP training for practitioners.
%R 10.18653/v1/2023.acl-industry.60
%U https://aclanthology.org/2023.acl-industry.60/
%U https://doi.org/10.18653/v1/2023.acl-industry.60
%P 629-639
Markdown (Informal)
[Federated Learning of Gboard Language Models with Differential Privacy](https://aclanthology.org/2023.acl-industry.60/) (Xu et al., ACL 2023)
ACL
- Zheng Xu, Yanxiang Zhang, Galen Andrew, Christopher Choquette, Peter Kairouz, Brendan Mcmahan, Jesse Rosenstock, and Yuanbo Zhang. 2023. Federated Learning of Gboard Language Models with Differential Privacy. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track), pages 629–639, Toronto, Canada. Association for Computational Linguistics.