@inproceedings{mattern-etal-2022-limits,
title = "The Limits of Word Level Differential Privacy",
author = "Mattern, Justus and
Weggenmann, Benjamin and
Kerschbaum, Florian",
editor = "Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2022",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-naacl.65/",
doi = "10.18653/v1/2022.findings-naacl.65",
pages = "867--881",
abstract = "As the issues of privacy and trust are receiving increasing attention within the research community, various attempts have been made to anonymize textual data. A significant subset of these approaches incorporate differentially private mechanims to perturb word embeddings, thus replacing individual words in a sentence. While these methods represent very important contributions, have various advantages over other techniques and do show anonymization capabilities,they have several shortcomings. In this paper, we investigate these weaknesses and demonstrate significant mathematical constraints diminishing the theoretical privacy guaranteeas well as major practical shortcomings with regard to the protection against deanonymization attacks, the preservation of content of the original sentences as well as the quality of the language output. Finally, we propose a new method for text anonymization based on transformer based language models fine-tuned for paraphrasing that circumvents most of the identified weaknesses and also offers a formal privacy guarantee. We evaluate the performance of our method via thourough experimentation and demonstrate superior performance over the discussed mechanisms."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mattern-etal-2022-limits">
<titleInfo>
<title>The Limits of Word Level Differential Privacy</title>
</titleInfo>
<name type="personal">
<namePart type="given">Justus</namePart>
<namePart type="family">Mattern</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Weggenmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Florian</namePart>
<namePart type="family">Kerschbaum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="given">Vladimir</namePart>
<namePart type="family">Meza Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As the issues of privacy and trust are receiving increasing attention within the research community, various attempts have been made to anonymize textual data. A significant subset of these approaches incorporate differentially private mechanims to perturb word embeddings, thus replacing individual words in a sentence. While these methods represent very important contributions, have various advantages over other techniques and do show anonymization capabilities,they have several shortcomings. In this paper, we investigate these weaknesses and demonstrate significant mathematical constraints diminishing the theoretical privacy guaranteeas well as major practical shortcomings with regard to the protection against deanonymization attacks, the preservation of content of the original sentences as well as the quality of the language output. Finally, we propose a new method for text anonymization based on transformer based language models fine-tuned for paraphrasing that circumvents most of the identified weaknesses and also offers a formal privacy guarantee. We evaluate the performance of our method via thourough experimentation and demonstrate superior performance over the discussed mechanisms.</abstract>
<identifier type="citekey">mattern-etal-2022-limits</identifier>
<identifier type="doi">10.18653/v1/2022.findings-naacl.65</identifier>
<location>
<url>https://aclanthology.org/2022.findings-naacl.65/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>867</start>
<end>881</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Limits of Word Level Differential Privacy
%A Mattern, Justus
%A Weggenmann, Benjamin
%A Kerschbaum, Florian
%Y Carpuat, Marine
%Y de Marneffe, Marie-Catherine
%Y Meza Ruiz, Ivan Vladimir
%S Findings of the Association for Computational Linguistics: NAACL 2022
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F mattern-etal-2022-limits
%X As the issues of privacy and trust are receiving increasing attention within the research community, various attempts have been made to anonymize textual data. A significant subset of these approaches incorporate differentially private mechanims to perturb word embeddings, thus replacing individual words in a sentence. While these methods represent very important contributions, have various advantages over other techniques and do show anonymization capabilities,they have several shortcomings. In this paper, we investigate these weaknesses and demonstrate significant mathematical constraints diminishing the theoretical privacy guaranteeas well as major practical shortcomings with regard to the protection against deanonymization attacks, the preservation of content of the original sentences as well as the quality of the language output. Finally, we propose a new method for text anonymization based on transformer based language models fine-tuned for paraphrasing that circumvents most of the identified weaknesses and also offers a formal privacy guarantee. We evaluate the performance of our method via thourough experimentation and demonstrate superior performance over the discussed mechanisms.
%R 10.18653/v1/2022.findings-naacl.65
%U https://aclanthology.org/2022.findings-naacl.65/
%U https://doi.org/10.18653/v1/2022.findings-naacl.65
%P 867-881
Markdown (Informal)
[The Limits of Word Level Differential Privacy](https://aclanthology.org/2022.findings-naacl.65/) (Mattern et al., Findings 2022)
ACL
- Justus Mattern, Benjamin Weggenmann, and Florian Kerschbaum. 2022. The Limits of Word Level Differential Privacy. In Findings of the Association for Computational Linguistics: NAACL 2022, pages 867–881, Seattle, United States. Association for Computational Linguistics.