@inproceedings{musa-etal-2024-c3pa,
title = "{C}3{PA}: An Open Dataset of Expert-Annotated and Regulation-Aware Privacy Policies to Enable Scalable Regulatory Compliance Audits",
author = "Musa, Maaz Bin and
Winston, Steven M. and
Allen, Garrison and
Schiller, Jacob and
Moore, Kevin and
Quick, Sean and
Melvin, Johnathan and
Srinivasan, Padmini and
Diamantis, Mihailis E. and
Nithyanand, Rishab",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.217/",
doi = "10.18653/v1/2024.emnlp-main.217",
pages = "3710--3722",
abstract = "The development of tools and techniques to analyze and extract organizations' data habits from privacy policies are critical for scalable regulatory compliance audits. Unfortunately, these tools are becoming increasingly limited in their ability to identify compliance issues and fixes. After all, most were developed using regulation-agnostic datasets of annotated privacy policies obtained from a time before the introduction of landmark privacy regulations such as EU`s GDPR and California`s CCPA. In this paper, we describe the first open regulation-aware dataset of expert-annotated privacy policies, C3PA (CCPA Privacy Policy Provision Annotations), aimed to address this challenge. C3PA contains over 48K expert-labeled privacy policy text segments associated with responses to CCPA-specific disclosure mandates from 411 unique organizations. We demonstrate that the C3PA dataset is uniquely suited for aiding automated audits of compliance with CCPA-related disclosure mandates."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="musa-etal-2024-c3pa">
<titleInfo>
<title>C3PA: An Open Dataset of Expert-Annotated and Regulation-Aware Privacy Policies to Enable Scalable Regulatory Compliance Audits</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maaz</namePart>
<namePart type="given">Bin</namePart>
<namePart type="family">Musa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Winston</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Garrison</namePart>
<namePart type="family">Allen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="family">Schiller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Moore</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">Quick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johnathan</namePart>
<namePart type="family">Melvin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Padmini</namePart>
<namePart type="family">Srinivasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihailis</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Diamantis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rishab</namePart>
<namePart type="family">Nithyanand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The development of tools and techniques to analyze and extract organizations’ data habits from privacy policies are critical for scalable regulatory compliance audits. Unfortunately, these tools are becoming increasingly limited in their ability to identify compliance issues and fixes. After all, most were developed using regulation-agnostic datasets of annotated privacy policies obtained from a time before the introduction of landmark privacy regulations such as EU‘s GDPR and California‘s CCPA. In this paper, we describe the first open regulation-aware dataset of expert-annotated privacy policies, C3PA (CCPA Privacy Policy Provision Annotations), aimed to address this challenge. C3PA contains over 48K expert-labeled privacy policy text segments associated with responses to CCPA-specific disclosure mandates from 411 unique organizations. We demonstrate that the C3PA dataset is uniquely suited for aiding automated audits of compliance with CCPA-related disclosure mandates.</abstract>
<identifier type="citekey">musa-etal-2024-c3pa</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.217</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.217/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>3710</start>
<end>3722</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T C3PA: An Open Dataset of Expert-Annotated and Regulation-Aware Privacy Policies to Enable Scalable Regulatory Compliance Audits
%A Musa, Maaz Bin
%A Winston, Steven M.
%A Allen, Garrison
%A Schiller, Jacob
%A Moore, Kevin
%A Quick, Sean
%A Melvin, Johnathan
%A Srinivasan, Padmini
%A Diamantis, Mihailis E.
%A Nithyanand, Rishab
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F musa-etal-2024-c3pa
%X The development of tools and techniques to analyze and extract organizations’ data habits from privacy policies are critical for scalable regulatory compliance audits. Unfortunately, these tools are becoming increasingly limited in their ability to identify compliance issues and fixes. After all, most were developed using regulation-agnostic datasets of annotated privacy policies obtained from a time before the introduction of landmark privacy regulations such as EU‘s GDPR and California‘s CCPA. In this paper, we describe the first open regulation-aware dataset of expert-annotated privacy policies, C3PA (CCPA Privacy Policy Provision Annotations), aimed to address this challenge. C3PA contains over 48K expert-labeled privacy policy text segments associated with responses to CCPA-specific disclosure mandates from 411 unique organizations. We demonstrate that the C3PA dataset is uniquely suited for aiding automated audits of compliance with CCPA-related disclosure mandates.
%R 10.18653/v1/2024.emnlp-main.217
%U https://aclanthology.org/2024.emnlp-main.217/
%U https://doi.org/10.18653/v1/2024.emnlp-main.217
%P 3710-3722
Markdown (Informal)
[C3PA: An Open Dataset of Expert-Annotated and Regulation-Aware Privacy Policies to Enable Scalable Regulatory Compliance Audits](https://aclanthology.org/2024.emnlp-main.217/) (Musa et al., EMNLP 2024)
ACL
- Maaz Bin Musa, Steven M. Winston, Garrison Allen, Jacob Schiller, Kevin Moore, Sean Quick, Johnathan Melvin, Padmini Srinivasan, Mihailis E. Diamantis, and Rishab Nithyanand. 2024. C3PA: An Open Dataset of Expert-Annotated and Regulation-Aware Privacy Policies to Enable Scalable Regulatory Compliance Audits. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 3710–3722, Miami, Florida, USA. Association for Computational Linguistics.