@inproceedings{zoubi-etal-2024-privat5,
title = "{P}riva{T}5: A Generative Language Model for Privacy Policies",
author = "Zoubi, Mohammad and
T.y.s.s, Santosh and
Rosas, Edgar and
Grabmair, Matthias",
editor = "Habernal, Ivan and
Ghanavati, Sepideh and
Ravichander, Abhilasha and
Jain, Vijayanta and
Thaine, Patricia and
Igamberdiev, Timour and
Mireshghallah, Niloofar and
Feyisetan, Oluwaseyi",
booktitle = "Proceedings of the Fifth Workshop on Privacy in Natural Language Processing",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.privatenlp-1.16/",
pages = "159--169",
abstract = "In the era of of digital privacy, users often neglect to read privacy policies due to their complexity. To bridge this gap, NLP models have emerged to assist in understanding privacy policies. While recent generative language models like BART and T5 have shown prowess in text generation and discriminative tasks being framed as generative ones, their application to privacy policy domain tasks remains unexplored. To address that, we introduce PrivaT5, a T5-based model that is further pre-trained on privacy policy text. We evaluate PrivaT5 over a diverse privacy policy related tasks and notice its superior performance over T5, showing the utility of continued domain-specific pre-training. Our results also highlight challenges faced by these generative models in complex structured output label space, especially in sequence tagging tasks, where they fall short compared to lighter encoder-only models."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zoubi-etal-2024-privat5">
<titleInfo>
<title>PrivaT5: A Generative Language Model for Privacy Policies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="family">Zoubi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Santosh</namePart>
<namePart type="family">T.y.s.s</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edgar</namePart>
<namePart type="family">Rosas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Grabmair</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Privacy in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Habernal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sepideh</namePart>
<namePart type="family">Ghanavati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhilasha</namePart>
<namePart type="family">Ravichander</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vijayanta</namePart>
<namePart type="family">Jain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patricia</namePart>
<namePart type="family">Thaine</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timour</namePart>
<namePart type="family">Igamberdiev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niloofar</namePart>
<namePart type="family">Mireshghallah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oluwaseyi</namePart>
<namePart type="family">Feyisetan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the era of of digital privacy, users often neglect to read privacy policies due to their complexity. To bridge this gap, NLP models have emerged to assist in understanding privacy policies. While recent generative language models like BART and T5 have shown prowess in text generation and discriminative tasks being framed as generative ones, their application to privacy policy domain tasks remains unexplored. To address that, we introduce PrivaT5, a T5-based model that is further pre-trained on privacy policy text. We evaluate PrivaT5 over a diverse privacy policy related tasks and notice its superior performance over T5, showing the utility of continued domain-specific pre-training. Our results also highlight challenges faced by these generative models in complex structured output label space, especially in sequence tagging tasks, where they fall short compared to lighter encoder-only models.</abstract>
<identifier type="citekey">zoubi-etal-2024-privat5</identifier>
<location>
<url>https://aclanthology.org/2024.privatenlp-1.16/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>159</start>
<end>169</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PrivaT5: A Generative Language Model for Privacy Policies
%A Zoubi, Mohammad
%A T.y.s.s, Santosh
%A Rosas, Edgar
%A Grabmair, Matthias
%Y Habernal, Ivan
%Y Ghanavati, Sepideh
%Y Ravichander, Abhilasha
%Y Jain, Vijayanta
%Y Thaine, Patricia
%Y Igamberdiev, Timour
%Y Mireshghallah, Niloofar
%Y Feyisetan, Oluwaseyi
%S Proceedings of the Fifth Workshop on Privacy in Natural Language Processing
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F zoubi-etal-2024-privat5
%X In the era of of digital privacy, users often neglect to read privacy policies due to their complexity. To bridge this gap, NLP models have emerged to assist in understanding privacy policies. While recent generative language models like BART and T5 have shown prowess in text generation and discriminative tasks being framed as generative ones, their application to privacy policy domain tasks remains unexplored. To address that, we introduce PrivaT5, a T5-based model that is further pre-trained on privacy policy text. We evaluate PrivaT5 over a diverse privacy policy related tasks and notice its superior performance over T5, showing the utility of continued domain-specific pre-training. Our results also highlight challenges faced by these generative models in complex structured output label space, especially in sequence tagging tasks, where they fall short compared to lighter encoder-only models.
%U https://aclanthology.org/2024.privatenlp-1.16/
%P 159-169
Markdown (Informal)
[PrivaT5: A Generative Language Model for Privacy Policies](https://aclanthology.org/2024.privatenlp-1.16/) (Zoubi et al., PrivateNLP 2024)
ACL
- Mohammad Zoubi, Santosh T.y.s.s, Edgar Rosas, and Matthias Grabmair. 2024. PrivaT5: A Generative Language Model for Privacy Policies. In Proceedings of the Fifth Workshop on Privacy in Natural Language Processing, pages 159–169, Bangkok, Thailand. Association for Computational Linguistics.