@inproceedings{painter-etal-2022-utilizing,
title = "Utilizing Weak Supervision to Create {S}3{D}: A Sarcasm Annotated Dataset",
author = "Painter, Jordan and
Treharne, Helen and
Kanojia, Diptesh",
editor = "Bamman, David and
Hovy, Dirk and
Jurgens, David and
Keith, Katherine and
O'Connor, Brendan and
Volkova, Svitlana",
booktitle = "Proceedings of the Fifth Workshop on Natural Language Processing and Computational Social Science (NLP+CSS)",
month = nov,
year = "2022",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.nlpcss-1.22/",
doi = "10.18653/v1/2022.nlpcss-1.22",
pages = "197--206",
abstract = "Sarcasm is prevalent in all corners of social media, posing many challenges within Natural Language Processing (NLP), particularly for sentiment analysis. Sarcasm detection remains a largely unsolved problem in many NLP tasks due to its contradictory and typically derogatory nature as a figurative language construct. With recent strides in NLP, many pre-trained language models exist that have been trained on data from specific social media platforms, i.e., Twitter. In this paper, we evaluate the efficacy of multiple sarcasm detection datasets using machine and deep learning models. We create two new datasets - a manually annotated gold standard Sarcasm Annotated Dataset (SAD) and a Silver-Standard Sarcasm-annotated Dataset (S3D). Using a combination of existing sarcasm datasets with SAD, we train a sarcasm detection model over a social-media domain pre-trained language model, BERTweet, which yields an F1-score of 78.29{\%}. Using an Ensemble model with an underlying majority technique, we further label S3D to produce a weakly supervised dataset containing over 100,000 tweets. We publicly release all the code, our manually annotated and weakly supervised datasets, and fine-tuned models for further research."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="painter-etal-2022-utilizing">
<titleInfo>
<title>Utilizing Weak Supervision to Create S3D: A Sarcasm Annotated Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Painter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helen</namePart>
<namePart type="family">Treharne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diptesh</namePart>
<namePart type="family">Kanojia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Natural Language Processing and Computational Social Science (NLP+CSS)</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Bamman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katherine</namePart>
<namePart type="family">Keith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brendan</namePart>
<namePart type="family">O’Connor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Svitlana</namePart>
<namePart type="family">Volkova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sarcasm is prevalent in all corners of social media, posing many challenges within Natural Language Processing (NLP), particularly for sentiment analysis. Sarcasm detection remains a largely unsolved problem in many NLP tasks due to its contradictory and typically derogatory nature as a figurative language construct. With recent strides in NLP, many pre-trained language models exist that have been trained on data from specific social media platforms, i.e., Twitter. In this paper, we evaluate the efficacy of multiple sarcasm detection datasets using machine and deep learning models. We create two new datasets - a manually annotated gold standard Sarcasm Annotated Dataset (SAD) and a Silver-Standard Sarcasm-annotated Dataset (S3D). Using a combination of existing sarcasm datasets with SAD, we train a sarcasm detection model over a social-media domain pre-trained language model, BERTweet, which yields an F1-score of 78.29%. Using an Ensemble model with an underlying majority technique, we further label S3D to produce a weakly supervised dataset containing over 100,000 tweets. We publicly release all the code, our manually annotated and weakly supervised datasets, and fine-tuned models for further research.</abstract>
<identifier type="citekey">painter-etal-2022-utilizing</identifier>
<identifier type="doi">10.18653/v1/2022.nlpcss-1.22</identifier>
<location>
<url>https://aclanthology.org/2022.nlpcss-1.22/</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>197</start>
<end>206</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Utilizing Weak Supervision to Create S3D: A Sarcasm Annotated Dataset
%A Painter, Jordan
%A Treharne, Helen
%A Kanojia, Diptesh
%Y Bamman, David
%Y Hovy, Dirk
%Y Jurgens, David
%Y Keith, Katherine
%Y O’Connor, Brendan
%Y Volkova, Svitlana
%S Proceedings of the Fifth Workshop on Natural Language Processing and Computational Social Science (NLP+CSS)
%D 2022
%8 November
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F painter-etal-2022-utilizing
%X Sarcasm is prevalent in all corners of social media, posing many challenges within Natural Language Processing (NLP), particularly for sentiment analysis. Sarcasm detection remains a largely unsolved problem in many NLP tasks due to its contradictory and typically derogatory nature as a figurative language construct. With recent strides in NLP, many pre-trained language models exist that have been trained on data from specific social media platforms, i.e., Twitter. In this paper, we evaluate the efficacy of multiple sarcasm detection datasets using machine and deep learning models. We create two new datasets - a manually annotated gold standard Sarcasm Annotated Dataset (SAD) and a Silver-Standard Sarcasm-annotated Dataset (S3D). Using a combination of existing sarcasm datasets with SAD, we train a sarcasm detection model over a social-media domain pre-trained language model, BERTweet, which yields an F1-score of 78.29%. Using an Ensemble model with an underlying majority technique, we further label S3D to produce a weakly supervised dataset containing over 100,000 tweets. We publicly release all the code, our manually annotated and weakly supervised datasets, and fine-tuned models for further research.
%R 10.18653/v1/2022.nlpcss-1.22
%U https://aclanthology.org/2022.nlpcss-1.22/
%U https://doi.org/10.18653/v1/2022.nlpcss-1.22
%P 197-206
Markdown (Informal)
[Utilizing Weak Supervision to Create S3D: A Sarcasm Annotated Dataset](https://aclanthology.org/2022.nlpcss-1.22/) (Painter et al., NLP+CSS 2022)
ACL