@inproceedings{reyes-ramirez-etal-2024-improving,
title = "Improving aggressiveness detection using a data augmentation technique based on a Diffusion Language Model",
author = "Reyes-Ram{\'i}rez, Antonio D. and
Arag{\'o}n, Mario Ezra and
S{\'a}nchez-Vega, Fernando and
L{\'o}pez-Monroy, A. Pastor",
editor = {Chung, Yi-Ling and
Talat, Zeerak and
Nozza, Debora and
Plaza-del-Arco, Flor Miriam and
R{\"o}ttger, Paul and
Mostafazadeh Davani, Aida and
Calabrese, Agostina},
booktitle = "Proceedings of the 8th Workshop on Online Abuse and Harms (WOAH 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.woah-1.13/",
doi = "10.18653/v1/2024.woah-1.13",
pages = "171--177",
abstract = "Cyberbullying has grown in recent years, largely attributed to the proliferation of social media users. This phenomenon manifests in various forms, such as hate speech and offensive language, increasing the necessity of effective detection models to tackle this problem. Most approaches focus on supervised algorithms, which have an important drawback{---}they heavily depend on the availability of ample training data. This paper attempts to tackle this insufficient data problem using data augmentation (DA) techniques. Concretely, we propose a novel data augmentation technique based on a Diffusion Language Model (DLA). We compare our proposed method against well-known DA techniques, such as contextual augmentation and Easy Data Augmentation (EDA). Our findings reveal a slight but promising improvement, leading to more robust results with very low variance. Additionally, we provide a comprehensive qualitative analysis using classification errors, and complementary analysis, shedding light on the nuances of our approach."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="reyes-ramirez-etal-2024-improving">
<titleInfo>
<title>Improving aggressiveness detection using a data augmentation technique based on a Diffusion Language Model</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Reyes-Ramírez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mario</namePart>
<namePart type="given">Ezra</namePart>
<namePart type="family">Aragón</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fernando</namePart>
<namePart type="family">Sánchez-Vega</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Pastor</namePart>
<namePart type="family">López-Monroy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Workshop on Online Abuse and Harms (WOAH 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yi-Ling</namePart>
<namePart type="family">Chung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Talat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Flor</namePart>
<namePart type="given">Miriam</namePart>
<namePart type="family">Plaza-del-Arco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Röttger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aida</namePart>
<namePart type="family">Mostafazadeh Davani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Agostina</namePart>
<namePart type="family">Calabrese</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Cyberbullying has grown in recent years, largely attributed to the proliferation of social media users. This phenomenon manifests in various forms, such as hate speech and offensive language, increasing the necessity of effective detection models to tackle this problem. Most approaches focus on supervised algorithms, which have an important drawback—they heavily depend on the availability of ample training data. This paper attempts to tackle this insufficient data problem using data augmentation (DA) techniques. Concretely, we propose a novel data augmentation technique based on a Diffusion Language Model (DLA). We compare our proposed method against well-known DA techniques, such as contextual augmentation and Easy Data Augmentation (EDA). Our findings reveal a slight but promising improvement, leading to more robust results with very low variance. Additionally, we provide a comprehensive qualitative analysis using classification errors, and complementary analysis, shedding light on the nuances of our approach.</abstract>
<identifier type="citekey">reyes-ramirez-etal-2024-improving</identifier>
<identifier type="doi">10.18653/v1/2024.woah-1.13</identifier>
<location>
<url>https://aclanthology.org/2024.woah-1.13/</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>171</start>
<end>177</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving aggressiveness detection using a data augmentation technique based on a Diffusion Language Model
%A Reyes-Ramírez, Antonio D.
%A Aragón, Mario Ezra
%A Sánchez-Vega, Fernando
%A López-Monroy, A. Pastor
%Y Chung, Yi-Ling
%Y Talat, Zeerak
%Y Nozza, Debora
%Y Plaza-del-Arco, Flor Miriam
%Y Röttger, Paul
%Y Mostafazadeh Davani, Aida
%Y Calabrese, Agostina
%S Proceedings of the 8th Workshop on Online Abuse and Harms (WOAH 2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F reyes-ramirez-etal-2024-improving
%X Cyberbullying has grown in recent years, largely attributed to the proliferation of social media users. This phenomenon manifests in various forms, such as hate speech and offensive language, increasing the necessity of effective detection models to tackle this problem. Most approaches focus on supervised algorithms, which have an important drawback—they heavily depend on the availability of ample training data. This paper attempts to tackle this insufficient data problem using data augmentation (DA) techniques. Concretely, we propose a novel data augmentation technique based on a Diffusion Language Model (DLA). We compare our proposed method against well-known DA techniques, such as contextual augmentation and Easy Data Augmentation (EDA). Our findings reveal a slight but promising improvement, leading to more robust results with very low variance. Additionally, we provide a comprehensive qualitative analysis using classification errors, and complementary analysis, shedding light on the nuances of our approach.
%R 10.18653/v1/2024.woah-1.13
%U https://aclanthology.org/2024.woah-1.13/
%U https://doi.org/10.18653/v1/2024.woah-1.13
%P 171-177
Markdown (Informal)
[Improving aggressiveness detection using a data augmentation technique based on a Diffusion Language Model](https://aclanthology.org/2024.woah-1.13/) (Reyes-Ramírez et al., WOAH 2024)
ACL