@inproceedings{plaza-del-arco-etal-2021-offendes,
title = "{O}ffend{ES}: A New Corpus in {S}panish for Offensive Language Research",
author = "Plaza-del-Arco, Flor Miriam and
Montejo-R{\'a}ez, Arturo and
Ure{\~n}a-L{\'o}pez, L. Alfonso and
Mart{\'\i}n-Valdivia, Mar{\'\i}a-Teresa",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.123",
pages = "1096--1108",
abstract = "Offensive language detection and analysis has become a major area of research in Natural Language Processing. The freedom of participation in social media has exposed online users to posts designed to denigrate, insult or hurt them according to gender, race, religion, ideology, or other personal characteristics. Focusing on young influencers from the well-known social platforms of Twitter, Instagram, and YouTube, we have collected a corpus composed of 47,128 Spanish comments manually labeled on offensive pre-defined categories. A subset of the corpus attaches a degree of confidence to each label, so both multi-class classification and multi-output regression studies are possible. In this paper, we introduce the corpus, discuss its building process, novelties, and some preliminary experiments with it to serve as a baseline for the research community.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="plaza-del-arco-etal-2021-offendes">
<titleInfo>
<title>OffendES: A New Corpus in Spanish for Offensive Language Research</title>
</titleInfo>
<name type="personal">
<namePart type="given">Flor</namePart>
<namePart type="given">Miriam</namePart>
<namePart type="family">Plaza-del-Arco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturo</namePart>
<namePart type="family">Montejo-Ráez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">L</namePart>
<namePart type="given">Alfonso</namePart>
<namePart type="family">Ureña-López</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">María-Teresa</namePart>
<namePart type="family">Martín-Valdivia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Offensive language detection and analysis has become a major area of research in Natural Language Processing. The freedom of participation in social media has exposed online users to posts designed to denigrate, insult or hurt them according to gender, race, religion, ideology, or other personal characteristics. Focusing on young influencers from the well-known social platforms of Twitter, Instagram, and YouTube, we have collected a corpus composed of 47,128 Spanish comments manually labeled on offensive pre-defined categories. A subset of the corpus attaches a degree of confidence to each label, so both multi-class classification and multi-output regression studies are possible. In this paper, we introduce the corpus, discuss its building process, novelties, and some preliminary experiments with it to serve as a baseline for the research community.</abstract>
<identifier type="citekey">plaza-del-arco-etal-2021-offendes</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.123</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>1096</start>
<end>1108</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T OffendES: A New Corpus in Spanish for Offensive Language Research
%A Plaza-del-Arco, Flor Miriam
%A Montejo-Ráez, Arturo
%A Ureña-López, L. Alfonso
%A Martín-Valdivia, María-Teresa
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F plaza-del-arco-etal-2021-offendes
%X Offensive language detection and analysis has become a major area of research in Natural Language Processing. The freedom of participation in social media has exposed online users to posts designed to denigrate, insult or hurt them according to gender, race, religion, ideology, or other personal characteristics. Focusing on young influencers from the well-known social platforms of Twitter, Instagram, and YouTube, we have collected a corpus composed of 47,128 Spanish comments manually labeled on offensive pre-defined categories. A subset of the corpus attaches a degree of confidence to each label, so both multi-class classification and multi-output regression studies are possible. In this paper, we introduce the corpus, discuss its building process, novelties, and some preliminary experiments with it to serve as a baseline for the research community.
%U https://aclanthology.org/2021.ranlp-1.123
%P 1096-1108
Markdown (Informal)
[OffendES: A New Corpus in Spanish for Offensive Language Research](https://aclanthology.org/2021.ranlp-1.123) (Plaza-del-Arco et al., RANLP 2021)
ACL
- Flor Miriam Plaza-del-Arco, Arturo Montejo-Ráez, L. Alfonso Ureña-López, and María-Teresa Martín-Valdivia. 2021. OffendES: A New Corpus in Spanish for Offensive Language Research. In Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021), pages 1096–1108, Held Online. INCOMA Ltd..