@inproceedings{vargas-etal-2024-hausahate,
title = "{H}ausa{H}ate: An Expert Annotated Corpus for {H}ausa Hate Speech Detection",
author = "Vargas, Francielle and
Guimar{\~a}es, Samuel and
Muhammad, Shamsuddeen Hassan and
Alves, Diego and
Ahmad, Ibrahim Said and
Abdulmumin, Idris and
Mohamed, Diallo and
Pardo, Thiago and
Benevenuto, Fabr{\'i}cio",
editor = {Chung, Yi-Ling and
Talat, Zeerak and
Nozza, Debora and
Plaza-del-Arco, Flor Miriam and
R{\"o}ttger, Paul and
Mostafazadeh Davani, Aida and
Calabrese, Agostina},
booktitle = "Proceedings of the 8th Workshop on Online Abuse and Harms (WOAH 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.woah-1.5/",
doi = "10.18653/v1/2024.woah-1.5",
pages = "52--58",
abstract = "We introduce the first expert annotated corpus of Facebook comments for Hausa hate speech detection. The corpus titled HausaHate comprises 2,000 comments extracted from Western African Facebook pages and manually annotated by three Hausa native speakers, who are also NLP experts. Our corpus was annotated using two different layers. We first labeled each comment according to a binary classification: offensive versus non-offensive. Then, offensive comments were also labeled according to hate speech targets: race, gender and none. Lastly, a baseline model using fine-tuned LLM for Hausa hate speech detection is presented, highlighting the challenges of hate speech detection tasks for indigenous languages in Africa, as well as future advances."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vargas-etal-2024-hausahate">
<titleInfo>
<title>HausaHate: An Expert Annotated Corpus for Hausa Hate Speech Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francielle</namePart>
<namePart type="family">Vargas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Guimarães</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shamsuddeen</namePart>
<namePart type="given">Hassan</namePart>
<namePart type="family">Muhammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Alves</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibrahim</namePart>
<namePart type="given">Said</namePart>
<namePart type="family">Ahmad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Idris</namePart>
<namePart type="family">Abdulmumin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diallo</namePart>
<namePart type="family">Mohamed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thiago</namePart>
<namePart type="family">Pardo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabrício</namePart>
<namePart type="family">Benevenuto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Workshop on Online Abuse and Harms (WOAH 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yi-Ling</namePart>
<namePart type="family">Chung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Talat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Flor</namePart>
<namePart type="given">Miriam</namePart>
<namePart type="family">Plaza-del-Arco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Röttger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aida</namePart>
<namePart type="family">Mostafazadeh Davani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Agostina</namePart>
<namePart type="family">Calabrese</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce the first expert annotated corpus of Facebook comments for Hausa hate speech detection. The corpus titled HausaHate comprises 2,000 comments extracted from Western African Facebook pages and manually annotated by three Hausa native speakers, who are also NLP experts. Our corpus was annotated using two different layers. We first labeled each comment according to a binary classification: offensive versus non-offensive. Then, offensive comments were also labeled according to hate speech targets: race, gender and none. Lastly, a baseline model using fine-tuned LLM for Hausa hate speech detection is presented, highlighting the challenges of hate speech detection tasks for indigenous languages in Africa, as well as future advances.</abstract>
<identifier type="citekey">vargas-etal-2024-hausahate</identifier>
<identifier type="doi">10.18653/v1/2024.woah-1.5</identifier>
<location>
<url>https://aclanthology.org/2024.woah-1.5/</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>52</start>
<end>58</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HausaHate: An Expert Annotated Corpus for Hausa Hate Speech Detection
%A Vargas, Francielle
%A Guimarães, Samuel
%A Muhammad, Shamsuddeen Hassan
%A Alves, Diego
%A Ahmad, Ibrahim Said
%A Abdulmumin, Idris
%A Mohamed, Diallo
%A Pardo, Thiago
%A Benevenuto, Fabrício
%Y Chung, Yi-Ling
%Y Talat, Zeerak
%Y Nozza, Debora
%Y Plaza-del-Arco, Flor Miriam
%Y Röttger, Paul
%Y Mostafazadeh Davani, Aida
%Y Calabrese, Agostina
%S Proceedings of the 8th Workshop on Online Abuse and Harms (WOAH 2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F vargas-etal-2024-hausahate
%X We introduce the first expert annotated corpus of Facebook comments for Hausa hate speech detection. The corpus titled HausaHate comprises 2,000 comments extracted from Western African Facebook pages and manually annotated by three Hausa native speakers, who are also NLP experts. Our corpus was annotated using two different layers. We first labeled each comment according to a binary classification: offensive versus non-offensive. Then, offensive comments were also labeled according to hate speech targets: race, gender and none. Lastly, a baseline model using fine-tuned LLM for Hausa hate speech detection is presented, highlighting the challenges of hate speech detection tasks for indigenous languages in Africa, as well as future advances.
%R 10.18653/v1/2024.woah-1.5
%U https://aclanthology.org/2024.woah-1.5/
%U https://doi.org/10.18653/v1/2024.woah-1.5
%P 52-58
Markdown (Informal)
[HausaHate: An Expert Annotated Corpus for Hausa Hate Speech Detection](https://aclanthology.org/2024.woah-1.5/) (Vargas et al., WOAH 2024)
ACL
- Francielle Vargas, Samuel Guimarães, Shamsuddeen Hassan Muhammad, Diego Alves, Ibrahim Said Ahmad, Idris Abdulmumin, Diallo Mohamed, Thiago Pardo, and Fabrício Benevenuto. 2024. HausaHate: An Expert Annotated Corpus for Hausa Hate Speech Detection. In Proceedings of the 8th Workshop on Online Abuse and Harms (WOAH 2024), pages 52–58, Mexico City, Mexico. Association for Computational Linguistics.