@inproceedings{korre-etal-2024-challenges-creating,
title = "The Challenges of Creating a Parallel Multilingual Hate Speech Corpus: An Exploration",
author = "Korre, Katerina and
Muti, Arianna and
Barr{\'o}n-Cede{\~n}o, Alberto",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1376",
pages = "15842--15853",
abstract = "Hate speech is infamously one of the most demanding topics in Natural Language Processing, as its multifacetedness is accompanied by a handful of challenges, such as multilinguality and cross-linguality. Hate speech has a subjective aspect that intensifies when referring to different cultures and different languages. In this respect, we design a pipeline that will help us explore the possibility of the creation of a parallel multilingual hate speech dataset, using machine translation. In this paper, we evaluate how/whether this is feasible by assessing the quality of the translations, calculating the toxicity levels of original and target texts, and calculating correlations between the newly obtained scores. Finally, we perform a qualitative analysis to gain further semantic and grammatical insights. With this pipeline we aim at exploring ways of filtering hate speech texts in order to parallelize sentences in multiple languages, examining the challenges of the task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="korre-etal-2024-challenges-creating">
<titleInfo>
<title>The Challenges of Creating a Parallel Multilingual Hate Speech Corpus: An Exploration</title>
</titleInfo>
<name type="personal">
<namePart type="given">Katerina</namePart>
<namePart type="family">Korre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arianna</namePart>
<namePart type="family">Muti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Barrón-Cedeño</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Hate speech is infamously one of the most demanding topics in Natural Language Processing, as its multifacetedness is accompanied by a handful of challenges, such as multilinguality and cross-linguality. Hate speech has a subjective aspect that intensifies when referring to different cultures and different languages. In this respect, we design a pipeline that will help us explore the possibility of the creation of a parallel multilingual hate speech dataset, using machine translation. In this paper, we evaluate how/whether this is feasible by assessing the quality of the translations, calculating the toxicity levels of original and target texts, and calculating correlations between the newly obtained scores. Finally, we perform a qualitative analysis to gain further semantic and grammatical insights. With this pipeline we aim at exploring ways of filtering hate speech texts in order to parallelize sentences in multiple languages, examining the challenges of the task.</abstract>
<identifier type="citekey">korre-etal-2024-challenges-creating</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1376</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>15842</start>
<end>15853</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Challenges of Creating a Parallel Multilingual Hate Speech Corpus: An Exploration
%A Korre, Katerina
%A Muti, Arianna
%A Barrón-Cedeño, Alberto
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F korre-etal-2024-challenges-creating
%X Hate speech is infamously one of the most demanding topics in Natural Language Processing, as its multifacetedness is accompanied by a handful of challenges, such as multilinguality and cross-linguality. Hate speech has a subjective aspect that intensifies when referring to different cultures and different languages. In this respect, we design a pipeline that will help us explore the possibility of the creation of a parallel multilingual hate speech dataset, using machine translation. In this paper, we evaluate how/whether this is feasible by assessing the quality of the translations, calculating the toxicity levels of original and target texts, and calculating correlations between the newly obtained scores. Finally, we perform a qualitative analysis to gain further semantic and grammatical insights. With this pipeline we aim at exploring ways of filtering hate speech texts in order to parallelize sentences in multiple languages, examining the challenges of the task.
%U https://aclanthology.org/2024.lrec-main.1376
%P 15842-15853
Markdown (Informal)
[The Challenges of Creating a Parallel Multilingual Hate Speech Corpus: An Exploration](https://aclanthology.org/2024.lrec-main.1376) (Korre et al., LREC-COLING 2024)
ACL