@inproceedings{elkaref-abu-elkheir-2022-guct,
title = "{GUCT} at {A}rabic Hate Speech 2022: Towards a Better Isotropy for Hatespeech Detection",
author = "Elkaref, Nehal and
Abu-Elkheir, Mervat",
editor = "Al-Khalifa, Hend and
Elsayed, Tamer and
Mubarak, Hamdy and
Al-Thubaity, Abdulmohsen and
Magdy, Walid and
Darwish, Kareem",
booktitle = "Proceedinsg of the 5th Workshop on Open-Source Arabic Corpora and Processing Tools with Shared Tasks on Qur'an QA and Fine-Grained Hate Speech Detection",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.osact-1.27",
pages = "209--213",
abstract = "Hate Speech is an increasingly common occurrence in verbal and textual exchanges on online platforms, where many users, especially those from vulnerable minorities, are in danger of being attacked or harassed via text messages, posts, comments, or articles. Therefore, it is crucial to detect and filter out hate speech in the various forms of text encountered on online and social platforms. In this paper, we present our work on the shared task of detecting hate speech in dialectical Arabic tweets as part of the OSACT shared task on Fine-grained Hate Speech Detection. Normally, tweets have a short length, and hence do not have sufficient context for language models, which in turn makes a classification task challenging. To contribute to sub-task A, we leverage MARBERT{'}s pre-trained contextual word representations and aim to improve their semantic quality using a cluster-based approach. Our work explores MARBERT{'}s embedding space and assess its geometric properties in-order to achieve better representations and subsequently better classification performance. We propose to improve the isotropic word representations of MARBERT via clustering. we compare the word representations generated by our approach to MARBERT{'}s default word representations via feeding each to a bidirectional LSTM to detect offensive and non-offensive tweets. Our results show that enhancing the isotropy of an embedding space can boost performance. Our system scores 81.2{\%} on accuracy and a macro-averaged F1 score of 79.1{\%} on sub-task A{'}s development set and achieves 76.5{\%} for accuracy and an F1 score of 74.2{\%} on the test set.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="elkaref-abu-elkheir-2022-guct">
<titleInfo>
<title>GUCT at Arabic Hate Speech 2022: Towards a Better Isotropy for Hatespeech Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nehal</namePart>
<namePart type="family">Elkaref</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mervat</namePart>
<namePart type="family">Abu-Elkheir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedinsg of the 5th Workshop on Open-Source Arabic Corpora and Processing Tools with Shared Tasks on Qur’an QA and Fine-Grained Hate Speech Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tamer</namePart>
<namePart type="family">Elsayed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hamdy</namePart>
<namePart type="family">Mubarak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdulmohsen</namePart>
<namePart type="family">Al-Thubaity</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walid</namePart>
<namePart type="family">Magdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Hate Speech is an increasingly common occurrence in verbal and textual exchanges on online platforms, where many users, especially those from vulnerable minorities, are in danger of being attacked or harassed via text messages, posts, comments, or articles. Therefore, it is crucial to detect and filter out hate speech in the various forms of text encountered on online and social platforms. In this paper, we present our work on the shared task of detecting hate speech in dialectical Arabic tweets as part of the OSACT shared task on Fine-grained Hate Speech Detection. Normally, tweets have a short length, and hence do not have sufficient context for language models, which in turn makes a classification task challenging. To contribute to sub-task A, we leverage MARBERT’s pre-trained contextual word representations and aim to improve their semantic quality using a cluster-based approach. Our work explores MARBERT’s embedding space and assess its geometric properties in-order to achieve better representations and subsequently better classification performance. We propose to improve the isotropic word representations of MARBERT via clustering. we compare the word representations generated by our approach to MARBERT’s default word representations via feeding each to a bidirectional LSTM to detect offensive and non-offensive tweets. Our results show that enhancing the isotropy of an embedding space can boost performance. Our system scores 81.2% on accuracy and a macro-averaged F1 score of 79.1% on sub-task A’s development set and achieves 76.5% for accuracy and an F1 score of 74.2% on the test set.</abstract>
<identifier type="citekey">elkaref-abu-elkheir-2022-guct</identifier>
<location>
<url>https://aclanthology.org/2022.osact-1.27</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>209</start>
<end>213</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GUCT at Arabic Hate Speech 2022: Towards a Better Isotropy for Hatespeech Detection
%A Elkaref, Nehal
%A Abu-Elkheir, Mervat
%Y Al-Khalifa, Hend
%Y Elsayed, Tamer
%Y Mubarak, Hamdy
%Y Al-Thubaity, Abdulmohsen
%Y Magdy, Walid
%Y Darwish, Kareem
%S Proceedinsg of the 5th Workshop on Open-Source Arabic Corpora and Processing Tools with Shared Tasks on Qur’an QA and Fine-Grained Hate Speech Detection
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F elkaref-abu-elkheir-2022-guct
%X Hate Speech is an increasingly common occurrence in verbal and textual exchanges on online platforms, where many users, especially those from vulnerable minorities, are in danger of being attacked or harassed via text messages, posts, comments, or articles. Therefore, it is crucial to detect and filter out hate speech in the various forms of text encountered on online and social platforms. In this paper, we present our work on the shared task of detecting hate speech in dialectical Arabic tweets as part of the OSACT shared task on Fine-grained Hate Speech Detection. Normally, tweets have a short length, and hence do not have sufficient context for language models, which in turn makes a classification task challenging. To contribute to sub-task A, we leverage MARBERT’s pre-trained contextual word representations and aim to improve their semantic quality using a cluster-based approach. Our work explores MARBERT’s embedding space and assess its geometric properties in-order to achieve better representations and subsequently better classification performance. We propose to improve the isotropic word representations of MARBERT via clustering. we compare the word representations generated by our approach to MARBERT’s default word representations via feeding each to a bidirectional LSTM to detect offensive and non-offensive tweets. Our results show that enhancing the isotropy of an embedding space can boost performance. Our system scores 81.2% on accuracy and a macro-averaged F1 score of 79.1% on sub-task A’s development set and achieves 76.5% for accuracy and an F1 score of 74.2% on the test set.
%U https://aclanthology.org/2022.osact-1.27
%P 209-213
Markdown (Informal)
[GUCT at Arabic Hate Speech 2022: Towards a Better Isotropy for Hatespeech Detection](https://aclanthology.org/2022.osact-1.27) (Elkaref & Abu-Elkheir, OSACT 2022)
ACL