@inproceedings{hegde-etal-2023-mucs,
title = "{MUCS}@{D}ravidian{L}ang{T}ech2023: Leveraging Learning Models to Identify Abusive Comments in Code-mixed {D}ravidian Languages",
author = "Hegde, Asha and
G, Kavya and
Coelho, Sharal and
Shashirekha, Hosahalli Lakshmaiah",
editor = "Chakravarthi, Bharathi R. and
Priyadharshini, Ruba and
M, Anand Kumar and
Thavareesan, Sajeetha and
Sherly, Elizabeth",
booktitle = "Proceedings of the Third Workshop on Speech and Language Technologies for Dravidian Languages",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.dravidianlangtech-1.39",
pages = "266--274",
abstract = "Abusive language detection in user-generated online content has become a pressing concern due to its negative impact on users and challenges for policy makers. Online platforms are faced with the task of moderating abusive content to mitigate societal harm, adhere to legal requirements, and foster inclusivity. Despite numerous methods developed for automated detection of abusive language, the problem continues to persist. This ongoing challenge necessitates further research and development to enhance the effectiveness of abusive content detection systems and implement proactive measures to create safer and more respectful online spaces. To address the automatic detection of abusive languages in social media platforms, this paper describes the models submitted by our team - MUCS to the shared task {``}Abusive Comment Detection in Tamil and Telugu{''} at DravidianLangTech - in Recent Advances in Natural Language Processing (RANLP) 2023. This shared task addresses the abusive comment detection in code-mixed Tamil, Telugu, and romanized Tamil (Tamil-English) texts. Two distinct models: i) AbusiveML - a model implemented utilizing Linear Support Vector Classifier (LinearSVC) algorithm fed with n-grams of words and character sequences within word boundary (char{\_}wb) features and ii) AbusiveTL - a Transfer Learning (TL ) model with three different Bidirectional Encoder Representations from Transformers (BERT) models along with random oversampling to deal with data imbalance, are submitted to the shared task for detecting abusive language in the given code-mixed texts. The AbusiveTL model fared well among these two models, with macro F1 scores of 0.46, 0.74, and 0.49 for code-mixed Tamil, Telugu, and Tamil-English texts respectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hegde-etal-2023-mucs">
<titleInfo>
<title>MUCS@DravidianLangTech2023: Leveraging Learning Models to Identify Abusive Comments in Code-mixed Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Asha</namePart>
<namePart type="family">Hegde</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kavya</namePart>
<namePart type="family">G</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sharal</namePart>
<namePart type="family">Coelho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hosahalli</namePart>
<namePart type="given">Lakshmaiah</namePart>
<namePart type="family">Shashirekha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Speech and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">M</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sajeetha</namePart>
<namePart type="family">Thavareesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Sherly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Abusive language detection in user-generated online content has become a pressing concern due to its negative impact on users and challenges for policy makers. Online platforms are faced with the task of moderating abusive content to mitigate societal harm, adhere to legal requirements, and foster inclusivity. Despite numerous methods developed for automated detection of abusive language, the problem continues to persist. This ongoing challenge necessitates further research and development to enhance the effectiveness of abusive content detection systems and implement proactive measures to create safer and more respectful online spaces. To address the automatic detection of abusive languages in social media platforms, this paper describes the models submitted by our team - MUCS to the shared task “Abusive Comment Detection in Tamil and Telugu” at DravidianLangTech - in Recent Advances in Natural Language Processing (RANLP) 2023. This shared task addresses the abusive comment detection in code-mixed Tamil, Telugu, and romanized Tamil (Tamil-English) texts. Two distinct models: i) AbusiveML - a model implemented utilizing Linear Support Vector Classifier (LinearSVC) algorithm fed with n-grams of words and character sequences within word boundary (char_wb) features and ii) AbusiveTL - a Transfer Learning (TL ) model with three different Bidirectional Encoder Representations from Transformers (BERT) models along with random oversampling to deal with data imbalance, are submitted to the shared task for detecting abusive language in the given code-mixed texts. The AbusiveTL model fared well among these two models, with macro F1 scores of 0.46, 0.74, and 0.49 for code-mixed Tamil, Telugu, and Tamil-English texts respectively.</abstract>
<identifier type="citekey">hegde-etal-2023-mucs</identifier>
<location>
<url>https://aclanthology.org/2023.dravidianlangtech-1.39</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>266</start>
<end>274</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MUCS@DravidianLangTech2023: Leveraging Learning Models to Identify Abusive Comments in Code-mixed Dravidian Languages
%A Hegde, Asha
%A G, Kavya
%A Coelho, Sharal
%A Shashirekha, Hosahalli Lakshmaiah
%Y Chakravarthi, Bharathi R.
%Y Priyadharshini, Ruba
%Y M, Anand Kumar
%Y Thavareesan, Sajeetha
%Y Sherly, Elizabeth
%S Proceedings of the Third Workshop on Speech and Language Technologies for Dravidian Languages
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F hegde-etal-2023-mucs
%X Abusive language detection in user-generated online content has become a pressing concern due to its negative impact on users and challenges for policy makers. Online platforms are faced with the task of moderating abusive content to mitigate societal harm, adhere to legal requirements, and foster inclusivity. Despite numerous methods developed for automated detection of abusive language, the problem continues to persist. This ongoing challenge necessitates further research and development to enhance the effectiveness of abusive content detection systems and implement proactive measures to create safer and more respectful online spaces. To address the automatic detection of abusive languages in social media platforms, this paper describes the models submitted by our team - MUCS to the shared task “Abusive Comment Detection in Tamil and Telugu” at DravidianLangTech - in Recent Advances in Natural Language Processing (RANLP) 2023. This shared task addresses the abusive comment detection in code-mixed Tamil, Telugu, and romanized Tamil (Tamil-English) texts. Two distinct models: i) AbusiveML - a model implemented utilizing Linear Support Vector Classifier (LinearSVC) algorithm fed with n-grams of words and character sequences within word boundary (char_wb) features and ii) AbusiveTL - a Transfer Learning (TL ) model with three different Bidirectional Encoder Representations from Transformers (BERT) models along with random oversampling to deal with data imbalance, are submitted to the shared task for detecting abusive language in the given code-mixed texts. The AbusiveTL model fared well among these two models, with macro F1 scores of 0.46, 0.74, and 0.49 for code-mixed Tamil, Telugu, and Tamil-English texts respectively.
%U https://aclanthology.org/2023.dravidianlangtech-1.39
%P 266-274
Markdown (Informal)
[MUCS@DravidianLangTech2023: Leveraging Learning Models to Identify Abusive Comments in Code-mixed Dravidian Languages](https://aclanthology.org/2023.dravidianlangtech-1.39) (Hegde et al., DravidianLangTech-WS 2023)
ACL