@inproceedings{pahwa-2022-bphigh,
title = "{B}p{H}igh@{T}amil{NLP}-{ACL}2022: Effects of Data Augmentation on Indic-Transformer based classifier for Abusive Comments Detection in {T}amil",
author = "Pahwa, Bhavish",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Krishnamurthy, Parameswari and
Sherly, Elizabeth and
Mahesan, Sinnathamby",
booktitle = "Proceedings of the Second Workshop on Speech and Language Technologies for Dravidian Languages",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.dravidianlangtech-1.22/",
doi = "10.18653/v1/2022.dravidianlangtech-1.22",
pages = "138--144",
abstract = "Social Media platforms have grown their reach worldwide. As an effect of this growth, many vernacular social media platforms have also emerged, focusing more on the diverse languages in the specific regions. Tamil has also emerged as a popular language for use on social media platforms due to the increasing penetration of vernacular media like Sharechat and Moj, which focus more on local Indian languages than English and encourage their users to converse in Indic languages. Abusive language remains a significant challenge in the social media framework and more so when we consider languages like Tamil, which are low-resource languages and have poor performance on multilingual models and lack language-specific models. Based on this shared task, {\textquotedblleft}Abusive Comment detection in Tamil@DravidianLangTech-ACL 2022{\textquotedblright}, we present an exploration of different techniques used to tackle and increase the accuracy of our models using data augmentation in NLP. We also show the results of these techniques."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pahwa-2022-bphigh">
<titleInfo>
<title>BpHigh@TamilNLP-ACL2022: Effects of Data Augmentation on Indic-Transformer based classifier for Abusive Comments Detection in Tamil</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bhavish</namePart>
<namePart type="family">Pahwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Speech and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Madasamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Parameswari</namePart>
<namePart type="family">Krishnamurthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Sherly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sinnathamby</namePart>
<namePart type="family">Mahesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Social Media platforms have grown their reach worldwide. As an effect of this growth, many vernacular social media platforms have also emerged, focusing more on the diverse languages in the specific regions. Tamil has also emerged as a popular language for use on social media platforms due to the increasing penetration of vernacular media like Sharechat and Moj, which focus more on local Indian languages than English and encourage their users to converse in Indic languages. Abusive language remains a significant challenge in the social media framework and more so when we consider languages like Tamil, which are low-resource languages and have poor performance on multilingual models and lack language-specific models. Based on this shared task, “Abusive Comment detection in Tamil@DravidianLangTech-ACL 2022”, we present an exploration of different techniques used to tackle and increase the accuracy of our models using data augmentation in NLP. We also show the results of these techniques.</abstract>
<identifier type="citekey">pahwa-2022-bphigh</identifier>
<identifier type="doi">10.18653/v1/2022.dravidianlangtech-1.22</identifier>
<location>
<url>https://aclanthology.org/2022.dravidianlangtech-1.22/</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>138</start>
<end>144</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BpHigh@TamilNLP-ACL2022: Effects of Data Augmentation on Indic-Transformer based classifier for Abusive Comments Detection in Tamil
%A Pahwa, Bhavish
%Y Chakravarthi, Bharathi Raja
%Y Priyadharshini, Ruba
%Y Madasamy, Anand Kumar
%Y Krishnamurthy, Parameswari
%Y Sherly, Elizabeth
%Y Mahesan, Sinnathamby
%S Proceedings of the Second Workshop on Speech and Language Technologies for Dravidian Languages
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F pahwa-2022-bphigh
%X Social Media platforms have grown their reach worldwide. As an effect of this growth, many vernacular social media platforms have also emerged, focusing more on the diverse languages in the specific regions. Tamil has also emerged as a popular language for use on social media platforms due to the increasing penetration of vernacular media like Sharechat and Moj, which focus more on local Indian languages than English and encourage their users to converse in Indic languages. Abusive language remains a significant challenge in the social media framework and more so when we consider languages like Tamil, which are low-resource languages and have poor performance on multilingual models and lack language-specific models. Based on this shared task, “Abusive Comment detection in Tamil@DravidianLangTech-ACL 2022”, we present an exploration of different techniques used to tackle and increase the accuracy of our models using data augmentation in NLP. We also show the results of these techniques.
%R 10.18653/v1/2022.dravidianlangtech-1.22
%U https://aclanthology.org/2022.dravidianlangtech-1.22/
%U https://doi.org/10.18653/v1/2022.dravidianlangtech-1.22
%P 138-144
Markdown (Informal)
[BpHigh@TamilNLP-ACL2022: Effects of Data Augmentation on Indic-Transformer based classifier for Abusive Comments Detection in Tamil](https://aclanthology.org/2022.dravidianlangtech-1.22/) (Pahwa, DravidianLangTech 2022)
ACL