@inproceedings{kumari-etal-2023-ml,
title = "{ML}{\&}{AI}{\_}{IIITR}anchi@{D}ravidian{L}ang{T}ech: Fine-Tuning {I}ndic{BERT} for Exploring Language-specific Features for Sentiment Classification in Code-Mixed {D}ravidian Languages",
author = "Kumari, Kirti and
Jha, Shirish Shekhar and
Dayanand, Zarikunte Kunal and
Sharma, Praneesh",
editor = "Chakravarthi, Bharathi R. and
Priyadharshini, Ruba and
M, Anand Kumar and
Thavareesan, Sajeetha and
Sherly, Elizabeth",
booktitle = "Proceedings of the Third Workshop on Speech and Language Technologies for Dravidian Languages",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.dravidianlangtech-1.27/",
pages = "192--197",
abstract = "Code-mixing presents challenges to sentiment analysis due to limited availability of annotated data found on low-resource languages such as Tulu. To address this issue, comprehensive work was done in creating a gold-standard labeled corpus that incorporates both languages while facilitating accurate analyses of sentiments involved. Encapsulated within this research was the employed use of varied techniques including data collection, cleaning processes as well as preprocessing leading up to effective annotation along with finding results using fine tuning indic bert and performing experiments over tf-idf plus bag of words. The outcome is an invaluable resource for developing custom-tailored models meant solely for analyzing sentiments involved with code mixed texts across Tamil and Tulu domain limits; allowing a focused insight into what makes up such expressions. Remarkably, the adoption of hybrid models yielded promising outcomes, culminating in a 10th rank achievement for Tulu, and a 14thrank achievement for Tamil, supported by an macro F1 score of 0.471 and 0.124 respectively."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kumari-etal-2023-ml">
<titleInfo>
<title>ML&AI_IIITRanchi@DravidianLangTech: Fine-Tuning IndicBERT for Exploring Language-specific Features for Sentiment Classification in Code-Mixed Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kirti</namePart>
<namePart type="family">Kumari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shirish</namePart>
<namePart type="given">Shekhar</namePart>
<namePart type="family">Jha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zarikunte</namePart>
<namePart type="given">Kunal</namePart>
<namePart type="family">Dayanand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Praneesh</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Speech and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">M</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sajeetha</namePart>
<namePart type="family">Thavareesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Sherly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Code-mixing presents challenges to sentiment analysis due to limited availability of annotated data found on low-resource languages such as Tulu. To address this issue, comprehensive work was done in creating a gold-standard labeled corpus that incorporates both languages while facilitating accurate analyses of sentiments involved. Encapsulated within this research was the employed use of varied techniques including data collection, cleaning processes as well as preprocessing leading up to effective annotation along with finding results using fine tuning indic bert and performing experiments over tf-idf plus bag of words. The outcome is an invaluable resource for developing custom-tailored models meant solely for analyzing sentiments involved with code mixed texts across Tamil and Tulu domain limits; allowing a focused insight into what makes up such expressions. Remarkably, the adoption of hybrid models yielded promising outcomes, culminating in a 10th rank achievement for Tulu, and a 14thrank achievement for Tamil, supported by an macro F1 score of 0.471 and 0.124 respectively.</abstract>
<identifier type="citekey">kumari-etal-2023-ml</identifier>
<location>
<url>https://aclanthology.org/2023.dravidianlangtech-1.27/</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>192</start>
<end>197</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ML&AI_IIITRanchi@DravidianLangTech: Fine-Tuning IndicBERT for Exploring Language-specific Features for Sentiment Classification in Code-Mixed Dravidian Languages
%A Kumari, Kirti
%A Jha, Shirish Shekhar
%A Dayanand, Zarikunte Kunal
%A Sharma, Praneesh
%Y Chakravarthi, Bharathi R.
%Y Priyadharshini, Ruba
%Y M, Anand Kumar
%Y Thavareesan, Sajeetha
%Y Sherly, Elizabeth
%S Proceedings of the Third Workshop on Speech and Language Technologies for Dravidian Languages
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F kumari-etal-2023-ml
%X Code-mixing presents challenges to sentiment analysis due to limited availability of annotated data found on low-resource languages such as Tulu. To address this issue, comprehensive work was done in creating a gold-standard labeled corpus that incorporates both languages while facilitating accurate analyses of sentiments involved. Encapsulated within this research was the employed use of varied techniques including data collection, cleaning processes as well as preprocessing leading up to effective annotation along with finding results using fine tuning indic bert and performing experiments over tf-idf plus bag of words. The outcome is an invaluable resource for developing custom-tailored models meant solely for analyzing sentiments involved with code mixed texts across Tamil and Tulu domain limits; allowing a focused insight into what makes up such expressions. Remarkably, the adoption of hybrid models yielded promising outcomes, culminating in a 10th rank achievement for Tulu, and a 14thrank achievement for Tamil, supported by an macro F1 score of 0.471 and 0.124 respectively.
%U https://aclanthology.org/2023.dravidianlangtech-1.27/
%P 192-197
Markdown (Informal)
[ML&AI_IIITRanchi@DravidianLangTech: Fine-Tuning IndicBERT for Exploring Language-specific Features for Sentiment Classification in Code-Mixed Dravidian Languages](https://aclanthology.org/2023.dravidianlangtech-1.27/) (Kumari et al., DravidianLangTech 2023)
ACL