@inproceedings{srinivasan-etal-2022-ssncse,
title = "{SSNCSE}{\_}{NLP}@{LT}-{EDI}-{ACL}2022: Speech Recognition for Vulnerable Individuals in {T}amil using pre-trained {XLSR} models",
author = "Srinivasan, Dhanya and
B, Bharathi and
Durairaj, Thenmozhi and
B, Senthil Kumar",
editor = "Chakravarthi, Bharathi Raja and
Bharathi, B and
McCrae, John P and
Zarrouk, Manel and
Bali, Kalika and
Buitelaar, Paul",
booktitle = "Proceedings of the Second Workshop on Language Technology for Equality, Diversity and Inclusion",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.ltedi-1.48/",
doi = "10.18653/v1/2022.ltedi-1.48",
pages = "317--320",
abstract = "Automatic speech recognition is a tool used to transform human speech into a written form. It is used in a variety of avenues, such as in voice commands, customer, service and more. It has emerged as an essential tool in the digitisation of daily life. It has been known to be of vital importance in making the lives of elderly and disabled people much easier. In this paper we describe an automatic speech recognition model, determined by using three pre-trained models, fine-tuned from the Facebook XLSR Wav2Vec2 model, which was trained using the Common Voice Dataset. The best model for speech recognition in Tamil is determined by finding the word error rate of the data. This work explains the submission made by SSNCSE{\_}NLP in the shared task organized by LT-EDI at ACL 2022. A word error rate of 39.4512 is achieved."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="srinivasan-etal-2022-ssncse">
<titleInfo>
<title>SSNCSE_NLP@LT-EDI-ACL2022: Speech Recognition for Vulnerable Individuals in Tamil using pre-trained XLSR models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dhanya</namePart>
<namePart type="family">Srinivasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="family">B</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thenmozhi</namePart>
<namePart type="family">Durairaj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Senthil</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">B</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Language Technology for Equality, Diversity and Inclusion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">B</namePart>
<namePart type="family">Bharathi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="given">P</namePart>
<namePart type="family">McCrae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manel</namePart>
<namePart type="family">Zarrouk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Buitelaar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic speech recognition is a tool used to transform human speech into a written form. It is used in a variety of avenues, such as in voice commands, customer, service and more. It has emerged as an essential tool in the digitisation of daily life. It has been known to be of vital importance in making the lives of elderly and disabled people much easier. In this paper we describe an automatic speech recognition model, determined by using three pre-trained models, fine-tuned from the Facebook XLSR Wav2Vec2 model, which was trained using the Common Voice Dataset. The best model for speech recognition in Tamil is determined by finding the word error rate of the data. This work explains the submission made by SSNCSE_NLP in the shared task organized by LT-EDI at ACL 2022. A word error rate of 39.4512 is achieved.</abstract>
<identifier type="citekey">srinivasan-etal-2022-ssncse</identifier>
<identifier type="doi">10.18653/v1/2022.ltedi-1.48</identifier>
<location>
<url>https://aclanthology.org/2022.ltedi-1.48/</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>317</start>
<end>320</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SSNCSE_NLP@LT-EDI-ACL2022: Speech Recognition for Vulnerable Individuals in Tamil using pre-trained XLSR models
%A Srinivasan, Dhanya
%A B, Bharathi
%A Durairaj, Thenmozhi
%A B, Senthil Kumar
%Y Chakravarthi, Bharathi Raja
%Y Bharathi, B.
%Y McCrae, John P.
%Y Zarrouk, Manel
%Y Bali, Kalika
%Y Buitelaar, Paul
%S Proceedings of the Second Workshop on Language Technology for Equality, Diversity and Inclusion
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F srinivasan-etal-2022-ssncse
%X Automatic speech recognition is a tool used to transform human speech into a written form. It is used in a variety of avenues, such as in voice commands, customer, service and more. It has emerged as an essential tool in the digitisation of daily life. It has been known to be of vital importance in making the lives of elderly and disabled people much easier. In this paper we describe an automatic speech recognition model, determined by using three pre-trained models, fine-tuned from the Facebook XLSR Wav2Vec2 model, which was trained using the Common Voice Dataset. The best model for speech recognition in Tamil is determined by finding the word error rate of the data. This work explains the submission made by SSNCSE_NLP in the shared task organized by LT-EDI at ACL 2022. A word error rate of 39.4512 is achieved.
%R 10.18653/v1/2022.ltedi-1.48
%U https://aclanthology.org/2022.ltedi-1.48/
%U https://doi.org/10.18653/v1/2022.ltedi-1.48
%P 317-320
Markdown (Informal)
[SSNCSE_NLP@LT-EDI-ACL2022: Speech Recognition for Vulnerable Individuals in Tamil using pre-trained XLSR models](https://aclanthology.org/2022.ltedi-1.48/) (Srinivasan et al., LTEDI 2022)
ACL