@article{olatunji-etal-2023-afrispeech,
title = "{A}fri{S}peech-200: Pan-{A}frican Accented Speech Dataset for Clinical and General Domain {ASR}",
author = "Olatunji, Tobi and
Afonja, Tejumade and
Yadavalli, Aditya and
Emezue, Chris Chinenye and
Singh, Sahib and
Dossou, Bonaventure F. P. and
Osuchukwu, Joanne and
Osei, Salomey and
Tonja, Atnafu Lambebo and
Etori, Naome and
Mbataku, Clinton",
journal = "Transactions of the Association for Computational Linguistics",
volume = "11",
year = "2023",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2023.tacl-1.93",
doi = "10.1162/tacl_a_00627",
pages = "1669--1685",
abstract = "Africa has a very poor doctor-to-patient ratio. At very busy clinics, doctors could see 30+ patients per day{---}a heavy patient burden compared with developed countries{---}but productivity tools such as clinical automatic speech recognition (ASR) are lacking for these overworked clinicians. However, clinical ASR is mature, even ubiquitous, in developed nations, and clinician-reported performance of commercial clinical ASR systems is generally satisfactory. Furthermore, the recent performance of general domain ASR is approaching human accuracy. However, several gaps exist. Several publications have highlighted racial bias with speech-to-text algorithms and performance on minority accents lags significantly. To our knowledge, there is no publicly available research or benchmark on accented African clinical ASR, and speech data is non-existent for the majority of African accents. We release AfriSpeech, 200hrs of Pan-African English speech, 67,577 clips from 2,463 unique speakers across 120 indigenous accents from 13 countries for clinical and general domain ASR, a benchmark test set, with publicly available pre-trained models with SOTA performance on the AfriSpeech benchmark.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="olatunji-etal-2023-afrispeech">
<titleInfo>
<title>AfriSpeech-200: Pan-African Accented Speech Dataset for Clinical and General Domain ASR</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tobi</namePart>
<namePart type="family">Olatunji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tejumade</namePart>
<namePart type="family">Afonja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aditya</namePart>
<namePart type="family">Yadavalli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="given">Chinenye</namePart>
<namePart type="family">Emezue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sahib</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bonaventure</namePart>
<namePart type="given">F</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Dossou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joanne</namePart>
<namePart type="family">Osuchukwu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salomey</namePart>
<namePart type="family">Osei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atnafu</namePart>
<namePart type="given">Lambebo</namePart>
<namePart type="family">Tonja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naome</namePart>
<namePart type="family">Etori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Clinton</namePart>
<namePart type="family">Mbataku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Africa has a very poor doctor-to-patient ratio. At very busy clinics, doctors could see 30+ patients per day—a heavy patient burden compared with developed countries—but productivity tools such as clinical automatic speech recognition (ASR) are lacking for these overworked clinicians. However, clinical ASR is mature, even ubiquitous, in developed nations, and clinician-reported performance of commercial clinical ASR systems is generally satisfactory. Furthermore, the recent performance of general domain ASR is approaching human accuracy. However, several gaps exist. Several publications have highlighted racial bias with speech-to-text algorithms and performance on minority accents lags significantly. To our knowledge, there is no publicly available research or benchmark on accented African clinical ASR, and speech data is non-existent for the majority of African accents. We release AfriSpeech, 200hrs of Pan-African English speech, 67,577 clips from 2,463 unique speakers across 120 indigenous accents from 13 countries for clinical and general domain ASR, a benchmark test set, with publicly available pre-trained models with SOTA performance on the AfriSpeech benchmark.</abstract>
<identifier type="citekey">olatunji-etal-2023-afrispeech</identifier>
<identifier type="doi">10.1162/tacl_a_00627</identifier>
<location>
<url>https://aclanthology.org/2023.tacl-1.93</url>
</location>
<part>
<date>2023</date>
<detail type="volume"><number>11</number></detail>
<extent unit="page">
<start>1669</start>
<end>1685</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T AfriSpeech-200: Pan-African Accented Speech Dataset for Clinical and General Domain ASR
%A Olatunji, Tobi
%A Afonja, Tejumade
%A Yadavalli, Aditya
%A Emezue, Chris Chinenye
%A Singh, Sahib
%A Dossou, Bonaventure F. P.
%A Osuchukwu, Joanne
%A Osei, Salomey
%A Tonja, Atnafu Lambebo
%A Etori, Naome
%A Mbataku, Clinton
%J Transactions of the Association for Computational Linguistics
%D 2023
%V 11
%I MIT Press
%C Cambridge, MA
%F olatunji-etal-2023-afrispeech
%X Africa has a very poor doctor-to-patient ratio. At very busy clinics, doctors could see 30+ patients per day—a heavy patient burden compared with developed countries—but productivity tools such as clinical automatic speech recognition (ASR) are lacking for these overworked clinicians. However, clinical ASR is mature, even ubiquitous, in developed nations, and clinician-reported performance of commercial clinical ASR systems is generally satisfactory. Furthermore, the recent performance of general domain ASR is approaching human accuracy. However, several gaps exist. Several publications have highlighted racial bias with speech-to-text algorithms and performance on minority accents lags significantly. To our knowledge, there is no publicly available research or benchmark on accented African clinical ASR, and speech data is non-existent for the majority of African accents. We release AfriSpeech, 200hrs of Pan-African English speech, 67,577 clips from 2,463 unique speakers across 120 indigenous accents from 13 countries for clinical and general domain ASR, a benchmark test set, with publicly available pre-trained models with SOTA performance on the AfriSpeech benchmark.
%R 10.1162/tacl_a_00627
%U https://aclanthology.org/2023.tacl-1.93
%U https://doi.org/10.1162/tacl_a_00627
%P 1669-1685
Markdown (Informal)
[AfriSpeech-200: Pan-African Accented Speech Dataset for Clinical and General Domain ASR](https://aclanthology.org/2023.tacl-1.93) (Olatunji et al., TACL 2023)
ACL
- Tobi Olatunji, Tejumade Afonja, Aditya Yadavalli, Chris Chinenye Emezue, Sahib Singh, Bonaventure F. P. Dossou, Joanne Osuchukwu, Salomey Osei, Atnafu Lambebo Tonja, Naome Etori, and Clinton Mbataku. 2023. AfriSpeech-200: Pan-African Accented Speech Dataset for Clinical and General Domain ASR. Transactions of the Association for Computational Linguistics, 11:1669–1685.