@inproceedings{kabongo-kabenamualu-etal-2022-listra,
title = "{L}i{ST}ra Automatic Speech Translation: {E}nglish to {L}ingala Case Study",
author = "Kabongo Kabenamualu, Salomon and
Marivate, Vukosi and
Kamper, Herman",
editor = {S{\"a}lev{\"a}, Jonne and
Lignos, Constantine},
booktitle = "Proceedings of the Workshop on Dataset Creation for Lower-Resourced Languages within the 13th Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.dclrl-1.8/",
pages = "63--67",
abstract = "In recent years there has been great interest in addressing the data scarcity of African languages and providing baseline models for different Natural Language Processing tasks (Orife et al., 2020). Several initiatives (Nekoto et al., 2020) on the continent uses the Bible as a data source to provide proof of concept for some NLP tasks. In this work, we present the Lingala Speech Translation (LiSTra) dataset, release a full pipeline for the construction of such dataset in other languages, and report baselines using both the traditional cascade approach (Automatic Speech Recognition - Machine Translation), and a revolutionary transformer based End-2-End architecture (Liu et al., 2020) with a custom interactive attention that allows information sharing between the recognition decoder and the translation decoder."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kabongo-kabenamualu-etal-2022-listra">
<titleInfo>
<title>LiSTra Automatic Speech Translation: English to Lingala Case Study</title>
</titleInfo>
<name type="personal">
<namePart type="given">Salomon</namePart>
<namePart type="family">Kabongo Kabenamualu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vukosi</namePart>
<namePart type="family">Marivate</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Herman</namePart>
<namePart type="family">Kamper</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Dataset Creation for Lower-Resourced Languages within the 13th Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jonne</namePart>
<namePart type="family">Sälevä</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Constantine</namePart>
<namePart type="family">Lignos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In recent years there has been great interest in addressing the data scarcity of African languages and providing baseline models for different Natural Language Processing tasks (Orife et al., 2020). Several initiatives (Nekoto et al., 2020) on the continent uses the Bible as a data source to provide proof of concept for some NLP tasks. In this work, we present the Lingala Speech Translation (LiSTra) dataset, release a full pipeline for the construction of such dataset in other languages, and report baselines using both the traditional cascade approach (Automatic Speech Recognition - Machine Translation), and a revolutionary transformer based End-2-End architecture (Liu et al., 2020) with a custom interactive attention that allows information sharing between the recognition decoder and the translation decoder.</abstract>
<identifier type="citekey">kabongo-kabenamualu-etal-2022-listra</identifier>
<location>
<url>https://aclanthology.org/2022.dclrl-1.8/</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>63</start>
<end>67</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LiSTra Automatic Speech Translation: English to Lingala Case Study
%A Kabongo Kabenamualu, Salomon
%A Marivate, Vukosi
%A Kamper, Herman
%Y Sälevä, Jonne
%Y Lignos, Constantine
%S Proceedings of the Workshop on Dataset Creation for Lower-Resourced Languages within the 13th Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F kabongo-kabenamualu-etal-2022-listra
%X In recent years there has been great interest in addressing the data scarcity of African languages and providing baseline models for different Natural Language Processing tasks (Orife et al., 2020). Several initiatives (Nekoto et al., 2020) on the continent uses the Bible as a data source to provide proof of concept for some NLP tasks. In this work, we present the Lingala Speech Translation (LiSTra) dataset, release a full pipeline for the construction of such dataset in other languages, and report baselines using both the traditional cascade approach (Automatic Speech Recognition - Machine Translation), and a revolutionary transformer based End-2-End architecture (Liu et al., 2020) with a custom interactive attention that allows information sharing between the recognition decoder and the translation decoder.
%U https://aclanthology.org/2022.dclrl-1.8/
%P 63-67
Markdown (Informal)
[LiSTra Automatic Speech Translation: English to Lingala Case Study](https://aclanthology.org/2022.dclrl-1.8/) (Kabongo Kabenamualu et al., DCLRL 2022)
ACL
- Salomon Kabongo Kabenamualu, Vukosi Marivate, and Herman Kamper. 2022. LiSTra Automatic Speech Translation: English to Lingala Case Study. In Proceedings of the Workshop on Dataset Creation for Lower-Resourced Languages within the 13th Language Resources and Evaluation Conference, pages 63–67, Marseille, France. European Language Resources Association.