@inproceedings{lonergan-etal-2022-automatic,
title = "Automatic Speech Recognition for {I}rish: the {ABAIR}-{{\'E}IST} System",
author = "Lonergan, Liam and
Qian, Mengjie and
Berthelsen, Harald and
Murphy, Andy and
Wendler, Christoph and
N{\'\i} Chiar{\'a}in, Neasa and
Gobl, Christer and
N{\'\i} Chasaide, Ailbhe",
editor = "Fransen, Theodorus and
Lamb, William and
Prys, Delyth",
booktitle = "Proceedings of the 4th Celtic Language Technology Workshop within LREC2022",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.cltw-1.7",
pages = "47--51",
abstract = "This paper describes {\'E}IST, automatic speech recogniser for Irish, developed as part of the ongoing ABAIR initiative, combining (1) acoustic models, (2) pronunciation lexicons and (3) language models into a hybrid system. A priority for now is a system that can deal with the multiple diverse native-speaker dialects. Consequently, (1) was built using predominately native-speaker speech, which included earlier recordings used for synthesis development as well as more diverse recordings obtained using the M{\'\i}leGl{\'o}r platform. The pronunciation variation across the dialects is a particular challenge in the development of (2) and is explored by testing both Trans-dialect and Multi-dialect letter-to-sound rules. Two approaches to language modelling (3) are used in the hybrid system, a simple n-gram model and recurrent neural network lattice rescoring, the latter garnering impressive performance improvements. The system is evaluated using a test set that is comprised of both native and non-native speakers, which allows for some inferences to be made on the performance of the system on both cohorts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lonergan-etal-2022-automatic">
<titleInfo>
<title>Automatic Speech Recognition for Irish: the ABAIR-ÉIST System</title>
</titleInfo>
<name type="personal">
<namePart type="given">Liam</namePart>
<namePart type="family">Lonergan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mengjie</namePart>
<namePart type="family">Qian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harald</namePart>
<namePart type="family">Berthelsen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andy</namePart>
<namePart type="family">Murphy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoph</namePart>
<namePart type="family">Wendler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neasa</namePart>
<namePart type="family">Ní Chiaráin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christer</namePart>
<namePart type="family">Gobl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ailbhe</namePart>
<namePart type="family">Ní Chasaide</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Celtic Language Technology Workshop within LREC2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Theodorus</namePart>
<namePart type="family">Fransen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Lamb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Delyth</namePart>
<namePart type="family">Prys</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes ÉIST, automatic speech recogniser for Irish, developed as part of the ongoing ABAIR initiative, combining (1) acoustic models, (2) pronunciation lexicons and (3) language models into a hybrid system. A priority for now is a system that can deal with the multiple diverse native-speaker dialects. Consequently, (1) was built using predominately native-speaker speech, which included earlier recordings used for synthesis development as well as more diverse recordings obtained using the MíleGlór platform. The pronunciation variation across the dialects is a particular challenge in the development of (2) and is explored by testing both Trans-dialect and Multi-dialect letter-to-sound rules. Two approaches to language modelling (3) are used in the hybrid system, a simple n-gram model and recurrent neural network lattice rescoring, the latter garnering impressive performance improvements. The system is evaluated using a test set that is comprised of both native and non-native speakers, which allows for some inferences to be made on the performance of the system on both cohorts.</abstract>
<identifier type="citekey">lonergan-etal-2022-automatic</identifier>
<location>
<url>https://aclanthology.org/2022.cltw-1.7</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>47</start>
<end>51</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Speech Recognition for Irish: the ABAIR-ÉIST System
%A Lonergan, Liam
%A Qian, Mengjie
%A Berthelsen, Harald
%A Murphy, Andy
%A Wendler, Christoph
%A Ní Chiaráin, Neasa
%A Gobl, Christer
%A Ní Chasaide, Ailbhe
%Y Fransen, Theodorus
%Y Lamb, William
%Y Prys, Delyth
%S Proceedings of the 4th Celtic Language Technology Workshop within LREC2022
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F lonergan-etal-2022-automatic
%X This paper describes ÉIST, automatic speech recogniser for Irish, developed as part of the ongoing ABAIR initiative, combining (1) acoustic models, (2) pronunciation lexicons and (3) language models into a hybrid system. A priority for now is a system that can deal with the multiple diverse native-speaker dialects. Consequently, (1) was built using predominately native-speaker speech, which included earlier recordings used for synthesis development as well as more diverse recordings obtained using the MíleGlór platform. The pronunciation variation across the dialects is a particular challenge in the development of (2) and is explored by testing both Trans-dialect and Multi-dialect letter-to-sound rules. Two approaches to language modelling (3) are used in the hybrid system, a simple n-gram model and recurrent neural network lattice rescoring, the latter garnering impressive performance improvements. The system is evaluated using a test set that is comprised of both native and non-native speakers, which allows for some inferences to be made on the performance of the system on both cohorts.
%U https://aclanthology.org/2022.cltw-1.7
%P 47-51
Markdown (Informal)
[Automatic Speech Recognition for Irish: the ABAIR-ÉIST System](https://aclanthology.org/2022.cltw-1.7) (Lonergan et al., CLTW 2022)
ACL
- Liam Lonergan, Mengjie Qian, Harald Berthelsen, Andy Murphy, Christoph Wendler, Neasa Ní Chiaráin, Christer Gobl, and Ailbhe Ní Chasaide. 2022. Automatic Speech Recognition for Irish: the ABAIR-ÉIST System. In Proceedings of the 4th Celtic Language Technology Workshop within LREC2022, pages 47–51, Marseille, France. European Language Resources Association.