@inproceedings{oneil-etal-2023-comparing,
title = "Comparing methods of orthographic conversion for B{\`a}s{\`a}{\'a}, a language of {C}ameroon",
author = "O{'}neil, Alexandra and
Swanson, Daniel and
Pugh, Robert and
Tyers, Francis and
Ngue Um, Emmanuel",
editor = "Mabuya, Rooweither and
Mthobela, Don and
Setaka, Mmasibidi and
Van Zaanen, Menno",
booktitle = "Proceedings of the Fourth workshop on Resources for African Indigenous Languages (RAIL 2023)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.rail-1.11/",
doi = "10.18653/v1/2023.rail-1.11",
pages = "97--105",
abstract = "Orthographical standardization is a milestone in a language`s documentation and the development of its resources. However, texts written in former orthographies remain relevant to the language`s history and development and therefore must be converted to the standardized orthography. Ensuring a language has access to the orthographically standardized version of all of its recorded texts is important in the development of resources as it provides additional textual resources for training, supports contribution of authors using former writing systems, and provides information about the development of the language. This paper evaluates the performance of natural language processing methods, specifically Finite State Transducers and Long Short-term Memory networks, for the orthographical conversion of B{\`a}s{\`a}{\'a} texts from the Protestant missionary orthography to the now-standard AGLC orthography, with the conclusion that LSTMs are somewhat more effective in the absence of explicit lexical information."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="oneil-etal-2023-comparing">
<titleInfo>
<title>Comparing methods of orthographic conversion for Bàsàá, a language of Cameroon</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="family">O’neil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Swanson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Pugh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Tyers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmanuel</namePart>
<namePart type="family">Ngue Um</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth workshop on Resources for African Indigenous Languages (RAIL 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rooweither</namePart>
<namePart type="family">Mabuya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Don</namePart>
<namePart type="family">Mthobela</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mmasibidi</namePart>
<namePart type="family">Setaka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Menno</namePart>
<namePart type="family">Van Zaanen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Orthographical standardization is a milestone in a language‘s documentation and the development of its resources. However, texts written in former orthographies remain relevant to the language‘s history and development and therefore must be converted to the standardized orthography. Ensuring a language has access to the orthographically standardized version of all of its recorded texts is important in the development of resources as it provides additional textual resources for training, supports contribution of authors using former writing systems, and provides information about the development of the language. This paper evaluates the performance of natural language processing methods, specifically Finite State Transducers and Long Short-term Memory networks, for the orthographical conversion of Bàsàá texts from the Protestant missionary orthography to the now-standard AGLC orthography, with the conclusion that LSTMs are somewhat more effective in the absence of explicit lexical information.</abstract>
<identifier type="citekey">oneil-etal-2023-comparing</identifier>
<identifier type="doi">10.18653/v1/2023.rail-1.11</identifier>
<location>
<url>https://aclanthology.org/2023.rail-1.11/</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>97</start>
<end>105</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Comparing methods of orthographic conversion for Bàsàá, a language of Cameroon
%A O’neil, Alexandra
%A Swanson, Daniel
%A Pugh, Robert
%A Tyers, Francis
%A Ngue Um, Emmanuel
%Y Mabuya, Rooweither
%Y Mthobela, Don
%Y Setaka, Mmasibidi
%Y Van Zaanen, Menno
%S Proceedings of the Fourth workshop on Resources for African Indigenous Languages (RAIL 2023)
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F oneil-etal-2023-comparing
%X Orthographical standardization is a milestone in a language‘s documentation and the development of its resources. However, texts written in former orthographies remain relevant to the language‘s history and development and therefore must be converted to the standardized orthography. Ensuring a language has access to the orthographically standardized version of all of its recorded texts is important in the development of resources as it provides additional textual resources for training, supports contribution of authors using former writing systems, and provides information about the development of the language. This paper evaluates the performance of natural language processing methods, specifically Finite State Transducers and Long Short-term Memory networks, for the orthographical conversion of Bàsàá texts from the Protestant missionary orthography to the now-standard AGLC orthography, with the conclusion that LSTMs are somewhat more effective in the absence of explicit lexical information.
%R 10.18653/v1/2023.rail-1.11
%U https://aclanthology.org/2023.rail-1.11/
%U https://doi.org/10.18653/v1/2023.rail-1.11
%P 97-105
Markdown (Informal)
[Comparing methods of orthographic conversion for Bàsàá, a language of Cameroon](https://aclanthology.org/2023.rail-1.11/) (O’neil et al., RAIL 2023)
ACL