@inproceedings{ortega-church-2023-research,
title = "A Research-Based Guide for the Creation and Deployment of a Low-Resource Machine Translation System",
author = "Ortega, John E. and
Church, Kenneth",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.ranlp-1.88",
pages = "813--823",
abstract = "The machine translation (MT) field seems to focus heavily on English and other high-resource languages. Though, low-resource MT (LRMT) is receiving more attention than in the past. Successful LRMT systems (LRMTS) should make a compelling business case in terms of demand, cost and quality in order to be viable for end users. When used by communities where low-resource languages are spoken, LRMT quality should not only be determined by the use of traditional metrics like BLEU, but it should also take into account other factors in order to be inclusive and not risk overall rejection by the community. MT systems based on neural methods tend to perform better with high volumes of training data, but they may be unrealistic and even harmful for LRMT. It is obvious that for research purposes, the development and creation of LRMTS is necessary. However, in this article, we argue that two main workarounds could be considered by companies that are considering deployment of LRMTS in the wild: human-in-the-loop and sub-domains.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ortega-church-2023-research">
<titleInfo>
<title>A Research-Based Guide for the Creation and Deployment of a Low-Resource Machine Translation System</title>
</titleInfo>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Ortega</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenneth</namePart>
<namePart type="family">Church</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The machine translation (MT) field seems to focus heavily on English and other high-resource languages. Though, low-resource MT (LRMT) is receiving more attention than in the past. Successful LRMT systems (LRMTS) should make a compelling business case in terms of demand, cost and quality in order to be viable for end users. When used by communities where low-resource languages are spoken, LRMT quality should not only be determined by the use of traditional metrics like BLEU, but it should also take into account other factors in order to be inclusive and not risk overall rejection by the community. MT systems based on neural methods tend to perform better with high volumes of training data, but they may be unrealistic and even harmful for LRMT. It is obvious that for research purposes, the development and creation of LRMTS is necessary. However, in this article, we argue that two main workarounds could be considered by companies that are considering deployment of LRMTS in the wild: human-in-the-loop and sub-domains.</abstract>
<identifier type="citekey">ortega-church-2023-research</identifier>
<location>
<url>https://aclanthology.org/2023.ranlp-1.88</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>813</start>
<end>823</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Research-Based Guide for the Creation and Deployment of a Low-Resource Machine Translation System
%A Ortega, John E.
%A Church, Kenneth
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F ortega-church-2023-research
%X The machine translation (MT) field seems to focus heavily on English and other high-resource languages. Though, low-resource MT (LRMT) is receiving more attention than in the past. Successful LRMT systems (LRMTS) should make a compelling business case in terms of demand, cost and quality in order to be viable for end users. When used by communities where low-resource languages are spoken, LRMT quality should not only be determined by the use of traditional metrics like BLEU, but it should also take into account other factors in order to be inclusive and not risk overall rejection by the community. MT systems based on neural methods tend to perform better with high volumes of training data, but they may be unrealistic and even harmful for LRMT. It is obvious that for research purposes, the development and creation of LRMTS is necessary. However, in this article, we argue that two main workarounds could be considered by companies that are considering deployment of LRMTS in the wild: human-in-the-loop and sub-domains.
%U https://aclanthology.org/2023.ranlp-1.88
%P 813-823
Markdown (Informal)
[A Research-Based Guide for the Creation and Deployment of a Low-Resource Machine Translation System](https://aclanthology.org/2023.ranlp-1.88) (Ortega & Church, RANLP 2023)
ACL