@inproceedings{diddee-etal-2024-inmt,
title = "{INMT}-Lite: Accelerating Low-Resource Language Data Collection via Offline Interactive Neural Machine Translation",
author = "Diddee, Harshita and
Shukla, Anurag and
Ganu, Tanuja and
Seshadri, Vivek and
Dandapat, Sandipan and
Choudhury, Monojit and
Bali, Kalika",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.797/",
pages = "9097--9109",
abstract = "A steady increase in the performance of Massively Multilingual Models (MMLMs) has contributed to their rapidly increasing use in data collection pipelines. Interactive Neural Machine Translation (INMT) systems are one class of tools that can utilize MMLMs to promote such data collection in several under-resourced languages. However, these tools are often not adapted to the deployment constraints that native language speakers operate in, as bloated, online inference-oriented MMLMs trained for data-rich languages, drive them. INMT-Lite addresses these challenges through its support of (1) three different modes of Internet-independent deployment and (2) a suite of four assistive interfaces suitable for (3) data-sparse languages. We perform an extensive user study for INMT-Lite with an under-resourced language community, Gondi, to find that INMT-Lite improves the data generation experience of community members along multiple axes, such as cognitive load, task productivity, and interface interaction time and effort, without compromising on the quality of the generated translations.INMT-Lite`s code is open-sourced to further research in this domain."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="diddee-etal-2024-inmt">
<titleInfo>
<title>INMT-Lite: Accelerating Low-Resource Language Data Collection via Offline Interactive Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Harshita</namePart>
<namePart type="family">Diddee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anurag</namePart>
<namePart type="family">Shukla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanuja</namePart>
<namePart type="family">Ganu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Seshadri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandipan</namePart>
<namePart type="family">Dandapat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Monojit</namePart>
<namePart type="family">Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A steady increase in the performance of Massively Multilingual Models (MMLMs) has contributed to their rapidly increasing use in data collection pipelines. Interactive Neural Machine Translation (INMT) systems are one class of tools that can utilize MMLMs to promote such data collection in several under-resourced languages. However, these tools are often not adapted to the deployment constraints that native language speakers operate in, as bloated, online inference-oriented MMLMs trained for data-rich languages, drive them. INMT-Lite addresses these challenges through its support of (1) three different modes of Internet-independent deployment and (2) a suite of four assistive interfaces suitable for (3) data-sparse languages. We perform an extensive user study for INMT-Lite with an under-resourced language community, Gondi, to find that INMT-Lite improves the data generation experience of community members along multiple axes, such as cognitive load, task productivity, and interface interaction time and effort, without compromising on the quality of the generated translations.INMT-Lite‘s code is open-sourced to further research in this domain.</abstract>
<identifier type="citekey">diddee-etal-2024-inmt</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.797/</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>9097</start>
<end>9109</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T INMT-Lite: Accelerating Low-Resource Language Data Collection via Offline Interactive Neural Machine Translation
%A Diddee, Harshita
%A Shukla, Anurag
%A Ganu, Tanuja
%A Seshadri, Vivek
%A Dandapat, Sandipan
%A Choudhury, Monojit
%A Bali, Kalika
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F diddee-etal-2024-inmt
%X A steady increase in the performance of Massively Multilingual Models (MMLMs) has contributed to their rapidly increasing use in data collection pipelines. Interactive Neural Machine Translation (INMT) systems are one class of tools that can utilize MMLMs to promote such data collection in several under-resourced languages. However, these tools are often not adapted to the deployment constraints that native language speakers operate in, as bloated, online inference-oriented MMLMs trained for data-rich languages, drive them. INMT-Lite addresses these challenges through its support of (1) three different modes of Internet-independent deployment and (2) a suite of four assistive interfaces suitable for (3) data-sparse languages. We perform an extensive user study for INMT-Lite with an under-resourced language community, Gondi, to find that INMT-Lite improves the data generation experience of community members along multiple axes, such as cognitive load, task productivity, and interface interaction time and effort, without compromising on the quality of the generated translations.INMT-Lite‘s code is open-sourced to further research in this domain.
%U https://aclanthology.org/2024.lrec-main.797/
%P 9097-9109
Markdown (Informal)
[INMT-Lite: Accelerating Low-Resource Language Data Collection via Offline Interactive Neural Machine Translation](https://aclanthology.org/2024.lrec-main.797/) (Diddee et al., LREC-COLING 2024)
ACL
- Harshita Diddee, Anurag Shukla, Tanuja Ganu, Vivek Seshadri, Sandipan Dandapat, Monojit Choudhury, and Kalika Bali. 2024. INMT-Lite: Accelerating Low-Resource Language Data Collection via Offline Interactive Neural Machine Translation. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 9097–9109, Torino, Italia. ELRA and ICCL.