@inproceedings{gomez-etal-2023-low,
title = "A Low-Resource Approach to the Grammatical Error Correction of {U}krainian",
author = "Palma Gomez, Frank and
Rozovskaya, Alla and
Roth, Dan",
editor = "Romanyshyn, Mariana",
booktitle = "Proceedings of the Second Ukrainian Natural Language Processing Workshop (UNLP)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.unlp-1.14",
doi = "10.18653/v1/2023.unlp-1.14",
pages = "114--120",
abstract = "We present our system that participated in the shared task on the grammatical error correction of Ukrainian. We have implemented two approaches that make use of large pre-trained language models and synthetic data, that have been used for error correction of English as well as low-resource languages. The first approach is based on fine-tuning a large multilingual language model (mT5) in two stages: first, on synthetic data, and then on gold data. The second approach trains a (smaller) seq2seq Transformer model pre-trained on synthetic data and fine-tuned on gold data. Our mT5-based model scored first in {``}GEC only{''} track, and a very close second in the {``}GEC+Fluency{''} track. Our two key innovations are (1) finetuning in stages, first on synthetic, and then on gold data; and (2) a high-quality corruption method based on roundtrip machine translation to complement existing noisification approaches.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gomez-etal-2023-low">
<titleInfo>
<title>A Low-Resource Approach to the Grammatical Error Correction of Ukrainian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frank</namePart>
<namePart type="family">Palma Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alla</namePart>
<namePart type="family">Rozovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Ukrainian Natural Language Processing Workshop (UNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mariana</namePart>
<namePart type="family">Romanyshyn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present our system that participated in the shared task on the grammatical error correction of Ukrainian. We have implemented two approaches that make use of large pre-trained language models and synthetic data, that have been used for error correction of English as well as low-resource languages. The first approach is based on fine-tuning a large multilingual language model (mT5) in two stages: first, on synthetic data, and then on gold data. The second approach trains a (smaller) seq2seq Transformer model pre-trained on synthetic data and fine-tuned on gold data. Our mT5-based model scored first in “GEC only” track, and a very close second in the “GEC+Fluency” track. Our two key innovations are (1) finetuning in stages, first on synthetic, and then on gold data; and (2) a high-quality corruption method based on roundtrip machine translation to complement existing noisification approaches.</abstract>
<identifier type="citekey">gomez-etal-2023-low</identifier>
<identifier type="doi">10.18653/v1/2023.unlp-1.14</identifier>
<location>
<url>https://aclanthology.org/2023.unlp-1.14</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>114</start>
<end>120</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Low-Resource Approach to the Grammatical Error Correction of Ukrainian
%A Palma Gomez, Frank
%A Rozovskaya, Alla
%A Roth, Dan
%Y Romanyshyn, Mariana
%S Proceedings of the Second Ukrainian Natural Language Processing Workshop (UNLP)
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F gomez-etal-2023-low
%X We present our system that participated in the shared task on the grammatical error correction of Ukrainian. We have implemented two approaches that make use of large pre-trained language models and synthetic data, that have been used for error correction of English as well as low-resource languages. The first approach is based on fine-tuning a large multilingual language model (mT5) in two stages: first, on synthetic data, and then on gold data. The second approach trains a (smaller) seq2seq Transformer model pre-trained on synthetic data and fine-tuned on gold data. Our mT5-based model scored first in “GEC only” track, and a very close second in the “GEC+Fluency” track. Our two key innovations are (1) finetuning in stages, first on synthetic, and then on gold data; and (2) a high-quality corruption method based on roundtrip machine translation to complement existing noisification approaches.
%R 10.18653/v1/2023.unlp-1.14
%U https://aclanthology.org/2023.unlp-1.14
%U https://doi.org/10.18653/v1/2023.unlp-1.14
%P 114-120
Markdown (Informal)
[A Low-Resource Approach to the Grammatical Error Correction of Ukrainian](https://aclanthology.org/2023.unlp-1.14) (Palma Gomez et al., UNLP 2023)
ACL