@inproceedings{palma-gomez-rozovskaya-2024-multi,
title = "Multi-Reference Benchmarks for {R}ussian Grammatical Error Correction",
author = "Palma Gomez, Frank and
Rozovskaya, Alla",
editor = "Graham, Yvette and
Purver, Matthew",
booktitle = "Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.eacl-long.76",
pages = "1253--1270",
abstract = "This paper presents multi-reference benchmarks for the Grammatical Error Correction (GEC) of Russian, based on two existing single-reference datasets, for a total of 7,444 learner sentences from a variety of first language backgrounds. Each sentence is corrected independently by two new raters, and their corrections are reviewed by a senior annotator, resulting in a total of three references per sentence. Analysis of the annotations reveals that the new raters tend to make more changes, compared to the original raters, especially at the lexical level. We conduct experiments with two popular GEC approaches and show competitive performance on the original datasets and the new benchmarks. We also compare system scores as evaluated against individual annotators and discuss the effect of using multiple references overall and on specific error types. We find that using the union of the references increases system scores by more than 10 points and decreases the gap between system and human performance, thereby providing a more realistic evaluation of GEC system performance, although the effect is not the same across the error types. The annotations are available for research.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="palma-gomez-rozovskaya-2024-multi">
<titleInfo>
<title>Multi-Reference Benchmarks for Russian Grammatical Error Correction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frank</namePart>
<namePart type="family">Palma Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alla</namePart>
<namePart type="family">Rozovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Purver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents multi-reference benchmarks for the Grammatical Error Correction (GEC) of Russian, based on two existing single-reference datasets, for a total of 7,444 learner sentences from a variety of first language backgrounds. Each sentence is corrected independently by two new raters, and their corrections are reviewed by a senior annotator, resulting in a total of three references per sentence. Analysis of the annotations reveals that the new raters tend to make more changes, compared to the original raters, especially at the lexical level. We conduct experiments with two popular GEC approaches and show competitive performance on the original datasets and the new benchmarks. We also compare system scores as evaluated against individual annotators and discuss the effect of using multiple references overall and on specific error types. We find that using the union of the references increases system scores by more than 10 points and decreases the gap between system and human performance, thereby providing a more realistic evaluation of GEC system performance, although the effect is not the same across the error types. The annotations are available for research.</abstract>
<identifier type="citekey">palma-gomez-rozovskaya-2024-multi</identifier>
<location>
<url>https://aclanthology.org/2024.eacl-long.76</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>1253</start>
<end>1270</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-Reference Benchmarks for Russian Grammatical Error Correction
%A Palma Gomez, Frank
%A Rozovskaya, Alla
%Y Graham, Yvette
%Y Purver, Matthew
%S Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F palma-gomez-rozovskaya-2024-multi
%X This paper presents multi-reference benchmarks for the Grammatical Error Correction (GEC) of Russian, based on two existing single-reference datasets, for a total of 7,444 learner sentences from a variety of first language backgrounds. Each sentence is corrected independently by two new raters, and their corrections are reviewed by a senior annotator, resulting in a total of three references per sentence. Analysis of the annotations reveals that the new raters tend to make more changes, compared to the original raters, especially at the lexical level. We conduct experiments with two popular GEC approaches and show competitive performance on the original datasets and the new benchmarks. We also compare system scores as evaluated against individual annotators and discuss the effect of using multiple references overall and on specific error types. We find that using the union of the references increases system scores by more than 10 points and decreases the gap between system and human performance, thereby providing a more realistic evaluation of GEC system performance, although the effect is not the same across the error types. The annotations are available for research.
%U https://aclanthology.org/2024.eacl-long.76
%P 1253-1270
Markdown (Informal)
[Multi-Reference Benchmarks for Russian Grammatical Error Correction](https://aclanthology.org/2024.eacl-long.76) (Palma Gomez & Rozovskaya, EACL 2024)
ACL