@inproceedings{barbu-etal-2023-designing,
title = "Designing the {LECOR} Learner Corpus for {R}omanian",
author = "Barbu, Ana Maria and
Irimia, Elena and
Vasile, Carmen M{\^\i}rzea and
P{\u{a}}i{\textcommabelow{s}}, Vasile",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.ranlp-1.16",
pages = "143--152",
abstract = "This article presents a work-in-progress project, which aims to build and utilize a corpus of Romanian texts written or spoken by non-native students of different nationalities, who learn Romanian as a foreign language in the one-year, intensive academic program organized by the University of Bucharest. This corpus, called LECOR {--} Learner Corpus for Romanian {--} is made up of pairs of texts: a version of the student and a corrected one of the teacher. Each version is automatically annotated with lemma and POS-tag, and the two versions are then compared, and the differences are marked as errors at this stage. The corpus also contains metadata file sets about students and their samples. In this article, the conceptual framework for building and utilization of the corpus is presented, including the acquisition and organization phases of the primary material, the annotation process, and the first attempts to adapt the NoSketch Engine query interface to the project{'}s objectives. The article concludes by outlining the next steps in the development of the corpus aimed at quantitative accumulation and the development of the error correction process and the complex error annotation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barbu-etal-2023-designing">
<titleInfo>
<title>Designing the LECOR Learner Corpus for Romanian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ana</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Barbu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Irimia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carmen</namePart>
<namePart type="given">Mîrzea</namePart>
<namePart type="family">Vasile</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasile</namePart>
<namePart type="family">Păi\textcommabelows</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This article presents a work-in-progress project, which aims to build and utilize a corpus of Romanian texts written or spoken by non-native students of different nationalities, who learn Romanian as a foreign language in the one-year, intensive academic program organized by the University of Bucharest. This corpus, called LECOR – Learner Corpus for Romanian – is made up of pairs of texts: a version of the student and a corrected one of the teacher. Each version is automatically annotated with lemma and POS-tag, and the two versions are then compared, and the differences are marked as errors at this stage. The corpus also contains metadata file sets about students and their samples. In this article, the conceptual framework for building and utilization of the corpus is presented, including the acquisition and organization phases of the primary material, the annotation process, and the first attempts to adapt the NoSketch Engine query interface to the project’s objectives. The article concludes by outlining the next steps in the development of the corpus aimed at quantitative accumulation and the development of the error correction process and the complex error annotation.</abstract>
<identifier type="citekey">barbu-etal-2023-designing</identifier>
<location>
<url>https://aclanthology.org/2023.ranlp-1.16</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>143</start>
<end>152</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Designing the LECOR Learner Corpus for Romanian
%A Barbu, Ana Maria
%A Irimia, Elena
%A Vasile, Carmen Mîrzea
%A Păi\textcommabelows, Vasile
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F barbu-etal-2023-designing
%X This article presents a work-in-progress project, which aims to build and utilize a corpus of Romanian texts written or spoken by non-native students of different nationalities, who learn Romanian as a foreign language in the one-year, intensive academic program organized by the University of Bucharest. This corpus, called LECOR – Learner Corpus for Romanian – is made up of pairs of texts: a version of the student and a corrected one of the teacher. Each version is automatically annotated with lemma and POS-tag, and the two versions are then compared, and the differences are marked as errors at this stage. The corpus also contains metadata file sets about students and their samples. In this article, the conceptual framework for building and utilization of the corpus is presented, including the acquisition and organization phases of the primary material, the annotation process, and the first attempts to adapt the NoSketch Engine query interface to the project’s objectives. The article concludes by outlining the next steps in the development of the corpus aimed at quantitative accumulation and the development of the error correction process and the complex error annotation.
%U https://aclanthology.org/2023.ranlp-1.16
%P 143-152
Markdown (Informal)
[Designing the LECOR Learner Corpus for Romanian](https://aclanthology.org/2023.ranlp-1.16) (Barbu et al., RANLP 2023)
ACL
- Ana Maria Barbu, Elena Irimia, Carmen Mîrzea Vasile, and Vasile Păiș. 2023. Designing the LECOR Learner Corpus for Romanian. In Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing, pages 143–152, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.