@inproceedings{sung-shin-2023-towards,
title = "Towards {L}2-friendly pipelines for learner corpora: A case of written production by {L}2-{K}orean learners",
author = "Sung, Hakyung and
Shin, Gyu-Ho",
editor = {Kochmar, Ekaterina and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Madnani, Nitin and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng and
Zesch, Torsten},
booktitle = "Proceedings of the 18th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.bea-1.6/",
doi = "10.18653/v1/2023.bea-1.6",
pages = "72--82",
abstract = "We introduce the Korean-Learner-Morpheme (KLM) corpus, a manually annotated dataset consisting of 129,784 morphemes from second language (L2) learners of Korean, featuring morpheme tokenization and part-of-speech (POS) tagging. We evaluate the performance of four Korean morphological analyzers in tokenization and POS tagging on the L2- Korean corpus. Results highlight the analyzers' reduced performance on L2 data, indicating the limitation of advanced deep-learning models when dealing with L2-Korean corpora. We further show that fine-tuning one of the models with the KLM corpus improves its accuracy of tokenization and POS tagging on L2-Korean dataset."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sung-shin-2023-towards">
<titleInfo>
<title>Towards L2-friendly pipelines for learner corpora: A case of written production by L2-Korean learners</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hakyung</namePart>
<namePart type="family">Sung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gyu-Ho</namePart>
<namePart type="family">Shin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Burstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="family">Horbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ronja</namePart>
<namePart type="family">Laarmann-Quante</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nitin</namePart>
<namePart type="family">Madnani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anaïs</namePart>
<namePart type="family">Tack</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victoria</namePart>
<namePart type="family">Yaneva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Torsten</namePart>
<namePart type="family">Zesch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce the Korean-Learner-Morpheme (KLM) corpus, a manually annotated dataset consisting of 129,784 morphemes from second language (L2) learners of Korean, featuring morpheme tokenization and part-of-speech (POS) tagging. We evaluate the performance of four Korean morphological analyzers in tokenization and POS tagging on the L2- Korean corpus. Results highlight the analyzers’ reduced performance on L2 data, indicating the limitation of advanced deep-learning models when dealing with L2-Korean corpora. We further show that fine-tuning one of the models with the KLM corpus improves its accuracy of tokenization and POS tagging on L2-Korean dataset.</abstract>
<identifier type="citekey">sung-shin-2023-towards</identifier>
<identifier type="doi">10.18653/v1/2023.bea-1.6</identifier>
<location>
<url>https://aclanthology.org/2023.bea-1.6/</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>72</start>
<end>82</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards L2-friendly pipelines for learner corpora: A case of written production by L2-Korean learners
%A Sung, Hakyung
%A Shin, Gyu-Ho
%Y Kochmar, Ekaterina
%Y Burstein, Jill
%Y Horbach, Andrea
%Y Laarmann-Quante, Ronja
%Y Madnani, Nitin
%Y Tack, Anaïs
%Y Yaneva, Victoria
%Y Yuan, Zheng
%Y Zesch, Torsten
%S Proceedings of the 18th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F sung-shin-2023-towards
%X We introduce the Korean-Learner-Morpheme (KLM) corpus, a manually annotated dataset consisting of 129,784 morphemes from second language (L2) learners of Korean, featuring morpheme tokenization and part-of-speech (POS) tagging. We evaluate the performance of four Korean morphological analyzers in tokenization and POS tagging on the L2- Korean corpus. Results highlight the analyzers’ reduced performance on L2 data, indicating the limitation of advanced deep-learning models when dealing with L2-Korean corpora. We further show that fine-tuning one of the models with the KLM corpus improves its accuracy of tokenization and POS tagging on L2-Korean dataset.
%R 10.18653/v1/2023.bea-1.6
%U https://aclanthology.org/2023.bea-1.6/
%U https://doi.org/10.18653/v1/2023.bea-1.6
%P 72-82
Markdown (Informal)
[Towards L2-friendly pipelines for learner corpora: A case of written production by L2-Korean learners](https://aclanthology.org/2023.bea-1.6/) (Sung & Shin, BEA 2023)
ACL