@inproceedings{tang-etal-2021-ji-yu,
title = "基于字词粒度噪声数据增强的中文语法纠错({C}hinese Grammatical Error Correction enhanced by Data Augmentation from Word and Character Levels)",
author = "Tang, Zecheng and
Ji, Yixin and
Zhao, Yibo and
Li, Junhui",
editor = "Li, Sheng and
Sun, Maosong and
Liu, Yang and
Wu, Hua and
Liu, Kang and
Che, Wanxiang and
He, Shizhu and
Rao, Gaoqi",
booktitle = "Proceedings of the 20th Chinese National Conference on Computational Linguistics",
month = aug,
year = "2021",
address = "Huhhot, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2021.ccl-1.73",
pages = "813--824",
abstract = "语法纠错是自然语言处理领域的热门任务之一,其目的是将错误的句子修改为正确的句子。为了缓解中文训练语料不足的问题,本文从数据增强的角度出发,提出一种新颖的扩充和增强数据的方法。具体地,为了使模型能更好地获取不同类型和不同粒度的错误,本文首先对语法纠错中出现的错误进行了字和词粒度的分类,在此基础上提出了融合字词粒度噪声的数据增强方法,以此获得大规模且质量较高的错误数据集。基于NLPCC2018共享任务的实验结果表明,本文提出的融合字词粒度加噪方法能够显著提升模型的性能,在该数据集上达到了最优的性能。最后,本文分析了错误类型和数据规模对中文语法纠错模型性能的影响。",
language = "Chinese",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tang-etal-2021-ji-yu">
<titleInfo>
<title>基于字词粒度噪声数据增强的中文语法纠错(Chinese Grammatical Error Correction enhanced by Data Augmentation from Word and Character Levels)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zecheng</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yixin</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yibo</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junhui</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">Chinese</languageTerm>
<languageTerm type="code" authority="iso639-2b">chi</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Chinese National Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sheng</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shizhu</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaoqi</namePart>
<namePart type="family">Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Huhhot, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>语法纠错是自然语言处理领域的热门任务之一,其目的是将错误的句子修改为正确的句子。为了缓解中文训练语料不足的问题,本文从数据增强的角度出发,提出一种新颖的扩充和增强数据的方法。具体地,为了使模型能更好地获取不同类型和不同粒度的错误,本文首先对语法纠错中出现的错误进行了字和词粒度的分类,在此基础上提出了融合字词粒度噪声的数据增强方法,以此获得大规模且质量较高的错误数据集。基于NLPCC2018共享任务的实验结果表明,本文提出的融合字词粒度加噪方法能够显著提升模型的性能,在该数据集上达到了最优的性能。最后,本文分析了错误类型和数据规模对中文语法纠错模型性能的影响。</abstract>
<identifier type="citekey">tang-etal-2021-ji-yu</identifier>
<location>
<url>https://aclanthology.org/2021.ccl-1.73</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>813</start>
<end>824</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T 基于字词粒度噪声数据增强的中文语法纠错(Chinese Grammatical Error Correction enhanced by Data Augmentation from Word and Character Levels)
%A Tang, Zecheng
%A Ji, Yixin
%A Zhao, Yibo
%A Li, Junhui
%Y Li, Sheng
%Y Sun, Maosong
%Y Liu, Yang
%Y Wu, Hua
%Y Liu, Kang
%Y Che, Wanxiang
%Y He, Shizhu
%Y Rao, Gaoqi
%S Proceedings of the 20th Chinese National Conference on Computational Linguistics
%D 2021
%8 August
%I Chinese Information Processing Society of China
%C Huhhot, China
%G Chinese
%F tang-etal-2021-ji-yu
%X 语法纠错是自然语言处理领域的热门任务之一,其目的是将错误的句子修改为正确的句子。为了缓解中文训练语料不足的问题,本文从数据增强的角度出发,提出一种新颖的扩充和增强数据的方法。具体地,为了使模型能更好地获取不同类型和不同粒度的错误,本文首先对语法纠错中出现的错误进行了字和词粒度的分类,在此基础上提出了融合字词粒度噪声的数据增强方法,以此获得大规模且质量较高的错误数据集。基于NLPCC2018共享任务的实验结果表明,本文提出的融合字词粒度加噪方法能够显著提升模型的性能,在该数据集上达到了最优的性能。最后,本文分析了错误类型和数据规模对中文语法纠错模型性能的影响。
%U https://aclanthology.org/2021.ccl-1.73
%P 813-824
Markdown (Informal)
[基于字词粒度噪声数据增强的中文语法纠错(Chinese Grammatical Error Correction enhanced by Data Augmentation from Word and Character Levels)](https://aclanthology.org/2021.ccl-1.73) (Tang et al., CCL 2021)
ACL