@inproceedings{liu-etal-2022-craspell,
title = "{CRAS}pell: A Contextual Typo Robust Approach to Improve {C}hinese Spelling Correction",
author = "Liu, Shulin and
Song, Shengkang and
Yue, Tianchi and
Yang, Tao and
Cai, Huihui and
Yu, TingHao and
Sun, Shengli",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2022",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-acl.237/",
doi = "10.18653/v1/2022.findings-acl.237",
pages = "3008--3018",
abstract = "Recently, Bert-based models have dominated the research of Chinese spelling correction (CSC). These methods have two limitations: (1) they have poor performance on multi-typo texts. In such texts, the context of each typo contains at least one misspelled character, which brings noise information. Such noisy context leads to the declining performance on multi-typo texts. (2) they tend to overcorrect valid expressions to more frequent expressions due to the masked token recovering task of Bert. We attempt to address these limitations in this paper. To make our model robust to contextual noise brought by typos, our approach first constructs a noisy context for each training sample. Then the correction model is forced to yield similar outputs based on the noisy and original contexts. Moreover, to address the overcorrection problem, copy mechanism is incorporated to encourage our model to prefer to choose the input character when the miscorrected and input character are both valid according to the given context. Experiments are conducted on widely used benchmarks. Our model achieves superior performance against state-of-the-art methods by a remarkable gain."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2022-craspell">
<titleInfo>
<title>CRASpell: A Contextual Typo Robust Approach to Improve Chinese Spelling Correction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shulin</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shengkang</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tianchi</namePart>
<namePart type="family">Yue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tao</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huihui</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">TingHao</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shengli</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently, Bert-based models have dominated the research of Chinese spelling correction (CSC). These methods have two limitations: (1) they have poor performance on multi-typo texts. In such texts, the context of each typo contains at least one misspelled character, which brings noise information. Such noisy context leads to the declining performance on multi-typo texts. (2) they tend to overcorrect valid expressions to more frequent expressions due to the masked token recovering task of Bert. We attempt to address these limitations in this paper. To make our model robust to contextual noise brought by typos, our approach first constructs a noisy context for each training sample. Then the correction model is forced to yield similar outputs based on the noisy and original contexts. Moreover, to address the overcorrection problem, copy mechanism is incorporated to encourage our model to prefer to choose the input character when the miscorrected and input character are both valid according to the given context. Experiments are conducted on widely used benchmarks. Our model achieves superior performance against state-of-the-art methods by a remarkable gain.</abstract>
<identifier type="citekey">liu-etal-2022-craspell</identifier>
<identifier type="doi">10.18653/v1/2022.findings-acl.237</identifier>
<location>
<url>https://aclanthology.org/2022.findings-acl.237/</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>3008</start>
<end>3018</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CRASpell: A Contextual Typo Robust Approach to Improve Chinese Spelling Correction
%A Liu, Shulin
%A Song, Shengkang
%A Yue, Tianchi
%A Yang, Tao
%A Cai, Huihui
%A Yu, TingHao
%A Sun, Shengli
%Y Muresan, Smaranda
%Y Nakov, Preslav
%Y Villavicencio, Aline
%S Findings of the Association for Computational Linguistics: ACL 2022
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F liu-etal-2022-craspell
%X Recently, Bert-based models have dominated the research of Chinese spelling correction (CSC). These methods have two limitations: (1) they have poor performance on multi-typo texts. In such texts, the context of each typo contains at least one misspelled character, which brings noise information. Such noisy context leads to the declining performance on multi-typo texts. (2) they tend to overcorrect valid expressions to more frequent expressions due to the masked token recovering task of Bert. We attempt to address these limitations in this paper. To make our model robust to contextual noise brought by typos, our approach first constructs a noisy context for each training sample. Then the correction model is forced to yield similar outputs based on the noisy and original contexts. Moreover, to address the overcorrection problem, copy mechanism is incorporated to encourage our model to prefer to choose the input character when the miscorrected and input character are both valid according to the given context. Experiments are conducted on widely used benchmarks. Our model achieves superior performance against state-of-the-art methods by a remarkable gain.
%R 10.18653/v1/2022.findings-acl.237
%U https://aclanthology.org/2022.findings-acl.237/
%U https://doi.org/10.18653/v1/2022.findings-acl.237
%P 3008-3018
Markdown (Informal)
[CRASpell: A Contextual Typo Robust Approach to Improve Chinese Spelling Correction](https://aclanthology.org/2022.findings-acl.237/) (Liu et al., Findings 2022)
ACL