@inproceedings{menard-mougeot-2019-turning,
title = "Turning silver into gold: error-focused corpus reannotation with active learning",
author = "M{\'e}nard, Pierre Andr{\'e} and
Mougeot, Antoine",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/R19-1088",
doi = "10.26615/978-954-452-056-4_088",
pages = "758--767",
abstract = "While high quality gold standard annotated corpora are crucial for most tasks in natural language processing, many annotated corpora published in recent years, created by annotators or tools, contains noisy annotations. These corpora can be viewed as more silver than gold standards, even if they are used in evaluation campaigns or to compare systems{'} performances. As upgrading a silver corpus to gold level is still a challenge, we explore the application of active learning techniques to detect errors using four datasets designed for document classification and part-of-speech tagging. Our results show that the proposed method for the seeding step improves the chance of finding incorrect annotations by a factor of 2.73 when compared to random selection, a 14.71{\%} increase from the baseline methods. Our query method provides an increase in the error detection precision on average by a factor of 1.78 against random selection, an increase of 61.82{\%} compared to other query approaches.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="menard-mougeot-2019-turning">
<titleInfo>
<title>Turning silver into gold: error-focused corpus reannotation with active learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="given">André</namePart>
<namePart type="family">Ménard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Mougeot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While high quality gold standard annotated corpora are crucial for most tasks in natural language processing, many annotated corpora published in recent years, created by annotators or tools, contains noisy annotations. These corpora can be viewed as more silver than gold standards, even if they are used in evaluation campaigns or to compare systems’ performances. As upgrading a silver corpus to gold level is still a challenge, we explore the application of active learning techniques to detect errors using four datasets designed for document classification and part-of-speech tagging. Our results show that the proposed method for the seeding step improves the chance of finding incorrect annotations by a factor of 2.73 when compared to random selection, a 14.71% increase from the baseline methods. Our query method provides an increase in the error detection precision on average by a factor of 1.78 against random selection, an increase of 61.82% compared to other query approaches.</abstract>
<identifier type="citekey">menard-mougeot-2019-turning</identifier>
<identifier type="doi">10.26615/978-954-452-056-4_088</identifier>
<location>
<url>https://aclanthology.org/R19-1088</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>758</start>
<end>767</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Turning silver into gold: error-focused corpus reannotation with active learning
%A Ménard, Pierre André
%A Mougeot, Antoine
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)
%D 2019
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F menard-mougeot-2019-turning
%X While high quality gold standard annotated corpora are crucial for most tasks in natural language processing, many annotated corpora published in recent years, created by annotators or tools, contains noisy annotations. These corpora can be viewed as more silver than gold standards, even if they are used in evaluation campaigns or to compare systems’ performances. As upgrading a silver corpus to gold level is still a challenge, we explore the application of active learning techniques to detect errors using four datasets designed for document classification and part-of-speech tagging. Our results show that the proposed method for the seeding step improves the chance of finding incorrect annotations by a factor of 2.73 when compared to random selection, a 14.71% increase from the baseline methods. Our query method provides an increase in the error detection precision on average by a factor of 1.78 against random selection, an increase of 61.82% compared to other query approaches.
%R 10.26615/978-954-452-056-4_088
%U https://aclanthology.org/R19-1088
%U https://doi.org/10.26615/978-954-452-056-4_088
%P 758-767
Markdown (Informal)
[Turning silver into gold: error-focused corpus reannotation with active learning](https://aclanthology.org/R19-1088) (Ménard & Mougeot, RANLP 2019)
ACL