@inproceedings{lertvittayakumjorn-etal-2020-find,
title = "{FIND}: {H}uman-in-the-{L}oop {D}ebugging {D}eep {T}ext {C}lassifiers",
author = "Lertvittayakumjorn, Piyawat and
Specia, Lucia and
Toni, Francesca",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.24/",
doi = "10.18653/v1/2020.emnlp-main.24",
pages = "332--348",
abstract = "Since obtaining a perfect training dataset (i.e., a dataset which is considerably large, unbiased, and well-representative of unseen cases) is hardly possible, many real-world text classifiers are trained on the available, yet imperfect, datasets. These classifiers are thus likely to have undesirable properties. For instance, they may have biases against some sub-populations or may not work effectively in the wild due to overfitting. In this paper, we propose FIND {--} a framework which enables humans to debug deep learning text classifiers by disabling irrelevant hidden features. Experiments show that by using FIND, humans can improve CNN text classifiers which were trained under different types of imperfect datasets (including datasets with biases and datasets with dissimilar train-test distributions)."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lertvittayakumjorn-etal-2020-find">
<titleInfo>
<title>FIND: Human-in-the-Loop Debugging Deep Text Classifiers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Piyawat</namePart>
<namePart type="family">Lertvittayakumjorn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francesca</namePart>
<namePart type="family">Toni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bonnie</namePart>
<namePart type="family">Webber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Since obtaining a perfect training dataset (i.e., a dataset which is considerably large, unbiased, and well-representative of unseen cases) is hardly possible, many real-world text classifiers are trained on the available, yet imperfect, datasets. These classifiers are thus likely to have undesirable properties. For instance, they may have biases against some sub-populations or may not work effectively in the wild due to overfitting. In this paper, we propose FIND – a framework which enables humans to debug deep learning text classifiers by disabling irrelevant hidden features. Experiments show that by using FIND, humans can improve CNN text classifiers which were trained under different types of imperfect datasets (including datasets with biases and datasets with dissimilar train-test distributions).</abstract>
<identifier type="citekey">lertvittayakumjorn-etal-2020-find</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-main.24</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-main.24/</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>332</start>
<end>348</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FIND: Human-in-the-Loop Debugging Deep Text Classifiers
%A Lertvittayakumjorn, Piyawat
%A Specia, Lucia
%A Toni, Francesca
%Y Webber, Bonnie
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F lertvittayakumjorn-etal-2020-find
%X Since obtaining a perfect training dataset (i.e., a dataset which is considerably large, unbiased, and well-representative of unseen cases) is hardly possible, many real-world text classifiers are trained on the available, yet imperfect, datasets. These classifiers are thus likely to have undesirable properties. For instance, they may have biases against some sub-populations or may not work effectively in the wild due to overfitting. In this paper, we propose FIND – a framework which enables humans to debug deep learning text classifiers by disabling irrelevant hidden features. Experiments show that by using FIND, humans can improve CNN text classifiers which were trained under different types of imperfect datasets (including datasets with biases and datasets with dissimilar train-test distributions).
%R 10.18653/v1/2020.emnlp-main.24
%U https://aclanthology.org/2020.emnlp-main.24/
%U https://doi.org/10.18653/v1/2020.emnlp-main.24
%P 332-348
Markdown (Informal)
[FIND: Human-in-the-Loop Debugging Deep Text Classifiers](https://aclanthology.org/2020.emnlp-main.24/) (Lertvittayakumjorn et al., EMNLP 2020)
ACL
- Piyawat Lertvittayakumjorn, Lucia Specia, and Francesca Toni. 2020. FIND: Human-in-the-Loop Debugging Deep Text Classifiers. In Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pages 332–348, Online. Association for Computational Linguistics.