@inproceedings{bian-etal-2023-contrastive,
title = "Contrastive Distant Supervision for Debiased and Denoised Machine Reading Comprehension",
author = "Bian, Ning and
Lin, Hongyu and
Han, Xianpei and
He, Ben and
Sun, Le",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.457/",
doi = "10.18653/v1/2023.findings-emnlp.457",
pages = "6852--6863",
abstract = "Distant Supervision (DS) is a promising learning approach for MRC by leveraging easily-obtained question-answer pairs. Unfortunately, the heuristically annotated dataset will inevitably lead to mislabeled instances, resulting in answer bias and context noise problems. To learn debiased and denoised MRC models, this paper proposes the Contrastive Distant Supervision algorithm {--} CDS, which can learn to distinguish confusing and noisy instances via confidence-aware contrastive learning. Specifically, to eliminate answer bias, CDS samples counterfactual negative instances, which ensures that MRC models must take both answer information and question-context interaction into consideration. To denoise distantly annotated contexts, CDS samples confusing negative instances to increase the margin between correct and mislabeled instances. We further propose a confidence-aware contrastive loss to model and leverage the uncertainty of all DS instances during learning. Experimental results show that CDS is effective and can even outperform supervised MRC models without manual annotations."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bian-etal-2023-contrastive">
<titleInfo>
<title>Contrastive Distant Supervision for Debiased and Denoised Machine Reading Comprehension</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ning</namePart>
<namePart type="family">Bian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongyu</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xianpei</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ben</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Le</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Distant Supervision (DS) is a promising learning approach for MRC by leveraging easily-obtained question-answer pairs. Unfortunately, the heuristically annotated dataset will inevitably lead to mislabeled instances, resulting in answer bias and context noise problems. To learn debiased and denoised MRC models, this paper proposes the Contrastive Distant Supervision algorithm – CDS, which can learn to distinguish confusing and noisy instances via confidence-aware contrastive learning. Specifically, to eliminate answer bias, CDS samples counterfactual negative instances, which ensures that MRC models must take both answer information and question-context interaction into consideration. To denoise distantly annotated contexts, CDS samples confusing negative instances to increase the margin between correct and mislabeled instances. We further propose a confidence-aware contrastive loss to model and leverage the uncertainty of all DS instances during learning. Experimental results show that CDS is effective and can even outperform supervised MRC models without manual annotations.</abstract>
<identifier type="citekey">bian-etal-2023-contrastive</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.457</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.457/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>6852</start>
<end>6863</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Contrastive Distant Supervision for Debiased and Denoised Machine Reading Comprehension
%A Bian, Ning
%A Lin, Hongyu
%A Han, Xianpei
%A He, Ben
%A Sun, Le
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F bian-etal-2023-contrastive
%X Distant Supervision (DS) is a promising learning approach for MRC by leveraging easily-obtained question-answer pairs. Unfortunately, the heuristically annotated dataset will inevitably lead to mislabeled instances, resulting in answer bias and context noise problems. To learn debiased and denoised MRC models, this paper proposes the Contrastive Distant Supervision algorithm – CDS, which can learn to distinguish confusing and noisy instances via confidence-aware contrastive learning. Specifically, to eliminate answer bias, CDS samples counterfactual negative instances, which ensures that MRC models must take both answer information and question-context interaction into consideration. To denoise distantly annotated contexts, CDS samples confusing negative instances to increase the margin between correct and mislabeled instances. We further propose a confidence-aware contrastive loss to model and leverage the uncertainty of all DS instances during learning. Experimental results show that CDS is effective and can even outperform supervised MRC models without manual annotations.
%R 10.18653/v1/2023.findings-emnlp.457
%U https://aclanthology.org/2023.findings-emnlp.457/
%U https://doi.org/10.18653/v1/2023.findings-emnlp.457
%P 6852-6863
Markdown (Informal)
[Contrastive Distant Supervision for Debiased and Denoised Machine Reading Comprehension](https://aclanthology.org/2023.findings-emnlp.457/) (Bian et al., Findings 2023)
ACL