@inproceedings{dhuliawala-etal-2022-calibration,
title = "Calibration of Machine Reading Systems at Scale",
author = "Dhuliawala, Shehzaad and
Adolphs, Leonard and
Das, Rajarshi and
Sachan, Mrinmaya",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2022",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-acl.133/",
doi = "10.18653/v1/2022.findings-acl.133",
pages = "1682--1693",
abstract = "In typical machine learning systems, an estimate of the probability of the prediction is used to assess the system`s confidence in the prediction. This confidence measure is usually uncalibrated; i.e. the system`s confidence in the prediction does not match the true probability of the predicted output. In this paper, we present an investigation into calibrating open setting machine reading systemssuch as open-domain question answering and claim verification systems. We show that calibrating such complex systems which contain discrete retrieval and deep reading components is challenging and current calibration techniques fail to scale to these settings. We propose simple extensions to existing calibration approaches that allows us to adapt them to these settings. Our experimental results reveal that the approach works well, and can be useful to selectively predict answers when question answering systems are posed with unanswerable or out-of-the-training distribution questions."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dhuliawala-etal-2022-calibration">
<titleInfo>
<title>Calibration of Machine Reading Systems at Scale</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shehzaad</namePart>
<namePart type="family">Dhuliawala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leonard</namePart>
<namePart type="family">Adolphs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajarshi</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mrinmaya</namePart>
<namePart type="family">Sachan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In typical machine learning systems, an estimate of the probability of the prediction is used to assess the system‘s confidence in the prediction. This confidence measure is usually uncalibrated; i.e. the system‘s confidence in the prediction does not match the true probability of the predicted output. In this paper, we present an investigation into calibrating open setting machine reading systemssuch as open-domain question answering and claim verification systems. We show that calibrating such complex systems which contain discrete retrieval and deep reading components is challenging and current calibration techniques fail to scale to these settings. We propose simple extensions to existing calibration approaches that allows us to adapt them to these settings. Our experimental results reveal that the approach works well, and can be useful to selectively predict answers when question answering systems are posed with unanswerable or out-of-the-training distribution questions.</abstract>
<identifier type="citekey">dhuliawala-etal-2022-calibration</identifier>
<identifier type="doi">10.18653/v1/2022.findings-acl.133</identifier>
<location>
<url>https://aclanthology.org/2022.findings-acl.133/</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>1682</start>
<end>1693</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Calibration of Machine Reading Systems at Scale
%A Dhuliawala, Shehzaad
%A Adolphs, Leonard
%A Das, Rajarshi
%A Sachan, Mrinmaya
%Y Muresan, Smaranda
%Y Nakov, Preslav
%Y Villavicencio, Aline
%S Findings of the Association for Computational Linguistics: ACL 2022
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F dhuliawala-etal-2022-calibration
%X In typical machine learning systems, an estimate of the probability of the prediction is used to assess the system‘s confidence in the prediction. This confidence measure is usually uncalibrated; i.e. the system‘s confidence in the prediction does not match the true probability of the predicted output. In this paper, we present an investigation into calibrating open setting machine reading systemssuch as open-domain question answering and claim verification systems. We show that calibrating such complex systems which contain discrete retrieval and deep reading components is challenging and current calibration techniques fail to scale to these settings. We propose simple extensions to existing calibration approaches that allows us to adapt them to these settings. Our experimental results reveal that the approach works well, and can be useful to selectively predict answers when question answering systems are posed with unanswerable or out-of-the-training distribution questions.
%R 10.18653/v1/2022.findings-acl.133
%U https://aclanthology.org/2022.findings-acl.133/
%U https://doi.org/10.18653/v1/2022.findings-acl.133
%P 1682-1693
Markdown (Informal)
[Calibration of Machine Reading Systems at Scale](https://aclanthology.org/2022.findings-acl.133/) (Dhuliawala et al., Findings 2022)
ACL
- Shehzaad Dhuliawala, Leonard Adolphs, Rajarshi Das, and Mrinmaya Sachan. 2022. Calibration of Machine Reading Systems at Scale. In Findings of the Association for Computational Linguistics: ACL 2022, pages 1682–1693, Dublin, Ireland. Association for Computational Linguistics.