@inproceedings{nighojkar-etal-2023-strong,
title = "No Strong Feelings One Way or Another: Re-operationalizing Neutrality in Natural Language Inference",
author = "Nighojkar, Animesh and
Laverghetta Jr., Antonio and
Licato, John",
editor = "Prange, Jakob and
Friedrich, Annemarie",
booktitle = "Proceedings of the 17th Linguistic Annotation Workshop (LAW-XVII)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.law-1.20",
doi = "10.18653/v1/2023.law-1.20",
pages = "199--210",
abstract = "Natural Language Inference (NLI) has been a cornerstone task in evaluating language models{'} inferential reasoning capabilities. However, the standard three-way classification scheme used in NLI has well-known shortcomings in evaluating models{'} ability to capture the nuances of natural human reasoning. In this paper, we argue that the operationalization of the neutral label in current NLI datasets has low validity, is interpreted inconsistently, and that at least one important sense of neutrality is often ignored. We uncover the detrimental impact of these shortcomings, which in some cases leads to annotation datasets that actually decrease performance on downstream tasks. We compare approaches of handling annotator disagreement and identify flaws in a recent NLI dataset that designs an annotator study based on a problematic operationalization. Our findings highlight the need for a more refined evaluation framework for NLI, and we hope to spark further discussion and action in the NLP community.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nighojkar-etal-2023-strong">
<titleInfo>
<title>No Strong Feelings One Way or Another: Re-operationalizing Neutrality in Natural Language Inference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Animesh</namePart>
<namePart type="family">Nighojkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Laverghetta Jr.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Licato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Linguistic Annotation Workshop (LAW-XVII)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jakob</namePart>
<namePart type="family">Prange</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annemarie</namePart>
<namePart type="family">Friedrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Natural Language Inference (NLI) has been a cornerstone task in evaluating language models’ inferential reasoning capabilities. However, the standard three-way classification scheme used in NLI has well-known shortcomings in evaluating models’ ability to capture the nuances of natural human reasoning. In this paper, we argue that the operationalization of the neutral label in current NLI datasets has low validity, is interpreted inconsistently, and that at least one important sense of neutrality is often ignored. We uncover the detrimental impact of these shortcomings, which in some cases leads to annotation datasets that actually decrease performance on downstream tasks. We compare approaches of handling annotator disagreement and identify flaws in a recent NLI dataset that designs an annotator study based on a problematic operationalization. Our findings highlight the need for a more refined evaluation framework for NLI, and we hope to spark further discussion and action in the NLP community.</abstract>
<identifier type="citekey">nighojkar-etal-2023-strong</identifier>
<identifier type="doi">10.18653/v1/2023.law-1.20</identifier>
<location>
<url>https://aclanthology.org/2023.law-1.20</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>199</start>
<end>210</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T No Strong Feelings One Way or Another: Re-operationalizing Neutrality in Natural Language Inference
%A Nighojkar, Animesh
%A Laverghetta Jr., Antonio
%A Licato, John
%Y Prange, Jakob
%Y Friedrich, Annemarie
%S Proceedings of the 17th Linguistic Annotation Workshop (LAW-XVII)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F nighojkar-etal-2023-strong
%X Natural Language Inference (NLI) has been a cornerstone task in evaluating language models’ inferential reasoning capabilities. However, the standard three-way classification scheme used in NLI has well-known shortcomings in evaluating models’ ability to capture the nuances of natural human reasoning. In this paper, we argue that the operationalization of the neutral label in current NLI datasets has low validity, is interpreted inconsistently, and that at least one important sense of neutrality is often ignored. We uncover the detrimental impact of these shortcomings, which in some cases leads to annotation datasets that actually decrease performance on downstream tasks. We compare approaches of handling annotator disagreement and identify flaws in a recent NLI dataset that designs an annotator study based on a problematic operationalization. Our findings highlight the need for a more refined evaluation framework for NLI, and we hope to spark further discussion and action in the NLP community.
%R 10.18653/v1/2023.law-1.20
%U https://aclanthology.org/2023.law-1.20
%U https://doi.org/10.18653/v1/2023.law-1.20
%P 199-210
Markdown (Informal)
[No Strong Feelings One Way or Another: Re-operationalizing Neutrality in Natural Language Inference](https://aclanthology.org/2023.law-1.20) (Nighojkar et al., LAW 2023)
ACL