@inproceedings{bastan-etal-2022-bionli,
title = "{B}io{NLI}: Generating a Biomedical {NLI} Dataset Using Lexico-semantic Constraints for Adversarial Examples",
author = "Bastan, Mohaddeseh and
Surdeanu, Mihai and
Balasubramanian, Niranjan",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.374/",
doi = "10.18653/v1/2022.findings-emnlp.374",
pages = "5093--5104",
abstract = "Natural language inference (NLI) is critical in many domains requiring complex decision-making, such as the biomedical domain. We introduce a novel semi-supervised procedure that bootstraps biomedical NLI datasets from positive entailment examples present in abstracts of biomedical publications. We focus on challenging texts where the hypothesis includes mechanistic information such as biochemical interactions between two entities. A key contribution of this work is automating the creation of negative examples that are informative without being simplistic. We generate a range of negative examples using nine strategies that manipulate the structure of the underlying mechanisms both with rules, e.g., flip the roles of the entities in the interaction, and, more importantly, by imposing the perturbed conditions as logical constraints in a neuro-logical decoding system (CITATION).We use this procedure to create a novel dataset for NLI in the biomedical domain, called . The accuracy of neural classifiers on this dataset is in the mid 70s F1, which indicates that this NLI task remains to be solved. Critically, we observe that the performance on the different classes of negative examples varies widely, from 97{\%} F1 on the simple negative examples that change the role of the entities in the hypothesis, to barely better than chance on the negative examples generated using neuro-logic decoding."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bastan-etal-2022-bionli">
<titleInfo>
<title>BioNLI: Generating a Biomedical NLI Dataset Using Lexico-semantic Constraints for Adversarial Examples</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohaddeseh</namePart>
<namePart type="family">Bastan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihai</namePart>
<namePart type="family">Surdeanu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niranjan</namePart>
<namePart type="family">Balasubramanian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Natural language inference (NLI) is critical in many domains requiring complex decision-making, such as the biomedical domain. We introduce a novel semi-supervised procedure that bootstraps biomedical NLI datasets from positive entailment examples present in abstracts of biomedical publications. We focus on challenging texts where the hypothesis includes mechanistic information such as biochemical interactions between two entities. A key contribution of this work is automating the creation of negative examples that are informative without being simplistic. We generate a range of negative examples using nine strategies that manipulate the structure of the underlying mechanisms both with rules, e.g., flip the roles of the entities in the interaction, and, more importantly, by imposing the perturbed conditions as logical constraints in a neuro-logical decoding system (CITATION).We use this procedure to create a novel dataset for NLI in the biomedical domain, called . The accuracy of neural classifiers on this dataset is in the mid 70s F1, which indicates that this NLI task remains to be solved. Critically, we observe that the performance on the different classes of negative examples varies widely, from 97% F1 on the simple negative examples that change the role of the entities in the hypothesis, to barely better than chance on the negative examples generated using neuro-logic decoding.</abstract>
<identifier type="citekey">bastan-etal-2022-bionli</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.374</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.374/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>5093</start>
<end>5104</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BioNLI: Generating a Biomedical NLI Dataset Using Lexico-semantic Constraints for Adversarial Examples
%A Bastan, Mohaddeseh
%A Surdeanu, Mihai
%A Balasubramanian, Niranjan
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F bastan-etal-2022-bionli
%X Natural language inference (NLI) is critical in many domains requiring complex decision-making, such as the biomedical domain. We introduce a novel semi-supervised procedure that bootstraps biomedical NLI datasets from positive entailment examples present in abstracts of biomedical publications. We focus on challenging texts where the hypothesis includes mechanistic information such as biochemical interactions between two entities. A key contribution of this work is automating the creation of negative examples that are informative without being simplistic. We generate a range of negative examples using nine strategies that manipulate the structure of the underlying mechanisms both with rules, e.g., flip the roles of the entities in the interaction, and, more importantly, by imposing the perturbed conditions as logical constraints in a neuro-logical decoding system (CITATION).We use this procedure to create a novel dataset for NLI in the biomedical domain, called . The accuracy of neural classifiers on this dataset is in the mid 70s F1, which indicates that this NLI task remains to be solved. Critically, we observe that the performance on the different classes of negative examples varies widely, from 97% F1 on the simple negative examples that change the role of the entities in the hypothesis, to barely better than chance on the negative examples generated using neuro-logic decoding.
%R 10.18653/v1/2022.findings-emnlp.374
%U https://aclanthology.org/2022.findings-emnlp.374/
%U https://doi.org/10.18653/v1/2022.findings-emnlp.374
%P 5093-5104
Markdown (Informal)
[BioNLI: Generating a Biomedical NLI Dataset Using Lexico-semantic Constraints for Adversarial Examples](https://aclanthology.org/2022.findings-emnlp.374/) (Bastan et al., Findings 2022)
ACL