@inproceedings{kim-etal-2024-discovering,
title = "Discovering Biases in Information Retrieval Models Using Relevance Thesaurus as Global Explanation",
author = "Kim, Youngwoo and
Rahimi, Razieh and
Allan, James",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.1089/",
doi = "10.18653/v1/2024.emnlp-main.1089",
pages = "19530--19547",
abstract = "Most of the efforts in interpreting neural relevance models have been on local explanations, which explain the relevance of a document to a query. However, local explanations are not effective in predicting the model`s behavior on unseen texts. We aim at explaining a neural relevance model by providing lexical explanations that can be globally generalized. Specifically, we construct a relevance thesaurus containing semantically relevant query term and document term pairs, which can augment BM25 scoring functions to better approximate the neural model`s predictions. We propose a novel method to build a relevance thesaurus construction. Our method involves training a neural relevance model which can score the relevance for partial segments of query and documents. The trained model is used to identify relevant terms over the vocabulary space. The resulting thesaurus explanation is evaluated based on ranking effectiveness and fidelity to the targeted neural ranking model. Finally, our thesaurus reveals the existence of brand name bias in ranking models, which further supports the utility of our explanation method."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-etal-2024-discovering">
<titleInfo>
<title>Discovering Biases in Information Retrieval Models Using Relevance Thesaurus as Global Explanation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Youngwoo</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Razieh</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Allan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Most of the efforts in interpreting neural relevance models have been on local explanations, which explain the relevance of a document to a query. However, local explanations are not effective in predicting the model‘s behavior on unseen texts. We aim at explaining a neural relevance model by providing lexical explanations that can be globally generalized. Specifically, we construct a relevance thesaurus containing semantically relevant query term and document term pairs, which can augment BM25 scoring functions to better approximate the neural model‘s predictions. We propose a novel method to build a relevance thesaurus construction. Our method involves training a neural relevance model which can score the relevance for partial segments of query and documents. The trained model is used to identify relevant terms over the vocabulary space. The resulting thesaurus explanation is evaluated based on ranking effectiveness and fidelity to the targeted neural ranking model. Finally, our thesaurus reveals the existence of brand name bias in ranking models, which further supports the utility of our explanation method.</abstract>
<identifier type="citekey">kim-etal-2024-discovering</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.1089</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.1089/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>19530</start>
<end>19547</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Discovering Biases in Information Retrieval Models Using Relevance Thesaurus as Global Explanation
%A Kim, Youngwoo
%A Rahimi, Razieh
%A Allan, James
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F kim-etal-2024-discovering
%X Most of the efforts in interpreting neural relevance models have been on local explanations, which explain the relevance of a document to a query. However, local explanations are not effective in predicting the model‘s behavior on unseen texts. We aim at explaining a neural relevance model by providing lexical explanations that can be globally generalized. Specifically, we construct a relevance thesaurus containing semantically relevant query term and document term pairs, which can augment BM25 scoring functions to better approximate the neural model‘s predictions. We propose a novel method to build a relevance thesaurus construction. Our method involves training a neural relevance model which can score the relevance for partial segments of query and documents. The trained model is used to identify relevant terms over the vocabulary space. The resulting thesaurus explanation is evaluated based on ranking effectiveness and fidelity to the targeted neural ranking model. Finally, our thesaurus reveals the existence of brand name bias in ranking models, which further supports the utility of our explanation method.
%R 10.18653/v1/2024.emnlp-main.1089
%U https://aclanthology.org/2024.emnlp-main.1089/
%U https://doi.org/10.18653/v1/2024.emnlp-main.1089
%P 19530-19547
Markdown (Informal)
[Discovering Biases in Information Retrieval Models Using Relevance Thesaurus as Global Explanation](https://aclanthology.org/2024.emnlp-main.1089/) (Kim et al., EMNLP 2024)
ACL