@inproceedings{chen-etal-2023-breaking,
title = "Breaking Boundaries in Retrieval Systems: Unsupervised Domain Adaptation with Denoise-Finetuning",
author = "Chen, Che and
Yang, Ching-Wen and
Lin, Chun-Yi and
Kao, Hung-Yu",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.110",
doi = "10.18653/v1/2023.findings-emnlp.110",
pages = "1630--1642",
abstract = "Dense retrieval models have exhibited remarkable effectiveness, but they rely on abundant labeled data and face challenges when applied to different domains. Previous domain adaptation methods have employed generative models to generate pseudo queries, creating pseudo datasets to enhance the performance of dense retrieval models. However, these approaches typically use unadapted rerank models, leading to potentially imprecise labels. In this paper, we demonstrate the significance of adapting the rerank model to the target domain prior to utilizing it for label generation. This adaptation process enables us to obtain more accurate labels, thereby improving the overall performance of the dense retrieval model. Additionally, by combining the adapted retrieval model with the adapted rerank model, we achieve significantly better domain adaptation results across three retrieval datasets. We release our code for future research.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2023-breaking">
<titleInfo>
<title>Breaking Boundaries in Retrieval Systems: Unsupervised Domain Adaptation with Denoise-Finetuning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Che</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ching-Wen</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chun-Yi</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hung-Yu</namePart>
<namePart type="family">Kao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Dense retrieval models have exhibited remarkable effectiveness, but they rely on abundant labeled data and face challenges when applied to different domains. Previous domain adaptation methods have employed generative models to generate pseudo queries, creating pseudo datasets to enhance the performance of dense retrieval models. However, these approaches typically use unadapted rerank models, leading to potentially imprecise labels. In this paper, we demonstrate the significance of adapting the rerank model to the target domain prior to utilizing it for label generation. This adaptation process enables us to obtain more accurate labels, thereby improving the overall performance of the dense retrieval model. Additionally, by combining the adapted retrieval model with the adapted rerank model, we achieve significantly better domain adaptation results across three retrieval datasets. We release our code for future research.</abstract>
<identifier type="citekey">chen-etal-2023-breaking</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.110</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.110</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>1630</start>
<end>1642</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Breaking Boundaries in Retrieval Systems: Unsupervised Domain Adaptation with Denoise-Finetuning
%A Chen, Che
%A Yang, Ching-Wen
%A Lin, Chun-Yi
%A Kao, Hung-Yu
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F chen-etal-2023-breaking
%X Dense retrieval models have exhibited remarkable effectiveness, but they rely on abundant labeled data and face challenges when applied to different domains. Previous domain adaptation methods have employed generative models to generate pseudo queries, creating pseudo datasets to enhance the performance of dense retrieval models. However, these approaches typically use unadapted rerank models, leading to potentially imprecise labels. In this paper, we demonstrate the significance of adapting the rerank model to the target domain prior to utilizing it for label generation. This adaptation process enables us to obtain more accurate labels, thereby improving the overall performance of the dense retrieval model. Additionally, by combining the adapted retrieval model with the adapted rerank model, we achieve significantly better domain adaptation results across three retrieval datasets. We release our code for future research.
%R 10.18653/v1/2023.findings-emnlp.110
%U https://aclanthology.org/2023.findings-emnlp.110
%U https://doi.org/10.18653/v1/2023.findings-emnlp.110
%P 1630-1642
Markdown (Informal)
[Breaking Boundaries in Retrieval Systems: Unsupervised Domain Adaptation with Denoise-Finetuning](https://aclanthology.org/2023.findings-emnlp.110) (Chen et al., Findings 2023)
ACL