@inproceedings{xiong-etal-2023-eara,
title = "{EARA}: Improving Biomedical Semantic Textual Similarity with Entity-Aligned Attention and Retrieval Augmentation",
author = "Xiong, Ying and
Yang, Xin and
Liu, Linjing and
Wong, Ka-Chun and
Chen, Qingcai and
Xiang, Yang and
Tang, Buzhou",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.586",
doi = "10.18653/v1/2023.findings-emnlp.586",
pages = "8760--8771",
abstract = "Measuring Semantic Textual Similarity (STS) is a fundamental task in biomedical text processing, which aims at quantifying the similarity between two input biomedical sentences. Unfortunately, the STS datasets in the biomedical domain are relatively smaller but more complex in semantics than common domain, often leading to overfitting issues and insufficient text representation even based on Pre-trained Language Models (PLMs) due to too many biomedical entities. In this paper, we propose EARA, an entity-aligned, attention-based and retrieval-augmented PLMs. Our proposed EARA first aligns the same type of fine-grained entity information in each sentence pair with an entity alignment matrix. Then, EARA regularizes the attention mechanism with an entity alignment matrix with an auxiliary loss. Finally, we add a retrieval module that retrieves similar instances to expand the scope of entity pairs and improve the model{'}s generalization. The comprehensive experiments reflect that EARA can achieve state-of-the-art performance on both in-domain and out-of-domain datasets. Source code is available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xiong-etal-2023-eara">
<titleInfo>
<title>EARA: Improving Biomedical Semantic Textual Similarity with Entity-Aligned Attention and Retrieval Augmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ying</namePart>
<namePart type="family">Xiong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Linjing</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ka-Chun</namePart>
<namePart type="family">Wong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qingcai</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Xiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Buzhou</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Measuring Semantic Textual Similarity (STS) is a fundamental task in biomedical text processing, which aims at quantifying the similarity between two input biomedical sentences. Unfortunately, the STS datasets in the biomedical domain are relatively smaller but more complex in semantics than common domain, often leading to overfitting issues and insufficient text representation even based on Pre-trained Language Models (PLMs) due to too many biomedical entities. In this paper, we propose EARA, an entity-aligned, attention-based and retrieval-augmented PLMs. Our proposed EARA first aligns the same type of fine-grained entity information in each sentence pair with an entity alignment matrix. Then, EARA regularizes the attention mechanism with an entity alignment matrix with an auxiliary loss. Finally, we add a retrieval module that retrieves similar instances to expand the scope of entity pairs and improve the model’s generalization. The comprehensive experiments reflect that EARA can achieve state-of-the-art performance on both in-domain and out-of-domain datasets. Source code is available.</abstract>
<identifier type="citekey">xiong-etal-2023-eara</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.586</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.586</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>8760</start>
<end>8771</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T EARA: Improving Biomedical Semantic Textual Similarity with Entity-Aligned Attention and Retrieval Augmentation
%A Xiong, Ying
%A Yang, Xin
%A Liu, Linjing
%A Wong, Ka-Chun
%A Chen, Qingcai
%A Xiang, Yang
%A Tang, Buzhou
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F xiong-etal-2023-eara
%X Measuring Semantic Textual Similarity (STS) is a fundamental task in biomedical text processing, which aims at quantifying the similarity between two input biomedical sentences. Unfortunately, the STS datasets in the biomedical domain are relatively smaller but more complex in semantics than common domain, often leading to overfitting issues and insufficient text representation even based on Pre-trained Language Models (PLMs) due to too many biomedical entities. In this paper, we propose EARA, an entity-aligned, attention-based and retrieval-augmented PLMs. Our proposed EARA first aligns the same type of fine-grained entity information in each sentence pair with an entity alignment matrix. Then, EARA regularizes the attention mechanism with an entity alignment matrix with an auxiliary loss. Finally, we add a retrieval module that retrieves similar instances to expand the scope of entity pairs and improve the model’s generalization. The comprehensive experiments reflect that EARA can achieve state-of-the-art performance on both in-domain and out-of-domain datasets. Source code is available.
%R 10.18653/v1/2023.findings-emnlp.586
%U https://aclanthology.org/2023.findings-emnlp.586
%U https://doi.org/10.18653/v1/2023.findings-emnlp.586
%P 8760-8771
Markdown (Informal)
[EARA: Improving Biomedical Semantic Textual Similarity with Entity-Aligned Attention and Retrieval Augmentation](https://aclanthology.org/2023.findings-emnlp.586) (Xiong et al., Findings 2023)
ACL