@inproceedings{mustafa-etal-2024-leveraging,
title = "Leveraging {W}ikidata for Biomedical Entity Linking in a Low-Resource Setting: A Case Study for {G}erman",
author = "Mustafa, Faizan E and
Dima, Corina and
Ochoa, Juan and
Staab, Steffen",
editor = "Naumann, Tristan and
Ben Abacha, Asma and
Bethard, Steven and
Roberts, Kirk and
Bitterman, Danielle",
booktitle = "Proceedings of the 6th Clinical Natural Language Processing Workshop",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.clinicalnlp-1.17",
doi = "10.18653/v1/2024.clinicalnlp-1.17",
pages = "202--207",
abstract = "Biomedical Entity Linking (BEL) is a challenging task for low-resource languages, due to the lack of appropriate resources: datasets, knowledge bases (KBs), and pre-trained models. In this paper, we propose an approach to create a biomedical knowledge base for German BEL using UMLS information from Wikidata, that provides good coverage and can be easily extended to further languages. As a further contribution, we adapt several existing approaches for use in the German BEL setup, and report on their results. The chosen methods include a sparse model using character n-grams, a multilingual biomedical entity linker, and two general-purpose text retrieval models. Our results show that a language-specific KB that provides good coverage leads to most improvement in entity linking performance, irrespective of the used model. The finetuned German BEL model, newly created UMLS$_{Wikidata}$ KB as well as the code to reproduce our results are publicly available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mustafa-etal-2024-leveraging">
<titleInfo>
<title>Leveraging Wikidata for Biomedical Entity Linking in a Low-Resource Setting: A Case Study for German</title>
</titleInfo>
<name type="personal">
<namePart type="given">Faizan</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Mustafa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Corina</namePart>
<namePart type="family">Dima</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Ochoa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steffen</namePart>
<namePart type="family">Staab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Clinical Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tristan</namePart>
<namePart type="family">Naumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asma</namePart>
<namePart type="family">Ben Abacha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danielle</namePart>
<namePart type="family">Bitterman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Biomedical Entity Linking (BEL) is a challenging task for low-resource languages, due to the lack of appropriate resources: datasets, knowledge bases (KBs), and pre-trained models. In this paper, we propose an approach to create a biomedical knowledge base for German BEL using UMLS information from Wikidata, that provides good coverage and can be easily extended to further languages. As a further contribution, we adapt several existing approaches for use in the German BEL setup, and report on their results. The chosen methods include a sparse model using character n-grams, a multilingual biomedical entity linker, and two general-purpose text retrieval models. Our results show that a language-specific KB that provides good coverage leads to most improvement in entity linking performance, irrespective of the used model. The finetuned German BEL model, newly created UMLS_Wikidata KB as well as the code to reproduce our results are publicly available.</abstract>
<identifier type="citekey">mustafa-etal-2024-leveraging</identifier>
<identifier type="doi">10.18653/v1/2024.clinicalnlp-1.17</identifier>
<location>
<url>https://aclanthology.org/2024.clinicalnlp-1.17</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>202</start>
<end>207</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Leveraging Wikidata for Biomedical Entity Linking in a Low-Resource Setting: A Case Study for German
%A Mustafa, Faizan E.
%A Dima, Corina
%A Ochoa, Juan
%A Staab, Steffen
%Y Naumann, Tristan
%Y Ben Abacha, Asma
%Y Bethard, Steven
%Y Roberts, Kirk
%Y Bitterman, Danielle
%S Proceedings of the 6th Clinical Natural Language Processing Workshop
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F mustafa-etal-2024-leveraging
%X Biomedical Entity Linking (BEL) is a challenging task for low-resource languages, due to the lack of appropriate resources: datasets, knowledge bases (KBs), and pre-trained models. In this paper, we propose an approach to create a biomedical knowledge base for German BEL using UMLS information from Wikidata, that provides good coverage and can be easily extended to further languages. As a further contribution, we adapt several existing approaches for use in the German BEL setup, and report on their results. The chosen methods include a sparse model using character n-grams, a multilingual biomedical entity linker, and two general-purpose text retrieval models. Our results show that a language-specific KB that provides good coverage leads to most improvement in entity linking performance, irrespective of the used model. The finetuned German BEL model, newly created UMLS_Wikidata KB as well as the code to reproduce our results are publicly available.
%R 10.18653/v1/2024.clinicalnlp-1.17
%U https://aclanthology.org/2024.clinicalnlp-1.17
%U https://doi.org/10.18653/v1/2024.clinicalnlp-1.17
%P 202-207
Markdown (Informal)
[Leveraging Wikidata for Biomedical Entity Linking in a Low-Resource Setting: A Case Study for German](https://aclanthology.org/2024.clinicalnlp-1.17) (Mustafa et al., ClinicalNLP-WS 2024)
ACL