@inproceedings{vakili-etal-2024-name,
title = "When Is a Name Sensitive? Eponyms in Clinical Text and Implications for De-Identification",
author = "Vakili, Thomas and
Hullmann, Tyr and
Henriksson, Aron and
Dalianis, Hercules",
editor = {Volodina, Elena and
Alfter, David and
Dobnik, Simon and
Lindstr{\"o}m Tiedemann, Therese and
Mu{\~n}oz S{\'a}nchez, Ricardo and
Szawerna, Maria Irena and
Vu, Xuan-Son},
booktitle = "Proceedings of the Workshop on Computational Approaches to Language Data Pseudonymization (CALD-pseudo 2024)",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.caldpseudo-1.9",
pages = "76--80",
abstract = "Clinical data, in the form of electronic health records, are rich resources that can be tapped using natural language processing. At the same time, they contain very sensitive information that must be protected. One strategy is to remove or obscure data using automatic de-identification. However, the detection of sensitive data can yield false positives. This is especially true for tokens that are similar in form to sensitive entities, such as eponyms. These names tend to refer to medical procedures or diagnoses rather than specific persons. Previous research has shown that automatic de-identification systems often misclassify eponyms as names, leading to a loss of valuable medical information. In this study, we estimate the prevalence of eponyms in a real Swedish clinical corpus. Furthermore, we demonstrate that modern transformer-based de-identification systems are more accurate in distinguishing between names and eponyms than previous approaches.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vakili-etal-2024-name">
<titleInfo>
<title>When Is a Name Sensitive? Eponyms in Clinical Text and Implications for De-Identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Vakili</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tyr</namePart>
<namePart type="family">Hullmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aron</namePart>
<namePart type="family">Henriksson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hercules</namePart>
<namePart type="family">Dalianis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Computational Approaches to Language Data Pseudonymization (CALD-pseudo 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Volodina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Alfter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Therese</namePart>
<namePart type="family">Lindström Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ricardo</namePart>
<namePart type="family">Muñoz Sánchez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Irena</namePart>
<namePart type="family">Szawerna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuan-Son</namePart>
<namePart type="family">Vu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Clinical data, in the form of electronic health records, are rich resources that can be tapped using natural language processing. At the same time, they contain very sensitive information that must be protected. One strategy is to remove or obscure data using automatic de-identification. However, the detection of sensitive data can yield false positives. This is especially true for tokens that are similar in form to sensitive entities, such as eponyms. These names tend to refer to medical procedures or diagnoses rather than specific persons. Previous research has shown that automatic de-identification systems often misclassify eponyms as names, leading to a loss of valuable medical information. In this study, we estimate the prevalence of eponyms in a real Swedish clinical corpus. Furthermore, we demonstrate that modern transformer-based de-identification systems are more accurate in distinguishing between names and eponyms than previous approaches.</abstract>
<identifier type="citekey">vakili-etal-2024-name</identifier>
<location>
<url>https://aclanthology.org/2024.caldpseudo-1.9</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>76</start>
<end>80</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T When Is a Name Sensitive? Eponyms in Clinical Text and Implications for De-Identification
%A Vakili, Thomas
%A Hullmann, Tyr
%A Henriksson, Aron
%A Dalianis, Hercules
%Y Volodina, Elena
%Y Alfter, David
%Y Dobnik, Simon
%Y Lindström Tiedemann, Therese
%Y Muñoz Sánchez, Ricardo
%Y Szawerna, Maria Irena
%Y Vu, Xuan-Son
%S Proceedings of the Workshop on Computational Approaches to Language Data Pseudonymization (CALD-pseudo 2024)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F vakili-etal-2024-name
%X Clinical data, in the form of electronic health records, are rich resources that can be tapped using natural language processing. At the same time, they contain very sensitive information that must be protected. One strategy is to remove or obscure data using automatic de-identification. However, the detection of sensitive data can yield false positives. This is especially true for tokens that are similar in form to sensitive entities, such as eponyms. These names tend to refer to medical procedures or diagnoses rather than specific persons. Previous research has shown that automatic de-identification systems often misclassify eponyms as names, leading to a loss of valuable medical information. In this study, we estimate the prevalence of eponyms in a real Swedish clinical corpus. Furthermore, we demonstrate that modern transformer-based de-identification systems are more accurate in distinguishing between names and eponyms than previous approaches.
%U https://aclanthology.org/2024.caldpseudo-1.9
%P 76-80
Markdown (Informal)
[When Is a Name Sensitive? Eponyms in Clinical Text and Implications for De-Identification](https://aclanthology.org/2024.caldpseudo-1.9) (Vakili et al., CALD-pseudo-WS 2024)
ACL