@inproceedings{grancharova-dalianis-2021-applying,
title = "Applying and Sharing pre-trained {BERT}-models for Named Entity Recognition and Classification in {S}wedish Electronic Patient Records",
author = "Grancharova, Mila and
Dalianis, Hercules",
editor = "Dobnik, Simon and
{\O}vrelid, Lilja",
booktitle = "Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may # " 31--2 " # jun,
year = "2021",
address = "Reykjavik, Iceland (Online)",
publisher = {Link{\"o}ping University Electronic Press, Sweden},
url = "https://aclanthology.org/2021.nodalida-main.23",
pages = "231--239",
abstract = "To be able to share the valuable information in electronic patient records (EPR) they first need to be de-identified in order to protect the privacy of their subjects. Named entity recognition and classification (NERC) is an important part of this process. In recent years, general-purpose language models pre-trained on large amounts of data, in particular BERT, have achieved state of the art results in NERC, among other NLP tasks. So far, however, no attempts have been made at applying BERT for NERC on Swedish EPR data. This study attempts to fine-tune one Swedish BERT-model and one multilingual BERT-model for NERC on a Swedish EPR corpus. The aim is to assess the applicability of BERT-models for this task as well as to compare the two models in a domain-specific Swedish language task. With the Swedish model, recall of 0.9220 and precision of 0.9226 is achieved. This is an improvement to previous results on the same corpus since the high recall does not sacrifice precision. As the models also perform relatively well when fine-tuned with pseudonymised data, it is concluded that there is good potential in using this method in a shareable de-identification system for Swedish clinical text.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="grancharova-dalianis-2021-applying">
<titleInfo>
<title>Applying and Sharing pre-trained BERT-models for Named Entity Recognition and Classification in Swedish Electronic Patient Records</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mila</namePart>
<namePart type="family">Grancharova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hercules</namePart>
<namePart type="family">Dalianis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-may 31–2 jun</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lilja</namePart>
<namePart type="family">Øvrelid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Linköping University Electronic Press, Sweden</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>To be able to share the valuable information in electronic patient records (EPR) they first need to be de-identified in order to protect the privacy of their subjects. Named entity recognition and classification (NERC) is an important part of this process. In recent years, general-purpose language models pre-trained on large amounts of data, in particular BERT, have achieved state of the art results in NERC, among other NLP tasks. So far, however, no attempts have been made at applying BERT for NERC on Swedish EPR data. This study attempts to fine-tune one Swedish BERT-model and one multilingual BERT-model for NERC on a Swedish EPR corpus. The aim is to assess the applicability of BERT-models for this task as well as to compare the two models in a domain-specific Swedish language task. With the Swedish model, recall of 0.9220 and precision of 0.9226 is achieved. This is an improvement to previous results on the same corpus since the high recall does not sacrifice precision. As the models also perform relatively well when fine-tuned with pseudonymised data, it is concluded that there is good potential in using this method in a shareable de-identification system for Swedish clinical text.</abstract>
<identifier type="citekey">grancharova-dalianis-2021-applying</identifier>
<location>
<url>https://aclanthology.org/2021.nodalida-main.23</url>
</location>
<part>
<date>2021-may 31–2 jun</date>
<extent unit="page">
<start>231</start>
<end>239</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Applying and Sharing pre-trained BERT-models for Named Entity Recognition and Classification in Swedish Electronic Patient Records
%A Grancharova, Mila
%A Dalianis, Hercules
%Y Dobnik, Simon
%Y Øvrelid, Lilja
%S Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)
%D 2021
%8 may 31–2 jun
%I Linköping University Electronic Press, Sweden
%C Reykjavik, Iceland (Online)
%F grancharova-dalianis-2021-applying
%X To be able to share the valuable information in electronic patient records (EPR) they first need to be de-identified in order to protect the privacy of their subjects. Named entity recognition and classification (NERC) is an important part of this process. In recent years, general-purpose language models pre-trained on large amounts of data, in particular BERT, have achieved state of the art results in NERC, among other NLP tasks. So far, however, no attempts have been made at applying BERT for NERC on Swedish EPR data. This study attempts to fine-tune one Swedish BERT-model and one multilingual BERT-model for NERC on a Swedish EPR corpus. The aim is to assess the applicability of BERT-models for this task as well as to compare the two models in a domain-specific Swedish language task. With the Swedish model, recall of 0.9220 and precision of 0.9226 is achieved. This is an improvement to previous results on the same corpus since the high recall does not sacrifice precision. As the models also perform relatively well when fine-tuned with pseudonymised data, it is concluded that there is good potential in using this method in a shareable de-identification system for Swedish clinical text.
%U https://aclanthology.org/2021.nodalida-main.23
%P 231-239
Markdown (Informal)
[Applying and Sharing pre-trained BERT-models for Named Entity Recognition and Classification in Swedish Electronic Patient Records](https://aclanthology.org/2021.nodalida-main.23) (Grancharova & Dalianis, NoDaLiDa 2021)
ACL