@inproceedings{bannour-etal-2024-benchmark,
title = "A Benchmark Evaluation of Clinical Named Entity Recognition in {F}rench",
author = "Bannour, Nesrine and
Servan, Christophe and
N{\'e}v{\'e}ol, Aur{\'e}lie and
Tannier, Xavier",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.2",
pages = "14--21",
abstract = "Background: Transformer-based language models have shown strong performance on many Natural Language Processing (NLP) tasks. Masked Language Models (MLMs) attract sustained interest because they can be adapted to different languages and sub-domains through training or fine-tuning on specific corpora while remaining lighter than modern Large Language Models (MLMs). Recently, several MLMs have been released for the biomedical domain in French, and experiments suggest that they outperform standard French counterparts. However, no systematic evaluation comparing all models on the same corpora is available. Objective: This paper presents an evaluation of masked language models for biomedical French on the task of clinical named entity recognition. Material and methods: We evaluate biomedical models CamemBERT-bio and DrBERT and compare them to standard French models CamemBERT, FlauBERT and FrAlBERT as well as multilingual mBERT using three publically available corpora for clinical named entity recognition in French. The evaluation set-up relies on gold-standard corpora as released by the corpus developers. Results: Results suggest that CamemBERT-bio outperforms DrBERT consistently while FlauBERT offers competitive performance and FrAlBERT achieves the lowest carbon footprint. Conclusion: This is the first benchmark evaluation of biomedical masked language models for French clinical entity recognition that compares model performance consistently on nested entity recognition using metrics covering performance and environmental impact.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bannour-etal-2024-benchmark">
<titleInfo>
<title>A Benchmark Evaluation of Clinical Named Entity Recognition in French</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nesrine</namePart>
<namePart type="family">Bannour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christophe</namePart>
<namePart type="family">Servan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aurélie</namePart>
<namePart type="family">Névéol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xavier</namePart>
<namePart type="family">Tannier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Background: Transformer-based language models have shown strong performance on many Natural Language Processing (NLP) tasks. Masked Language Models (MLMs) attract sustained interest because they can be adapted to different languages and sub-domains through training or fine-tuning on specific corpora while remaining lighter than modern Large Language Models (MLMs). Recently, several MLMs have been released for the biomedical domain in French, and experiments suggest that they outperform standard French counterparts. However, no systematic evaluation comparing all models on the same corpora is available. Objective: This paper presents an evaluation of masked language models for biomedical French on the task of clinical named entity recognition. Material and methods: We evaluate biomedical models CamemBERT-bio and DrBERT and compare them to standard French models CamemBERT, FlauBERT and FrAlBERT as well as multilingual mBERT using three publically available corpora for clinical named entity recognition in French. The evaluation set-up relies on gold-standard corpora as released by the corpus developers. Results: Results suggest that CamemBERT-bio outperforms DrBERT consistently while FlauBERT offers competitive performance and FrAlBERT achieves the lowest carbon footprint. Conclusion: This is the first benchmark evaluation of biomedical masked language models for French clinical entity recognition that compares model performance consistently on nested entity recognition using metrics covering performance and environmental impact.</abstract>
<identifier type="citekey">bannour-etal-2024-benchmark</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.2</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>14</start>
<end>21</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Benchmark Evaluation of Clinical Named Entity Recognition in French
%A Bannour, Nesrine
%A Servan, Christophe
%A Névéol, Aurélie
%A Tannier, Xavier
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F bannour-etal-2024-benchmark
%X Background: Transformer-based language models have shown strong performance on many Natural Language Processing (NLP) tasks. Masked Language Models (MLMs) attract sustained interest because they can be adapted to different languages and sub-domains through training or fine-tuning on specific corpora while remaining lighter than modern Large Language Models (MLMs). Recently, several MLMs have been released for the biomedical domain in French, and experiments suggest that they outperform standard French counterparts. However, no systematic evaluation comparing all models on the same corpora is available. Objective: This paper presents an evaluation of masked language models for biomedical French on the task of clinical named entity recognition. Material and methods: We evaluate biomedical models CamemBERT-bio and DrBERT and compare them to standard French models CamemBERT, FlauBERT and FrAlBERT as well as multilingual mBERT using three publically available corpora for clinical named entity recognition in French. The evaluation set-up relies on gold-standard corpora as released by the corpus developers. Results: Results suggest that CamemBERT-bio outperforms DrBERT consistently while FlauBERT offers competitive performance and FrAlBERT achieves the lowest carbon footprint. Conclusion: This is the first benchmark evaluation of biomedical masked language models for French clinical entity recognition that compares model performance consistently on nested entity recognition using metrics covering performance and environmental impact.
%U https://aclanthology.org/2024.lrec-main.2
%P 14-21
Markdown (Informal)
[A Benchmark Evaluation of Clinical Named Entity Recognition in French](https://aclanthology.org/2024.lrec-main.2) (Bannour et al., LREC-COLING 2024)
ACL