@inproceedings{doneva-etal-2024-neurotrialner,
title = "{N}euro{T}rial{NER}: An Annotated Corpus for Neurological Diseases and Therapies in Clinical Trial Registries",
author = "Doneva, Simona Emilova and
Ellendorff, Tilia and
Sick, Beate and
Goldman, Jean-Philippe and
Cannon, Amelia Elaine and
Schneider, Gerold and
Ineichen, Benjamin Victor",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.1050/",
doi = "10.18653/v1/2024.emnlp-main.1050",
pages = "18868--18890",
abstract = "Extracting and aggregating information from clinical trial registries could provide invaluable insights into the drug development landscape and advance the treatment of neurologic diseases. However, achieving this at scale is hampered by the volume of available data and the lack of an annotated corpus to assist in the development of automation tools. Thus, we introduce NeuroTrialNER, a new and fully open corpus for named entity recognition (NER). It comprises 1093 clinical trial summaries sourced from ClinicalTrials.gov, annotated for neurological diseases, therapeutic interventions, and control treatments. We describe our data collection process and the corpus in detail. We demonstrate its utility for NER using large language models and achieve a close-to-human performance. By bridging the gap in data resources, we hope to foster the development of text processing tools that help researchers navigate clinical trials data more easily."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="doneva-etal-2024-neurotrialner">
<titleInfo>
<title>NeuroTrialNER: An Annotated Corpus for Neurological Diseases and Therapies in Clinical Trial Registries</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simona</namePart>
<namePart type="given">Emilova</namePart>
<namePart type="family">Doneva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tilia</namePart>
<namePart type="family">Ellendorff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beate</namePart>
<namePart type="family">Sick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jean-Philippe</namePart>
<namePart type="family">Goldman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amelia</namePart>
<namePart type="given">Elaine</namePart>
<namePart type="family">Cannon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerold</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="given">Victor</namePart>
<namePart type="family">Ineichen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Extracting and aggregating information from clinical trial registries could provide invaluable insights into the drug development landscape and advance the treatment of neurologic diseases. However, achieving this at scale is hampered by the volume of available data and the lack of an annotated corpus to assist in the development of automation tools. Thus, we introduce NeuroTrialNER, a new and fully open corpus for named entity recognition (NER). It comprises 1093 clinical trial summaries sourced from ClinicalTrials.gov, annotated for neurological diseases, therapeutic interventions, and control treatments. We describe our data collection process and the corpus in detail. We demonstrate its utility for NER using large language models and achieve a close-to-human performance. By bridging the gap in data resources, we hope to foster the development of text processing tools that help researchers navigate clinical trials data more easily.</abstract>
<identifier type="citekey">doneva-etal-2024-neurotrialner</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.1050</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.1050/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>18868</start>
<end>18890</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NeuroTrialNER: An Annotated Corpus for Neurological Diseases and Therapies in Clinical Trial Registries
%A Doneva, Simona Emilova
%A Ellendorff, Tilia
%A Sick, Beate
%A Goldman, Jean-Philippe
%A Cannon, Amelia Elaine
%A Schneider, Gerold
%A Ineichen, Benjamin Victor
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F doneva-etal-2024-neurotrialner
%X Extracting and aggregating information from clinical trial registries could provide invaluable insights into the drug development landscape and advance the treatment of neurologic diseases. However, achieving this at scale is hampered by the volume of available data and the lack of an annotated corpus to assist in the development of automation tools. Thus, we introduce NeuroTrialNER, a new and fully open corpus for named entity recognition (NER). It comprises 1093 clinical trial summaries sourced from ClinicalTrials.gov, annotated for neurological diseases, therapeutic interventions, and control treatments. We describe our data collection process and the corpus in detail. We demonstrate its utility for NER using large language models and achieve a close-to-human performance. By bridging the gap in data resources, we hope to foster the development of text processing tools that help researchers navigate clinical trials data more easily.
%R 10.18653/v1/2024.emnlp-main.1050
%U https://aclanthology.org/2024.emnlp-main.1050/
%U https://doi.org/10.18653/v1/2024.emnlp-main.1050
%P 18868-18890
Markdown (Informal)
[NeuroTrialNER: An Annotated Corpus for Neurological Diseases and Therapies in Clinical Trial Registries](https://aclanthology.org/2024.emnlp-main.1050/) (Doneva et al., EMNLP 2024)
ACL
- Simona Emilova Doneva, Tilia Ellendorff, Beate Sick, Jean-Philippe Goldman, Amelia Elaine Cannon, Gerold Schneider, and Benjamin Victor Ineichen. 2024. NeuroTrialNER: An Annotated Corpus for Neurological Diseases and Therapies in Clinical Trial Registries. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 18868–18890, Miami, Florida, USA. Association for Computational Linguistics.