@inproceedings{schafer-etal-2022-cross,
title = "Cross-Language Transfer of High-Quality Annotations: Combining Neural Machine Translation with Cross-Linguistic Span Alignment to Apply {NER} to Clinical Texts in a Low-Resource Language",
author = {Sch{\"a}fer, Henning and
Idrissi-Yaghir, Ahmad and
Horn, Peter and
Friedrich, Christoph},
editor = "Naumann, Tristan and
Bethard, Steven and
Roberts, Kirk and
Rumshisky, Anna",
booktitle = "Proceedings of the 4th Clinical Natural Language Processing Workshop",
month = jul,
year = "2022",
address = "Seattle, WA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.clinicalnlp-1.6/",
doi = "10.18653/v1/2022.clinicalnlp-1.6",
pages = "53--62",
abstract = {In this work, cross-linguistic span prediction based on contextualized word embedding models is used together with neural machine translation (NMT) to transfer and apply the state-of-the-art models in natural language processing (NLP) to a low-resource language clinical corpus. Two directions are evaluated: (a) English models can be applied to translated texts to subsequently transfer the predicted annotations to the source language and (b) existing high-quality annotations can be transferred beyond translation and then used to train NLP models in the target language. Effectiveness and loss of transmission is evaluated using the German Berlin-T{\"u}bingen-Oncology Corpus (BRONCO) dataset with transferred external data from NCBI disease, SemEval-2013 drug-drug interaction (DDI) and i2b2/VA 2010 data. The use of English models for translated clinical texts has always involved attempts to take full advantage of the benefits associated with them (large pre-trained biomedical word embeddings). To improve advances in this area, we provide a general-purpose pipeline to transfer any annotated BRAT or CoNLL format to various target languages. For the entity class medication, good results were obtained with 0.806 $F1$-score after re-alignment. Limited success occurred in the diagnosis and treatment class with results just below 0.5 $F1$-score due to differences in annotation guidelines.}
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="schafer-etal-2022-cross">
<titleInfo>
<title>Cross-Language Transfer of High-Quality Annotations: Combining Neural Machine Translation with Cross-Linguistic Span Alignment to Apply NER to Clinical Texts in a Low-Resource Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Henning</namePart>
<namePart type="family">Schäfer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmad</namePart>
<namePart type="family">Idrissi-Yaghir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Horn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoph</namePart>
<namePart type="family">Friedrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Clinical Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tristan</namePart>
<namePart type="family">Naumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, WA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, cross-linguistic span prediction based on contextualized word embedding models is used together with neural machine translation (NMT) to transfer and apply the state-of-the-art models in natural language processing (NLP) to a low-resource language clinical corpus. Two directions are evaluated: (a) English models can be applied to translated texts to subsequently transfer the predicted annotations to the source language and (b) existing high-quality annotations can be transferred beyond translation and then used to train NLP models in the target language. Effectiveness and loss of transmission is evaluated using the German Berlin-Tübingen-Oncology Corpus (BRONCO) dataset with transferred external data from NCBI disease, SemEval-2013 drug-drug interaction (DDI) and i2b2/VA 2010 data. The use of English models for translated clinical texts has always involved attempts to take full advantage of the benefits associated with them (large pre-trained biomedical word embeddings). To improve advances in this area, we provide a general-purpose pipeline to transfer any annotated BRAT or CoNLL format to various target languages. For the entity class medication, good results were obtained with 0.806 F1-score after re-alignment. Limited success occurred in the diagnosis and treatment class with results just below 0.5 F1-score due to differences in annotation guidelines.</abstract>
<identifier type="citekey">schafer-etal-2022-cross</identifier>
<identifier type="doi">10.18653/v1/2022.clinicalnlp-1.6</identifier>
<location>
<url>https://aclanthology.org/2022.clinicalnlp-1.6/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>53</start>
<end>62</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cross-Language Transfer of High-Quality Annotations: Combining Neural Machine Translation with Cross-Linguistic Span Alignment to Apply NER to Clinical Texts in a Low-Resource Language
%A Schäfer, Henning
%A Idrissi-Yaghir, Ahmad
%A Horn, Peter
%A Friedrich, Christoph
%Y Naumann, Tristan
%Y Bethard, Steven
%Y Roberts, Kirk
%Y Rumshisky, Anna
%S Proceedings of the 4th Clinical Natural Language Processing Workshop
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, WA
%F schafer-etal-2022-cross
%X In this work, cross-linguistic span prediction based on contextualized word embedding models is used together with neural machine translation (NMT) to transfer and apply the state-of-the-art models in natural language processing (NLP) to a low-resource language clinical corpus. Two directions are evaluated: (a) English models can be applied to translated texts to subsequently transfer the predicted annotations to the source language and (b) existing high-quality annotations can be transferred beyond translation and then used to train NLP models in the target language. Effectiveness and loss of transmission is evaluated using the German Berlin-Tübingen-Oncology Corpus (BRONCO) dataset with transferred external data from NCBI disease, SemEval-2013 drug-drug interaction (DDI) and i2b2/VA 2010 data. The use of English models for translated clinical texts has always involved attempts to take full advantage of the benefits associated with them (large pre-trained biomedical word embeddings). To improve advances in this area, we provide a general-purpose pipeline to transfer any annotated BRAT or CoNLL format to various target languages. For the entity class medication, good results were obtained with 0.806 F1-score after re-alignment. Limited success occurred in the diagnosis and treatment class with results just below 0.5 F1-score due to differences in annotation guidelines.
%R 10.18653/v1/2022.clinicalnlp-1.6
%U https://aclanthology.org/2022.clinicalnlp-1.6/
%U https://doi.org/10.18653/v1/2022.clinicalnlp-1.6
%P 53-62
Markdown (Informal)
[Cross-Language Transfer of High-Quality Annotations: Combining Neural Machine Translation with Cross-Linguistic Span Alignment to Apply NER to Clinical Texts in a Low-Resource Language](https://aclanthology.org/2022.clinicalnlp-1.6/) (Schäfer et al., ClinicalNLP 2022)
ACL