@inproceedings{kalyan-sangeetha-2020-target,
title = "Target Concept Guided Medical Concept Normalization in Noisy User-Generated Texts",
author = "Kalyan, Katikapalli Subramanyam and
Sangeetha, Sivanesan",
editor = "Agirre, Eneko and
Apidianaki, Marianna and
Vuli{\'c}, Ivan",
booktitle = "Proceedings of Deep Learning Inside Out (DeeLIO): The First Workshop on Knowledge Extraction and Integration for Deep Learning Architectures",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.deelio-1.8/",
doi = "10.18653/v1/2020.deelio-1.8",
pages = "64--73",
abstract = "Medical concept normalization (MCN) i.e., mapping of colloquial medical phrases to standard concepts is an essential step in analysis of medical social media text. The main drawback in existing state-of-the-art approach (Kalyan and Sangeetha, 2020b) is learning target concept vector representations from scratch which requires more number of training instances. Our model is based on RoBERTa and target concept embeddings. In our model, we integrate a) target concept information in the form of target concept vectors generated by encoding target concept descriptions using SRoBERTa, state-of-the-art RoBERTa based sentence embedding model and b) domain lexicon knowledge by enriching target concept vectors with synonym relationship knowledge using retrofitting algorithm. It is the first attempt in MCN to exploit both target concept information as well as domain lexicon knowledge in the form of retrofitted target concept vectors. Our model outperforms all the existing models with an accuracy improvement up to 1.36{\%} on three standard datasets. Further, our model when trained only on mapping lexicon synonyms achieves up to 4.87{\%} improvement in accuracy."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kalyan-sangeetha-2020-target">
<titleInfo>
<title>Target Concept Guided Medical Concept Normalization in Noisy User-Generated Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Katikapalli</namePart>
<namePart type="given">Subramanyam</namePart>
<namePart type="family">Kalyan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sivanesan</namePart>
<namePart type="family">Sangeetha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Deep Learning Inside Out (DeeLIO): The First Workshop on Knowledge Extraction and Integration for Deep Learning Architectures</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eneko</namePart>
<namePart type="family">Agirre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Vulić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Medical concept normalization (MCN) i.e., mapping of colloquial medical phrases to standard concepts is an essential step in analysis of medical social media text. The main drawback in existing state-of-the-art approach (Kalyan and Sangeetha, 2020b) is learning target concept vector representations from scratch which requires more number of training instances. Our model is based on RoBERTa and target concept embeddings. In our model, we integrate a) target concept information in the form of target concept vectors generated by encoding target concept descriptions using SRoBERTa, state-of-the-art RoBERTa based sentence embedding model and b) domain lexicon knowledge by enriching target concept vectors with synonym relationship knowledge using retrofitting algorithm. It is the first attempt in MCN to exploit both target concept information as well as domain lexicon knowledge in the form of retrofitted target concept vectors. Our model outperforms all the existing models with an accuracy improvement up to 1.36% on three standard datasets. Further, our model when trained only on mapping lexicon synonyms achieves up to 4.87% improvement in accuracy.</abstract>
<identifier type="citekey">kalyan-sangeetha-2020-target</identifier>
<identifier type="doi">10.18653/v1/2020.deelio-1.8</identifier>
<location>
<url>https://aclanthology.org/2020.deelio-1.8/</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>64</start>
<end>73</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Target Concept Guided Medical Concept Normalization in Noisy User-Generated Texts
%A Kalyan, Katikapalli Subramanyam
%A Sangeetha, Sivanesan
%Y Agirre, Eneko
%Y Apidianaki, Marianna
%Y Vulić, Ivan
%S Proceedings of Deep Learning Inside Out (DeeLIO): The First Workshop on Knowledge Extraction and Integration for Deep Learning Architectures
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F kalyan-sangeetha-2020-target
%X Medical concept normalization (MCN) i.e., mapping of colloquial medical phrases to standard concepts is an essential step in analysis of medical social media text. The main drawback in existing state-of-the-art approach (Kalyan and Sangeetha, 2020b) is learning target concept vector representations from scratch which requires more number of training instances. Our model is based on RoBERTa and target concept embeddings. In our model, we integrate a) target concept information in the form of target concept vectors generated by encoding target concept descriptions using SRoBERTa, state-of-the-art RoBERTa based sentence embedding model and b) domain lexicon knowledge by enriching target concept vectors with synonym relationship knowledge using retrofitting algorithm. It is the first attempt in MCN to exploit both target concept information as well as domain lexicon knowledge in the form of retrofitted target concept vectors. Our model outperforms all the existing models with an accuracy improvement up to 1.36% on three standard datasets. Further, our model when trained only on mapping lexicon synonyms achieves up to 4.87% improvement in accuracy.
%R 10.18653/v1/2020.deelio-1.8
%U https://aclanthology.org/2020.deelio-1.8/
%U https://doi.org/10.18653/v1/2020.deelio-1.8
%P 64-73
Markdown (Informal)
[Target Concept Guided Medical Concept Normalization in Noisy User-Generated Texts](https://aclanthology.org/2020.deelio-1.8/) (Kalyan & Sangeetha, DeeLIO 2020)
ACL