@inproceedings{he-etal-2022-infrrd,
title = "Infrrd.ai at {S}em{E}val-2022 Task 11: A system for named entity recognition using data augmentation, transformer-based sequence labeling model, and {E}nsemble{CRF}",
author = "He, Jianglong and
Uppal, Akshay and
N, Mamatha and
Vignesh, Shiv and
Kumar, Deepak and
Kumar Sarda, Aditya",
editor = "Emerson, Guy and
Schluter, Natalie and
Stanovsky, Gabriel and
Kumar, Ritesh and
Palmer, Alexis and
Schneider, Nathan and
Singh, Siddharth and
Ratan, Shyam",
booktitle = "Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.semeval-1.206/",
doi = "10.18653/v1/2022.semeval-1.206",
pages = "1501--1510",
abstract = "In low-resource languages, the amount of training data is limited. Hence, the model has to perform well in unseen sentences and syntax on which the model has not trained. We propose a method that addresses the problem through an encoder and an ensemble of language models. A language-specific language model performed poorly when compared to a multilingual language model. So, the multilingual language model checkpoint is fine-tuned to a specific language. A novel approach of one hot encoder is introduced between the model outputs and the CRF to combine the results in an ensemble format. Our team, \textbf{Infrrd.ai}, competed in the MultiCoNER competition. The results are encouraging where the team is positioned within the top 10 positions. There is less than a 4{\%} percent difference from the third position in most of the tracks that we participated in. The proposed method shows that the ensemble of models with a multilingual language model as the base with the help of an encoder performs better than a single language-specific model."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="he-etal-2022-infrrd">
<titleInfo>
<title>Infrrd.ai at SemEval-2022 Task 11: A system for named entity recognition using data augmentation, transformer-based sequence labeling model, and EnsembleCRF</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jianglong</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akshay</namePart>
<namePart type="family">Uppal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamatha</namePart>
<namePart type="family">N</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiv</namePart>
<namePart type="family">Vignesh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deepak</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aditya</namePart>
<namePart type="family">Kumar Sarda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guy</namePart>
<namePart type="family">Emerson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalie</namePart>
<namePart type="family">Schluter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Stanovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siddharth</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shyam</namePart>
<namePart type="family">Ratan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In low-resource languages, the amount of training data is limited. Hence, the model has to perform well in unseen sentences and syntax on which the model has not trained. We propose a method that addresses the problem through an encoder and an ensemble of language models. A language-specific language model performed poorly when compared to a multilingual language model. So, the multilingual language model checkpoint is fine-tuned to a specific language. A novel approach of one hot encoder is introduced between the model outputs and the CRF to combine the results in an ensemble format. Our team, Infrrd.ai, competed in the MultiCoNER competition. The results are encouraging where the team is positioned within the top 10 positions. There is less than a 4% percent difference from the third position in most of the tracks that we participated in. The proposed method shows that the ensemble of models with a multilingual language model as the base with the help of an encoder performs better than a single language-specific model.</abstract>
<identifier type="citekey">he-etal-2022-infrrd</identifier>
<identifier type="doi">10.18653/v1/2022.semeval-1.206</identifier>
<location>
<url>https://aclanthology.org/2022.semeval-1.206/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>1501</start>
<end>1510</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Infrrd.ai at SemEval-2022 Task 11: A system for named entity recognition using data augmentation, transformer-based sequence labeling model, and EnsembleCRF
%A He, Jianglong
%A Uppal, Akshay
%A N, Mamatha
%A Vignesh, Shiv
%A Kumar, Deepak
%A Kumar Sarda, Aditya
%Y Emerson, Guy
%Y Schluter, Natalie
%Y Stanovsky, Gabriel
%Y Kumar, Ritesh
%Y Palmer, Alexis
%Y Schneider, Nathan
%Y Singh, Siddharth
%Y Ratan, Shyam
%S Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F he-etal-2022-infrrd
%X In low-resource languages, the amount of training data is limited. Hence, the model has to perform well in unseen sentences and syntax on which the model has not trained. We propose a method that addresses the problem through an encoder and an ensemble of language models. A language-specific language model performed poorly when compared to a multilingual language model. So, the multilingual language model checkpoint is fine-tuned to a specific language. A novel approach of one hot encoder is introduced between the model outputs and the CRF to combine the results in an ensemble format. Our team, Infrrd.ai, competed in the MultiCoNER competition. The results are encouraging where the team is positioned within the top 10 positions. There is less than a 4% percent difference from the third position in most of the tracks that we participated in. The proposed method shows that the ensemble of models with a multilingual language model as the base with the help of an encoder performs better than a single language-specific model.
%R 10.18653/v1/2022.semeval-1.206
%U https://aclanthology.org/2022.semeval-1.206/
%U https://doi.org/10.18653/v1/2022.semeval-1.206
%P 1501-1510
Markdown (Informal)
[Infrrd.ai at SemEval-2022 Task 11: A system for named entity recognition using data augmentation, transformer-based sequence labeling model, and EnsembleCRF](https://aclanthology.org/2022.semeval-1.206/) (He et al., SemEval 2022)
ACL
- Jianglong He, Akshay Uppal, Mamatha N, Shiv Vignesh, Deepak Kumar, and Aditya Kumar Sarda. 2022. Infrrd.ai at SemEval-2022 Task 11: A system for named entity recognition using data augmentation, transformer-based sequence labeling model, and EnsembleCRF. In Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022), pages 1501–1510, Seattle, United States. Association for Computational Linguistics.