@inproceedings{sandrih-todorovic-etal-2021-serbian,
title = "{S}erbian {NER}{\&}Beyond: The Archaic and the Modern Intertwinned",
author = "{\v{S}}andrih Todorovi{\'c}, Branislava and
Krstev, Cvetana and
Stankovi{\'c}, Ranka and
Ikoni{\'c} Ne{\v{s}}i{\'c}, Milica",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.141",
pages = "1252--1260",
abstract = "In this work, we present a Serbian literary corpus that is being developed under the umbrella of the {``}Distant Reading for European Literary History{''} COST Action CA16204. Using this corpus of novels written more than a century ago, we have developed and made publicly available a Named Entity Recognizer (NER) trained to recognize 7 different named entity types, with a Convolutional Neural Network (CNN) architecture, having F1 score of {\mbox{$\approx$}}91{\%} on the test dataset. This model has been further assessed on a separate evaluation dataset. We wrap up with comparison of the developed model with the existing one, followed by a discussion of pros and cons of the both models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sandrih-todorovic-etal-2021-serbian">
<titleInfo>
<title>Serbian NER&Beyond: The Archaic and the Modern Intertwinned</title>
</titleInfo>
<name type="personal">
<namePart type="given">Branislava</namePart>
<namePart type="family">Šandrih Todorović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cvetana</namePart>
<namePart type="family">Krstev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ranka</namePart>
<namePart type="family">Stanković</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Milica</namePart>
<namePart type="family">Ikonić Nešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we present a Serbian literary corpus that is being developed under the umbrella of the “Distant Reading for European Literary History” COST Action CA16204. Using this corpus of novels written more than a century ago, we have developed and made publicly available a Named Entity Recognizer (NER) trained to recognize 7 different named entity types, with a Convolutional Neural Network (CNN) architecture, having F1 score of \approx91% on the test dataset. This model has been further assessed on a separate evaluation dataset. We wrap up with comparison of the developed model with the existing one, followed by a discussion of pros and cons of the both models.</abstract>
<identifier type="citekey">sandrih-todorovic-etal-2021-serbian</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.141</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>1252</start>
<end>1260</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Serbian NER&Beyond: The Archaic and the Modern Intertwinned
%A Šandrih Todorović, Branislava
%A Krstev, Cvetana
%A Stanković, Ranka
%A Ikonić Nešić, Milica
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F sandrih-todorovic-etal-2021-serbian
%X In this work, we present a Serbian literary corpus that is being developed under the umbrella of the “Distant Reading for European Literary History” COST Action CA16204. Using this corpus of novels written more than a century ago, we have developed and made publicly available a Named Entity Recognizer (NER) trained to recognize 7 different named entity types, with a Convolutional Neural Network (CNN) architecture, having F1 score of \approx91% on the test dataset. This model has been further assessed on a separate evaluation dataset. We wrap up with comparison of the developed model with the existing one, followed by a discussion of pros and cons of the both models.
%U https://aclanthology.org/2021.ranlp-1.141
%P 1252-1260
Markdown (Informal)
[Serbian NER&Beyond: The Archaic and the Modern Intertwinned](https://aclanthology.org/2021.ranlp-1.141) (Šandrih Todorović et al., RANLP 2021)
ACL
- Branislava Šandrih Todorović, Cvetana Krstev, Ranka Stanković, and Milica Ikonić Nešić. 2021. Serbian NER&Beyond: The Archaic and the Modern Intertwinned. In Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021), pages 1252–1260, Held Online. INCOMA Ltd..