@inproceedings{hafsteinsson-ingason-2020-developing,
title = "Developing a {F}aroese {P}o{S}-tagging solution using {I}celandic methods",
author = "Hafsteinsson, Hinrik and
Ingason, Anton Karl",
editor = "Bhattacharyya, Pushpak and
Sharma, Dipti Misra and
Sangal, Rajeev",
booktitle = "Proceedings of the 17th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2020",
address = "Indian Institute of Technology Patna, Patna, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2020.icon-main.65",
pages = "481--490",
abstract = "We describe the development of a dedicated, high-accuracy part-of-speech (PoS) tagging solution for Faroese, a North Germanic language with about 50,000 speakers. To achieve this, a state-of-the-art neural PoS tagger for Icelandic, ABLTagger, was trained on a 100,000 word PoS-tagged corpus for Faroese, standardised with methods previously applied to Icelandic corpora. This tagger was supplemented with a novel Experimental Database of Faroese Inflection (EDFM), which contains morphological information on 67,488 Faroese words with about one million inflectional forms. This approach produced a PoS-tagging model for Faroese which achieves a 91.40{\%} overall accuracy when evaluated with 10-fold cross validation, which is currently the highest reported accuracy for a dedicated Faroese PoS-tagger. The tagging model, morphological database, proposed revised PoS tagset for Faroese as well as a revised and standardised PoS tagged corpus are all presented as products of this project and are made available for use in further research in Faroese language technology",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hafsteinsson-ingason-2020-developing">
<titleInfo>
<title>Developing a Faroese PoS-tagging solution using Icelandic methods</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hinrik</namePart>
<namePart type="family">Hafsteinsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anton</namePart>
<namePart type="given">Karl</namePart>
<namePart type="family">Ingason</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipti</namePart>
<namePart type="given">Misra</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajeev</namePart>
<namePart type="family">Sangal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">Indian Institute of Technology Patna, Patna, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We describe the development of a dedicated, high-accuracy part-of-speech (PoS) tagging solution for Faroese, a North Germanic language with about 50,000 speakers. To achieve this, a state-of-the-art neural PoS tagger for Icelandic, ABLTagger, was trained on a 100,000 word PoS-tagged corpus for Faroese, standardised with methods previously applied to Icelandic corpora. This tagger was supplemented with a novel Experimental Database of Faroese Inflection (EDFM), which contains morphological information on 67,488 Faroese words with about one million inflectional forms. This approach produced a PoS-tagging model for Faroese which achieves a 91.40% overall accuracy when evaluated with 10-fold cross validation, which is currently the highest reported accuracy for a dedicated Faroese PoS-tagger. The tagging model, morphological database, proposed revised PoS tagset for Faroese as well as a revised and standardised PoS tagged corpus are all presented as products of this project and are made available for use in further research in Faroese language technology</abstract>
<identifier type="citekey">hafsteinsson-ingason-2020-developing</identifier>
<location>
<url>https://aclanthology.org/2020.icon-main.65</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>481</start>
<end>490</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Developing a Faroese PoS-tagging solution using Icelandic methods
%A Hafsteinsson, Hinrik
%A Ingason, Anton Karl
%Y Bhattacharyya, Pushpak
%Y Sharma, Dipti Misra
%Y Sangal, Rajeev
%S Proceedings of the 17th International Conference on Natural Language Processing (ICON)
%D 2020
%8 December
%I NLP Association of India (NLPAI)
%C Indian Institute of Technology Patna, Patna, India
%F hafsteinsson-ingason-2020-developing
%X We describe the development of a dedicated, high-accuracy part-of-speech (PoS) tagging solution for Faroese, a North Germanic language with about 50,000 speakers. To achieve this, a state-of-the-art neural PoS tagger for Icelandic, ABLTagger, was trained on a 100,000 word PoS-tagged corpus for Faroese, standardised with methods previously applied to Icelandic corpora. This tagger was supplemented with a novel Experimental Database of Faroese Inflection (EDFM), which contains morphological information on 67,488 Faroese words with about one million inflectional forms. This approach produced a PoS-tagging model for Faroese which achieves a 91.40% overall accuracy when evaluated with 10-fold cross validation, which is currently the highest reported accuracy for a dedicated Faroese PoS-tagger. The tagging model, morphological database, proposed revised PoS tagset for Faroese as well as a revised and standardised PoS tagged corpus are all presented as products of this project and are made available for use in further research in Faroese language technology
%U https://aclanthology.org/2020.icon-main.65
%P 481-490
Markdown (Informal)
[Developing a Faroese PoS-tagging solution using Icelandic methods](https://aclanthology.org/2020.icon-main.65) (Hafsteinsson & Ingason, ICON 2020)
ACL