@inproceedings{obreshkov-etal-2020-categorisation,
title = "Categorisation of {B}ulgarian Legislative Documents",
author = "Obreshkov, Nikola and
Yalamov, Martin and
Koeva, Svetla",
booktitle = "Proceedings of the 4th International Conference on Computational Linguistics in Bulgaria (CLIB 2020)",
month = sep,
year = "2020",
address = "Sofia, Bulgaria",
publisher = "Department of Computational Linguistics, IBL -- BAS",
url = "https://aclanthology.org/2020.clib-1.6",
pages = "53--62",
abstract = "The paper presents the categorisation of Bulgarian MARCELL corpus in toplevel EuroVoc domains. The Bulgarian MARCELL corpus is part of a recently developed multilingual corpus representing the national legislation in seven European countries. We performed several experiments with JEX Indexer, with neural networks and with a basic method measuring the domain-specific terms in documents annotated in advance with IATE terms and EuroVoc descriptors (combined with grouping of a primary document and its satellites, term extraction and parsing of the titles of the documents). The evaluation shows slight overweight of the basic method, which makes it appropriate as the categorisation should be a module of a NLP Pipeline for Bulgarian that is continuously feeding and annotating the Bulgarian MARCELL corpus with newly issued legislative documents.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="obreshkov-etal-2020-categorisation">
<titleInfo>
<title>Categorisation of Bulgarian Legislative Documents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Obreshkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Yalamov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Svetla</namePart>
<namePart type="family">Koeva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th International Conference on Computational Linguistics in Bulgaria (CLIB 2020)</title>
</titleInfo>
<originInfo>
<publisher>Department of Computational Linguistics, IBL – BAS</publisher>
<place>
<placeTerm type="text">Sofia, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The paper presents the categorisation of Bulgarian MARCELL corpus in toplevel EuroVoc domains. The Bulgarian MARCELL corpus is part of a recently developed multilingual corpus representing the national legislation in seven European countries. We performed several experiments with JEX Indexer, with neural networks and with a basic method measuring the domain-specific terms in documents annotated in advance with IATE terms and EuroVoc descriptors (combined with grouping of a primary document and its satellites, term extraction and parsing of the titles of the documents). The evaluation shows slight overweight of the basic method, which makes it appropriate as the categorisation should be a module of a NLP Pipeline for Bulgarian that is continuously feeding and annotating the Bulgarian MARCELL corpus with newly issued legislative documents.</abstract>
<identifier type="citekey">obreshkov-etal-2020-categorisation</identifier>
<location>
<url>https://aclanthology.org/2020.clib-1.6</url>
</location>
<part>
<date>2020-09</date>
<extent unit="page">
<start>53</start>
<end>62</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Categorisation of Bulgarian Legislative Documents
%A Obreshkov, Nikola
%A Yalamov, Martin
%A Koeva, Svetla
%S Proceedings of the 4th International Conference on Computational Linguistics in Bulgaria (CLIB 2020)
%D 2020
%8 September
%I Department of Computational Linguistics, IBL – BAS
%C Sofia, Bulgaria
%F obreshkov-etal-2020-categorisation
%X The paper presents the categorisation of Bulgarian MARCELL corpus in toplevel EuroVoc domains. The Bulgarian MARCELL corpus is part of a recently developed multilingual corpus representing the national legislation in seven European countries. We performed several experiments with JEX Indexer, with neural networks and with a basic method measuring the domain-specific terms in documents annotated in advance with IATE terms and EuroVoc descriptors (combined with grouping of a primary document and its satellites, term extraction and parsing of the titles of the documents). The evaluation shows slight overweight of the basic method, which makes it appropriate as the categorisation should be a module of a NLP Pipeline for Bulgarian that is continuously feeding and annotating the Bulgarian MARCELL corpus with newly issued legislative documents.
%U https://aclanthology.org/2020.clib-1.6
%P 53-62
Markdown (Informal)
[Categorisation of Bulgarian Legislative Documents](https://aclanthology.org/2020.clib-1.6) (Obreshkov et al., CLIB 2020)
ACL
- Nikola Obreshkov, Martin Yalamov, and Svetla Koeva. 2020. Categorisation of Bulgarian Legislative Documents. In Proceedings of the 4th International Conference on Computational Linguistics in Bulgaria (CLIB 2020), pages 53–62, Sofia, Bulgaria. Department of Computational Linguistics, IBL -- BAS.