@inproceedings{indig-etal-2019-one,
title = "One format to rule them all {--} The emtsv pipeline for {H}ungarian",
author = "Indig, Bal{\'a}zs and
Sass, B{\'a}lint and
Simon, Eszter and
Mittelholcz, Iv{\'a}n and
Vad{\'a}sz, No{\'e}mi and
Makrai, M{\'a}rton",
editor = "Friedrich, Annemarie and
Zeyrek, Deniz and
Hoek, Jet",
booktitle = "Proceedings of the 13th Linguistic Annotation Workshop",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-4018",
doi = "10.18653/v1/W19-4018",
pages = "155--165",
abstract = "We present a more efficient version of the e-magyar NLP pipeline for Hungarian called emtsv. It integrates Hungarian NLP tools in a framework whose individual modules can be developed or replaced independently and allows new ones to be added. The design also allows convenient investigation and manual correction of the data flow from one module to another. The improvements we publish include effective communication between the modules and support of the use of individual modules both in the chain and standing alone. Our goals are accomplished using extended tsv (tab separated values) files, a simple, uniform, generic and self-documenting input/output format. Our vision is maintaining the system for a long time and making it easier for external developers to fit their own modules into the system, thus sharing existing competencies in the field of processing Hungarian, a mid-resourced language. The source code is available under LGPL 3.0 license at \url{https://github.com/dlt-rilmta/emtsv} .",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="indig-etal-2019-one">
<titleInfo>
<title>One format to rule them all – The emtsv pipeline for Hungarian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Balázs</namePart>
<namePart type="family">Indig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bálint</namePart>
<namePart type="family">Sass</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eszter</namePart>
<namePart type="family">Simon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iván</namePart>
<namePart type="family">Mittelholcz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Noémi</namePart>
<namePart type="family">Vadász</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Márton</namePart>
<namePart type="family">Makrai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Linguistic Annotation Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Annemarie</namePart>
<namePart type="family">Friedrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deniz</namePart>
<namePart type="family">Zeyrek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jet</namePart>
<namePart type="family">Hoek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a more efficient version of the e-magyar NLP pipeline for Hungarian called emtsv. It integrates Hungarian NLP tools in a framework whose individual modules can be developed or replaced independently and allows new ones to be added. The design also allows convenient investigation and manual correction of the data flow from one module to another. The improvements we publish include effective communication between the modules and support of the use of individual modules both in the chain and standing alone. Our goals are accomplished using extended tsv (tab separated values) files, a simple, uniform, generic and self-documenting input/output format. Our vision is maintaining the system for a long time and making it easier for external developers to fit their own modules into the system, thus sharing existing competencies in the field of processing Hungarian, a mid-resourced language. The source code is available under LGPL 3.0 license at https://github.com/dlt-rilmta/emtsv .</abstract>
<identifier type="citekey">indig-etal-2019-one</identifier>
<identifier type="doi">10.18653/v1/W19-4018</identifier>
<location>
<url>https://aclanthology.org/W19-4018</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>155</start>
<end>165</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T One format to rule them all – The emtsv pipeline for Hungarian
%A Indig, Balázs
%A Sass, Bálint
%A Simon, Eszter
%A Mittelholcz, Iván
%A Vadász, Noémi
%A Makrai, Márton
%Y Friedrich, Annemarie
%Y Zeyrek, Deniz
%Y Hoek, Jet
%S Proceedings of the 13th Linguistic Annotation Workshop
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F indig-etal-2019-one
%X We present a more efficient version of the e-magyar NLP pipeline for Hungarian called emtsv. It integrates Hungarian NLP tools in a framework whose individual modules can be developed or replaced independently and allows new ones to be added. The design also allows convenient investigation and manual correction of the data flow from one module to another. The improvements we publish include effective communication between the modules and support of the use of individual modules both in the chain and standing alone. Our goals are accomplished using extended tsv (tab separated values) files, a simple, uniform, generic and self-documenting input/output format. Our vision is maintaining the system for a long time and making it easier for external developers to fit their own modules into the system, thus sharing existing competencies in the field of processing Hungarian, a mid-resourced language. The source code is available under LGPL 3.0 license at https://github.com/dlt-rilmta/emtsv .
%R 10.18653/v1/W19-4018
%U https://aclanthology.org/W19-4018
%U https://doi.org/10.18653/v1/W19-4018
%P 155-165
Markdown (Informal)
[One format to rule them all – The emtsv pipeline for Hungarian](https://aclanthology.org/W19-4018) (Indig et al., LAW 2019)
ACL