@inproceedings{siewert-etal-2020-lsdc,
title = "{LSDC} - A comprehensive dataset for {L}ow {S}axon Dialect Classification",
author = {Siewert, Janine and
Scherrer, Yves and
Wieling, Martijn and
Tiedemann, J{\"o}rg},
editor = {Zampieri, Marcos and
Nakov, Preslav and
Ljube{\v{s}}i{\'c}, Nikola and
Tiedemann, J{\"o}rg and
Scherrer, Yves},
booktitle = "Proceedings of the 7th Workshop on NLP for Similar Languages, Varieties and Dialects",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics (ICCL)",
url = "https://aclanthology.org/2020.vardial-1.3/",
pages = "25--35",
abstract = "We present a new comprehensive dataset for the unstandardised West-Germanic language Low Saxon covering the last two centuries, the majority of modern dialects and various genres, which will be made openly available in connection with the final version of this paper. Since so far no such comprehensive dataset of contemporary Low Saxon exists, this provides a great contribution to NLP research on this language. We also test the use of this dataset for dialect classification by training a few baseline models comparing statistical and neural approaches. The performance of these models shows that in spite of an imbalance in the amount of data per dialect, enough features can be learned for a relatively high classification accuracy."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="siewert-etal-2020-lsdc">
<titleInfo>
<title>LSDC - A comprehensive dataset for Low Saxon Dialect Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Janine</namePart>
<namePart type="family">Siewert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martijn</namePart>
<namePart type="family">Wieling</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 7th Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics (ICCL)</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a new comprehensive dataset for the unstandardised West-Germanic language Low Saxon covering the last two centuries, the majority of modern dialects and various genres, which will be made openly available in connection with the final version of this paper. Since so far no such comprehensive dataset of contemporary Low Saxon exists, this provides a great contribution to NLP research on this language. We also test the use of this dataset for dialect classification by training a few baseline models comparing statistical and neural approaches. The performance of these models shows that in spite of an imbalance in the amount of data per dialect, enough features can be learned for a relatively high classification accuracy.</abstract>
<identifier type="citekey">siewert-etal-2020-lsdc</identifier>
<location>
<url>https://aclanthology.org/2020.vardial-1.3/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>25</start>
<end>35</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LSDC - A comprehensive dataset for Low Saxon Dialect Classification
%A Siewert, Janine
%A Scherrer, Yves
%A Wieling, Martijn
%A Tiedemann, Jörg
%Y Zampieri, Marcos
%Y Nakov, Preslav
%Y Ljubešić, Nikola
%Y Tiedemann, Jörg
%Y Scherrer, Yves
%S Proceedings of the 7th Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2020
%8 December
%I International Committee on Computational Linguistics (ICCL)
%C Barcelona, Spain (Online)
%F siewert-etal-2020-lsdc
%X We present a new comprehensive dataset for the unstandardised West-Germanic language Low Saxon covering the last two centuries, the majority of modern dialects and various genres, which will be made openly available in connection with the final version of this paper. Since so far no such comprehensive dataset of contemporary Low Saxon exists, this provides a great contribution to NLP research on this language. We also test the use of this dataset for dialect classification by training a few baseline models comparing statistical and neural approaches. The performance of these models shows that in spite of an imbalance in the amount of data per dialect, enough features can be learned for a relatively high classification accuracy.
%U https://aclanthology.org/2020.vardial-1.3/
%P 25-35
Markdown (Informal)
[LSDC - A comprehensive dataset for Low Saxon Dialect Classification](https://aclanthology.org/2020.vardial-1.3/) (Siewert et al., VarDial 2020)
ACL