@inproceedings{stromberg-derczynski-etal-2021-danish,
title = "The {D}anish {G}igaword Corpus",
author = "Str{\o}mberg-Derczynski, Leon and
Ciosici, Manuel and
Baglini, Rebekah and
Christiansen, Morten H. and
Dalsgaard, Jacob Aarup and
Fusaroli, Riccardo and
Henrichsen, Peter Juel and
Hvingelby, Rasmus and
Kirkedal, Andreas and
Kjeldsen, Alex Speed and
Ladefoged, Claus and
Nielsen, Finn {\AA}rup and
Madsen, Jens and
Petersen, Malte Lau and
Rystr{\o}m, Jonathan Hvithamar and
Varab, Daniel",
editor = "Dobnik, Simon and
{\O}vrelid, Lilja",
booktitle = "Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may # " 31--2 " # jun,
year = "2021",
address = "Reykjavik, Iceland (Online)",
publisher = {Link{\"o}ping University Electronic Press, Sweden},
url = "https://aclanthology.org/2021.nodalida-main.46",
pages = "413--421",
abstract = "Danish language technology has been hindered by a lack of broad-coverage corpora at the scale modern NLP prefers. This paper describes the Danish Gigaword Corpus, the result of a focused effort to provide a diverse and freely-available one billion word corpus of Danish text. The Danish Gigaword corpus covers a wide array of time periods, domains, speakers{'} socio-economic status, and Danish dialects.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stromberg-derczynski-etal-2021-danish">
<titleInfo>
<title>The Danish Gigaword Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Leon</namePart>
<namePart type="family">Strømberg-Derczynski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Ciosici</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebekah</namePart>
<namePart type="family">Baglini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Morten</namePart>
<namePart type="given">H</namePart>
<namePart type="family">Christiansen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="given">Aarup</namePart>
<namePart type="family">Dalsgaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Riccardo</namePart>
<namePart type="family">Fusaroli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="given">Juel</namePart>
<namePart type="family">Henrichsen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rasmus</namePart>
<namePart type="family">Hvingelby</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Kirkedal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="given">Speed</namePart>
<namePart type="family">Kjeldsen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claus</namePart>
<namePart type="family">Ladefoged</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Finn</namePart>
<namePart type="given">Årup</namePart>
<namePart type="family">Nielsen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jens</namePart>
<namePart type="family">Madsen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malte</namePart>
<namePart type="given">Lau</namePart>
<namePart type="family">Petersen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="given">Hvithamar</namePart>
<namePart type="family">Rystrøm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Varab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-may 31–2 jun</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lilja</namePart>
<namePart type="family">Øvrelid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Linköping University Electronic Press, Sweden</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Danish language technology has been hindered by a lack of broad-coverage corpora at the scale modern NLP prefers. This paper describes the Danish Gigaword Corpus, the result of a focused effort to provide a diverse and freely-available one billion word corpus of Danish text. The Danish Gigaword corpus covers a wide array of time periods, domains, speakers’ socio-economic status, and Danish dialects.</abstract>
<identifier type="citekey">stromberg-derczynski-etal-2021-danish</identifier>
<location>
<url>https://aclanthology.org/2021.nodalida-main.46</url>
</location>
<part>
<date>2021-may 31–2 jun</date>
<extent unit="page">
<start>413</start>
<end>421</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Danish Gigaword Corpus
%A Strømberg-Derczynski, Leon
%A Ciosici, Manuel
%A Baglini, Rebekah
%A Christiansen, Morten H.
%A Dalsgaard, Jacob Aarup
%A Fusaroli, Riccardo
%A Henrichsen, Peter Juel
%A Hvingelby, Rasmus
%A Kirkedal, Andreas
%A Kjeldsen, Alex Speed
%A Ladefoged, Claus
%A Nielsen, Finn Årup
%A Madsen, Jens
%A Petersen, Malte Lau
%A Rystrøm, Jonathan Hvithamar
%A Varab, Daniel
%Y Dobnik, Simon
%Y Øvrelid, Lilja
%S Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)
%D 2021
%8 may 31–2 jun
%I Linköping University Electronic Press, Sweden
%C Reykjavik, Iceland (Online)
%F stromberg-derczynski-etal-2021-danish
%X Danish language technology has been hindered by a lack of broad-coverage corpora at the scale modern NLP prefers. This paper describes the Danish Gigaword Corpus, the result of a focused effort to provide a diverse and freely-available one billion word corpus of Danish text. The Danish Gigaword corpus covers a wide array of time periods, domains, speakers’ socio-economic status, and Danish dialects.
%U https://aclanthology.org/2021.nodalida-main.46
%P 413-421
Markdown (Informal)
[The Danish Gigaword Corpus](https://aclanthology.org/2021.nodalida-main.46) (Strømberg-Derczynski et al., NoDaLiDa 2021)
ACL
- Leon Strømberg-Derczynski, Manuel Ciosici, Rebekah Baglini, Morten H. Christiansen, Jacob Aarup Dalsgaard, Riccardo Fusaroli, Peter Juel Henrichsen, Rasmus Hvingelby, Andreas Kirkedal, Alex Speed Kjeldsen, Claus Ladefoged, Finn Årup Nielsen, Jens Madsen, Malte Lau Petersen, Jonathan Hvithamar Rystrøm, and Daniel Varab. 2021. The Danish Gigaword Corpus. In Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa), pages 413–421, Reykjavik, Iceland (Online). Linköping University Electronic Press, Sweden.