@inproceedings{bentum-etal-2024-corpus,
title = "Corpus Creation and Automatic Alignment of Historical {D}utch Dialect Speech",
author = "Bentum, Martijn and
Sanders, Eric and
van den Bosch, Antal P.J. and
Zeldenrust, Douwe and
van den Heuvel, Henk",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.357",
pages = "4021--4029",
abstract = "The Dutch Dialect Database (also known as the {`}Nederlandse Dialectenbank{'}) contains dialectal variations of Dutch that were recorded all over the Netherlands in the second half of the twentieth century. A subset of these recordings of about 300 hours were enriched with manual orthographic transcriptions, using non-standard approximations of dialectal speech. In this paper we describe the creation of a corpus containing both the audio recordings and their corresponding transcriptions and focus on our method for aligning the recordings with the transcriptions and the metadata.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bentum-etal-2024-corpus">
<titleInfo>
<title>Corpus Creation and Automatic Alignment of Historical Dutch Dialect Speech</title>
</titleInfo>
<name type="personal">
<namePart type="given">Martijn</namePart>
<namePart type="family">Bentum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Sanders</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antal</namePart>
<namePart type="given">P.J.</namePart>
<namePart type="family">van den Bosch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Douwe</namePart>
<namePart type="family">Zeldenrust</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Henk</namePart>
<namePart type="family">van den Heuvel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Dutch Dialect Database (also known as the ‘Nederlandse Dialectenbank’) contains dialectal variations of Dutch that were recorded all over the Netherlands in the second half of the twentieth century. A subset of these recordings of about 300 hours were enriched with manual orthographic transcriptions, using non-standard approximations of dialectal speech. In this paper we describe the creation of a corpus containing both the audio recordings and their corresponding transcriptions and focus on our method for aligning the recordings with the transcriptions and the metadata.</abstract>
<identifier type="citekey">bentum-etal-2024-corpus</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.357</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>4021</start>
<end>4029</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Corpus Creation and Automatic Alignment of Historical Dutch Dialect Speech
%A Bentum, Martijn
%A Sanders, Eric
%A van den Bosch, Antal P.J.
%A Zeldenrust, Douwe
%A van den Heuvel, Henk
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F bentum-etal-2024-corpus
%X The Dutch Dialect Database (also known as the ‘Nederlandse Dialectenbank’) contains dialectal variations of Dutch that were recorded all over the Netherlands in the second half of the twentieth century. A subset of these recordings of about 300 hours were enriched with manual orthographic transcriptions, using non-standard approximations of dialectal speech. In this paper we describe the creation of a corpus containing both the audio recordings and their corresponding transcriptions and focus on our method for aligning the recordings with the transcriptions and the metadata.
%U https://aclanthology.org/2024.lrec-main.357
%P 4021-4029
Markdown (Informal)
[Corpus Creation and Automatic Alignment of Historical Dutch Dialect Speech](https://aclanthology.org/2024.lrec-main.357) (Bentum et al., LREC-COLING 2024)
ACL
- Martijn Bentum, Eric Sanders, Antal P.J. van den Bosch, Douwe Zeldenrust, and Henk van den Heuvel. 2024. Corpus Creation and Automatic Alignment of Historical Dutch Dialect Speech. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 4021–4029, Torino, Italia. ELRA and ICCL.