@inproceedings{ratsep-fishel-2023-neural,
title = "Neural Text-to-Speech Synthesis for {V}{\~o}ro",
author = {R{\"a}tsep, Liisa and
Fishel, Mark},
editor = {Alum{\"a}e, Tanel and
Fishel, Mark},
booktitle = "Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may,
year = "2023",
address = "T{\'o}rshavn, Faroe Islands",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2023.nodalida-1.73/",
pages = "723--727",
abstract = "This paper presents the first high-quality neural text-to-speech (TTS) system for V{\~o}ro, a minority language spoken in Southern Estonia. By leveraging existing Estonian TTS models and datasets, we analyze whether common low-resource NLP techniques, such as cross-lingual transfer learning from related languages or multi-task learning, can benefit our low-resource use case. Our results show that we can achieve high-quality V{\~o}ro TTS without transfer learning and that using more diverse training data can even decrease synthesis quality. While these techniques may still be useful in some cases, our work highlights the need for caution when applied in specific low-resource scenarios, and it can provide valuable insights for future low-resource research and efforts in preserving minority languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ratsep-fishel-2023-neural">
<titleInfo>
<title>Neural Text-to-Speech Synthesis for Võro</title>
</titleInfo>
<name type="personal">
<namePart type="given">Liisa</namePart>
<namePart type="family">Rätsep</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Fishel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tanel</namePart>
<namePart type="family">Alumäe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Fishel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tórshavn, Faroe Islands</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the first high-quality neural text-to-speech (TTS) system for Võro, a minority language spoken in Southern Estonia. By leveraging existing Estonian TTS models and datasets, we analyze whether common low-resource NLP techniques, such as cross-lingual transfer learning from related languages or multi-task learning, can benefit our low-resource use case. Our results show that we can achieve high-quality Võro TTS without transfer learning and that using more diverse training data can even decrease synthesis quality. While these techniques may still be useful in some cases, our work highlights the need for caution when applied in specific low-resource scenarios, and it can provide valuable insights for future low-resource research and efforts in preserving minority languages.</abstract>
<identifier type="citekey">ratsep-fishel-2023-neural</identifier>
<location>
<url>https://aclanthology.org/2023.nodalida-1.73/</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>723</start>
<end>727</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Neural Text-to-Speech Synthesis for Võro
%A Rätsep, Liisa
%A Fishel, Mark
%Y Alumäe, Tanel
%Y Fishel, Mark
%S Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)
%D 2023
%8 May
%I University of Tartu Library
%C Tórshavn, Faroe Islands
%F ratsep-fishel-2023-neural
%X This paper presents the first high-quality neural text-to-speech (TTS) system for Võro, a minority language spoken in Southern Estonia. By leveraging existing Estonian TTS models and datasets, we analyze whether common low-resource NLP techniques, such as cross-lingual transfer learning from related languages or multi-task learning, can benefit our low-resource use case. Our results show that we can achieve high-quality Võro TTS without transfer learning and that using more diverse training data can even decrease synthesis quality. While these techniques may still be useful in some cases, our work highlights the need for caution when applied in specific low-resource scenarios, and it can provide valuable insights for future low-resource research and efforts in preserving minority languages.
%U https://aclanthology.org/2023.nodalida-1.73/
%P 723-727
Markdown (Informal)
[Neural Text-to-Speech Synthesis for Võro](https://aclanthology.org/2023.nodalida-1.73/) (Rätsep & Fishel, NoDaLiDa 2023)
ACL
- Liisa Rätsep and Mark Fishel. 2023. Neural Text-to-Speech Synthesis for Võro. In Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa), pages 723–727, Tórshavn, Faroe Islands. University of Tartu Library.