@inproceedings{vishnu-kudlu-shanbhogue-etal-2023-improving,
title = "Improving Low Resource Speech Translation with Data Augmentation and Ensemble Strategies",
author = "Shanbhogue, Akshaya Vishnu Kudlu and
Xue, Ran and
Saha, Soumya and
Zhang, Daniel and
Ganesan, Ashwinkumar",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Carpuat, Marine",
booktitle = "Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.iwslt-1.21/",
doi = "10.18653/v1/2023.iwslt-1.21",
pages = "241--250",
abstract = "This paper describes the speech translation system submitted as part of the IWSLT 2023 shared task on low resource speech translation. The low resource task aids in building models for language pairs where the training corpus is limited. In this paper, we focus on two language pairs, namely, Tamasheq-French (Tmh{\textrightarrow}Fra) and Marathi-Hindi (Mr{\textrightarrow}Hi) and implement a speech translation system that is unconstrained. We evaluate three strategies in our system: (a) Data augmentation where we perform different operations on audio as well as text samples, (b) an ensemble model that integrates a set of models trained using a combination of augmentation strategies, and (c) post-processing techniques where we explore the use of large language models (LLMs) to improve the quality of sentences that are generated. Experiments show how data augmentation can relatively improve the BLEU score by 5.2{\%} over the baseline system for Tmh{\textrightarrow}Fra while an ensemble model further improves performance by 17{\%} for Tmh{\textrightarrow}Fra and 23{\%} for Mr{\textrightarrow}Hi task."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vishnu-kudlu-shanbhogue-etal-2023-improving">
<titleInfo>
<title>Improving Low Resource Speech Translation with Data Augmentation and Ensemble Strategies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Akshaya</namePart>
<namePart type="given">Vishnu</namePart>
<namePart type="given">Kudlu</namePart>
<namePart type="family">Shanbhogue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ran</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soumya</namePart>
<namePart type="family">Saha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashwinkumar</namePart>
<namePart type="family">Ganesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the speech translation system submitted as part of the IWSLT 2023 shared task on low resource speech translation. The low resource task aids in building models for language pairs where the training corpus is limited. In this paper, we focus on two language pairs, namely, Tamasheq-French (Tmh→Fra) and Marathi-Hindi (Mr→Hi) and implement a speech translation system that is unconstrained. We evaluate three strategies in our system: (a) Data augmentation where we perform different operations on audio as well as text samples, (b) an ensemble model that integrates a set of models trained using a combination of augmentation strategies, and (c) post-processing techniques where we explore the use of large language models (LLMs) to improve the quality of sentences that are generated. Experiments show how data augmentation can relatively improve the BLEU score by 5.2% over the baseline system for Tmh→Fra while an ensemble model further improves performance by 17% for Tmh→Fra and 23% for Mr→Hi task.</abstract>
<identifier type="citekey">vishnu-kudlu-shanbhogue-etal-2023-improving</identifier>
<identifier type="doi">10.18653/v1/2023.iwslt-1.21</identifier>
<location>
<url>https://aclanthology.org/2023.iwslt-1.21/</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>241</start>
<end>250</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving Low Resource Speech Translation with Data Augmentation and Ensemble Strategies
%A Shanbhogue, Akshaya Vishnu Kudlu
%A Xue, Ran
%A Saha, Soumya
%A Zhang, Daniel
%A Ganesan, Ashwinkumar
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Carpuat, Marine
%S Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada (in-person and online)
%F vishnu-kudlu-shanbhogue-etal-2023-improving
%X This paper describes the speech translation system submitted as part of the IWSLT 2023 shared task on low resource speech translation. The low resource task aids in building models for language pairs where the training corpus is limited. In this paper, we focus on two language pairs, namely, Tamasheq-French (Tmh→Fra) and Marathi-Hindi (Mr→Hi) and implement a speech translation system that is unconstrained. We evaluate three strategies in our system: (a) Data augmentation where we perform different operations on audio as well as text samples, (b) an ensemble model that integrates a set of models trained using a combination of augmentation strategies, and (c) post-processing techniques where we explore the use of large language models (LLMs) to improve the quality of sentences that are generated. Experiments show how data augmentation can relatively improve the BLEU score by 5.2% over the baseline system for Tmh→Fra while an ensemble model further improves performance by 17% for Tmh→Fra and 23% for Mr→Hi task.
%R 10.18653/v1/2023.iwslt-1.21
%U https://aclanthology.org/2023.iwslt-1.21/
%U https://doi.org/10.18653/v1/2023.iwslt-1.21
%P 241-250
Markdown (Informal)
[Improving Low Resource Speech Translation with Data Augmentation and Ensemble Strategies](https://aclanthology.org/2023.iwslt-1.21/) (Shanbhogue et al., IWSLT 2023)
ACL