@inproceedings{chingacham-etal-2024-human,
title = "Human Speech Perception in Noise: Can Large Language Models Paraphrase to Improve It?",
author = "Chingacham, Anupama and
Zhang, Miaoran and
Demberg, Vera and
Klakow, Dietrich",
editor = "Soni, Nikita and
Flek, Lucie and
Sharma, Ashish and
Yang, Diyi and
Hooker, Sara and
Schwartz, H. Andrew",
booktitle = "Proceedings of the 1st Human-Centered Large Language Modeling Workshop",
month = aug,
year = "2024",
address = "TBD",
publisher = "ACL",
url = "https://aclanthology.org/2024.hucllm-1.1",
doi = "10.18653/v1/2024.hucllm-1.1",
pages = "1--15",
abstract = "Large Language Models (LLMs) can generate text by transferring style attributes like formality resulting in formal or informal text.However, instructing LLMs to generate text that when spoken, is more intelligible in an acoustically difficult environment, is an under-explored topic.We conduct the first study to evaluate LLMs on a novel task of generating acoustically intelligible paraphrases for better human speech perception in noise.Our experiments in English demonstrated that with standard prompting, LLMs struggle to control the non-textual attribute, i.e., acoustic intelligibility, while efficiently capturing the desired textual attributes like semantic equivalence. To remedy this issue, we propose a simple prompting approach, prompt-and-select, which generates paraphrases by decoupling the desired textual and non-textual attributes in the text generation pipeline.Our approach resulted in a 40{\%} relative improvement in human speech perception, by paraphrasing utterances that are highly distorted in a listening condition with babble noise at signal-to-noise ratio (SNR) -5 dB. This study reveals the limitation of LLMs in capturing non-textual attributes, and our proposed method showcases the potential of using LLMs for better human speech perception in noise.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chingacham-etal-2024-human">
<titleInfo>
<title>Human Speech Perception in Noise: Can Large Language Models Paraphrase to Improve It?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anupama</namePart>
<namePart type="family">Chingacham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miaoran</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dietrich</namePart>
<namePart type="family">Klakow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Human-Centered Large Language Modeling Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikita</namePart>
<namePart type="family">Soni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucie</namePart>
<namePart type="family">Flek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashish</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diyi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Hooker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">H</namePart>
<namePart type="given">Andrew</namePart>
<namePart type="family">Schwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ACL</publisher>
<place>
<placeTerm type="text">TBD</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) can generate text by transferring style attributes like formality resulting in formal or informal text.However, instructing LLMs to generate text that when spoken, is more intelligible in an acoustically difficult environment, is an under-explored topic.We conduct the first study to evaluate LLMs on a novel task of generating acoustically intelligible paraphrases for better human speech perception in noise.Our experiments in English demonstrated that with standard prompting, LLMs struggle to control the non-textual attribute, i.e., acoustic intelligibility, while efficiently capturing the desired textual attributes like semantic equivalence. To remedy this issue, we propose a simple prompting approach, prompt-and-select, which generates paraphrases by decoupling the desired textual and non-textual attributes in the text generation pipeline.Our approach resulted in a 40% relative improvement in human speech perception, by paraphrasing utterances that are highly distorted in a listening condition with babble noise at signal-to-noise ratio (SNR) -5 dB. This study reveals the limitation of LLMs in capturing non-textual attributes, and our proposed method showcases the potential of using LLMs for better human speech perception in noise.</abstract>
<identifier type="citekey">chingacham-etal-2024-human</identifier>
<identifier type="doi">10.18653/v1/2024.hucllm-1.1</identifier>
<location>
<url>https://aclanthology.org/2024.hucllm-1.1</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>1</start>
<end>15</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Human Speech Perception in Noise: Can Large Language Models Paraphrase to Improve It?
%A Chingacham, Anupama
%A Zhang, Miaoran
%A Demberg, Vera
%A Klakow, Dietrich
%Y Soni, Nikita
%Y Flek, Lucie
%Y Sharma, Ashish
%Y Yang, Diyi
%Y Hooker, Sara
%Y Schwartz, H. Andrew
%S Proceedings of the 1st Human-Centered Large Language Modeling Workshop
%D 2024
%8 August
%I ACL
%C TBD
%F chingacham-etal-2024-human
%X Large Language Models (LLMs) can generate text by transferring style attributes like formality resulting in formal or informal text.However, instructing LLMs to generate text that when spoken, is more intelligible in an acoustically difficult environment, is an under-explored topic.We conduct the first study to evaluate LLMs on a novel task of generating acoustically intelligible paraphrases for better human speech perception in noise.Our experiments in English demonstrated that with standard prompting, LLMs struggle to control the non-textual attribute, i.e., acoustic intelligibility, while efficiently capturing the desired textual attributes like semantic equivalence. To remedy this issue, we propose a simple prompting approach, prompt-and-select, which generates paraphrases by decoupling the desired textual and non-textual attributes in the text generation pipeline.Our approach resulted in a 40% relative improvement in human speech perception, by paraphrasing utterances that are highly distorted in a listening condition with babble noise at signal-to-noise ratio (SNR) -5 dB. This study reveals the limitation of LLMs in capturing non-textual attributes, and our proposed method showcases the potential of using LLMs for better human speech perception in noise.
%R 10.18653/v1/2024.hucllm-1.1
%U https://aclanthology.org/2024.hucllm-1.1
%U https://doi.org/10.18653/v1/2024.hucllm-1.1
%P 1-15
Markdown (Informal)
[Human Speech Perception in Noise: Can Large Language Models Paraphrase to Improve It?](https://aclanthology.org/2024.hucllm-1.1) (Chingacham et al., HuCLLM-WS 2024)
ACL