@inproceedings{wang-etal-2023-listen,
title = "Listen, Decipher and Sign: Toward Unsupervised Speech-to-Sign Language Recognition",
author = "Wang, Liming and
Ni, Junrui and
Gao, Heting and
Li, Jialu and
Chang, Kai Chieh and
Fan, Xulin and
Wu, Junkai and
Hasegawa-Johnson, Mark and
Yoo, Chang",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.424",
doi = "10.18653/v1/2023.findings-acl.424",
pages = "6785--6800",
abstract = "Existing supervised sign language recognition systems rely on an abundance of well-annotated data. Instead, an unsupervised speech-to-sign language recognition (SSR-U) system learns to translate between spoken and sign languages by observing only non-parallel speech and sign-language corpora. We propose speech2sign-U, a neural network-based approach capable of both character-level and word-level SSR-U. Our approach significantly outperforms baselines directly adapted from unsupervised speech recognition (ASR-U) models by as much as 50{\%} recall@10 on several challenging American sign language corpora with various levels of sample sizes, vocabulary sizes, and audio and visual variability. The code is available at \url{https://github.com/cactuswiththoughts/UnsupSpeech2Sign.gitcactuswiththoughts/UnsupSpeech2Sign.git}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2023-listen">
<titleInfo>
<title>Listen, Decipher and Sign: Toward Unsupervised Speech-to-Sign Language Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Liming</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junrui</namePart>
<namePart type="family">Ni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heting</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jialu</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="given">Chieh</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xulin</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junkai</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Hasegawa-Johnson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chang</namePart>
<namePart type="family">Yoo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Existing supervised sign language recognition systems rely on an abundance of well-annotated data. Instead, an unsupervised speech-to-sign language recognition (SSR-U) system learns to translate between spoken and sign languages by observing only non-parallel speech and sign-language corpora. We propose speech2sign-U, a neural network-based approach capable of both character-level and word-level SSR-U. Our approach significantly outperforms baselines directly adapted from unsupervised speech recognition (ASR-U) models by as much as 50% recall@10 on several challenging American sign language corpora with various levels of sample sizes, vocabulary sizes, and audio and visual variability. The code is available at https://github.com/cactuswiththoughts/UnsupSpeech2Sign.gitcactuswiththoughts/UnsupSpeech2Sign.git.</abstract>
<identifier type="citekey">wang-etal-2023-listen</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.424</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.424</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>6785</start>
<end>6800</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Listen, Decipher and Sign: Toward Unsupervised Speech-to-Sign Language Recognition
%A Wang, Liming
%A Ni, Junrui
%A Gao, Heting
%A Li, Jialu
%A Chang, Kai Chieh
%A Fan, Xulin
%A Wu, Junkai
%A Hasegawa-Johnson, Mark
%A Yoo, Chang
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F wang-etal-2023-listen
%X Existing supervised sign language recognition systems rely on an abundance of well-annotated data. Instead, an unsupervised speech-to-sign language recognition (SSR-U) system learns to translate between spoken and sign languages by observing only non-parallel speech and sign-language corpora. We propose speech2sign-U, a neural network-based approach capable of both character-level and word-level SSR-U. Our approach significantly outperforms baselines directly adapted from unsupervised speech recognition (ASR-U) models by as much as 50% recall@10 on several challenging American sign language corpora with various levels of sample sizes, vocabulary sizes, and audio and visual variability. The code is available at https://github.com/cactuswiththoughts/UnsupSpeech2Sign.gitcactuswiththoughts/UnsupSpeech2Sign.git.
%R 10.18653/v1/2023.findings-acl.424
%U https://aclanthology.org/2023.findings-acl.424
%U https://doi.org/10.18653/v1/2023.findings-acl.424
%P 6785-6800
Markdown (Informal)
[Listen, Decipher and Sign: Toward Unsupervised Speech-to-Sign Language Recognition](https://aclanthology.org/2023.findings-acl.424) (Wang et al., Findings 2023)
ACL
- Liming Wang, Junrui Ni, Heting Gao, Jialu Li, Kai Chieh Chang, Xulin Fan, Junkai Wu, Mark Hasegawa-Johnson, and Chang Yoo. 2023. Listen, Decipher and Sign: Toward Unsupervised Speech-to-Sign Language Recognition. In Findings of the Association for Computational Linguistics: ACL 2023, pages 6785–6800, Toronto, Canada. Association for Computational Linguistics.