@inproceedings{moryossef-etal-2021-data,
title = "Data Augmentation for Sign Language Gloss Translation",
author = "Moryossef, Amit and
Yin, Kayo and
Neubig, Graham and
Goldberg, Yoav",
editor = "Shterionov, Dimitar",
booktitle = "Proceedings of the 1st International Workshop on Automatic Translation for Signed and Spoken Languages (AT4SSL)",
month = aug,
year = "2021",
address = "Virtual",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2021.mtsummit-at4ssl.1",
pages = "1--11",
abstract = "Sign language translation (SLT) is often decomposed into video-to-gloss recognition and gloss to-text translation, where a gloss is a sequence of transcribed spoken-language words in the order in which they are signed. We focus here on gloss-to-text translation, which we treat as a low-resource neural machine translation (NMT) problem. However, unlike traditional low resource NMT, gloss-to-text translation differs because gloss-text pairs often have a higher lexical overlap and lower syntactic overlap than pairs of spoken languages. We exploit this lexical overlap and handle syntactic divergence by proposing two rule-based heuristics that generate pseudo-parallel gloss-text pairs from monolingual spoken language text. By pre-training on this synthetic data, we improve translation from American Sign Language (ASL) to English and German Sign Language (DGS) to German by up to 3.14 and 2.20 BLEU, respectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="moryossef-etal-2021-data">
<titleInfo>
<title>Data Augmentation for Sign Language Gloss Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amit</namePart>
<namePart type="family">Moryossef</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kayo</namePart>
<namePart type="family">Yin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st International Workshop on Automatic Translation for Signed and Spoken Languages (AT4SSL)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dimitar</namePart>
<namePart type="family">Shterionov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Virtual</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sign language translation (SLT) is often decomposed into video-to-gloss recognition and gloss to-text translation, where a gloss is a sequence of transcribed spoken-language words in the order in which they are signed. We focus here on gloss-to-text translation, which we treat as a low-resource neural machine translation (NMT) problem. However, unlike traditional low resource NMT, gloss-to-text translation differs because gloss-text pairs often have a higher lexical overlap and lower syntactic overlap than pairs of spoken languages. We exploit this lexical overlap and handle syntactic divergence by proposing two rule-based heuristics that generate pseudo-parallel gloss-text pairs from monolingual spoken language text. By pre-training on this synthetic data, we improve translation from American Sign Language (ASL) to English and German Sign Language (DGS) to German by up to 3.14 and 2.20 BLEU, respectively.</abstract>
<identifier type="citekey">moryossef-etal-2021-data</identifier>
<location>
<url>https://aclanthology.org/2021.mtsummit-at4ssl.1</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>1</start>
<end>11</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data Augmentation for Sign Language Gloss Translation
%A Moryossef, Amit
%A Yin, Kayo
%A Neubig, Graham
%A Goldberg, Yoav
%Y Shterionov, Dimitar
%S Proceedings of the 1st International Workshop on Automatic Translation for Signed and Spoken Languages (AT4SSL)
%D 2021
%8 August
%I Association for Machine Translation in the Americas
%C Virtual
%F moryossef-etal-2021-data
%X Sign language translation (SLT) is often decomposed into video-to-gloss recognition and gloss to-text translation, where a gloss is a sequence of transcribed spoken-language words in the order in which they are signed. We focus here on gloss-to-text translation, which we treat as a low-resource neural machine translation (NMT) problem. However, unlike traditional low resource NMT, gloss-to-text translation differs because gloss-text pairs often have a higher lexical overlap and lower syntactic overlap than pairs of spoken languages. We exploit this lexical overlap and handle syntactic divergence by proposing two rule-based heuristics that generate pseudo-parallel gloss-text pairs from monolingual spoken language text. By pre-training on this synthetic data, we improve translation from American Sign Language (ASL) to English and German Sign Language (DGS) to German by up to 3.14 and 2.20 BLEU, respectively.
%U https://aclanthology.org/2021.mtsummit-at4ssl.1
%P 1-11
Markdown (Informal)
[Data Augmentation for Sign Language Gloss Translation](https://aclanthology.org/2021.mtsummit-at4ssl.1) (Moryossef et al., MTSummit 2021)
ACL
- Amit Moryossef, Kayo Yin, Graham Neubig, and Yoav Goldberg. 2021. Data Augmentation for Sign Language Gloss Translation. In Proceedings of the 1st International Workshop on Automatic Translation for Signed and Spoken Languages (AT4SSL), pages 1–11, Virtual. Association for Machine Translation in the Americas.