@inproceedings{beloucif-etal-2016-improving,
title = "Improving word alignment for low resource languages using {E}nglish monolingual {SRL}",
author = "Beloucif, Meriem and
Saers, Markus and
Wu, Dekai",
editor = "Lambert, Patrik and
Babych, Bogdan and
Eberle, Kurt and
Banchs, Rafael E. and
Rapp, Reinhard and
Costa-juss{\`a}, Marta R.",
booktitle = "Proceedings of the Sixth Workshop on Hybrid Approaches to Translation ({H}y{T}ra6)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-4507",
pages = "51--60",
abstract = "We introduce a new statistical machine translation approach specifically geared to learning translation from low resource languages, that exploits monolingual English semantic parsing to bias inversion transduction grammar (ITG) induction. We show that in contrast to conventional statistical machine translation (SMT) training methods, which rely heavily on phrase memorization, our approach focuses on learning bilingual correlations that help translating low resource languages, by using the output language semantic structure to further narrow down ITG constraints. This approach is motivated by previous research which has shown that injecting a semantic frame based objective function while training SMT models improves the translation quality. We show that including a monolingual semantic objective function during the learning of the translation model leads towards a semantically driven alignment which is more efficient than simply tuning loglinear mixture weights against a semantic frame based evaluation metric in the final stage of statistical machine translation training. We test our approach with three different language pairs and demonstrate that our model biases the learning towards more semantically correct alignments. Both GIZA++ and ITG based techniques fail to capture meaningful bilingual constituents, which is required when trying to learn translation models for low resource languages. In contrast, our proposed model not only improve translation by injecting a monolingual objective function to learn bilingual correlations during early training of the translation model, but also helps to learn more meaningful correlations with a relatively small data set, leading to a better alignment compared to either conventional ITG or traditional GIZA++ based approaches.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="beloucif-etal-2016-improving">
<titleInfo>
<title>Improving word alignment for low resource languages using English monolingual SRL</title>
</titleInfo>
<name type="personal">
<namePart type="given">Meriem</namePart>
<namePart type="family">Beloucif</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Markus</namePart>
<namePart type="family">Saers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dekai</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Hybrid Approaches to Translation (HyTra6)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Patrik</namePart>
<namePart type="family">Lambert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bogdan</namePart>
<namePart type="family">Babych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kurt</namePart>
<namePart type="family">Eberle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rafael</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Banchs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reinhard</namePart>
<namePart type="family">Rapp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce a new statistical machine translation approach specifically geared to learning translation from low resource languages, that exploits monolingual English semantic parsing to bias inversion transduction grammar (ITG) induction. We show that in contrast to conventional statistical machine translation (SMT) training methods, which rely heavily on phrase memorization, our approach focuses on learning bilingual correlations that help translating low resource languages, by using the output language semantic structure to further narrow down ITG constraints. This approach is motivated by previous research which has shown that injecting a semantic frame based objective function while training SMT models improves the translation quality. We show that including a monolingual semantic objective function during the learning of the translation model leads towards a semantically driven alignment which is more efficient than simply tuning loglinear mixture weights against a semantic frame based evaluation metric in the final stage of statistical machine translation training. We test our approach with three different language pairs and demonstrate that our model biases the learning towards more semantically correct alignments. Both GIZA++ and ITG based techniques fail to capture meaningful bilingual constituents, which is required when trying to learn translation models for low resource languages. In contrast, our proposed model not only improve translation by injecting a monolingual objective function to learn bilingual correlations during early training of the translation model, but also helps to learn more meaningful correlations with a relatively small data set, leading to a better alignment compared to either conventional ITG or traditional GIZA++ based approaches.</abstract>
<identifier type="citekey">beloucif-etal-2016-improving</identifier>
<location>
<url>https://aclanthology.org/W16-4507</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>51</start>
<end>60</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving word alignment for low resource languages using English monolingual SRL
%A Beloucif, Meriem
%A Saers, Markus
%A Wu, Dekai
%Y Lambert, Patrik
%Y Babych, Bogdan
%Y Eberle, Kurt
%Y Banchs, Rafael E.
%Y Rapp, Reinhard
%Y Costa-jussà, Marta R.
%S Proceedings of the Sixth Workshop on Hybrid Approaches to Translation (HyTra6)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F beloucif-etal-2016-improving
%X We introduce a new statistical machine translation approach specifically geared to learning translation from low resource languages, that exploits monolingual English semantic parsing to bias inversion transduction grammar (ITG) induction. We show that in contrast to conventional statistical machine translation (SMT) training methods, which rely heavily on phrase memorization, our approach focuses on learning bilingual correlations that help translating low resource languages, by using the output language semantic structure to further narrow down ITG constraints. This approach is motivated by previous research which has shown that injecting a semantic frame based objective function while training SMT models improves the translation quality. We show that including a monolingual semantic objective function during the learning of the translation model leads towards a semantically driven alignment which is more efficient than simply tuning loglinear mixture weights against a semantic frame based evaluation metric in the final stage of statistical machine translation training. We test our approach with three different language pairs and demonstrate that our model biases the learning towards more semantically correct alignments. Both GIZA++ and ITG based techniques fail to capture meaningful bilingual constituents, which is required when trying to learn translation models for low resource languages. In contrast, our proposed model not only improve translation by injecting a monolingual objective function to learn bilingual correlations during early training of the translation model, but also helps to learn more meaningful correlations with a relatively small data set, leading to a better alignment compared to either conventional ITG or traditional GIZA++ based approaches.
%U https://aclanthology.org/W16-4507
%P 51-60
Markdown (Informal)
[Improving word alignment for low resource languages using English monolingual SRL](https://aclanthology.org/W16-4507) (Beloucif et al., HyTra 2016)
ACL