@inproceedings{bisazza-federico-2009-morphological,
title = "Morphological pre-processing for {T}urkish to {E}nglish statistical machine translation",
author = "Bisazza, Arianna and
Federico, Marcello",
booktitle = "Proceedings of the 6th International Workshop on Spoken Language Translation: Papers",
month = dec # " 1-2",
year = "2009",
address = "Tokyo, Japan",
url = "https://aclanthology.org/2009.iwslt-papers.1/",
pages = "129--135",
abstract = "We tried to cope with the complex morphology of Turkish by applying different schemes of morphological word segmentation to the training and test data of a phrase-based statistical machine translation system. These techniques allow for a considerable reduction of the training dictionary, and lower the out-of-vocabulary rate of the test set. By minimizing differences between lexical granularities of Turkish and English we can produce more refined alignments and a better modeling of the translation task. Morphological segmentation is highly language dependent and requires a fair amount of linguistic knowledge in its development phase. Yet it is fast and light-weight {--} does not involve syntax {--} and appears to benefit our IWSLT09 system: our best segmentation scheme associated to a simple lexical approximation technique achieved a 50{\%} reduction of out-of-vocabulary rate and over 5 point BLEU improvement above the baseline."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bisazza-federico-2009-morphological">
<titleInfo>
<title>Morphological pre-processing for Turkish to English statistical machine translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arianna</namePart>
<namePart type="family">Bisazza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2009-dec 1-2</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th International Workshop on Spoken Language Translation: Papers</title>
</titleInfo>
<originInfo>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We tried to cope with the complex morphology of Turkish by applying different schemes of morphological word segmentation to the training and test data of a phrase-based statistical machine translation system. These techniques allow for a considerable reduction of the training dictionary, and lower the out-of-vocabulary rate of the test set. By minimizing differences between lexical granularities of Turkish and English we can produce more refined alignments and a better modeling of the translation task. Morphological segmentation is highly language dependent and requires a fair amount of linguistic knowledge in its development phase. Yet it is fast and light-weight – does not involve syntax – and appears to benefit our IWSLT09 system: our best segmentation scheme associated to a simple lexical approximation technique achieved a 50% reduction of out-of-vocabulary rate and over 5 point BLEU improvement above the baseline.</abstract>
<identifier type="citekey">bisazza-federico-2009-morphological</identifier>
<location>
<url>https://aclanthology.org/2009.iwslt-papers.1/</url>
</location>
<part>
<date>2009-dec 1-2</date>
<extent unit="page">
<start>129</start>
<end>135</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Morphological pre-processing for Turkish to English statistical machine translation
%A Bisazza, Arianna
%A Federico, Marcello
%S Proceedings of the 6th International Workshop on Spoken Language Translation: Papers
%D 2009
%8 dec 1 2
%C Tokyo, Japan
%F bisazza-federico-2009-morphological
%X We tried to cope with the complex morphology of Turkish by applying different schemes of morphological word segmentation to the training and test data of a phrase-based statistical machine translation system. These techniques allow for a considerable reduction of the training dictionary, and lower the out-of-vocabulary rate of the test set. By minimizing differences between lexical granularities of Turkish and English we can produce more refined alignments and a better modeling of the translation task. Morphological segmentation is highly language dependent and requires a fair amount of linguistic knowledge in its development phase. Yet it is fast and light-weight – does not involve syntax – and appears to benefit our IWSLT09 system: our best segmentation scheme associated to a simple lexical approximation technique achieved a 50% reduction of out-of-vocabulary rate and over 5 point BLEU improvement above the baseline.
%U https://aclanthology.org/2009.iwslt-papers.1/
%P 129-135
Markdown (Informal)
[Morphological pre-processing for Turkish to English statistical machine translation](https://aclanthology.org/2009.iwslt-papers.1/) (Bisazza & Federico, IWSLT 2009)
ACL