@inproceedings{bisazza-etal-2011-fill,
title = "Fill-up versus interpolation methods for phrase-based {SMT} adaptation",
author = "Bisazza, Arianna and
Ruiz, Nick and
Federico, Marcello",
editor = {Federico, Marcello and
Hwang, Mei-Yuh and
R{\"o}dder, Margit and
St{\"u}ker, Sebastian},
booktitle = "Proceedings of the 8th International Workshop on Spoken Language Translation: Evaluation Campaign",
month = dec # " 8-9",
year = "2011",
address = "San Francisco, California",
url = "https://aclanthology.org/2011.iwslt-evaluation.18/",
pages = "136--143",
abstract = "This paper compares techniques to combine diverse parallel corpora for domain-specific phrase-based SMT system training. We address a common scenario where little in-domain data is available for the task, but where large background models exist for the same language pair. In particular, we focus on phrase table fill-up: a method that effectively exploits background knowledge to improve model coverage, while preserving the more reliable information coming from the in-domain corpus. We present experiments on an emerging transcribed speech translation task {--} the TED talks. While performing similarly in terms of BLEU and NIST scores to the popular log-linear and linear interpolation techniques, filled-up translation models are more compact and easy to tune by minimum error training."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bisazza-etal-2011-fill">
<titleInfo>
<title>Fill-up versus interpolation methods for phrase-based SMT adaptation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arianna</namePart>
<namePart type="family">Bisazza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nick</namePart>
<namePart type="family">Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2011-dec 8-9</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th International Workshop on Spoken Language Translation: Evaluation Campaign</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mei-Yuh</namePart>
<namePart type="family">Hwang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Margit</namePart>
<namePart type="family">Rödder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Stüker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<place>
<placeTerm type="text">San Francisco, California</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper compares techniques to combine diverse parallel corpora for domain-specific phrase-based SMT system training. We address a common scenario where little in-domain data is available for the task, but where large background models exist for the same language pair. In particular, we focus on phrase table fill-up: a method that effectively exploits background knowledge to improve model coverage, while preserving the more reliable information coming from the in-domain corpus. We present experiments on an emerging transcribed speech translation task – the TED talks. While performing similarly in terms of BLEU and NIST scores to the popular log-linear and linear interpolation techniques, filled-up translation models are more compact and easy to tune by minimum error training.</abstract>
<identifier type="citekey">bisazza-etal-2011-fill</identifier>
<location>
<url>https://aclanthology.org/2011.iwslt-evaluation.18/</url>
</location>
<part>
<date>2011-dec 8-9</date>
<extent unit="page">
<start>136</start>
<end>143</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fill-up versus interpolation methods for phrase-based SMT adaptation
%A Bisazza, Arianna
%A Ruiz, Nick
%A Federico, Marcello
%Y Federico, Marcello
%Y Hwang, Mei-Yuh
%Y Rödder, Margit
%Y Stüker, Sebastian
%S Proceedings of the 8th International Workshop on Spoken Language Translation: Evaluation Campaign
%D 2011
%8 dec 8 9
%C San Francisco, California
%F bisazza-etal-2011-fill
%X This paper compares techniques to combine diverse parallel corpora for domain-specific phrase-based SMT system training. We address a common scenario where little in-domain data is available for the task, but where large background models exist for the same language pair. In particular, we focus on phrase table fill-up: a method that effectively exploits background knowledge to improve model coverage, while preserving the more reliable information coming from the in-domain corpus. We present experiments on an emerging transcribed speech translation task – the TED talks. While performing similarly in terms of BLEU and NIST scores to the popular log-linear and linear interpolation techniques, filled-up translation models are more compact and easy to tune by minimum error training.
%U https://aclanthology.org/2011.iwslt-evaluation.18/
%P 136-143
Markdown (Informal)
[Fill-up versus interpolation methods for phrase-based SMT adaptation](https://aclanthology.org/2011.iwslt-evaluation.18/) (Bisazza et al., IWSLT 2011)
ACL