@inproceedings{eck-etal-2005-low-cost,
title = "Low Cost Portability for Statistical Machine Translation based on N-gram Coverage",
author = "Eck, Matthias and
Vogel, Stephan and
Waibel, Alex",
booktitle = "Proceedings of Machine Translation Summit X: Papers",
month = sep # " 13-15",
year = "2005",
address = "Phuket, Thailand",
url = "https://aclanthology.org/2005.mtsummit-papers.30",
pages = "227--234",
abstract = "Statistical machine translation relies heavily on the available training data. However, in some cases, it is necessary to limit the amount of training data that can be created for or actually used by the systems. To solve that problem, we introduce a weighting scheme that tries to select more informative sentences first. This selection is based on the previously unseen n-grams the sentences contain, and it allows us to sort the sentences according to their estimated importance. After sorting, we can construct smaller training corpora, and we are able to demonstrate that systems trained on much less training data show a very competitive performance compared to baseline systems using all available training data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="eck-etal-2005-low-cost">
<titleInfo>
<title>Low Cost Portability for Statistical Machine Translation based on N-gram Coverage</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Eck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephan</namePart>
<namePart type="family">Vogel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Waibel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2005-sep 13-15</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Machine Translation Summit X: Papers</title>
</titleInfo>
<originInfo>
<place>
<placeTerm type="text">Phuket, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Statistical machine translation relies heavily on the available training data. However, in some cases, it is necessary to limit the amount of training data that can be created for or actually used by the systems. To solve that problem, we introduce a weighting scheme that tries to select more informative sentences first. This selection is based on the previously unseen n-grams the sentences contain, and it allows us to sort the sentences according to their estimated importance. After sorting, we can construct smaller training corpora, and we are able to demonstrate that systems trained on much less training data show a very competitive performance compared to baseline systems using all available training data.</abstract>
<identifier type="citekey">eck-etal-2005-low-cost</identifier>
<location>
<url>https://aclanthology.org/2005.mtsummit-papers.30</url>
</location>
<part>
<date>2005-sep 13-15</date>
<extent unit="page">
<start>227</start>
<end>234</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Low Cost Portability for Statistical Machine Translation based on N-gram Coverage
%A Eck, Matthias
%A Vogel, Stephan
%A Waibel, Alex
%S Proceedings of Machine Translation Summit X: Papers
%D 2005
%8 sep 13 15
%C Phuket, Thailand
%F eck-etal-2005-low-cost
%X Statistical machine translation relies heavily on the available training data. However, in some cases, it is necessary to limit the amount of training data that can be created for or actually used by the systems. To solve that problem, we introduce a weighting scheme that tries to select more informative sentences first. This selection is based on the previously unseen n-grams the sentences contain, and it allows us to sort the sentences according to their estimated importance. After sorting, we can construct smaller training corpora, and we are able to demonstrate that systems trained on much less training data show a very competitive performance compared to baseline systems using all available training data.
%U https://aclanthology.org/2005.mtsummit-papers.30
%P 227-234
Markdown (Informal)
[Low Cost Portability for Statistical Machine Translation based on N-gram Coverage](https://aclanthology.org/2005.mtsummit-papers.30) (Eck et al., MTSummit 2005)
ACL