@inproceedings{venugopal-etal-2008-wider,
title = "Wider Pipelines: N-Best Alignments and Parses in {MT} Training",
author = "Venugopal, Ashish and
Zollmann, Andreas and
Smith, Noah A. and
Vogel, Stephan",
booktitle = "Proceedings of the 8th Conference of the Association for Machine Translation in the Americas: Research Papers",
month = oct # " 21-25",
year = "2008",
address = "Waikiki, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2008.amta-papers.18/",
pages = "192--201",
abstract = "State-of-the-art statistical machine translation systems use hypotheses from several maximum a posteriori inference steps, including word alignments and parse trees, to identify translational structure and estimate the parameters of translation models. While this approach leads to a modular pipeline of independently developed components, errors made in these {\textquotedblleft}single-best{\textquotedblright} hypotheses can propagate to downstream estimation steps that treat these inputs as clean, trustworthy training data. In this work we integrate N-best alignments and parses by using a probability distribution over these alternatives to generate posterior fractional counts for use in downstream estimation. Using these fractional counts in a DOP-inspired syntax-based translation system, we show significant improvements in translation quality over a single-best trained baseline."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="venugopal-etal-2008-wider">
<titleInfo>
<title>Wider Pipelines: N-Best Alignments and Parses in MT Training</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ashish</namePart>
<namePart type="family">Venugopal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Zollmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Noah</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Smith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephan</namePart>
<namePart type="family">Vogel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-oct 21-25</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Conference of the Association for Machine Translation in the Americas: Research Papers</title>
</titleInfo>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Waikiki, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>State-of-the-art statistical machine translation systems use hypotheses from several maximum a posteriori inference steps, including word alignments and parse trees, to identify translational structure and estimate the parameters of translation models. While this approach leads to a modular pipeline of independently developed components, errors made in these “single-best” hypotheses can propagate to downstream estimation steps that treat these inputs as clean, trustworthy training data. In this work we integrate N-best alignments and parses by using a probability distribution over these alternatives to generate posterior fractional counts for use in downstream estimation. Using these fractional counts in a DOP-inspired syntax-based translation system, we show significant improvements in translation quality over a single-best trained baseline.</abstract>
<identifier type="citekey">venugopal-etal-2008-wider</identifier>
<location>
<url>https://aclanthology.org/2008.amta-papers.18/</url>
</location>
<part>
<date>2008-oct 21-25</date>
<extent unit="page">
<start>192</start>
<end>201</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Wider Pipelines: N-Best Alignments and Parses in MT Training
%A Venugopal, Ashish
%A Zollmann, Andreas
%A Smith, Noah A.
%A Vogel, Stephan
%S Proceedings of the 8th Conference of the Association for Machine Translation in the Americas: Research Papers
%D 2008
%8 oct 21 25
%I Association for Machine Translation in the Americas
%C Waikiki, USA
%F venugopal-etal-2008-wider
%X State-of-the-art statistical machine translation systems use hypotheses from several maximum a posteriori inference steps, including word alignments and parse trees, to identify translational structure and estimate the parameters of translation models. While this approach leads to a modular pipeline of independently developed components, errors made in these “single-best” hypotheses can propagate to downstream estimation steps that treat these inputs as clean, trustworthy training data. In this work we integrate N-best alignments and parses by using a probability distribution over these alternatives to generate posterior fractional counts for use in downstream estimation. Using these fractional counts in a DOP-inspired syntax-based translation system, we show significant improvements in translation quality over a single-best trained baseline.
%U https://aclanthology.org/2008.amta-papers.18/
%P 192-201
Markdown (Informal)
[Wider Pipelines: N-Best Alignments and Parses in MT Training](https://aclanthology.org/2008.amta-papers.18/) (Venugopal et al., AMTA 2008)
ACL
- Ashish Venugopal, Andreas Zollmann, Noah A. Smith, and Stephan Vogel. 2008. Wider Pipelines: N-Best Alignments and Parses in MT Training. In Proceedings of the 8th Conference of the Association for Machine Translation in the Americas: Research Papers, pages 192–201, Waikiki, USA. Association for Machine Translation in the Americas.