@inproceedings{tomeh-etal-2011-good,
title = "How good are your phrases? Assessing phrase quality with single class classification",
author = "Tomeh, Nadi and
Turchi, Marco and
Wisinewski, Guillaume and
Allauzen, Alexandre and
Yvon, Fran{\c{c}}ois",
editor = {Federico, Marcello and
Hwang, Mei-Yuh and
R{\"o}dder, Margit and
St{\"u}ker, Sebastian},
booktitle = "Proceedings of the 8th International Workshop on Spoken Language Translation: Papers",
month = dec # " 8-9",
year = "2011",
address = "San Francisco, California",
url = "https://aclanthology.org/2011.iwslt-papers.10/",
pages = "261--268",
abstract = "We present a novel translation quality informed procedure for both extraction and scoring of phrase pairs in PBSMT systems. We reformulate the extraction problem in the supervised learning framework. Our goal is twofold. First, We attempt to take the translation quality into account; and second we incorporating arbitrary features in order to circumvent alignment errors. One-Class SVMs and the Mapping Convergence algorithm permit training a single-class classifier to discriminate between useful and useless phrase pairs. Such classifier can be learned from a training corpus that comprises only useful instances. The confidence score, produced by the classifier for each phrase pairs, is employed as a selection criteria. The smoothness of these scores allow a fine control over the size of the resulting translation model. Finally, confidence scores provide a new accuracy-based feature to score phrase pairs. Experimental evaluation of the method shows accurate assessments of phrase pairs quality even for regions in the space of possible phrase pairs that are ignored by other approaches. This enhanced evaluation of phrase pairs leads to improvements in the translation performance as measured by BLEU."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tomeh-etal-2011-good">
<titleInfo>
<title>How good are your phrases? Assessing phrase quality with single class classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Turchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guillaume</namePart>
<namePart type="family">Wisinewski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Allauzen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">François</namePart>
<namePart type="family">Yvon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2011-dec 8-9</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th International Workshop on Spoken Language Translation: Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mei-Yuh</namePart>
<namePart type="family">Hwang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Margit</namePart>
<namePart type="family">Rödder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Stüker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<place>
<placeTerm type="text">San Francisco, California</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a novel translation quality informed procedure for both extraction and scoring of phrase pairs in PBSMT systems. We reformulate the extraction problem in the supervised learning framework. Our goal is twofold. First, We attempt to take the translation quality into account; and second we incorporating arbitrary features in order to circumvent alignment errors. One-Class SVMs and the Mapping Convergence algorithm permit training a single-class classifier to discriminate between useful and useless phrase pairs. Such classifier can be learned from a training corpus that comprises only useful instances. The confidence score, produced by the classifier for each phrase pairs, is employed as a selection criteria. The smoothness of these scores allow a fine control over the size of the resulting translation model. Finally, confidence scores provide a new accuracy-based feature to score phrase pairs. Experimental evaluation of the method shows accurate assessments of phrase pairs quality even for regions in the space of possible phrase pairs that are ignored by other approaches. This enhanced evaluation of phrase pairs leads to improvements in the translation performance as measured by BLEU.</abstract>
<identifier type="citekey">tomeh-etal-2011-good</identifier>
<location>
<url>https://aclanthology.org/2011.iwslt-papers.10/</url>
</location>
<part>
<date>2011-dec 8-9</date>
<extent unit="page">
<start>261</start>
<end>268</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How good are your phrases? Assessing phrase quality with single class classification
%A Tomeh, Nadi
%A Turchi, Marco
%A Wisinewski, Guillaume
%A Allauzen, Alexandre
%A Yvon, François
%Y Federico, Marcello
%Y Hwang, Mei-Yuh
%Y Rödder, Margit
%Y Stüker, Sebastian
%S Proceedings of the 8th International Workshop on Spoken Language Translation: Papers
%D 2011
%8 dec 8 9
%C San Francisco, California
%F tomeh-etal-2011-good
%X We present a novel translation quality informed procedure for both extraction and scoring of phrase pairs in PBSMT systems. We reformulate the extraction problem in the supervised learning framework. Our goal is twofold. First, We attempt to take the translation quality into account; and second we incorporating arbitrary features in order to circumvent alignment errors. One-Class SVMs and the Mapping Convergence algorithm permit training a single-class classifier to discriminate between useful and useless phrase pairs. Such classifier can be learned from a training corpus that comprises only useful instances. The confidence score, produced by the classifier for each phrase pairs, is employed as a selection criteria. The smoothness of these scores allow a fine control over the size of the resulting translation model. Finally, confidence scores provide a new accuracy-based feature to score phrase pairs. Experimental evaluation of the method shows accurate assessments of phrase pairs quality even for regions in the space of possible phrase pairs that are ignored by other approaches. This enhanced evaluation of phrase pairs leads to improvements in the translation performance as measured by BLEU.
%U https://aclanthology.org/2011.iwslt-papers.10/
%P 261-268
Markdown (Informal)
[How good are your phrases? Assessing phrase quality with single class classification](https://aclanthology.org/2011.iwslt-papers.10/) (Tomeh et al., IWSLT 2011)
ACL