@inproceedings{arcan-etal-2014-enhancing,
title = "Enhancing statistical machine translation with bilingual terminology in a {CAT} environment",
author = "Arcan, Mihael and
Turchi, Marco and
Topelli, Sara and
Buitelaar, Paul",
editor = "Al-Onaizan, Yaser and
Simard, Michel",
booktitle = "Proceedings of the 11th Conference of the Association for Machine Translation in the Americas: MT Researchers Track",
month = oct # " 22-26",
year = "2014",
address = "Vancouver, Canada",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2014.amta-researchers.5",
pages = "54--68",
abstract = "In this paper, we address the problem of extracting and integrating bilingual terminology into a Statistical Machine Translation (SMT) system for a Computer Aided Translation (CAT) tool scenario. We develop a framework that, taking as input a small amount of parallel in-domain data, gathers domain-specific bilingual terms and injects them in an SMT system to enhance the translation productivity. Therefore, we investigate several strategies to extract and align bilingual terminology, and to embed it into the SMT. We compare two embedding methods that can be easily used at run-time without altering the normal activity of an SMT system: XML markup and the cache-based model. We tested our framework on two different domains showing improvements up to 15{\%} BLEU score points.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="arcan-etal-2014-enhancing">
<titleInfo>
<title>Enhancing statistical machine translation with bilingual terminology in a CAT environment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mihael</namePart>
<namePart type="family">Arcan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Turchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Topelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Buitelaar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-oct 22-26</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th Conference of the Association for Machine Translation in the Americas: MT Researchers Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michel</namePart>
<namePart type="family">Simard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we address the problem of extracting and integrating bilingual terminology into a Statistical Machine Translation (SMT) system for a Computer Aided Translation (CAT) tool scenario. We develop a framework that, taking as input a small amount of parallel in-domain data, gathers domain-specific bilingual terms and injects them in an SMT system to enhance the translation productivity. Therefore, we investigate several strategies to extract and align bilingual terminology, and to embed it into the SMT. We compare two embedding methods that can be easily used at run-time without altering the normal activity of an SMT system: XML markup and the cache-based model. We tested our framework on two different domains showing improvements up to 15% BLEU score points.</abstract>
<identifier type="citekey">arcan-etal-2014-enhancing</identifier>
<location>
<url>https://aclanthology.org/2014.amta-researchers.5</url>
</location>
<part>
<date>2014-oct 22-26</date>
<extent unit="page">
<start>54</start>
<end>68</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Enhancing statistical machine translation with bilingual terminology in a CAT environment
%A Arcan, Mihael
%A Turchi, Marco
%A Topelli, Sara
%A Buitelaar, Paul
%Y Al-Onaizan, Yaser
%Y Simard, Michel
%S Proceedings of the 11th Conference of the Association for Machine Translation in the Americas: MT Researchers Track
%D 2014
%8 oct 22 26
%I Association for Machine Translation in the Americas
%C Vancouver, Canada
%F arcan-etal-2014-enhancing
%X In this paper, we address the problem of extracting and integrating bilingual terminology into a Statistical Machine Translation (SMT) system for a Computer Aided Translation (CAT) tool scenario. We develop a framework that, taking as input a small amount of parallel in-domain data, gathers domain-specific bilingual terms and injects them in an SMT system to enhance the translation productivity. Therefore, we investigate several strategies to extract and align bilingual terminology, and to embed it into the SMT. We compare two embedding methods that can be easily used at run-time without altering the normal activity of an SMT system: XML markup and the cache-based model. We tested our framework on two different domains showing improvements up to 15% BLEU score points.
%U https://aclanthology.org/2014.amta-researchers.5
%P 54-68
Markdown (Informal)
[Enhancing statistical machine translation with bilingual terminology in a CAT environment](https://aclanthology.org/2014.amta-researchers.5) (Arcan et al., AMTA 2014)
ACL