@inproceedings{darwin-2001-trial,
title = "Trial and error: an evaluation project on {J}apanese {\ensuremath{<}}{\ensuremath{>}} {E}nglish {MT} output quality",
author = "Darwin, Maki",
editor = "Maegaard, Bente",
booktitle = "Proceedings of Machine Translation Summit VIII",
month = sep # " 18-22",
year = "2001",
address = "Santiago de Compostela, Spain",
url = "https://aclanthology.org/2001.mtsummit-papers.15/",
abstract = "This paper describes a small-scale but organized attempt to evaluate output quality of several Japanese MT systems. The project also served as the first experiment of the implementation of the in-house MT evaluation guidelines created in 2000. Since time was limited and the budget was not infinite, it was launched with the following compact components: Five people; 300 source sentences per language pair; and 160 hours per evaluator. The quantitative results showed noteworthy phenomena. Although the test materials had been presented in a way that evaluators could not identify the performance of any particular system, the results were quite consistent. The scoring ratio that the two E-to-J evaluators employed was almost identical, while that of the J-to-E evaluators was similar. This indicates that high-quality output has universal appeal. Additionally, the evaluators noted that stronger systems, regardless of language pair, tended to be superior in source sentence analysis, target sentence arrangement, word choice, and lexicon entries whereas weaker systems tended to be inferior in these areas. As for language-pair comparison, the results indicate that English-to-Japanese systems may require more improvement than their counterparts, judging from the scores given and the number of unfound words recorded."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="darwin-2001-trial">
<titleInfo>
<title>Trial and error: an evaluation project on Japanese \ensuremath<\ensuremath> English MT output quality</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maki</namePart>
<namePart type="family">Darwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2001-sep 18-22</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Machine Translation Summit VIII</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<place>
<placeTerm type="text">Santiago de Compostela, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes a small-scale but organized attempt to evaluate output quality of several Japanese MT systems. The project also served as the first experiment of the implementation of the in-house MT evaluation guidelines created in 2000. Since time was limited and the budget was not infinite, it was launched with the following compact components: Five people; 300 source sentences per language pair; and 160 hours per evaluator. The quantitative results showed noteworthy phenomena. Although the test materials had been presented in a way that evaluators could not identify the performance of any particular system, the results were quite consistent. The scoring ratio that the two E-to-J evaluators employed was almost identical, while that of the J-to-E evaluators was similar. This indicates that high-quality output has universal appeal. Additionally, the evaluators noted that stronger systems, regardless of language pair, tended to be superior in source sentence analysis, target sentence arrangement, word choice, and lexicon entries whereas weaker systems tended to be inferior in these areas. As for language-pair comparison, the results indicate that English-to-Japanese systems may require more improvement than their counterparts, judging from the scores given and the number of unfound words recorded.</abstract>
<identifier type="citekey">darwin-2001-trial</identifier>
<location>
<url>https://aclanthology.org/2001.mtsummit-papers.15/</url>
</location>
<part>
<date>2001-sep 18-22</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Trial and error: an evaluation project on Japanese \ensuremath<\ensuremath> English MT output quality
%A Darwin, Maki
%Y Maegaard, Bente
%S Proceedings of Machine Translation Summit VIII
%D 2001
%8 sep 18 22
%C Santiago de Compostela, Spain
%F darwin-2001-trial
%X This paper describes a small-scale but organized attempt to evaluate output quality of several Japanese MT systems. The project also served as the first experiment of the implementation of the in-house MT evaluation guidelines created in 2000. Since time was limited and the budget was not infinite, it was launched with the following compact components: Five people; 300 source sentences per language pair; and 160 hours per evaluator. The quantitative results showed noteworthy phenomena. Although the test materials had been presented in a way that evaluators could not identify the performance of any particular system, the results were quite consistent. The scoring ratio that the two E-to-J evaluators employed was almost identical, while that of the J-to-E evaluators was similar. This indicates that high-quality output has universal appeal. Additionally, the evaluators noted that stronger systems, regardless of language pair, tended to be superior in source sentence analysis, target sentence arrangement, word choice, and lexicon entries whereas weaker systems tended to be inferior in these areas. As for language-pair comparison, the results indicate that English-to-Japanese systems may require more improvement than their counterparts, judging from the scores given and the number of unfound words recorded.
%U https://aclanthology.org/2001.mtsummit-papers.15/
Markdown (Informal)
[Trial and error: an evaluation project on Japanese <> English MT output quality](https://aclanthology.org/2001.mtsummit-papers.15/) (Darwin, MTSummit 2001)
ACL