@inproceedings{paik-etal-2004-comparison,
title = "A Comparison of Two Variant Corpora: The Same Content with Different Source",
author = "Paik, Kyonghee and
Ohtake, Kiyonori and
Yamamoto, Kazuhide",
editor = "Lino, Maria Teresa and
Xavier, Maria Francisca and
Ferreira, F{\'a}tima and
Costa, Rute and
Silva, Raquel",
booktitle = "Proceedings of the Fourth International Conference on Language Resources and Evaluation ({LREC}{'}04)",
month = may,
year = "2004",
address = "Lisbon, Portugal",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2004/pdf/424.pdf",
abstract = "In order to investigate the effect of source language on translations, we investigate two variants of a Korean translation corpus. The first variant consists of Korean translations of 162,308 Japanese sentences from the ATR BTEC (Basic Expression Text Corpus). The second variant was made by translating the English translations of the Japanese sentences into Korean. We show that the source language text has a large influence on the target text. Even after normalizing orthographic differences, fewer than 8.3{\textbackslash}{\%} of the sentences in the two variants were identical. We describe in general which phenomena differ and then discuss how our analysis can be used in natural language processing.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="paik-etal-2004-comparison">
<titleInfo>
<title>A Comparison of Two Variant Corpora: The Same Content with Different Source</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kyonghee</namePart>
<namePart type="family">Paik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kiyonori</namePart>
<namePart type="family">Ohtake</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kazuhide</namePart>
<namePart type="family">Yamamoto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2004-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC’04)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Teresa</namePart>
<namePart type="family">Lino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Francisca</namePart>
<namePart type="family">Xavier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fátima</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rute</namePart>
<namePart type="family">Costa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raquel</namePart>
<namePart type="family">Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Lisbon, Portugal</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In order to investigate the effect of source language on translations, we investigate two variants of a Korean translation corpus. The first variant consists of Korean translations of 162,308 Japanese sentences from the ATR BTEC (Basic Expression Text Corpus). The second variant was made by translating the English translations of the Japanese sentences into Korean. We show that the source language text has a large influence on the target text. Even after normalizing orthographic differences, fewer than 8.3\textbackslash% of the sentences in the two variants were identical. We describe in general which phenomena differ and then discuss how our analysis can be used in natural language processing.</abstract>
<identifier type="citekey">paik-etal-2004-comparison</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/424.pdf</url>
</location>
<part>
<date>2004-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Comparison of Two Variant Corpora: The Same Content with Different Source
%A Paik, Kyonghee
%A Ohtake, Kiyonori
%A Yamamoto, Kazuhide
%Y Lino, Maria Teresa
%Y Xavier, Maria Francisca
%Y Ferreira, Fátima
%Y Costa, Rute
%Y Silva, Raquel
%S Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC’04)
%D 2004
%8 May
%I European Language Resources Association (ELRA)
%C Lisbon, Portugal
%F paik-etal-2004-comparison
%X In order to investigate the effect of source language on translations, we investigate two variants of a Korean translation corpus. The first variant consists of Korean translations of 162,308 Japanese sentences from the ATR BTEC (Basic Expression Text Corpus). The second variant was made by translating the English translations of the Japanese sentences into Korean. We show that the source language text has a large influence on the target text. Even after normalizing orthographic differences, fewer than 8.3\textbackslash% of the sentences in the two variants were identical. We describe in general which phenomena differ and then discuss how our analysis can be used in natural language processing.
%U http://www.lrec-conf.org/proceedings/lrec2004/pdf/424.pdf
Markdown (Informal)
[A Comparison of Two Variant Corpora: The Same Content with Different Source](http://www.lrec-conf.org/proceedings/lrec2004/pdf/424.pdf) (Paik et al., LREC 2004)
ACL