@inproceedings{kudo-etal-2023-skim,
title = "{SKIM} at {WMT} 2023 General Translation Task",
author = "Kudo, Keito and
Ito, Takumi and
Morishita, Makoto and
Suzuki, Jun",
editor = "Koehn, Philipp and
Haddow, Barry and
Kocmi, Tom and
Monz, Christof",
booktitle = "Proceedings of the Eighth Conference on Machine Translation",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.wmt-1.9",
doi = "10.18653/v1/2023.wmt-1.9",
pages = "128--136",
abstract = "The SKIM team{'}s submission used a standard procedure to build ensemble Transformer models, including base-model training, back-translation of base models for data augmentation, and retraining of several final models using back-translated training data. Each final model had its own architecture and configuration, including up to 10.5B parameters, and substituted self- and cross-sublayers in the decoder with a cross+self-attention sub-layer. We selected the best candidate from a large candidate pool, namely 70 translations generated from 13 distinct models for each sentence, using an MBR reranking method using COMET and COMET-QE. We also applied data augmentation and selection techniques to the training data of the Transformer models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kudo-etal-2023-skim">
<titleInfo>
<title>SKIM at WMT 2023 General Translation Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Keito</namePart>
<namePart type="family">Kudo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Takumi</namePart>
<namePart type="family">Ito</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Makoto</namePart>
<namePart type="family">Morishita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun</namePart>
<namePart type="family">Suzuki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The SKIM team’s submission used a standard procedure to build ensemble Transformer models, including base-model training, back-translation of base models for data augmentation, and retraining of several final models using back-translated training data. Each final model had its own architecture and configuration, including up to 10.5B parameters, and substituted self- and cross-sublayers in the decoder with a cross+self-attention sub-layer. We selected the best candidate from a large candidate pool, namely 70 translations generated from 13 distinct models for each sentence, using an MBR reranking method using COMET and COMET-QE. We also applied data augmentation and selection techniques to the training data of the Transformer models.</abstract>
<identifier type="citekey">kudo-etal-2023-skim</identifier>
<identifier type="doi">10.18653/v1/2023.wmt-1.9</identifier>
<location>
<url>https://aclanthology.org/2023.wmt-1.9</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>128</start>
<end>136</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SKIM at WMT 2023 General Translation Task
%A Kudo, Keito
%A Ito, Takumi
%A Morishita, Makoto
%A Suzuki, Jun
%Y Koehn, Philipp
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Monz, Christof
%S Proceedings of the Eighth Conference on Machine Translation
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F kudo-etal-2023-skim
%X The SKIM team’s submission used a standard procedure to build ensemble Transformer models, including base-model training, back-translation of base models for data augmentation, and retraining of several final models using back-translated training data. Each final model had its own architecture and configuration, including up to 10.5B parameters, and substituted self- and cross-sublayers in the decoder with a cross+self-attention sub-layer. We selected the best candidate from a large candidate pool, namely 70 translations generated from 13 distinct models for each sentence, using an MBR reranking method using COMET and COMET-QE. We also applied data augmentation and selection techniques to the training data of the Transformer models.
%R 10.18653/v1/2023.wmt-1.9
%U https://aclanthology.org/2023.wmt-1.9
%U https://doi.org/10.18653/v1/2023.wmt-1.9
%P 128-136
Markdown (Informal)
[SKIM at WMT 2023 General Translation Task](https://aclanthology.org/2023.wmt-1.9) (Kudo et al., WMT 2023)
ACL
- Keito Kudo, Takumi Ito, Makoto Morishita, and Jun Suzuki. 2023. SKIM at WMT 2023 General Translation Task. In Proceedings of the Eighth Conference on Machine Translation, pages 128–136, Singapore. Association for Computational Linguistics.