@inproceedings{bogoychev-etal-2020-edinburghs,
title = "{E}dinburgh`s Submissions to the 2020 Machine Translation Efficiency Task",
author = "Bogoychev, Nikolay and
Grundkiewicz, Roman and
Aji, Alham Fikri and
Behnke, Maximiliana and
Heafield, Kenneth and
Kashyap, Sidharth and
Farsarakis, Emmanouil-Ioannis and
Chudyk, Mateusz",
editor = "Birch, Alexandra and
Finch, Andrew and
Hayashi, Hiroaki and
Heafield, Kenneth and
Junczys-Dowmunt, Marcin and
Konstas, Ioannis and
Li, Xian and
Neubig, Graham and
Oda, Yusuke",
booktitle = "Proceedings of the Fourth Workshop on Neural Generation and Translation",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.ngt-1.26/",
doi = "10.18653/v1/2020.ngt-1.26",
pages = "218--224",
abstract = "We participated in all tracks of the Workshop on Neural Generation and Translation 2020 Efficiency Shared Task: single-core CPU, multi-core CPU, and GPU. At the model level, we use teacher-student training with a variety of student sizes, tie embeddings and sometimes layers, use the Simpler Simple Recurrent Unit, and introduce head pruning. On GPUs, we used 16-bit floating-point tensor cores. On CPUs, we customized 8-bit quantization and multiple processes with affinity for the multi-core setting. To reduce model size, we experimented with 4-bit log quantization but use floats at runtime. In the shared task, most of our submissions were Pareto optimal with respect the trade-off between time and quality."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bogoychev-etal-2020-edinburghs">
<titleInfo>
<title>Edinburgh‘s Submissions to the 2020 Machine Translation Efficiency Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolay</namePart>
<namePart type="family">Bogoychev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Grundkiewicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alham</namePart>
<namePart type="given">Fikri</namePart>
<namePart type="family">Aji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maximiliana</namePart>
<namePart type="family">Behnke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenneth</namePart>
<namePart type="family">Heafield</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sidharth</namePart>
<namePart type="family">Kashyap</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmanouil-Ioannis</namePart>
<namePart type="family">Farsarakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mateusz</namePart>
<namePart type="family">Chudyk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Neural Generation and Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="family">Birch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Finch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroaki</namePart>
<namePart type="family">Hayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenneth</namePart>
<namePart type="family">Heafield</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcin</namePart>
<namePart type="family">Junczys-Dowmunt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ioannis</namePart>
<namePart type="family">Konstas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xian</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Oda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We participated in all tracks of the Workshop on Neural Generation and Translation 2020 Efficiency Shared Task: single-core CPU, multi-core CPU, and GPU. At the model level, we use teacher-student training with a variety of student sizes, tie embeddings and sometimes layers, use the Simpler Simple Recurrent Unit, and introduce head pruning. On GPUs, we used 16-bit floating-point tensor cores. On CPUs, we customized 8-bit quantization and multiple processes with affinity for the multi-core setting. To reduce model size, we experimented with 4-bit log quantization but use floats at runtime. In the shared task, most of our submissions were Pareto optimal with respect the trade-off between time and quality.</abstract>
<identifier type="citekey">bogoychev-etal-2020-edinburghs</identifier>
<identifier type="doi">10.18653/v1/2020.ngt-1.26</identifier>
<location>
<url>https://aclanthology.org/2020.ngt-1.26/</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>218</start>
<end>224</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Edinburgh‘s Submissions to the 2020 Machine Translation Efficiency Task
%A Bogoychev, Nikolay
%A Grundkiewicz, Roman
%A Aji, Alham Fikri
%A Behnke, Maximiliana
%A Heafield, Kenneth
%A Kashyap, Sidharth
%A Farsarakis, Emmanouil-Ioannis
%A Chudyk, Mateusz
%Y Birch, Alexandra
%Y Finch, Andrew
%Y Hayashi, Hiroaki
%Y Heafield, Kenneth
%Y Junczys-Dowmunt, Marcin
%Y Konstas, Ioannis
%Y Li, Xian
%Y Neubig, Graham
%Y Oda, Yusuke
%S Proceedings of the Fourth Workshop on Neural Generation and Translation
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F bogoychev-etal-2020-edinburghs
%X We participated in all tracks of the Workshop on Neural Generation and Translation 2020 Efficiency Shared Task: single-core CPU, multi-core CPU, and GPU. At the model level, we use teacher-student training with a variety of student sizes, tie embeddings and sometimes layers, use the Simpler Simple Recurrent Unit, and introduce head pruning. On GPUs, we used 16-bit floating-point tensor cores. On CPUs, we customized 8-bit quantization and multiple processes with affinity for the multi-core setting. To reduce model size, we experimented with 4-bit log quantization but use floats at runtime. In the shared task, most of our submissions were Pareto optimal with respect the trade-off between time and quality.
%R 10.18653/v1/2020.ngt-1.26
%U https://aclanthology.org/2020.ngt-1.26/
%U https://doi.org/10.18653/v1/2020.ngt-1.26
%P 218-224
Markdown (Informal)
[Edinburgh’s Submissions to the 2020 Machine Translation Efficiency Task](https://aclanthology.org/2020.ngt-1.26/) (Bogoychev et al., NGT 2020)
ACL
- Nikolay Bogoychev, Roman Grundkiewicz, Alham Fikri Aji, Maximiliana Behnke, Kenneth Heafield, Sidharth Kashyap, Emmanouil-Ioannis Farsarakis, and Mateusz Chudyk. 2020. Edinburgh’s Submissions to the 2020 Machine Translation Efficiency Task. In Proceedings of the Fourth Workshop on Neural Generation and Translation, pages 218–224, Online. Association for Computational Linguistics.