@inproceedings{di-liello-etal-2022-effective,
title = "Effective Pretraining Objectives for Transformer-based Autoencoders",
author = "Di Liello, Luca and
Gabburo, Matteo and
Moschitti, Alessandro",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.405/",
doi = "10.18653/v1/2022.findings-emnlp.405",
pages = "5533--5547",
abstract = "In this paper, we study trade-offs between efficiency, cost and accuracy when pre-training Transformer encoders with different pre-training objectives. For this purpose, we analyze features of common objectives and combine them to create new effective pre-training approaches. Specifically, we designed light token generators based on a straightforward statistical approach, which can replace ELECTRA computationally heavy generators, thus highly reducing cost. Our experiments also show that (i) there are more efficient alternatives to BERT`s MLM, and (ii) it is possible to efficiently pre-train Transformer-based models using lighter generators without a significant drop in performance."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="di-liello-etal-2022-effective">
<titleInfo>
<title>Effective Pretraining Objectives for Transformer-based Autoencoders</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luca</namePart>
<namePart type="family">Di Liello</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Gabburo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Moschitti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we study trade-offs between efficiency, cost and accuracy when pre-training Transformer encoders with different pre-training objectives. For this purpose, we analyze features of common objectives and combine them to create new effective pre-training approaches. Specifically, we designed light token generators based on a straightforward statistical approach, which can replace ELECTRA computationally heavy generators, thus highly reducing cost. Our experiments also show that (i) there are more efficient alternatives to BERT‘s MLM, and (ii) it is possible to efficiently pre-train Transformer-based models using lighter generators without a significant drop in performance.</abstract>
<identifier type="citekey">di-liello-etal-2022-effective</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.405</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.405/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>5533</start>
<end>5547</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Effective Pretraining Objectives for Transformer-based Autoencoders
%A Di Liello, Luca
%A Gabburo, Matteo
%A Moschitti, Alessandro
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F di-liello-etal-2022-effective
%X In this paper, we study trade-offs between efficiency, cost and accuracy when pre-training Transformer encoders with different pre-training objectives. For this purpose, we analyze features of common objectives and combine them to create new effective pre-training approaches. Specifically, we designed light token generators based on a straightforward statistical approach, which can replace ELECTRA computationally heavy generators, thus highly reducing cost. Our experiments also show that (i) there are more efficient alternatives to BERT‘s MLM, and (ii) it is possible to efficiently pre-train Transformer-based models using lighter generators without a significant drop in performance.
%R 10.18653/v1/2022.findings-emnlp.405
%U https://aclanthology.org/2022.findings-emnlp.405/
%U https://doi.org/10.18653/v1/2022.findings-emnlp.405
%P 5533-5547
Markdown (Informal)
[Effective Pretraining Objectives for Transformer-based Autoencoders](https://aclanthology.org/2022.findings-emnlp.405/) (Di Liello et al., Findings 2022)
ACL