@inproceedings{fathullah-etal-2024-needs,
title = "Who Needs Decoders? Efficient Estimation of Sequence-Level Attributes with Proxies",
author = "Fathullah, Yassir and
Radmard, Puria and
Liusie, Adian and
Gales, Mark",
editor = "Graham, Yvette and
Purver, Matthew",
booktitle = "Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.eacl-long.89/",
pages = "1478--1496",
abstract = "Sequence-to-sequence models often require an expensive autoregressive decoding process. However, for some downstream tasks such as out-of-distribution (OOD) detection and resource allocation, the actual decoding output is not needed, just a scalar attribute of this sequence. In such scenarios, where knowing the quality of a system`s output to predict poor performance prevails over knowing the output itself, is it possible to bypass the autoregressive decoding? We propose Non-Autoregressive Proxy (NAP) models that can efficiently predict scalar-valued sequence-level attributes. Importantly, NAPs predict these metrics directly from the encodings, avoiding the expensive decoding stage. We consider two sequence tasks: Machine Translation (MT) and Automatic Speech Recognition (ASR). In OOD for MT, NAPs outperform ensembles while being significantly faster. NAPs are also proven capable of predicting metrics such as BERTScore (MT) or word error rate (ASR). For downstream tasks, such as data filtering and resource optimization, NAPs generate performance predictions that outperform predictive uncertainty while being highly inference efficient."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fathullah-etal-2024-needs">
<titleInfo>
<title>Who Needs Decoders? Efficient Estimation of Sequence-Level Attributes with Proxies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yassir</namePart>
<namePart type="family">Fathullah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Puria</namePart>
<namePart type="family">Radmard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adian</namePart>
<namePart type="family">Liusie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Gales</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Purver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sequence-to-sequence models often require an expensive autoregressive decoding process. However, for some downstream tasks such as out-of-distribution (OOD) detection and resource allocation, the actual decoding output is not needed, just a scalar attribute of this sequence. In such scenarios, where knowing the quality of a system‘s output to predict poor performance prevails over knowing the output itself, is it possible to bypass the autoregressive decoding? We propose Non-Autoregressive Proxy (NAP) models that can efficiently predict scalar-valued sequence-level attributes. Importantly, NAPs predict these metrics directly from the encodings, avoiding the expensive decoding stage. We consider two sequence tasks: Machine Translation (MT) and Automatic Speech Recognition (ASR). In OOD for MT, NAPs outperform ensembles while being significantly faster. NAPs are also proven capable of predicting metrics such as BERTScore (MT) or word error rate (ASR). For downstream tasks, such as data filtering and resource optimization, NAPs generate performance predictions that outperform predictive uncertainty while being highly inference efficient.</abstract>
<identifier type="citekey">fathullah-etal-2024-needs</identifier>
<location>
<url>https://aclanthology.org/2024.eacl-long.89/</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>1478</start>
<end>1496</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Who Needs Decoders? Efficient Estimation of Sequence-Level Attributes with Proxies
%A Fathullah, Yassir
%A Radmard, Puria
%A Liusie, Adian
%A Gales, Mark
%Y Graham, Yvette
%Y Purver, Matthew
%S Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F fathullah-etal-2024-needs
%X Sequence-to-sequence models often require an expensive autoregressive decoding process. However, for some downstream tasks such as out-of-distribution (OOD) detection and resource allocation, the actual decoding output is not needed, just a scalar attribute of this sequence. In such scenarios, where knowing the quality of a system‘s output to predict poor performance prevails over knowing the output itself, is it possible to bypass the autoregressive decoding? We propose Non-Autoregressive Proxy (NAP) models that can efficiently predict scalar-valued sequence-level attributes. Importantly, NAPs predict these metrics directly from the encodings, avoiding the expensive decoding stage. We consider two sequence tasks: Machine Translation (MT) and Automatic Speech Recognition (ASR). In OOD for MT, NAPs outperform ensembles while being significantly faster. NAPs are also proven capable of predicting metrics such as BERTScore (MT) or word error rate (ASR). For downstream tasks, such as data filtering and resource optimization, NAPs generate performance predictions that outperform predictive uncertainty while being highly inference efficient.
%U https://aclanthology.org/2024.eacl-long.89/
%P 1478-1496
Markdown (Informal)
[Who Needs Decoders? Efficient Estimation of Sequence-Level Attributes with Proxies](https://aclanthology.org/2024.eacl-long.89/) (Fathullah et al., EACL 2024)
ACL