@inproceedings{lu-etal-2024-strings,
title = "Strings from the Library of {B}abel: Random Sampling as a Strong Baseline for Prompt Optimisation",
author = "Lu, Yao and
Wang, Jiayi and
Tang, Raphael and
Riedel, Sebastian and
Stenetorp, Pontus",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-long.122/",
doi = "10.18653/v1/2024.naacl-long.122",
pages = "2221--2231",
abstract = "Recent prompt optimisation approaches use the generative nature of language models to produce prompts {--} even rivaling the performance of human-curated prompts. In this paper, we demonstrate that randomly sampling tokens from the model vocabulary as {\textquotedblleft}separators{\textquotedblright} can be as effective as language models for prompt-style text classification. Our experiments show that random separators are competitive baselines, having less than a 1{\%} difference compared to previous self-optimisation methods and showing a 12{\%} average relative improvement over strong human baselines across nine text classification tasks and eight language models. We further analyse this phenomenon in detail using three different random generation strategies, establishing that the language space is rich with potentially good separators, with a greater than 40{\%} average chance that a randomly drawn separator performs better than human-curated separators. These observations challenge the common assumption that an effective prompt should be human readable or task relevant and establish a strong baseline for prompt optimisation research."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lu-etal-2024-strings">
<titleInfo>
<title>Strings from the Library of Babel: Random Sampling as a Strong Baseline for Prompt Optimisation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yao</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiayi</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raphael</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Riedel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pontus</namePart>
<namePart type="family">Stenetorp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent prompt optimisation approaches use the generative nature of language models to produce prompts – even rivaling the performance of human-curated prompts. In this paper, we demonstrate that randomly sampling tokens from the model vocabulary as “separators” can be as effective as language models for prompt-style text classification. Our experiments show that random separators are competitive baselines, having less than a 1% difference compared to previous self-optimisation methods and showing a 12% average relative improvement over strong human baselines across nine text classification tasks and eight language models. We further analyse this phenomenon in detail using three different random generation strategies, establishing that the language space is rich with potentially good separators, with a greater than 40% average chance that a randomly drawn separator performs better than human-curated separators. These observations challenge the common assumption that an effective prompt should be human readable or task relevant and establish a strong baseline for prompt optimisation research.</abstract>
<identifier type="citekey">lu-etal-2024-strings</identifier>
<identifier type="doi">10.18653/v1/2024.naacl-long.122</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-long.122/</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>2221</start>
<end>2231</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Strings from the Library of Babel: Random Sampling as a Strong Baseline for Prompt Optimisation
%A Lu, Yao
%A Wang, Jiayi
%A Tang, Raphael
%A Riedel, Sebastian
%A Stenetorp, Pontus
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F lu-etal-2024-strings
%X Recent prompt optimisation approaches use the generative nature of language models to produce prompts – even rivaling the performance of human-curated prompts. In this paper, we demonstrate that randomly sampling tokens from the model vocabulary as “separators” can be as effective as language models for prompt-style text classification. Our experiments show that random separators are competitive baselines, having less than a 1% difference compared to previous self-optimisation methods and showing a 12% average relative improvement over strong human baselines across nine text classification tasks and eight language models. We further analyse this phenomenon in detail using three different random generation strategies, establishing that the language space is rich with potentially good separators, with a greater than 40% average chance that a randomly drawn separator performs better than human-curated separators. These observations challenge the common assumption that an effective prompt should be human readable or task relevant and establish a strong baseline for prompt optimisation research.
%R 10.18653/v1/2024.naacl-long.122
%U https://aclanthology.org/2024.naacl-long.122/
%U https://doi.org/10.18653/v1/2024.naacl-long.122
%P 2221-2231
Markdown (Informal)
[Strings from the Library of Babel: Random Sampling as a Strong Baseline for Prompt Optimisation](https://aclanthology.org/2024.naacl-long.122/) (Lu et al., NAACL 2024)
ACL