@inproceedings{ye-etal-2024-prompt,
title = "Prompt Engineering a Prompt Engineer",
author = "Ye, Qinyuan and
Ahmed, Mohamed and
Pryzant, Reid and
Khani, Fereshte",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.21/",
doi = "10.18653/v1/2024.findings-acl.21",
pages = "355--385",
abstract = "Prompt engineering is a challenging yet crucial task for optimizing the performance of large language models on customized tasks. It requires complex reasoning to examine the model`s errors, hypothesize what is missing or misleading in the current prompt, and communicate the task with clarity. While recent works indicate that large language models can be meta-prompted to perform automatic prompt engineering, we argue that their potential is limited due to insufficient guidance for complex reasoning in the meta-prompt. We fill this gap by infusing into the meta-prompt three key components: detailed descriptions, context specification, and a step-by-step reasoning template. The resulting method, named PE2, showcases remarkable versatility across diverse language tasks. It finds prompts that outperform {\textquotedblleft}let`s think step by step{\textquotedblright} by 6.3{\%} on MultiArith and 3.1{\%} on GSM8K, and outperforms competitive baselines on counterfactual tasks by 6.9{\%}. Further, we show that PE2 can make targeted prompt edits, rectify erroneous prompts, and induce multi-step plans for complex tasks."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ye-etal-2024-prompt">
<titleInfo>
<title>Prompt Engineering a Prompt Engineer</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qinyuan</namePart>
<namePart type="family">Ye</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Ahmed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reid</namePart>
<namePart type="family">Pryzant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fereshte</namePart>
<namePart type="family">Khani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Prompt engineering is a challenging yet crucial task for optimizing the performance of large language models on customized tasks. It requires complex reasoning to examine the model‘s errors, hypothesize what is missing or misleading in the current prompt, and communicate the task with clarity. While recent works indicate that large language models can be meta-prompted to perform automatic prompt engineering, we argue that their potential is limited due to insufficient guidance for complex reasoning in the meta-prompt. We fill this gap by infusing into the meta-prompt three key components: detailed descriptions, context specification, and a step-by-step reasoning template. The resulting method, named PE2, showcases remarkable versatility across diverse language tasks. It finds prompts that outperform “let‘s think step by step” by 6.3% on MultiArith and 3.1% on GSM8K, and outperforms competitive baselines on counterfactual tasks by 6.9%. Further, we show that PE2 can make targeted prompt edits, rectify erroneous prompts, and induce multi-step plans for complex tasks.</abstract>
<identifier type="citekey">ye-etal-2024-prompt</identifier>
<identifier type="doi">10.18653/v1/2024.findings-acl.21</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.21/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>355</start>
<end>385</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Prompt Engineering a Prompt Engineer
%A Ye, Qinyuan
%A Ahmed, Mohamed
%A Pryzant, Reid
%A Khani, Fereshte
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics: ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F ye-etal-2024-prompt
%X Prompt engineering is a challenging yet crucial task for optimizing the performance of large language models on customized tasks. It requires complex reasoning to examine the model‘s errors, hypothesize what is missing or misleading in the current prompt, and communicate the task with clarity. While recent works indicate that large language models can be meta-prompted to perform automatic prompt engineering, we argue that their potential is limited due to insufficient guidance for complex reasoning in the meta-prompt. We fill this gap by infusing into the meta-prompt three key components: detailed descriptions, context specification, and a step-by-step reasoning template. The resulting method, named PE2, showcases remarkable versatility across diverse language tasks. It finds prompts that outperform “let‘s think step by step” by 6.3% on MultiArith and 3.1% on GSM8K, and outperforms competitive baselines on counterfactual tasks by 6.9%. Further, we show that PE2 can make targeted prompt edits, rectify erroneous prompts, and induce multi-step plans for complex tasks.
%R 10.18653/v1/2024.findings-acl.21
%U https://aclanthology.org/2024.findings-acl.21/
%U https://doi.org/10.18653/v1/2024.findings-acl.21
%P 355-385
Markdown (Informal)
[Prompt Engineering a Prompt Engineer](https://aclanthology.org/2024.findings-acl.21/) (Ye et al., Findings 2024)
ACL
- Qinyuan Ye, Mohamed Ahmed, Reid Pryzant, and Fereshte Khani. 2024. Prompt Engineering a Prompt Engineer. In Findings of the Association for Computational Linguistics: ACL 2024, pages 355–385, Bangkok, Thailand. Association for Computational Linguistics.