@inproceedings{yang-etal-2024-weak,
title = "Weak-to-Strong Reasoning",
author = "Yang, Yuqing and
Ma, Yan and
Liu, Pengfei",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.490/",
doi = "10.18653/v1/2024.findings-emnlp.490",
pages = "8350--8367",
abstract = "When large language models (LLMs) surpass human capabilities, supervising them effectively becomes difficult. \textit{Weak-to-strong learning}, where a less capable model enhances a stronger one, proves valuable in this context. Yet, the efficacy of this paradigm for complex reasoning tasks is still unexplored. In this paper, we introduce a progressive weak-to-strong reasoning framework that enables the strong model to autonomously refine its training data, maximizing the use of weak signals and unlocking its latent abilities. This framework begins with supervised fine-tuning on a selective small but high-quality dataset, followed by preference optimization on contrastive samples identified by the strong model itself. Experiments on the GSM8K and MATH datasets verify that our method can effectively improve the reasoning capabilities of Llama2-70b using three separate weak models. This work paves the way for a more scalable and sophisticated strategy to enhance AI reasoning powers. All relevant code and resources are available in \url{https://github.com/GAIR-NLP/weak-to-strong-reasoning}."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-etal-2024-weak">
<titleInfo>
<title>Weak-to-Strong Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuqing</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yan</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pengfei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>When large language models (LLMs) surpass human capabilities, supervising them effectively becomes difficult. Weak-to-strong learning, where a less capable model enhances a stronger one, proves valuable in this context. Yet, the efficacy of this paradigm for complex reasoning tasks is still unexplored. In this paper, we introduce a progressive weak-to-strong reasoning framework that enables the strong model to autonomously refine its training data, maximizing the use of weak signals and unlocking its latent abilities. This framework begins with supervised fine-tuning on a selective small but high-quality dataset, followed by preference optimization on contrastive samples identified by the strong model itself. Experiments on the GSM8K and MATH datasets verify that our method can effectively improve the reasoning capabilities of Llama2-70b using three separate weak models. This work paves the way for a more scalable and sophisticated strategy to enhance AI reasoning powers. All relevant code and resources are available in https://github.com/GAIR-NLP/weak-to-strong-reasoning.</abstract>
<identifier type="citekey">yang-etal-2024-weak</identifier>
<identifier type="doi">10.18653/v1/2024.findings-emnlp.490</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.490/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>8350</start>
<end>8367</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Weak-to-Strong Reasoning
%A Yang, Yuqing
%A Ma, Yan
%A Liu, Pengfei
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F yang-etal-2024-weak
%X When large language models (LLMs) surpass human capabilities, supervising them effectively becomes difficult. Weak-to-strong learning, where a less capable model enhances a stronger one, proves valuable in this context. Yet, the efficacy of this paradigm for complex reasoning tasks is still unexplored. In this paper, we introduce a progressive weak-to-strong reasoning framework that enables the strong model to autonomously refine its training data, maximizing the use of weak signals and unlocking its latent abilities. This framework begins with supervised fine-tuning on a selective small but high-quality dataset, followed by preference optimization on contrastive samples identified by the strong model itself. Experiments on the GSM8K and MATH datasets verify that our method can effectively improve the reasoning capabilities of Llama2-70b using three separate weak models. This work paves the way for a more scalable and sophisticated strategy to enhance AI reasoning powers. All relevant code and resources are available in https://github.com/GAIR-NLP/weak-to-strong-reasoning.
%R 10.18653/v1/2024.findings-emnlp.490
%U https://aclanthology.org/2024.findings-emnlp.490/
%U https://doi.org/10.18653/v1/2024.findings-emnlp.490
%P 8350-8367
Markdown (Informal)
[Weak-to-Strong Reasoning](https://aclanthology.org/2024.findings-emnlp.490/) (Yang et al., Findings 2024)
ACL
- Yuqing Yang, Yan Ma, and Pengfei Liu. 2024. Weak-to-Strong Reasoning. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 8350–8367, Miami, Florida, USA. Association for Computational Linguistics.