@inproceedings{shashidhar-etal-2023-democratizing,
title = "Democratizing {LLM}s: An Exploration of Cost-Performance Trade-offs in Self-Refined Open-Source Models",
author = "Shashidhar, Sumuk and
Chinta, Abhinav and
Sahai, Vaibhav and
Wang, Zhenhailong and
Ji, Heng",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.608/",
doi = "10.18653/v1/2023.findings-emnlp.608",
pages = "9070--9084",
abstract = "The dominance of proprietary LLMs has led to restricted access and raised information privacy concerns. The SoTA open-source alternatives are crucial for information-sensitive and high-volume applications but often lag behind in performance. To address this gap, we propose (1) A generalized variant of iterative self-critique and self-refinement devoid of external influence. (2) A novel ranking metric - Performance, Refinement, and Inference Cost Score (PeRFICS) - to find the optimal model for a given task considering refined performance and cost. Our experiments show that SoTA open source models of varying sizes from 7B - 65B, on average, improve 8.2{\%} from their baseline performance. Strikingly, even models with extremely small memory footprints, such as Vicuna-7B, show a 11.74{\%} improvement overall and up to a 25.39{\%} improvement in high-creativity, open ended tasks on the Vicuna benchmark. Vicuna-13B takes it a step further and outperforms ChatGPT post-refinement. This work has profound implications for resource-constrained and information-sensitive environments seeking to leverage LLMs without incurring prohibitive costs, compromising on performance and privacy. The domain-agnostic self-refinement process coupled with our novel ranking metric facilitates informed decision-making in model selection, thereby reducing costs and democratizing access to high-performing language models, as evidenced by three case studies on personal computing, gaming and enterprise solutions."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shashidhar-etal-2023-democratizing">
<titleInfo>
<title>Democratizing LLMs: An Exploration of Cost-Performance Trade-offs in Self-Refined Open-Source Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sumuk</namePart>
<namePart type="family">Shashidhar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhinav</namePart>
<namePart type="family">Chinta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vaibhav</namePart>
<namePart type="family">Sahai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenhailong</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The dominance of proprietary LLMs has led to restricted access and raised information privacy concerns. The SoTA open-source alternatives are crucial for information-sensitive and high-volume applications but often lag behind in performance. To address this gap, we propose (1) A generalized variant of iterative self-critique and self-refinement devoid of external influence. (2) A novel ranking metric - Performance, Refinement, and Inference Cost Score (PeRFICS) - to find the optimal model for a given task considering refined performance and cost. Our experiments show that SoTA open source models of varying sizes from 7B - 65B, on average, improve 8.2% from their baseline performance. Strikingly, even models with extremely small memory footprints, such as Vicuna-7B, show a 11.74% improvement overall and up to a 25.39% improvement in high-creativity, open ended tasks on the Vicuna benchmark. Vicuna-13B takes it a step further and outperforms ChatGPT post-refinement. This work has profound implications for resource-constrained and information-sensitive environments seeking to leverage LLMs without incurring prohibitive costs, compromising on performance and privacy. The domain-agnostic self-refinement process coupled with our novel ranking metric facilitates informed decision-making in model selection, thereby reducing costs and democratizing access to high-performing language models, as evidenced by three case studies on personal computing, gaming and enterprise solutions.</abstract>
<identifier type="citekey">shashidhar-etal-2023-democratizing</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.608</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.608/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>9070</start>
<end>9084</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Democratizing LLMs: An Exploration of Cost-Performance Trade-offs in Self-Refined Open-Source Models
%A Shashidhar, Sumuk
%A Chinta, Abhinav
%A Sahai, Vaibhav
%A Wang, Zhenhailong
%A Ji, Heng
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F shashidhar-etal-2023-democratizing
%X The dominance of proprietary LLMs has led to restricted access and raised information privacy concerns. The SoTA open-source alternatives are crucial for information-sensitive and high-volume applications but often lag behind in performance. To address this gap, we propose (1) A generalized variant of iterative self-critique and self-refinement devoid of external influence. (2) A novel ranking metric - Performance, Refinement, and Inference Cost Score (PeRFICS) - to find the optimal model for a given task considering refined performance and cost. Our experiments show that SoTA open source models of varying sizes from 7B - 65B, on average, improve 8.2% from their baseline performance. Strikingly, even models with extremely small memory footprints, such as Vicuna-7B, show a 11.74% improvement overall and up to a 25.39% improvement in high-creativity, open ended tasks on the Vicuna benchmark. Vicuna-13B takes it a step further and outperforms ChatGPT post-refinement. This work has profound implications for resource-constrained and information-sensitive environments seeking to leverage LLMs without incurring prohibitive costs, compromising on performance and privacy. The domain-agnostic self-refinement process coupled with our novel ranking metric facilitates informed decision-making in model selection, thereby reducing costs and democratizing access to high-performing language models, as evidenced by three case studies on personal computing, gaming and enterprise solutions.
%R 10.18653/v1/2023.findings-emnlp.608
%U https://aclanthology.org/2023.findings-emnlp.608/
%U https://doi.org/10.18653/v1/2023.findings-emnlp.608
%P 9070-9084
Markdown (Informal)
[Democratizing LLMs: An Exploration of Cost-Performance Trade-offs in Self-Refined Open-Source Models](https://aclanthology.org/2023.findings-emnlp.608/) (Shashidhar et al., Findings 2023)
ACL