@inproceedings{qu-etal-2024-unsupervised,
title = "Unsupervised Distractor Generation via Large Language Model Distilling and Counterfactual Contrastive Decoding",
author = "Qu, Fanyi and
Sun, Hao and
Wu, Yunfang",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.47/",
doi = "10.18653/v1/2024.findings-acl.47",
pages = "827--838",
abstract = "Within the context of reading comprehension, the task of Distractor Generation (DG) aims to generate several incorrect options to confuse readers. In recent years, the emergence of Large Language Models (LLMs) provides a potential for unsupervised DG without expensive human-annotated distractor labels. In this paper, we leverage LLMs as a cost-effective annotator to enhance the DG capability of smaller student models. To perform knowledge distilling, we propose a dual task training framework that integrates pseudo distractors from LLMs and answer information as the objective target with a two-stage training process. Moreover, we devise a counterfactual contrastive decoding mechanism for increasing the distracting capability of the DG model. Experiments show that our unsupervised generation method with Bart-base greatly surpasses GPT-3.5-turbo zero-shot performance with only 200$\times$ fewer model parameters. Our proposed unsupervised DG method offers a cost-effective framework for practical reading comprehension applications, without the need of laborious distractor annotation and costly large-size models."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="qu-etal-2024-unsupervised">
<titleInfo>
<title>Unsupervised Distractor Generation via Large Language Model Distilling and Counterfactual Contrastive Decoding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fanyi</namePart>
<namePart type="family">Qu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunfang</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Within the context of reading comprehension, the task of Distractor Generation (DG) aims to generate several incorrect options to confuse readers. In recent years, the emergence of Large Language Models (LLMs) provides a potential for unsupervised DG without expensive human-annotated distractor labels. In this paper, we leverage LLMs as a cost-effective annotator to enhance the DG capability of smaller student models. To perform knowledge distilling, we propose a dual task training framework that integrates pseudo distractors from LLMs and answer information as the objective target with a two-stage training process. Moreover, we devise a counterfactual contrastive decoding mechanism for increasing the distracting capability of the DG model. Experiments show that our unsupervised generation method with Bart-base greatly surpasses GPT-3.5-turbo zero-shot performance with only 200\times fewer model parameters. Our proposed unsupervised DG method offers a cost-effective framework for practical reading comprehension applications, without the need of laborious distractor annotation and costly large-size models.</abstract>
<identifier type="citekey">qu-etal-2024-unsupervised</identifier>
<identifier type="doi">10.18653/v1/2024.findings-acl.47</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.47/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>827</start>
<end>838</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised Distractor Generation via Large Language Model Distilling and Counterfactual Contrastive Decoding
%A Qu, Fanyi
%A Sun, Hao
%A Wu, Yunfang
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics: ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F qu-etal-2024-unsupervised
%X Within the context of reading comprehension, the task of Distractor Generation (DG) aims to generate several incorrect options to confuse readers. In recent years, the emergence of Large Language Models (LLMs) provides a potential for unsupervised DG without expensive human-annotated distractor labels. In this paper, we leverage LLMs as a cost-effective annotator to enhance the DG capability of smaller student models. To perform knowledge distilling, we propose a dual task training framework that integrates pseudo distractors from LLMs and answer information as the objective target with a two-stage training process. Moreover, we devise a counterfactual contrastive decoding mechanism for increasing the distracting capability of the DG model. Experiments show that our unsupervised generation method with Bart-base greatly surpasses GPT-3.5-turbo zero-shot performance with only 200\times fewer model parameters. Our proposed unsupervised DG method offers a cost-effective framework for practical reading comprehension applications, without the need of laborious distractor annotation and costly large-size models.
%R 10.18653/v1/2024.findings-acl.47
%U https://aclanthology.org/2024.findings-acl.47/
%U https://doi.org/10.18653/v1/2024.findings-acl.47
%P 827-838
Markdown (Informal)
[Unsupervised Distractor Generation via Large Language Model Distilling and Counterfactual Contrastive Decoding](https://aclanthology.org/2024.findings-acl.47/) (Qu et al., Findings 2024)
ACL