@inproceedings{forristal-etal-2023-block,
title = "A Block Metropolis-Hastings Sampler for Controllable Energy-based Text Generation",
author = "Forristal, Jarad and
Mireshghallah, Fatemehsadat and
Durrett, Greg and
Berg-Kirkpatrick, Taylor",
editor = "Jiang, Jing and
Reitter, David and
Deng, Shumin",
booktitle = "Proceedings of the 27th Conference on Computational Natural Language Learning (CoNLL)",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.conll-1.26",
doi = "10.18653/v1/2023.conll-1.26",
pages = "403--413",
abstract = "Recent work has shown that energy-based language modeling is an effective framework for controllable text generation because it enables flexible integration of arbitrary discriminators. However, because energy-based LMs are globally normalized, approximate techniques like Metropolis-Hastings (MH) are required for inference. Past work has largely explored simple proposal distributions that modify a single token at a time, like in Gibbs sampling. In this paper, we develop a novel MH sampler that, in contrast, proposes re-writes of the entire sequence in each step via iterative prompting of a large language model. Our new sampler (a) allows for more efficient and accurate sampling from a target distribution and (b) allows generation length to be determined through the sampling procedure rather than fixed in advance, as past work has required. We perform experiments on two controlled generation tasks, showing both downstream performance gains and more accurate target distribution sampling in comparison with single-token proposal techniques.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="forristal-etal-2023-block">
<titleInfo>
<title>A Block Metropolis-Hastings Sampler for Controllable Energy-based Text Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jarad</namePart>
<namePart type="family">Forristal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fatemehsadat</namePart>
<namePart type="family">Mireshghallah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Durrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taylor</namePart>
<namePart type="family">Berg-Kirkpatrick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 27th Conference on Computational Natural Language Learning (CoNLL)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Reitter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shumin</namePart>
<namePart type="family">Deng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent work has shown that energy-based language modeling is an effective framework for controllable text generation because it enables flexible integration of arbitrary discriminators. However, because energy-based LMs are globally normalized, approximate techniques like Metropolis-Hastings (MH) are required for inference. Past work has largely explored simple proposal distributions that modify a single token at a time, like in Gibbs sampling. In this paper, we develop a novel MH sampler that, in contrast, proposes re-writes of the entire sequence in each step via iterative prompting of a large language model. Our new sampler (a) allows for more efficient and accurate sampling from a target distribution and (b) allows generation length to be determined through the sampling procedure rather than fixed in advance, as past work has required. We perform experiments on two controlled generation tasks, showing both downstream performance gains and more accurate target distribution sampling in comparison with single-token proposal techniques.</abstract>
<identifier type="citekey">forristal-etal-2023-block</identifier>
<identifier type="doi">10.18653/v1/2023.conll-1.26</identifier>
<location>
<url>https://aclanthology.org/2023.conll-1.26</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>403</start>
<end>413</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Block Metropolis-Hastings Sampler for Controllable Energy-based Text Generation
%A Forristal, Jarad
%A Mireshghallah, Fatemehsadat
%A Durrett, Greg
%A Berg-Kirkpatrick, Taylor
%Y Jiang, Jing
%Y Reitter, David
%Y Deng, Shumin
%S Proceedings of the 27th Conference on Computational Natural Language Learning (CoNLL)
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F forristal-etal-2023-block
%X Recent work has shown that energy-based language modeling is an effective framework for controllable text generation because it enables flexible integration of arbitrary discriminators. However, because energy-based LMs are globally normalized, approximate techniques like Metropolis-Hastings (MH) are required for inference. Past work has largely explored simple proposal distributions that modify a single token at a time, like in Gibbs sampling. In this paper, we develop a novel MH sampler that, in contrast, proposes re-writes of the entire sequence in each step via iterative prompting of a large language model. Our new sampler (a) allows for more efficient and accurate sampling from a target distribution and (b) allows generation length to be determined through the sampling procedure rather than fixed in advance, as past work has required. We perform experiments on two controlled generation tasks, showing both downstream performance gains and more accurate target distribution sampling in comparison with single-token proposal techniques.
%R 10.18653/v1/2023.conll-1.26
%U https://aclanthology.org/2023.conll-1.26
%U https://doi.org/10.18653/v1/2023.conll-1.26
%P 403-413
Markdown (Informal)
[A Block Metropolis-Hastings Sampler for Controllable Energy-based Text Generation](https://aclanthology.org/2023.conll-1.26) (Forristal et al., CoNLL 2023)
ACL