@inproceedings{parnell-etal-2021-rewardsofsum,
title = "{R}ewards{O}f{S}um: Exploring Reinforcement Learning Rewards for Summarisation",
author = "Parnell, Jacob and
Jauregi Unanue, Inigo and
Piccardi, Massimo",
editor = "Kozareva, Zornitsa and
Ravi, Sujith and
Vlachos, Andreas and
Agrawal, Priyanka and
Martins, Andr{\'e}",
booktitle = "Proceedings of the 5th Workshop on Structured Prediction for NLP (SPNLP 2021)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.spnlp-1.1/",
doi = "10.18653/v1/2021.spnlp-1.1",
pages = "1--11",
abstract = "To date, most abstractive summarisation models have relied on variants of the negative log-likelihood (NLL) as their training objective. In some cases, reinforcement learning has been added to train the models with an objective that is closer to their evaluation measures (e.g. ROUGE). However, the reward function to be used within the reinforcement learning approach can play a key role for performance and is still partially unexplored. For this reason, in this paper, we propose two reward functions for the task of abstractive summarisation: the first function, referred to as RwB-Hinge, dynamically selects the samples for the gradient update. The second function, nicknamed RISK, leverages a small pool of strong candidates to inform the reward. In the experiments, we probe the proposed approach by fine-tuning an NLL pre-trained model over nine summarisation datasets of diverse size and nature. The experimental results show a consistent improvement over the negative log-likelihood baselines."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="parnell-etal-2021-rewardsofsum">
<titleInfo>
<title>RewardsOfSum: Exploring Reinforcement Learning Rewards for Summarisation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="family">Parnell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Inigo</namePart>
<namePart type="family">Jauregi Unanue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Massimo</namePart>
<namePart type="family">Piccardi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Structured Prediction for NLP (SPNLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sujith</namePart>
<namePart type="family">Ravi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Priyanka</namePart>
<namePart type="family">Agrawal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">André</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>To date, most abstractive summarisation models have relied on variants of the negative log-likelihood (NLL) as their training objective. In some cases, reinforcement learning has been added to train the models with an objective that is closer to their evaluation measures (e.g. ROUGE). However, the reward function to be used within the reinforcement learning approach can play a key role for performance and is still partially unexplored. For this reason, in this paper, we propose two reward functions for the task of abstractive summarisation: the first function, referred to as RwB-Hinge, dynamically selects the samples for the gradient update. The second function, nicknamed RISK, leverages a small pool of strong candidates to inform the reward. In the experiments, we probe the proposed approach by fine-tuning an NLL pre-trained model over nine summarisation datasets of diverse size and nature. The experimental results show a consistent improvement over the negative log-likelihood baselines.</abstract>
<identifier type="citekey">parnell-etal-2021-rewardsofsum</identifier>
<identifier type="doi">10.18653/v1/2021.spnlp-1.1</identifier>
<location>
<url>https://aclanthology.org/2021.spnlp-1.1/</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>1</start>
<end>11</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RewardsOfSum: Exploring Reinforcement Learning Rewards for Summarisation
%A Parnell, Jacob
%A Jauregi Unanue, Inigo
%A Piccardi, Massimo
%Y Kozareva, Zornitsa
%Y Ravi, Sujith
%Y Vlachos, Andreas
%Y Agrawal, Priyanka
%Y Martins, André
%S Proceedings of the 5th Workshop on Structured Prediction for NLP (SPNLP 2021)
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F parnell-etal-2021-rewardsofsum
%X To date, most abstractive summarisation models have relied on variants of the negative log-likelihood (NLL) as their training objective. In some cases, reinforcement learning has been added to train the models with an objective that is closer to their evaluation measures (e.g. ROUGE). However, the reward function to be used within the reinforcement learning approach can play a key role for performance and is still partially unexplored. For this reason, in this paper, we propose two reward functions for the task of abstractive summarisation: the first function, referred to as RwB-Hinge, dynamically selects the samples for the gradient update. The second function, nicknamed RISK, leverages a small pool of strong candidates to inform the reward. In the experiments, we probe the proposed approach by fine-tuning an NLL pre-trained model over nine summarisation datasets of diverse size and nature. The experimental results show a consistent improvement over the negative log-likelihood baselines.
%R 10.18653/v1/2021.spnlp-1.1
%U https://aclanthology.org/2021.spnlp-1.1/
%U https://doi.org/10.18653/v1/2021.spnlp-1.1
%P 1-11
Markdown (Informal)
[RewardsOfSum: Exploring Reinforcement Learning Rewards for Summarisation](https://aclanthology.org/2021.spnlp-1.1/) (Parnell et al., spnlp 2021)
ACL