@inproceedings{yadav-etal-2021-subsume,
title = "{SUBSUME}: A Dataset for Subjective Summary Extraction from {W}ikipedia Documents",
author = "Yadav, Nishant and
Brucato, Matteo and
Fariha, Anna and
Youngquist, Oscar and
Killingback, Julian and
Meliou, Alexandra and
Haas, Peter",
editor = "Carenini, Giuseppe and
Cheung, Jackie Chi Kit and
Dong, Yue and
Liu, Fei and
Wang, Lu",
booktitle = "Proceedings of the Third Workshop on New Frontiers in Summarization",
month = nov,
year = "2021",
address = "Online and in Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.newsum-1.14",
doi = "10.18653/v1/2021.newsum-1.14",
pages = "131--141",
abstract = "Many applications require generation of summaries tailored to the user{'}s information needs, i.e., their intent. Methods that express intent via explicit user queries fall short when query interpretation is subjective. Several datasets exist for summarization with objective intents where, for each document and intent (e.g., {``}weather{''}), a single summary suffices for all users. No datasets exist, however, for subjective intents (e.g., {``}interesting places{''}) where different users will provide different summaries. We present SUBSUME, the first dataset for evaluation of SUBjective SUMmary Extraction systems. SUBSUME contains 2,200 (document, intent, summary) triplets over 48 Wikipedia pages, with ten intents of varying subjectivity, provided by 103 individuals over Mechanical Turk. We demonstrate statistically that the intents in SUBSUME vary systematically in subjectivity. To indicate SUBSUME{'}s usefulness, we explore a collection of baseline algorithms for subjective extractive summarization and show that (i) as expected, example-based approaches better capture subjective intents than query-based ones, and (ii) there is ample scope for improving upon the baseline algorithms, thereby motivating further research on this challenging problem.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yadav-etal-2021-subsume">
<titleInfo>
<title>SUBSUME: A Dataset for Subjective Summary Extraction from Wikipedia Documents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nishant</namePart>
<namePart type="family">Yadav</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Brucato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Fariha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oscar</namePart>
<namePart type="family">Youngquist</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julian</namePart>
<namePart type="family">Killingback</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="family">Meliou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Haas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on New Frontiers in Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Giuseppe</namePart>
<namePart type="family">Carenini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackie</namePart>
<namePart type="given">Chi</namePart>
<namePart type="given">Kit</namePart>
<namePart type="family">Cheung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Dong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and in Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Many applications require generation of summaries tailored to the user’s information needs, i.e., their intent. Methods that express intent via explicit user queries fall short when query interpretation is subjective. Several datasets exist for summarization with objective intents where, for each document and intent (e.g., “weather”), a single summary suffices for all users. No datasets exist, however, for subjective intents (e.g., “interesting places”) where different users will provide different summaries. We present SUBSUME, the first dataset for evaluation of SUBjective SUMmary Extraction systems. SUBSUME contains 2,200 (document, intent, summary) triplets over 48 Wikipedia pages, with ten intents of varying subjectivity, provided by 103 individuals over Mechanical Turk. We demonstrate statistically that the intents in SUBSUME vary systematically in subjectivity. To indicate SUBSUME’s usefulness, we explore a collection of baseline algorithms for subjective extractive summarization and show that (i) as expected, example-based approaches better capture subjective intents than query-based ones, and (ii) there is ample scope for improving upon the baseline algorithms, thereby motivating further research on this challenging problem.</abstract>
<identifier type="citekey">yadav-etal-2021-subsume</identifier>
<identifier type="doi">10.18653/v1/2021.newsum-1.14</identifier>
<location>
<url>https://aclanthology.org/2021.newsum-1.14</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>131</start>
<end>141</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SUBSUME: A Dataset for Subjective Summary Extraction from Wikipedia Documents
%A Yadav, Nishant
%A Brucato, Matteo
%A Fariha, Anna
%A Youngquist, Oscar
%A Killingback, Julian
%A Meliou, Alexandra
%A Haas, Peter
%Y Carenini, Giuseppe
%Y Cheung, Jackie Chi Kit
%Y Dong, Yue
%Y Liu, Fei
%Y Wang, Lu
%S Proceedings of the Third Workshop on New Frontiers in Summarization
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online and in Dominican Republic
%F yadav-etal-2021-subsume
%X Many applications require generation of summaries tailored to the user’s information needs, i.e., their intent. Methods that express intent via explicit user queries fall short when query interpretation is subjective. Several datasets exist for summarization with objective intents where, for each document and intent (e.g., “weather”), a single summary suffices for all users. No datasets exist, however, for subjective intents (e.g., “interesting places”) where different users will provide different summaries. We present SUBSUME, the first dataset for evaluation of SUBjective SUMmary Extraction systems. SUBSUME contains 2,200 (document, intent, summary) triplets over 48 Wikipedia pages, with ten intents of varying subjectivity, provided by 103 individuals over Mechanical Turk. We demonstrate statistically that the intents in SUBSUME vary systematically in subjectivity. To indicate SUBSUME’s usefulness, we explore a collection of baseline algorithms for subjective extractive summarization and show that (i) as expected, example-based approaches better capture subjective intents than query-based ones, and (ii) there is ample scope for improving upon the baseline algorithms, thereby motivating further research on this challenging problem.
%R 10.18653/v1/2021.newsum-1.14
%U https://aclanthology.org/2021.newsum-1.14
%U https://doi.org/10.18653/v1/2021.newsum-1.14
%P 131-141
Markdown (Informal)
[SUBSUME: A Dataset for Subjective Summary Extraction from Wikipedia Documents](https://aclanthology.org/2021.newsum-1.14) (Yadav et al., NewSum 2021)
ACL