@inproceedings{gholipour-ghalandari-2017-revisiting,
title = "Revisiting the Centroid-based Method: A Strong Baseline for Multi-Document Summarization",
author = "Gholipour Ghalandari, Demian",
editor = "Wang, Lu and
Cheung, Jackie Chi Kit and
Carenini, Giuseppe and
Liu, Fei",
booktitle = "Proceedings of the Workshop on New Frontiers in Summarization",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-4511",
doi = "10.18653/v1/W17-4511",
pages = "85--90",
abstract = "The centroid-based model for extractive document summarization is a simple and fast baseline that ranks sentences based on their similarity to a centroid vector. In this paper, we apply this ranking to possible summaries instead of sentences and use a simple greedy algorithm to find the best summary. Furthermore, we show possibilities to scale up to larger input document collections by selecting a small number of sentences from each document prior to constructing the summary. Experiments were done on the DUC2004 dataset for multi-document summarization. We observe a higher performance over the original model, on par with more complex state-of-the-art methods.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gholipour-ghalandari-2017-revisiting">
<titleInfo>
<title>Revisiting the Centroid-based Method: A Strong Baseline for Multi-Document Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Demian</namePart>
<namePart type="family">Gholipour Ghalandari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on New Frontiers in Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackie</namePart>
<namePart type="given">Chi</namePart>
<namePart type="given">Kit</namePart>
<namePart type="family">Cheung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giuseppe</namePart>
<namePart type="family">Carenini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The centroid-based model for extractive document summarization is a simple and fast baseline that ranks sentences based on their similarity to a centroid vector. In this paper, we apply this ranking to possible summaries instead of sentences and use a simple greedy algorithm to find the best summary. Furthermore, we show possibilities to scale up to larger input document collections by selecting a small number of sentences from each document prior to constructing the summary. Experiments were done on the DUC2004 dataset for multi-document summarization. We observe a higher performance over the original model, on par with more complex state-of-the-art methods.</abstract>
<identifier type="citekey">gholipour-ghalandari-2017-revisiting</identifier>
<identifier type="doi">10.18653/v1/W17-4511</identifier>
<location>
<url>https://aclanthology.org/W17-4511</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>85</start>
<end>90</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Revisiting the Centroid-based Method: A Strong Baseline for Multi-Document Summarization
%A Gholipour Ghalandari, Demian
%Y Wang, Lu
%Y Cheung, Jackie Chi Kit
%Y Carenini, Giuseppe
%Y Liu, Fei
%S Proceedings of the Workshop on New Frontiers in Summarization
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F gholipour-ghalandari-2017-revisiting
%X The centroid-based model for extractive document summarization is a simple and fast baseline that ranks sentences based on their similarity to a centroid vector. In this paper, we apply this ranking to possible summaries instead of sentences and use a simple greedy algorithm to find the best summary. Furthermore, we show possibilities to scale up to larger input document collections by selecting a small number of sentences from each document prior to constructing the summary. Experiments were done on the DUC2004 dataset for multi-document summarization. We observe a higher performance over the original model, on par with more complex state-of-the-art methods.
%R 10.18653/v1/W17-4511
%U https://aclanthology.org/W17-4511
%U https://doi.org/10.18653/v1/W17-4511
%P 85-90
Markdown (Informal)
[Revisiting the Centroid-based Method: A Strong Baseline for Multi-Document Summarization](https://aclanthology.org/W17-4511) (Gholipour Ghalandari, 2017)
ACL