@inproceedings{zmiycharov-etal-2021-comparative,
title = "A Comparative Study on Abstractive and Extractive Approaches in Summarization of {E}uropean Legislation Documents",
author = "Zmiycharov, Valentin and
Chechev, Milen and
Lazarova, Gergana and
Tsonkov, Todor and
Koychev, Ivan",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.184",
pages = "1645--1651",
abstract = "Extracting the most important part of legislation documents has great business value because the texts are usually very long and hard to understand. The aim of this article is to evaluate different algorithms for text summarization on EU legislation documents. The content contains domain-specific words. We collected a text summarization dataset of EU legal documents consisting of 1563 documents, in which the mean length of summaries is 424 words. Experiments were conducted with different algorithms using the new dataset. A simple extractive algorithm was selected as a baseline. Advanced extractive algorithms, which use encoders show better results than baseline. The best result measured by ROUGE scores was achieved by a fine-tuned abstractive T5 model, which was adapted to work with long texts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zmiycharov-etal-2021-comparative">
<titleInfo>
<title>A Comparative Study on Abstractive and Extractive Approaches in Summarization of European Legislation Documents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Zmiycharov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Milen</namePart>
<namePart type="family">Chechev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gergana</namePart>
<namePart type="family">Lazarova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Todor</namePart>
<namePart type="family">Tsonkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Koychev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Extracting the most important part of legislation documents has great business value because the texts are usually very long and hard to understand. The aim of this article is to evaluate different algorithms for text summarization on EU legislation documents. The content contains domain-specific words. We collected a text summarization dataset of EU legal documents consisting of 1563 documents, in which the mean length of summaries is 424 words. Experiments were conducted with different algorithms using the new dataset. A simple extractive algorithm was selected as a baseline. Advanced extractive algorithms, which use encoders show better results than baseline. The best result measured by ROUGE scores was achieved by a fine-tuned abstractive T5 model, which was adapted to work with long texts.</abstract>
<identifier type="citekey">zmiycharov-etal-2021-comparative</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.184</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>1645</start>
<end>1651</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Comparative Study on Abstractive and Extractive Approaches in Summarization of European Legislation Documents
%A Zmiycharov, Valentin
%A Chechev, Milen
%A Lazarova, Gergana
%A Tsonkov, Todor
%A Koychev, Ivan
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F zmiycharov-etal-2021-comparative
%X Extracting the most important part of legislation documents has great business value because the texts are usually very long and hard to understand. The aim of this article is to evaluate different algorithms for text summarization on EU legislation documents. The content contains domain-specific words. We collected a text summarization dataset of EU legal documents consisting of 1563 documents, in which the mean length of summaries is 424 words. Experiments were conducted with different algorithms using the new dataset. A simple extractive algorithm was selected as a baseline. Advanced extractive algorithms, which use encoders show better results than baseline. The best result measured by ROUGE scores was achieved by a fine-tuned abstractive T5 model, which was adapted to work with long texts.
%U https://aclanthology.org/2021.ranlp-1.184
%P 1645-1651
Markdown (Informal)
[A Comparative Study on Abstractive and Extractive Approaches in Summarization of European Legislation Documents](https://aclanthology.org/2021.ranlp-1.184) (Zmiycharov et al., RANLP 2021)
ACL