@inproceedings{chen-etal-2022-dialogsum,
title = "{D}ialog{S}um Challenge: Results of the Dialogue Summarization Shared Task",
author = "Chen, Yulong and
Deng, Naihao and
Liu, Yang and
Zhang, Yue",
editor = "Shaikh, Samira and
Ferreira, Thiago and
Stent, Amanda",
booktitle = "Proceedings of the 15th International Conference on Natural Language Generation: Generation Challenges",
month = jul,
year = "2022",
address = "Waterville, Maine, USA and virtual meeting",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.inlg-genchal.14/",
pages = "94--103",
abstract = "We report the results of DialogSum Challenge, the shared task on summarizing real-life sce- nario dialogues at INLG 2022. Four teams participate in this shared task and three submit their system reports, exploring different meth- ods to improve the performance of dialogue summarization. Although there is a great im- provement over the baseline models regarding automatic evaluation metrics, such as ROUGE scores, we find that there is a salient gap be- tween model generated outputs and human an- notated summaries by human evaluation from multiple aspects. These findings demonstrate the difficulty of dialogue summarization and suggest that more fine-grained evaluatuion met- rics are in need."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2022-dialogsum">
<titleInfo>
<title>DialogSum Challenge: Results of the Dialogue Summarization Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yulong</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naihao</namePart>
<namePart type="family">Deng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Natural Language Generation: Generation Challenges</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samira</namePart>
<namePart type="family">Shaikh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thiago</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanda</namePart>
<namePart type="family">Stent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Waterville, Maine, USA and virtual meeting</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We report the results of DialogSum Challenge, the shared task on summarizing real-life sce- nario dialogues at INLG 2022. Four teams participate in this shared task and three submit their system reports, exploring different meth- ods to improve the performance of dialogue summarization. Although there is a great im- provement over the baseline models regarding automatic evaluation metrics, such as ROUGE scores, we find that there is a salient gap be- tween model generated outputs and human an- notated summaries by human evaluation from multiple aspects. These findings demonstrate the difficulty of dialogue summarization and suggest that more fine-grained evaluatuion met- rics are in need.</abstract>
<identifier type="citekey">chen-etal-2022-dialogsum</identifier>
<location>
<url>https://aclanthology.org/2022.inlg-genchal.14/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>94</start>
<end>103</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DialogSum Challenge: Results of the Dialogue Summarization Shared Task
%A Chen, Yulong
%A Deng, Naihao
%A Liu, Yang
%A Zhang, Yue
%Y Shaikh, Samira
%Y Ferreira, Thiago
%Y Stent, Amanda
%S Proceedings of the 15th International Conference on Natural Language Generation: Generation Challenges
%D 2022
%8 July
%I Association for Computational Linguistics
%C Waterville, Maine, USA and virtual meeting
%F chen-etal-2022-dialogsum
%X We report the results of DialogSum Challenge, the shared task on summarizing real-life sce- nario dialogues at INLG 2022. Four teams participate in this shared task and three submit their system reports, exploring different meth- ods to improve the performance of dialogue summarization. Although there is a great im- provement over the baseline models regarding automatic evaluation metrics, such as ROUGE scores, we find that there is a salient gap be- tween model generated outputs and human an- notated summaries by human evaluation from multiple aspects. These findings demonstrate the difficulty of dialogue summarization and suggest that more fine-grained evaluatuion met- rics are in need.
%U https://aclanthology.org/2022.inlg-genchal.14/
%P 94-103
Markdown (Informal)
[DialogSum Challenge: Results of the Dialogue Summarization Shared Task](https://aclanthology.org/2022.inlg-genchal.14/) (Chen et al., INLG 2022)
ACL