@inproceedings{goschenhofer-etal-2022-cc,
title = "{CC}-Top: Constrained Clustering for Dynamic Topic Discovery",
author = "Goschenhofer, Jann and
Ragupathy, Pranav and
Heumann, Christian and
Bischl, Bernd and
A{\ss}enmacher, Matthias",
editor = "Barbieri, Francesco and
Camacho-Collados, Jose and
Dhingra, Bhuwan and
Espinosa-Anke, Luis and
Gribovskaya, Elena and
Lazaridou, Angeliki and
Loureiro, Daniel and
Neves, Leonardo",
booktitle = "Proceedings of the First Workshop on Ever Evolving NLP (EvoNLP)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.evonlp-1.5/",
doi = "10.18653/v1/2022.evonlp-1.5",
pages = "26--34",
abstract = "Research on multi-class text classification of short texts mainly focuses on supervised (transfer) learning approaches, requiring a finite set of pre-defined classes which is constant over time. This work explores deep constrained clustering (CC) as an alternative to supervised learning approaches in a setting with a dynamically changing number of classes, a task we introduce as dynamic topic discovery (DTD).We do so by using pairwise similarity constraints instead of instance-level class labels which allow for a flexible number of classes while exhibiting a competitive performance compared to supervised approaches. First, we substantiate this through a series of experiments and show that CC algorithms exhibit a predictive performance similar to state-of-the-art supervised learning algorithms while requiring less annotation effort. Second, we demonstrate the overclustering capabilities of deep CC for detecting topics in short text data sets in the absence of the ground truth class cardinality during model training. Third, we showcase that these capabilities can be leveraged for the DTD setting as a step towards dynamic learning over time and finally, we release our codebase to nurture further research in this area."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="goschenhofer-etal-2022-cc">
<titleInfo>
<title>CC-Top: Constrained Clustering for Dynamic Topic Discovery</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jann</namePart>
<namePart type="family">Goschenhofer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pranav</namePart>
<namePart type="family">Ragupathy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Heumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bernd</namePart>
<namePart type="family">Bischl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Aßenmacher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Ever Evolving NLP (EvoNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francesco</namePart>
<namePart type="family">Barbieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="family">Camacho-Collados</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bhuwan</namePart>
<namePart type="family">Dhingra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Espinosa-Anke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Gribovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angeliki</namePart>
<namePart type="family">Lazaridou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Loureiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leonardo</namePart>
<namePart type="family">Neves</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Research on multi-class text classification of short texts mainly focuses on supervised (transfer) learning approaches, requiring a finite set of pre-defined classes which is constant over time. This work explores deep constrained clustering (CC) as an alternative to supervised learning approaches in a setting with a dynamically changing number of classes, a task we introduce as dynamic topic discovery (DTD).We do so by using pairwise similarity constraints instead of instance-level class labels which allow for a flexible number of classes while exhibiting a competitive performance compared to supervised approaches. First, we substantiate this through a series of experiments and show that CC algorithms exhibit a predictive performance similar to state-of-the-art supervised learning algorithms while requiring less annotation effort. Second, we demonstrate the overclustering capabilities of deep CC for detecting topics in short text data sets in the absence of the ground truth class cardinality during model training. Third, we showcase that these capabilities can be leveraged for the DTD setting as a step towards dynamic learning over time and finally, we release our codebase to nurture further research in this area.</abstract>
<identifier type="citekey">goschenhofer-etal-2022-cc</identifier>
<identifier type="doi">10.18653/v1/2022.evonlp-1.5</identifier>
<location>
<url>https://aclanthology.org/2022.evonlp-1.5/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>26</start>
<end>34</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CC-Top: Constrained Clustering for Dynamic Topic Discovery
%A Goschenhofer, Jann
%A Ragupathy, Pranav
%A Heumann, Christian
%A Bischl, Bernd
%A Aßenmacher, Matthias
%Y Barbieri, Francesco
%Y Camacho-Collados, Jose
%Y Dhingra, Bhuwan
%Y Espinosa-Anke, Luis
%Y Gribovskaya, Elena
%Y Lazaridou, Angeliki
%Y Loureiro, Daniel
%Y Neves, Leonardo
%S Proceedings of the First Workshop on Ever Evolving NLP (EvoNLP)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F goschenhofer-etal-2022-cc
%X Research on multi-class text classification of short texts mainly focuses on supervised (transfer) learning approaches, requiring a finite set of pre-defined classes which is constant over time. This work explores deep constrained clustering (CC) as an alternative to supervised learning approaches in a setting with a dynamically changing number of classes, a task we introduce as dynamic topic discovery (DTD).We do so by using pairwise similarity constraints instead of instance-level class labels which allow for a flexible number of classes while exhibiting a competitive performance compared to supervised approaches. First, we substantiate this through a series of experiments and show that CC algorithms exhibit a predictive performance similar to state-of-the-art supervised learning algorithms while requiring less annotation effort. Second, we demonstrate the overclustering capabilities of deep CC for detecting topics in short text data sets in the absence of the ground truth class cardinality during model training. Third, we showcase that these capabilities can be leveraged for the DTD setting as a step towards dynamic learning over time and finally, we release our codebase to nurture further research in this area.
%R 10.18653/v1/2022.evonlp-1.5
%U https://aclanthology.org/2022.evonlp-1.5/
%U https://doi.org/10.18653/v1/2022.evonlp-1.5
%P 26-34
Markdown (Informal)
[CC-Top: Constrained Clustering for Dynamic Topic Discovery](https://aclanthology.org/2022.evonlp-1.5/) (Goschenhofer et al., EvoNLP 2022)
ACL
- Jann Goschenhofer, Pranav Ragupathy, Christian Heumann, Bernd Bischl, and Matthias Aßenmacher. 2022. CC-Top: Constrained Clustering for Dynamic Topic Discovery. In Proceedings of the First Workshop on Ever Evolving NLP (EvoNLP), pages 26–34, Abu Dhabi, United Arab Emirates (Hybrid). Association for Computational Linguistics.