@inproceedings{kumar-etal-2020-online,
title = "An Online Semantic-enhanced {D}irichlet Model for Short Text Stream Clustering",
author = "Kumar, Jay and
Shao, Junming and
Uddin, Salah and
Ali, Wazir",
editor = "Jurafsky, Dan and
Chai, Joyce and
Schluter, Natalie and
Tetreault, Joel",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-main.70",
doi = "10.18653/v1/2020.acl-main.70",
pages = "766--776",
abstract = "Clustering short text streams is a challenging task due to its unique properties: infinite length, sparse data representation and cluster evolution. Existing approaches often exploit short text streams in a batch way. However, determine the optimal batch size is usually a difficult task since we have no priori knowledge when the topics evolve. In addition, traditional independent word representation in graphical model tends to cause {``}term ambiguity{''} problem in short text clustering. Therefore, in this paper, we propose an Online Semantic-enhanced Dirichlet Model for short sext stream clustering, called OSDM, which integrates the word-occurance semantic information (i.e., context) into a new graphical model and clusters each arriving short text automatically in an online way. Extensive results have demonstrated that OSDM has better performance compared to many state-of-the-art algorithms on both synthetic and real-world data sets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kumar-etal-2020-online">
<titleInfo>
<title>An Online Semantic-enhanced Dirichlet Model for Short Text Stream Clustering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jay</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junming</namePart>
<namePart type="family">Shao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salah</namePart>
<namePart type="family">Uddin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wazir</namePart>
<namePart type="family">Ali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Jurafsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Chai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalie</namePart>
<namePart type="family">Schluter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Tetreault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Clustering short text streams is a challenging task due to its unique properties: infinite length, sparse data representation and cluster evolution. Existing approaches often exploit short text streams in a batch way. However, determine the optimal batch size is usually a difficult task since we have no priori knowledge when the topics evolve. In addition, traditional independent word representation in graphical model tends to cause “term ambiguity” problem in short text clustering. Therefore, in this paper, we propose an Online Semantic-enhanced Dirichlet Model for short sext stream clustering, called OSDM, which integrates the word-occurance semantic information (i.e., context) into a new graphical model and clusters each arriving short text automatically in an online way. Extensive results have demonstrated that OSDM has better performance compared to many state-of-the-art algorithms on both synthetic and real-world data sets.</abstract>
<identifier type="citekey">kumar-etal-2020-online</identifier>
<identifier type="doi">10.18653/v1/2020.acl-main.70</identifier>
<location>
<url>https://aclanthology.org/2020.acl-main.70</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>766</start>
<end>776</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Online Semantic-enhanced Dirichlet Model for Short Text Stream Clustering
%A Kumar, Jay
%A Shao, Junming
%A Uddin, Salah
%A Ali, Wazir
%Y Jurafsky, Dan
%Y Chai, Joyce
%Y Schluter, Natalie
%Y Tetreault, Joel
%S Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F kumar-etal-2020-online
%X Clustering short text streams is a challenging task due to its unique properties: infinite length, sparse data representation and cluster evolution. Existing approaches often exploit short text streams in a batch way. However, determine the optimal batch size is usually a difficult task since we have no priori knowledge when the topics evolve. In addition, traditional independent word representation in graphical model tends to cause “term ambiguity” problem in short text clustering. Therefore, in this paper, we propose an Online Semantic-enhanced Dirichlet Model for short sext stream clustering, called OSDM, which integrates the word-occurance semantic information (i.e., context) into a new graphical model and clusters each arriving short text automatically in an online way. Extensive results have demonstrated that OSDM has better performance compared to many state-of-the-art algorithms on both synthetic and real-world data sets.
%R 10.18653/v1/2020.acl-main.70
%U https://aclanthology.org/2020.acl-main.70
%U https://doi.org/10.18653/v1/2020.acl-main.70
%P 766-776
Markdown (Informal)
[An Online Semantic-enhanced Dirichlet Model for Short Text Stream Clustering](https://aclanthology.org/2020.acl-main.70) (Kumar et al., ACL 2020)
ACL