@inproceedings{veselova-vorontsov-2020-topic,
title = "Topic Balancing with Additive Regularization of Topic Models",
author = "Veselova, Eugeniia and
Vorontsov, Konstantin",
editor = "Rijhwani, Shruti and
Liu, Jiangming and
Wang, Yizhong and
Dror, Rotem",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-srw.9",
doi = "10.18653/v1/2020.acl-srw.9",
pages = "59--65",
abstract = "This article proposes a new approach for building topic models on unbalanced collections in topic modelling, based on the existing methods and our experiments with such methods. Real-world data collections contain topics in various proportions, and often documents of the relatively small theme become distributed all over the larger topics instead of being grouped into one topic. To address this issue, we design a new regularizer for Theta and Phi matrices in probabilistic Latent Semantic Analysis (pLSA) model. We make sure this regularizer increases the quality of topic models, trained on unbalanced collections. Besides, we conceptually support this regularizer by our experiments.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="veselova-vorontsov-2020-topic">
<titleInfo>
<title>Topic Balancing with Additive Regularization of Topic Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eugeniia</namePart>
<namePart type="family">Veselova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Konstantin</namePart>
<namePart type="family">Vorontsov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiangming</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yizhong</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rotem</namePart>
<namePart type="family">Dror</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This article proposes a new approach for building topic models on unbalanced collections in topic modelling, based on the existing methods and our experiments with such methods. Real-world data collections contain topics in various proportions, and often documents of the relatively small theme become distributed all over the larger topics instead of being grouped into one topic. To address this issue, we design a new regularizer for Theta and Phi matrices in probabilistic Latent Semantic Analysis (pLSA) model. We make sure this regularizer increases the quality of topic models, trained on unbalanced collections. Besides, we conceptually support this regularizer by our experiments.</abstract>
<identifier type="citekey">veselova-vorontsov-2020-topic</identifier>
<identifier type="doi">10.18653/v1/2020.acl-srw.9</identifier>
<location>
<url>https://aclanthology.org/2020.acl-srw.9</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>59</start>
<end>65</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Topic Balancing with Additive Regularization of Topic Models
%A Veselova, Eugeniia
%A Vorontsov, Konstantin
%Y Rijhwani, Shruti
%Y Liu, Jiangming
%Y Wang, Yizhong
%Y Dror, Rotem
%S Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F veselova-vorontsov-2020-topic
%X This article proposes a new approach for building topic models on unbalanced collections in topic modelling, based on the existing methods and our experiments with such methods. Real-world data collections contain topics in various proportions, and often documents of the relatively small theme become distributed all over the larger topics instead of being grouped into one topic. To address this issue, we design a new regularizer for Theta and Phi matrices in probabilistic Latent Semantic Analysis (pLSA) model. We make sure this regularizer increases the quality of topic models, trained on unbalanced collections. Besides, we conceptually support this regularizer by our experiments.
%R 10.18653/v1/2020.acl-srw.9
%U https://aclanthology.org/2020.acl-srw.9
%U https://doi.org/10.18653/v1/2020.acl-srw.9
%P 59-65
Markdown (Informal)
[Topic Balancing with Additive Regularization of Topic Models](https://aclanthology.org/2020.acl-srw.9) (Veselova & Vorontsov, ACL 2020)
ACL