@inproceedings{saha-etal-2022-seeded,
title = "Seeded Hierarchical Clustering for Expert-Crafted Taxonomies",
author = "Saha, Anish and
Ananthram, Amith and
Allaway, Emily and
Ji, Heng and
McKeown, Kathleen",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.115/",
doi = "10.18653/v1/2022.findings-emnlp.115",
pages = "1595--1609",
abstract = "Practitioners from many disciplines (e.g., political science) use expert-crafted taxonomies to make sense of large, unlabeled corpora. In this work, we study Seeded Hierarchical Clustering (SHC): the task of automatically fitting unlabeled data to such taxonomies using a small set of labeled examples. We propose HierSeed, a novel weakly supervised algorithm for this task that uses only a small set of labeled seed examples in a computation and data efficient manner. HierSeed assigns documents to topics by weighing document density against topic hierarchical structure. It outperforms unsupervised and supervised baselines for the SHC task on three real-world datasets."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="saha-etal-2022-seeded">
<titleInfo>
<title>Seeded Hierarchical Clustering for Expert-Crafted Taxonomies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anish</namePart>
<namePart type="family">Saha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amith</namePart>
<namePart type="family">Ananthram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Allaway</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kathleen</namePart>
<namePart type="family">McKeown</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Practitioners from many disciplines (e.g., political science) use expert-crafted taxonomies to make sense of large, unlabeled corpora. In this work, we study Seeded Hierarchical Clustering (SHC): the task of automatically fitting unlabeled data to such taxonomies using a small set of labeled examples. We propose HierSeed, a novel weakly supervised algorithm for this task that uses only a small set of labeled seed examples in a computation and data efficient manner. HierSeed assigns documents to topics by weighing document density against topic hierarchical structure. It outperforms unsupervised and supervised baselines for the SHC task on three real-world datasets.</abstract>
<identifier type="citekey">saha-etal-2022-seeded</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.115</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.115/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>1595</start>
<end>1609</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Seeded Hierarchical Clustering for Expert-Crafted Taxonomies
%A Saha, Anish
%A Ananthram, Amith
%A Allaway, Emily
%A Ji, Heng
%A McKeown, Kathleen
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F saha-etal-2022-seeded
%X Practitioners from many disciplines (e.g., political science) use expert-crafted taxonomies to make sense of large, unlabeled corpora. In this work, we study Seeded Hierarchical Clustering (SHC): the task of automatically fitting unlabeled data to such taxonomies using a small set of labeled examples. We propose HierSeed, a novel weakly supervised algorithm for this task that uses only a small set of labeled seed examples in a computation and data efficient manner. HierSeed assigns documents to topics by weighing document density against topic hierarchical structure. It outperforms unsupervised and supervised baselines for the SHC task on three real-world datasets.
%R 10.18653/v1/2022.findings-emnlp.115
%U https://aclanthology.org/2022.findings-emnlp.115/
%U https://doi.org/10.18653/v1/2022.findings-emnlp.115
%P 1595-1609
Markdown (Informal)
[Seeded Hierarchical Clustering for Expert-Crafted Taxonomies](https://aclanthology.org/2022.findings-emnlp.115/) (Saha et al., Findings 2022)
ACL
- Anish Saha, Amith Ananthram, Emily Allaway, Heng Ji, and Kathleen McKeown. 2022. Seeded Hierarchical Clustering for Expert-Crafted Taxonomies. In Findings of the Association for Computational Linguistics: EMNLP 2022, pages 1595–1609, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.