@inproceedings{li-etal-2023-enhancing-extreme,
title = "Enhancing Extreme Multi-Label Text Classification: Addressing Challenges in Model, Data, and Evaluation",
author = "Li, Dan and
Zhu, Zi Long and
van de Loo, Janneke and
Masip Gomez, Agnes and
Yadav, Vikrant and
Tsatsaronis, Georgios and
Afzal, Zubair",
editor = "Wang, Mingxuan and
Zitouni, Imed",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-industry.30",
doi = "10.18653/v1/2023.emnlp-industry.30",
pages = "313--321",
abstract = "Extreme multi-label text classification is a prevalent task in industry, but it frequently encounters challenges in terms of machine learning perspectives, including model limitations, data scarcity, and time-consuming evaluation. This paper aims to mitigate these issues by introducing novel approaches. Firstly, we propose a label ranking model as an alternative to the conventional SciBERT-based classification model, enabling efficient handling of large-scale labels and accommodating new labels. Secondly, we present an active learning-based pipeline that addresses the data scarcity of new labels during the update of a classification system. Finally, we introduce ChatGPT to assist with model evaluation. Our experiments demonstrate the effectiveness of these techniques in enhancing the extreme multi-label text classification task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2023-enhancing-extreme">
<titleInfo>
<title>Enhancing Extreme Multi-Label Text Classification: Addressing Challenges in Model, Data, and Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zi</namePart>
<namePart type="given">Long</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Janneke</namePart>
<namePart type="family">van de Loo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Agnes</namePart>
<namePart type="family">Masip Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vikrant</namePart>
<namePart type="family">Yadav</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georgios</namePart>
<namePart type="family">Tsatsaronis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zubair</namePart>
<namePart type="family">Afzal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mingxuan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Extreme multi-label text classification is a prevalent task in industry, but it frequently encounters challenges in terms of machine learning perspectives, including model limitations, data scarcity, and time-consuming evaluation. This paper aims to mitigate these issues by introducing novel approaches. Firstly, we propose a label ranking model as an alternative to the conventional SciBERT-based classification model, enabling efficient handling of large-scale labels and accommodating new labels. Secondly, we present an active learning-based pipeline that addresses the data scarcity of new labels during the update of a classification system. Finally, we introduce ChatGPT to assist with model evaluation. Our experiments demonstrate the effectiveness of these techniques in enhancing the extreme multi-label text classification task.</abstract>
<identifier type="citekey">li-etal-2023-enhancing-extreme</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-industry.30</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-industry.30</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>313</start>
<end>321</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Enhancing Extreme Multi-Label Text Classification: Addressing Challenges in Model, Data, and Evaluation
%A Li, Dan
%A Zhu, Zi Long
%A van de Loo, Janneke
%A Masip Gomez, Agnes
%A Yadav, Vikrant
%A Tsatsaronis, Georgios
%A Afzal, Zubair
%Y Wang, Mingxuan
%Y Zitouni, Imed
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F li-etal-2023-enhancing-extreme
%X Extreme multi-label text classification is a prevalent task in industry, but it frequently encounters challenges in terms of machine learning perspectives, including model limitations, data scarcity, and time-consuming evaluation. This paper aims to mitigate these issues by introducing novel approaches. Firstly, we propose a label ranking model as an alternative to the conventional SciBERT-based classification model, enabling efficient handling of large-scale labels and accommodating new labels. Secondly, we present an active learning-based pipeline that addresses the data scarcity of new labels during the update of a classification system. Finally, we introduce ChatGPT to assist with model evaluation. Our experiments demonstrate the effectiveness of these techniques in enhancing the extreme multi-label text classification task.
%R 10.18653/v1/2023.emnlp-industry.30
%U https://aclanthology.org/2023.emnlp-industry.30
%U https://doi.org/10.18653/v1/2023.emnlp-industry.30
%P 313-321
Markdown (Informal)
[Enhancing Extreme Multi-Label Text Classification: Addressing Challenges in Model, Data, and Evaluation](https://aclanthology.org/2023.emnlp-industry.30) (Li et al., EMNLP 2023)
ACL
- Dan Li, Zi Long Zhu, Janneke van de Loo, Agnes Masip Gomez, Vikrant Yadav, Georgios Tsatsaronis, and Zubair Afzal. 2023. Enhancing Extreme Multi-Label Text Classification: Addressing Challenges in Model, Data, and Evaluation. In Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track, pages 313–321, Singapore. Association for Computational Linguistics.