@inproceedings{chen-etal-2022-extreme,
title = "Extreme Multi-Label Classification with Label Masking for Product Attribute Value Extraction",
author = "Chen, Wei-Te and
Xia, Yandi and
Shinzato, Keiji",
editor = "Malmasi, Shervin and
Rokhlenko, Oleg and
Ueffing, Nicola and
Guy, Ido and
Agichtein, Eugene and
Kallumadi, Surya",
booktitle = "Proceedings of the Fifth Workshop on e-Commerce and NLP (ECNLP 5)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.ecnlp-1.16",
doi = "10.18653/v1/2022.ecnlp-1.16",
pages = "134--140",
abstract = "Although most studies have treated attribute value extraction (AVE) as named entity recognition, these approaches are not practical in real-world e-commerce platforms because they perform poorly, and require canonicalization of extracted values. Furthermore, since values needed for actual services is static in many attributes, extraction of new values is not always necessary. Given the above, we formalize AVE as extreme multi-label classification (XMC). A major problem in solving AVE as XMC is that the distribution between positive and negative labels for products is heavily imbalanced. To mitigate the negative impact derived from such biased distribution, we propose label masking, a simple and effective method to reduce the number of negative labels in training. We exploit attribute taxonomy designed for e-commerce platforms to determine which labels are negative for products. Experimental results using a dataset collected from a Japanese e-commerce platform demonstrate that the label masking improves micro and macro F$_1$ scores by 3.38 and 23.20 points, respectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2022-extreme">
<titleInfo>
<title>Extreme Multi-Label Classification with Label Masking for Product Attribute Value Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei-Te</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yandi</namePart>
<namePart type="family">Xia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keiji</namePart>
<namePart type="family">Shinzato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on e-Commerce and NLP (ECNLP 5)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Rokhlenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicola</namePart>
<namePart type="family">Ueffing</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ido</namePart>
<namePart type="family">Guy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugene</namePart>
<namePart type="family">Agichtein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surya</namePart>
<namePart type="family">Kallumadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Although most studies have treated attribute value extraction (AVE) as named entity recognition, these approaches are not practical in real-world e-commerce platforms because they perform poorly, and require canonicalization of extracted values. Furthermore, since values needed for actual services is static in many attributes, extraction of new values is not always necessary. Given the above, we formalize AVE as extreme multi-label classification (XMC). A major problem in solving AVE as XMC is that the distribution between positive and negative labels for products is heavily imbalanced. To mitigate the negative impact derived from such biased distribution, we propose label masking, a simple and effective method to reduce the number of negative labels in training. We exploit attribute taxonomy designed for e-commerce platforms to determine which labels are negative for products. Experimental results using a dataset collected from a Japanese e-commerce platform demonstrate that the label masking improves micro and macro F₁ scores by 3.38 and 23.20 points, respectively.</abstract>
<identifier type="citekey">chen-etal-2022-extreme</identifier>
<identifier type="doi">10.18653/v1/2022.ecnlp-1.16</identifier>
<location>
<url>https://aclanthology.org/2022.ecnlp-1.16</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>134</start>
<end>140</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Extreme Multi-Label Classification with Label Masking for Product Attribute Value Extraction
%A Chen, Wei-Te
%A Xia, Yandi
%A Shinzato, Keiji
%Y Malmasi, Shervin
%Y Rokhlenko, Oleg
%Y Ueffing, Nicola
%Y Guy, Ido
%Y Agichtein, Eugene
%Y Kallumadi, Surya
%S Proceedings of the Fifth Workshop on e-Commerce and NLP (ECNLP 5)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F chen-etal-2022-extreme
%X Although most studies have treated attribute value extraction (AVE) as named entity recognition, these approaches are not practical in real-world e-commerce platforms because they perform poorly, and require canonicalization of extracted values. Furthermore, since values needed for actual services is static in many attributes, extraction of new values is not always necessary. Given the above, we formalize AVE as extreme multi-label classification (XMC). A major problem in solving AVE as XMC is that the distribution between positive and negative labels for products is heavily imbalanced. To mitigate the negative impact derived from such biased distribution, we propose label masking, a simple and effective method to reduce the number of negative labels in training. We exploit attribute taxonomy designed for e-commerce platforms to determine which labels are negative for products. Experimental results using a dataset collected from a Japanese e-commerce platform demonstrate that the label masking improves micro and macro F₁ scores by 3.38 and 23.20 points, respectively.
%R 10.18653/v1/2022.ecnlp-1.16
%U https://aclanthology.org/2022.ecnlp-1.16
%U https://doi.org/10.18653/v1/2022.ecnlp-1.16
%P 134-140
Markdown (Informal)
[Extreme Multi-Label Classification with Label Masking for Product Attribute Value Extraction](https://aclanthology.org/2022.ecnlp-1.16) (Chen et al., ECNLP 2022)
ACL