@inproceedings{tracz-etal-2020-bert,
title = "{BERT}-based similarity learning for product matching",
author = "Tracz, Janusz and
W{\'o}jcik, Piotr Iwo and
Jasinska-Kobus, Kalina and
Belluzzo, Riccardo and
Mroczkowski, Robert and
Gawlik, Ireneusz",
editor = "Zhao, Huasha and
Sondhi, Parikshit and
Bach, Nguyen and
Hewavitharana, Sanjika and
He, Yifan and
Si, Luo and
Ji, Heng",
booktitle = "Proceedings of Workshop on Natural Language Processing in E-Commerce",
month = dec,
year = "2020",
address = "Barcelona, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.ecomnlp-1.7/",
pages = "66--75",
abstract = "Product matching, i.e., being able to infer the product being sold for a merchant-created offer, is crucial for any e-commerce marketplace, enabling product-based navigation, price comparisons, product reviews, etc. This problem proves a challenging task, mostly due to the extent of product catalog, data heterogeneity, missing product representants, and varying levels of data quality. Moreover, new products are being introduced every day, making it difficult to cast the problem as a classification task. In this work, we apply BERT-based models in a similarity learning setup to solve the product matching problem. We provide a thorough ablation study, showing the impact of architecture and training objective choices. Application of transformer-based architectures and proper sampling techniques significantly boosts performance for a range of e-commerce domains, allowing for production deployment."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tracz-etal-2020-bert">
<titleInfo>
<title>BERT-based similarity learning for product matching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Janusz</namePart>
<namePart type="family">Tracz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piotr</namePart>
<namePart type="given">Iwo</namePart>
<namePart type="family">Wójcik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalina</namePart>
<namePart type="family">Jasinska-Kobus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Riccardo</namePart>
<namePart type="family">Belluzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Mroczkowski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ireneusz</namePart>
<namePart type="family">Gawlik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Workshop on Natural Language Processing in E-Commerce</title>
</titleInfo>
<name type="personal">
<namePart type="given">Huasha</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Parikshit</namePart>
<namePart type="family">Sondhi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nguyen</namePart>
<namePart type="family">Bach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanjika</namePart>
<namePart type="family">Hewavitharana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yifan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luo</namePart>
<namePart type="family">Si</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Product matching, i.e., being able to infer the product being sold for a merchant-created offer, is crucial for any e-commerce marketplace, enabling product-based navigation, price comparisons, product reviews, etc. This problem proves a challenging task, mostly due to the extent of product catalog, data heterogeneity, missing product representants, and varying levels of data quality. Moreover, new products are being introduced every day, making it difficult to cast the problem as a classification task. In this work, we apply BERT-based models in a similarity learning setup to solve the product matching problem. We provide a thorough ablation study, showing the impact of architecture and training objective choices. Application of transformer-based architectures and proper sampling techniques significantly boosts performance for a range of e-commerce domains, allowing for production deployment.</abstract>
<identifier type="citekey">tracz-etal-2020-bert</identifier>
<location>
<url>https://aclanthology.org/2020.ecomnlp-1.7/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>66</start>
<end>75</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BERT-based similarity learning for product matching
%A Tracz, Janusz
%A Wójcik, Piotr Iwo
%A Jasinska-Kobus, Kalina
%A Belluzzo, Riccardo
%A Mroczkowski, Robert
%A Gawlik, Ireneusz
%Y Zhao, Huasha
%Y Sondhi, Parikshit
%Y Bach, Nguyen
%Y Hewavitharana, Sanjika
%Y He, Yifan
%Y Si, Luo
%Y Ji, Heng
%S Proceedings of Workshop on Natural Language Processing in E-Commerce
%D 2020
%8 December
%I Association for Computational Linguistics
%C Barcelona, Spain
%F tracz-etal-2020-bert
%X Product matching, i.e., being able to infer the product being sold for a merchant-created offer, is crucial for any e-commerce marketplace, enabling product-based navigation, price comparisons, product reviews, etc. This problem proves a challenging task, mostly due to the extent of product catalog, data heterogeneity, missing product representants, and varying levels of data quality. Moreover, new products are being introduced every day, making it difficult to cast the problem as a classification task. In this work, we apply BERT-based models in a similarity learning setup to solve the product matching problem. We provide a thorough ablation study, showing the impact of architecture and training objective choices. Application of transformer-based architectures and proper sampling techniques significantly boosts performance for a range of e-commerce domains, allowing for production deployment.
%U https://aclanthology.org/2020.ecomnlp-1.7/
%P 66-75
Markdown (Informal)
[BERT-based similarity learning for product matching](https://aclanthology.org/2020.ecomnlp-1.7/) (Tracz et al., EcomNLP 2020)
ACL
- Janusz Tracz, Piotr Iwo Wójcik, Kalina Jasinska-Kobus, Riccardo Belluzzo, Robert Mroczkowski, and Ireneusz Gawlik. 2020. BERT-based similarity learning for product matching. In Proceedings of Workshop on Natural Language Processing in E-Commerce, pages 66–75, Barcelona, Spain. Association for Computational Linguistics.