@inproceedings{dasgupta-etal-2022-word2box,
title = "{W}ord2{B}ox: Capturing Set-Theoretic Semantics of Words using Box Embeddings",
author = "Dasgupta, Shib and
Boratko, Michael and
Mishra, Siddhartha and
Atmakuri, Shriya and
Patel, Dhruvesh and
Li, Xiang and
McCallum, Andrew",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.acl-long.161/",
doi = "10.18653/v1/2022.acl-long.161",
pages = "2263--2276",
abstract = "Learning representations of words in a continuous space is perhaps the most fundamental task in NLP, however words interact in ways much richer than vector dot product similarity can provide. Many relationships between words can be expressed set-theoretically, for example, adjective-noun compounds (eg. {\textquotedblleft}red cars{\textquotedblright}{\ensuremath{\subseteq}}{\textquotedblleft}cars{\textquotedblright}) and homographs (eg. {\textquotedblleft}tongue{\textquotedblright}{\ensuremath{\cap}}{\textquotedblleft}body{\textquotedblright} should be similar to {\textquotedblleft}mouth{\textquotedblright}, while {\textquotedblleft}tongue{\textquotedblright}{\ensuremath{\cap}}{\textquotedblleft}language{\textquotedblright} should be similar to {\textquotedblleft}dialect{\textquotedblright}) have natural set-theoretic interpretations. Box embeddings are a novel region-based representation which provide the capability to perform these set-theoretic operations. In this work, we provide a fuzzy-set interpretation of box embeddings, and learn box representations of words using a set-theoretic training objective. We demonstrate improved performance on various word similarity tasks, particularly on less common words, and perform a quantitative and qualitative analysis exploring the additional unique expressivity provided by Word2Box."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dasgupta-etal-2022-word2box">
<titleInfo>
<title>Word2Box: Capturing Set-Theoretic Semantics of Words using Box Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shib</namePart>
<namePart type="family">Dasgupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Boratko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siddhartha</namePart>
<namePart type="family">Mishra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shriya</namePart>
<namePart type="family">Atmakuri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dhruvesh</namePart>
<namePart type="family">Patel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">McCallum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Learning representations of words in a continuous space is perhaps the most fundamental task in NLP, however words interact in ways much richer than vector dot product similarity can provide. Many relationships between words can be expressed set-theoretically, for example, adjective-noun compounds (eg. “red cars”\ensuremath\subseteq“cars”) and homographs (eg. “tongue”\ensuremath\cap“body” should be similar to “mouth”, while “tongue”\ensuremath\cap“language” should be similar to “dialect”) have natural set-theoretic interpretations. Box embeddings are a novel region-based representation which provide the capability to perform these set-theoretic operations. In this work, we provide a fuzzy-set interpretation of box embeddings, and learn box representations of words using a set-theoretic training objective. We demonstrate improved performance on various word similarity tasks, particularly on less common words, and perform a quantitative and qualitative analysis exploring the additional unique expressivity provided by Word2Box.</abstract>
<identifier type="citekey">dasgupta-etal-2022-word2box</identifier>
<identifier type="doi">10.18653/v1/2022.acl-long.161</identifier>
<location>
<url>https://aclanthology.org/2022.acl-long.161/</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>2263</start>
<end>2276</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Word2Box: Capturing Set-Theoretic Semantics of Words using Box Embeddings
%A Dasgupta, Shib
%A Boratko, Michael
%A Mishra, Siddhartha
%A Atmakuri, Shriya
%A Patel, Dhruvesh
%A Li, Xiang
%A McCallum, Andrew
%Y Muresan, Smaranda
%Y Nakov, Preslav
%Y Villavicencio, Aline
%S Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F dasgupta-etal-2022-word2box
%X Learning representations of words in a continuous space is perhaps the most fundamental task in NLP, however words interact in ways much richer than vector dot product similarity can provide. Many relationships between words can be expressed set-theoretically, for example, adjective-noun compounds (eg. “red cars”\ensuremath\subseteq“cars”) and homographs (eg. “tongue”\ensuremath\cap“body” should be similar to “mouth”, while “tongue”\ensuremath\cap“language” should be similar to “dialect”) have natural set-theoretic interpretations. Box embeddings are a novel region-based representation which provide the capability to perform these set-theoretic operations. In this work, we provide a fuzzy-set interpretation of box embeddings, and learn box representations of words using a set-theoretic training objective. We demonstrate improved performance on various word similarity tasks, particularly on less common words, and perform a quantitative and qualitative analysis exploring the additional unique expressivity provided by Word2Box.
%R 10.18653/v1/2022.acl-long.161
%U https://aclanthology.org/2022.acl-long.161/
%U https://doi.org/10.18653/v1/2022.acl-long.161
%P 2263-2276
Markdown (Informal)
[Word2Box: Capturing Set-Theoretic Semantics of Words using Box Embeddings](https://aclanthology.org/2022.acl-long.161/) (Dasgupta et al., ACL 2022)
ACL
- Shib Dasgupta, Michael Boratko, Siddhartha Mishra, Shriya Atmakuri, Dhruvesh Patel, Xiang Li, and Andrew McCallum. 2022. Word2Box: Capturing Set-Theoretic Semantics of Words using Box Embeddings. In Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 2263–2276, Dublin, Ireland. Association for Computational Linguistics.