@inproceedings{fournier-etal-2020-analogies,
title = "Analogies minus analogy test: measuring regularities in word embeddings",
author = "Fournier, Louis and
Dupoux, Emmanuel and
Dunbar, Ewan",
editor = "Fern{\'a}ndez, Raquel and
Linzen, Tal",
booktitle = "Proceedings of the 24th Conference on Computational Natural Language Learning",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.conll-1.29/",
doi = "10.18653/v1/2020.conll-1.29",
pages = "365--375",
abstract = "Vector space models of words have long been claimed to capture linguistic regularities as simple vector translations, but problems have been raised with this claim. We decompose and empirically analyze the classic arithmetic word analogy test, to motivate two new metrics that address the issues with the standard test, and which distinguish between class-wise offset concentration (similar directions between pairs of words drawn from different broad classes, such as France-London, China-Ottawa,...) and pairing consistency (the existence of a regular transformation between correctly-matched pairs such as France:Paris::China:Beijing). We show that, while the standard analogy test is flawed, several popular word embeddings do nevertheless encode linguistic regularities."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fournier-etal-2020-analogies">
<titleInfo>
<title>Analogies minus analogy test: measuring regularities in word embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Louis</namePart>
<namePart type="family">Fournier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmanuel</namePart>
<namePart type="family">Dupoux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ewan</namePart>
<namePart type="family">Dunbar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raquel</namePart>
<namePart type="family">Fernández</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tal</namePart>
<namePart type="family">Linzen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Vector space models of words have long been claimed to capture linguistic regularities as simple vector translations, but problems have been raised with this claim. We decompose and empirically analyze the classic arithmetic word analogy test, to motivate two new metrics that address the issues with the standard test, and which distinguish between class-wise offset concentration (similar directions between pairs of words drawn from different broad classes, such as France-London, China-Ottawa,...) and pairing consistency (the existence of a regular transformation between correctly-matched pairs such as France:Paris::China:Beijing). We show that, while the standard analogy test is flawed, several popular word embeddings do nevertheless encode linguistic regularities.</abstract>
<identifier type="citekey">fournier-etal-2020-analogies</identifier>
<identifier type="doi">10.18653/v1/2020.conll-1.29</identifier>
<location>
<url>https://aclanthology.org/2020.conll-1.29/</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>365</start>
<end>375</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Analogies minus analogy test: measuring regularities in word embeddings
%A Fournier, Louis
%A Dupoux, Emmanuel
%A Dunbar, Ewan
%Y Fernández, Raquel
%Y Linzen, Tal
%S Proceedings of the 24th Conference on Computational Natural Language Learning
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F fournier-etal-2020-analogies
%X Vector space models of words have long been claimed to capture linguistic regularities as simple vector translations, but problems have been raised with this claim. We decompose and empirically analyze the classic arithmetic word analogy test, to motivate two new metrics that address the issues with the standard test, and which distinguish between class-wise offset concentration (similar directions between pairs of words drawn from different broad classes, such as France-London, China-Ottawa,...) and pairing consistency (the existence of a regular transformation between correctly-matched pairs such as France:Paris::China:Beijing). We show that, while the standard analogy test is flawed, several popular word embeddings do nevertheless encode linguistic regularities.
%R 10.18653/v1/2020.conll-1.29
%U https://aclanthology.org/2020.conll-1.29/
%U https://doi.org/10.18653/v1/2020.conll-1.29
%P 365-375
Markdown (Informal)
[Analogies minus analogy test: measuring regularities in word embeddings](https://aclanthology.org/2020.conll-1.29/) (Fournier et al., CoNLL 2020)
ACL