@inproceedings{hengchen-tahmasebi-2021-supersim,
title = "{S}uper{S}im: a test set for word similarity and relatedness in {S}wedish",
author = "Hengchen, Simon and
Tahmasebi, Nina",
editor = "Dobnik, Simon and
{\O}vrelid, Lilja",
booktitle = "Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may # " 31--2 " # jun,
year = "2021",
address = "Reykjavik, Iceland (Online)",
publisher = {Link{\"o}ping University Electronic Press, Sweden},
url = "https://aclanthology.org/2021.nodalida-main.27",
pages = "268--275",
abstract = "Language models are notoriously difficult to evaluate. We release SuperSim, a large-scale similarity and relatedness test set for Swedish built with expert human judgements. The test set is composed of 1,360 word-pairs independently judged for both relatedness and similarity by five annotators. We evaluate three different models (Word2Vec, fastText, and GloVe) trained on two separate Swedish datasets, namely the Swedish Gigaword corpus and a Swedish Wikipedia dump, to provide a baseline for future comparison. We will release the fully annotated test set, code, models, and data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hengchen-tahmasebi-2021-supersim">
<titleInfo>
<title>SuperSim: a test set for word similarity and relatedness in Swedish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Hengchen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nina</namePart>
<namePart type="family">Tahmasebi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-may 31–2 jun</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lilja</namePart>
<namePart type="family">Øvrelid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Linköping University Electronic Press, Sweden</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Language models are notoriously difficult to evaluate. We release SuperSim, a large-scale similarity and relatedness test set for Swedish built with expert human judgements. The test set is composed of 1,360 word-pairs independently judged for both relatedness and similarity by five annotators. We evaluate three different models (Word2Vec, fastText, and GloVe) trained on two separate Swedish datasets, namely the Swedish Gigaword corpus and a Swedish Wikipedia dump, to provide a baseline for future comparison. We will release the fully annotated test set, code, models, and data.</abstract>
<identifier type="citekey">hengchen-tahmasebi-2021-supersim</identifier>
<location>
<url>https://aclanthology.org/2021.nodalida-main.27</url>
</location>
<part>
<date>2021-may 31–2 jun</date>
<extent unit="page">
<start>268</start>
<end>275</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SuperSim: a test set for word similarity and relatedness in Swedish
%A Hengchen, Simon
%A Tahmasebi, Nina
%Y Dobnik, Simon
%Y Øvrelid, Lilja
%S Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)
%D 2021
%8 may 31–2 jun
%I Linköping University Electronic Press, Sweden
%C Reykjavik, Iceland (Online)
%F hengchen-tahmasebi-2021-supersim
%X Language models are notoriously difficult to evaluate. We release SuperSim, a large-scale similarity and relatedness test set for Swedish built with expert human judgements. The test set is composed of 1,360 word-pairs independently judged for both relatedness and similarity by five annotators. We evaluate three different models (Word2Vec, fastText, and GloVe) trained on two separate Swedish datasets, namely the Swedish Gigaword corpus and a Swedish Wikipedia dump, to provide a baseline for future comparison. We will release the fully annotated test set, code, models, and data.
%U https://aclanthology.org/2021.nodalida-main.27
%P 268-275
Markdown (Informal)
[SuperSim: a test set for word similarity and relatedness in Swedish](https://aclanthology.org/2021.nodalida-main.27) (Hengchen & Tahmasebi, NoDaLiDa 2021)
ACL