@inproceedings{gromann-etal-2024-multilexbats,
title = "{M}ulti{L}ex{BATS}: Multilingual Dataset of Lexical Semantic Relations",
author = "Gromann, Dagmar and
Goncalo Oliveira, Hugo and
Pitarch, Lucia and
Apostol, Elena-Simona and
Bernad, Jordi and
Byty{\c{c}}i, Eliot and
Cantone, Chiara and
Carvalho, Sara and
Frontini, Francesca and
Garabik, Radovan and
Gracia, Jorge and
Granata, Letizia and
Khan, Fahad and
Knez, Timotej and
Labropoulou, Penny and
Liebeskind, Chaya and
Di Buono, Maria Pia and
Ostro{\v{s}}ki Ani{\'c}, Ana and
Rackevi{\v{c}}ien{\.{e}}, Sigita and
Rodrigues, Ricardo and
S{\'e}rasset, Gilles and
Selmistraitis, Linas and
Sidib{\'e}, Mahammadou and
Silvano, Purifica{\c{c}}{\~a}o and
Spahiu, Blerina and
Sogutlu, Enriketa and
Stankovi{\'c}, Ranka and
Truic{\u{a}}, Ciprian-Octavian and
Valunaite Oleskeviciene, Giedre and
Zitnik, Slavko and
Zdravkova, Katerina",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1029/",
pages = "11783--11793",
abstract = "Understanding the relation between the meanings of words is an important part of comprehending natural language. Prior work has either focused on analysing lexical semantic relations in word embeddings or probing pretrained language models (PLMs), with some exceptions. Given the rarity of highly multilingual benchmarks, it is unclear to what extent PLMs capture relational knowledge and are able to transfer it across languages. To start addressing this question, we propose MultiLexBATS, a multilingual parallel dataset of lexical semantic relations adapted from BATS in 15 languages including low-resource languages, such as Bambara, Lithuanian, and Albanian. As experiment on cross-lingual transfer of relational knowledge, we test the PLMs' ability to (1) capture analogies across languages, and (2) predict translation targets. We find considerable differences across relation types and languages with a clear preference for hypernymy and antonymy as well as romance languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gromann-etal-2024-multilexbats">
<titleInfo>
<title>MultiLexBATS: Multilingual Dataset of Lexical Semantic Relations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dagmar</namePart>
<namePart type="family">Gromann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hugo</namePart>
<namePart type="family">Goncalo Oliveira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Pitarch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena-Simona</namePart>
<namePart type="family">Apostol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordi</namePart>
<namePart type="family">Bernad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eliot</namePart>
<namePart type="family">Bytyçi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chiara</namePart>
<namePart type="family">Cantone</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Carvalho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francesca</namePart>
<namePart type="family">Frontini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Radovan</namePart>
<namePart type="family">Garabik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jorge</namePart>
<namePart type="family">Gracia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Letizia</namePart>
<namePart type="family">Granata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fahad</namePart>
<namePart type="family">Khan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timotej</namePart>
<namePart type="family">Knez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Penny</namePart>
<namePart type="family">Labropoulou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chaya</namePart>
<namePart type="family">Liebeskind</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Pia</namePart>
<namePart type="family">Di Buono</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ana</namePart>
<namePart type="family">Ostroški Anić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sigita</namePart>
<namePart type="family">Rackevičienė</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ricardo</namePart>
<namePart type="family">Rodrigues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gilles</namePart>
<namePart type="family">Sérasset</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Linas</namePart>
<namePart type="family">Selmistraitis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahammadou</namePart>
<namePart type="family">Sidibé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Purificação</namePart>
<namePart type="family">Silvano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Blerina</namePart>
<namePart type="family">Spahiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enriketa</namePart>
<namePart type="family">Sogutlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ranka</namePart>
<namePart type="family">Stanković</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ciprian-Octavian</namePart>
<namePart type="family">Truică</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giedre</namePart>
<namePart type="family">Valunaite Oleskeviciene</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Slavko</namePart>
<namePart type="family">Zitnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katerina</namePart>
<namePart type="family">Zdravkova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Understanding the relation between the meanings of words is an important part of comprehending natural language. Prior work has either focused on analysing lexical semantic relations in word embeddings or probing pretrained language models (PLMs), with some exceptions. Given the rarity of highly multilingual benchmarks, it is unclear to what extent PLMs capture relational knowledge and are able to transfer it across languages. To start addressing this question, we propose MultiLexBATS, a multilingual parallel dataset of lexical semantic relations adapted from BATS in 15 languages including low-resource languages, such as Bambara, Lithuanian, and Albanian. As experiment on cross-lingual transfer of relational knowledge, we test the PLMs’ ability to (1) capture analogies across languages, and (2) predict translation targets. We find considerable differences across relation types and languages with a clear preference for hypernymy and antonymy as well as romance languages.</abstract>
<identifier type="citekey">gromann-etal-2024-multilexbats</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1029/</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>11783</start>
<end>11793</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MultiLexBATS: Multilingual Dataset of Lexical Semantic Relations
%A Gromann, Dagmar
%A Goncalo Oliveira, Hugo
%A Pitarch, Lucia
%A Apostol, Elena-Simona
%A Bernad, Jordi
%A Bytyçi, Eliot
%A Cantone, Chiara
%A Carvalho, Sara
%A Frontini, Francesca
%A Garabik, Radovan
%A Gracia, Jorge
%A Granata, Letizia
%A Khan, Fahad
%A Knez, Timotej
%A Labropoulou, Penny
%A Liebeskind, Chaya
%A Di Buono, Maria Pia
%A Ostroški Anić, Ana
%A Rackevičienė, Sigita
%A Rodrigues, Ricardo
%A Sérasset, Gilles
%A Selmistraitis, Linas
%A Sidibé, Mahammadou
%A Silvano, Purificação
%A Spahiu, Blerina
%A Sogutlu, Enriketa
%A Stanković, Ranka
%A Truică, Ciprian-Octavian
%A Valunaite Oleskeviciene, Giedre
%A Zitnik, Slavko
%A Zdravkova, Katerina
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F gromann-etal-2024-multilexbats
%X Understanding the relation between the meanings of words is an important part of comprehending natural language. Prior work has either focused on analysing lexical semantic relations in word embeddings or probing pretrained language models (PLMs), with some exceptions. Given the rarity of highly multilingual benchmarks, it is unclear to what extent PLMs capture relational knowledge and are able to transfer it across languages. To start addressing this question, we propose MultiLexBATS, a multilingual parallel dataset of lexical semantic relations adapted from BATS in 15 languages including low-resource languages, such as Bambara, Lithuanian, and Albanian. As experiment on cross-lingual transfer of relational knowledge, we test the PLMs’ ability to (1) capture analogies across languages, and (2) predict translation targets. We find considerable differences across relation types and languages with a clear preference for hypernymy and antonymy as well as romance languages.
%U https://aclanthology.org/2024.lrec-main.1029/
%P 11783-11793
Markdown (Informal)
[MultiLexBATS: Multilingual Dataset of Lexical Semantic Relations](https://aclanthology.org/2024.lrec-main.1029/) (Gromann et al., LREC-COLING 2024)
ACL
- Dagmar Gromann, Hugo Goncalo Oliveira, Lucia Pitarch, Elena-Simona Apostol, Jordi Bernad, Eliot Bytyçi, Chiara Cantone, Sara Carvalho, Francesca Frontini, Radovan Garabik, Jorge Gracia, Letizia Granata, Fahad Khan, Timotej Knez, Penny Labropoulou, Chaya Liebeskind, Maria Pia Di Buono, Ana Ostroški Anić, Sigita Rackevičienė, Ricardo Rodrigues, Gilles Sérasset, Linas Selmistraitis, Mahammadou Sidibé, Purificação Silvano, Blerina Spahiu, Enriketa Sogutlu, Ranka Stanković, Ciprian-Octavian Truică, Giedre Valunaite Oleskeviciene, Slavko Zitnik, and Katerina Zdravkova. 2024. MultiLexBATS: Multilingual Dataset of Lexical Semantic Relations. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 11783–11793, Torino, Italia. ELRA and ICCL.