@inproceedings{gombert-bartsch-2020-multivitaminbooster,
title = "{M}ulti{V}itamin{B}ooster at {PARSEME} Shared Task 2020: Combining Window- and Dependency-Based Features with Multilingual Contextualised Word Embeddings for {VMWE} Detection",
author = "Gombert, Sebastian and
Bartsch, Sabine",
editor = "Markantonatou, Stella and
McCrae, John and
Mitrovi{\'c}, Jelena and
Tiberius, Carole and
Ramisch, Carlos and
Vaidya, Ashwini and
Osenova, Petya and
Savary, Agata",
booktitle = "Proceedings of the Joint Workshop on Multiword Expressions and Electronic Lexicons",
month = dec,
year = "2020",
address = "online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.mwe-1.20",
pages = "149--155",
abstract = "In this paper, we present MultiVitaminBooster, a system implemented for the PARSEME shared task on semi-supervised identification of verbal multiword expressions - edition 1.2. For our approach, we interpret detecting verbal multiword expressions as a token classification task aiming to decide whether a token is part of a verbal multiword expression or not. For this purpose, we train gradient boosting-based models. We encode tokens as feature vectors combining multilingual contextualized word embeddings provided by the XLM-RoBERTa language model with a more traditional linguistic feature set relying on context windows and dependency relations. Our system was ranked 7th in the official open track ranking of the shared task evaluations with an encoding-related bug distorting the results. For this reason we carry out further unofficial evaluations. Unofficial versions of our systems would have achieved higher ranks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gombert-bartsch-2020-multivitaminbooster">
<titleInfo>
<title>MultiVitaminBooster at PARSEME Shared Task 2020: Combining Window- and Dependency-Based Features with Multilingual Contextualised Word Embeddings for VMWE Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Gombert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sabine</namePart>
<namePart type="family">Bartsch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint Workshop on Multiword Expressions and Electronic Lexicons</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stella</namePart>
<namePart type="family">Markantonatou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">McCrae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jelena</namePart>
<namePart type="family">Mitrović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carole</namePart>
<namePart type="family">Tiberius</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Ramisch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashwini</namePart>
<namePart type="family">Vaidya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Petya</namePart>
<namePart type="family">Osenova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Agata</namePart>
<namePart type="family">Savary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present MultiVitaminBooster, a system implemented for the PARSEME shared task on semi-supervised identification of verbal multiword expressions - edition 1.2. For our approach, we interpret detecting verbal multiword expressions as a token classification task aiming to decide whether a token is part of a verbal multiword expression or not. For this purpose, we train gradient boosting-based models. We encode tokens as feature vectors combining multilingual contextualized word embeddings provided by the XLM-RoBERTa language model with a more traditional linguistic feature set relying on context windows and dependency relations. Our system was ranked 7th in the official open track ranking of the shared task evaluations with an encoding-related bug distorting the results. For this reason we carry out further unofficial evaluations. Unofficial versions of our systems would have achieved higher ranks.</abstract>
<identifier type="citekey">gombert-bartsch-2020-multivitaminbooster</identifier>
<location>
<url>https://aclanthology.org/2020.mwe-1.20</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>149</start>
<end>155</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MultiVitaminBooster at PARSEME Shared Task 2020: Combining Window- and Dependency-Based Features with Multilingual Contextualised Word Embeddings for VMWE Detection
%A Gombert, Sebastian
%A Bartsch, Sabine
%Y Markantonatou, Stella
%Y McCrae, John
%Y Mitrović, Jelena
%Y Tiberius, Carole
%Y Ramisch, Carlos
%Y Vaidya, Ashwini
%Y Osenova, Petya
%Y Savary, Agata
%S Proceedings of the Joint Workshop on Multiword Expressions and Electronic Lexicons
%D 2020
%8 December
%I Association for Computational Linguistics
%C online
%F gombert-bartsch-2020-multivitaminbooster
%X In this paper, we present MultiVitaminBooster, a system implemented for the PARSEME shared task on semi-supervised identification of verbal multiword expressions - edition 1.2. For our approach, we interpret detecting verbal multiword expressions as a token classification task aiming to decide whether a token is part of a verbal multiword expression or not. For this purpose, we train gradient boosting-based models. We encode tokens as feature vectors combining multilingual contextualized word embeddings provided by the XLM-RoBERTa language model with a more traditional linguistic feature set relying on context windows and dependency relations. Our system was ranked 7th in the official open track ranking of the shared task evaluations with an encoding-related bug distorting the results. For this reason we carry out further unofficial evaluations. Unofficial versions of our systems would have achieved higher ranks.
%U https://aclanthology.org/2020.mwe-1.20
%P 149-155
Markdown (Informal)
[MultiVitaminBooster at PARSEME Shared Task 2020: Combining Window- and Dependency-Based Features with Multilingual Contextualised Word Embeddings for VMWE Detection](https://aclanthology.org/2020.mwe-1.20) (Gombert & Bartsch, MWE 2020)
ACL