@inproceedings{corazza-etal-2020-hybrid,
title = "Hybrid Emoji-Based Masked Language Models for Zero-Shot Abusive Language Detection",
author = "Corazza, Michele and
Menini, Stefano and
Cabrio, Elena and
Tonelli, Sara and
Villata, Serena",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.84/",
doi = "10.18653/v1/2020.findings-emnlp.84",
pages = "943--949",
abstract = "Recent studies have demonstrated the effectiveness of cross-lingual language model pre-training on different NLP tasks, such as natural language inference and machine translation. In our work, we test this approach on social media data, which are particularly challenging to process within this framework, since the limited length of the textual messages and the irregularity of the language make it harder to learn meaningful encodings. More specifically, we propose a hybrid emoji-based Masked Language Model (MLM) to leverage the common information conveyed by emojis across different languages and improve the learned cross-lingual representation of short text messages, with the goal to perform zero- shot abusive language detection. We compare the results obtained with the original MLM to the ones obtained by our method, showing improved performance on German, Italian and Spanish."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="corazza-etal-2020-hybrid">
<titleInfo>
<title>Hybrid Emoji-Based Masked Language Models for Zero-Shot Abusive Language Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michele</namePart>
<namePart type="family">Corazza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefano</namePart>
<namePart type="family">Menini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Cabrio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Tonelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Serena</namePart>
<namePart type="family">Villata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2020</title>
</titleInfo>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent studies have demonstrated the effectiveness of cross-lingual language model pre-training on different NLP tasks, such as natural language inference and machine translation. In our work, we test this approach on social media data, which are particularly challenging to process within this framework, since the limited length of the textual messages and the irregularity of the language make it harder to learn meaningful encodings. More specifically, we propose a hybrid emoji-based Masked Language Model (MLM) to leverage the common information conveyed by emojis across different languages and improve the learned cross-lingual representation of short text messages, with the goal to perform zero- shot abusive language detection. We compare the results obtained with the original MLM to the ones obtained by our method, showing improved performance on German, Italian and Spanish.</abstract>
<identifier type="citekey">corazza-etal-2020-hybrid</identifier>
<identifier type="doi">10.18653/v1/2020.findings-emnlp.84</identifier>
<location>
<url>https://aclanthology.org/2020.findings-emnlp.84/</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>943</start>
<end>949</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hybrid Emoji-Based Masked Language Models for Zero-Shot Abusive Language Detection
%A Corazza, Michele
%A Menini, Stefano
%A Cabrio, Elena
%A Tonelli, Sara
%A Villata, Serena
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Findings of the Association for Computational Linguistics: EMNLP 2020
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F corazza-etal-2020-hybrid
%X Recent studies have demonstrated the effectiveness of cross-lingual language model pre-training on different NLP tasks, such as natural language inference and machine translation. In our work, we test this approach on social media data, which are particularly challenging to process within this framework, since the limited length of the textual messages and the irregularity of the language make it harder to learn meaningful encodings. More specifically, we propose a hybrid emoji-based Masked Language Model (MLM) to leverage the common information conveyed by emojis across different languages and improve the learned cross-lingual representation of short text messages, with the goal to perform zero- shot abusive language detection. We compare the results obtained with the original MLM to the ones obtained by our method, showing improved performance on German, Italian and Spanish.
%R 10.18653/v1/2020.findings-emnlp.84
%U https://aclanthology.org/2020.findings-emnlp.84/
%U https://doi.org/10.18653/v1/2020.findings-emnlp.84
%P 943-949
Markdown (Informal)
[Hybrid Emoji-Based Masked Language Models for Zero-Shot Abusive Language Detection](https://aclanthology.org/2020.findings-emnlp.84/) (Corazza et al., Findings 2020)
ACL