@inproceedings{bigoulaeva-etal-2021-cross,
title = "Cross-Lingual Transfer Learning for Hate Speech Detection",
author = "Bigoulaeva, Irina and
Hangya, Viktor and
Fraser, Alexander",
editor = "Chakravarthi, Bharathi Raja and
McCrae, John P. and
Zarrouk, Manel and
Bali, Kalika and
Buitelaar, Paul",
booktitle = "Proceedings of the First Workshop on Language Technology for Equality, Diversity and Inclusion",
month = apr,
year = "2021",
address = "Kyiv",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.ltedi-1.3/",
pages = "15--25",
abstract = "We address the task of automatic hate speech detection for low-resource languages. Rather than collecting and annotating new hate speech data, we show how to use cross-lingual transfer learning to leverage already existing data from higher-resource languages. Using bilingual word embeddings based classifiers we achieve good performance on the target language by training only on the source dataset. Using our transferred system we bootstrap on unlabeled target language data, improving the performance of standard cross-lingual transfer approaches. We use English as a high resource language and German as the target language for which only a small amount of annotated corpora are available. Our results indicate that cross-lingual transfer learning together with our approach to leverage additional unlabeled data is an effective way of achieving good performance on low-resource target languages without the need for any target-language annotations."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bigoulaeva-etal-2021-cross">
<titleInfo>
<title>Cross-Lingual Transfer Learning for Hate Speech Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Bigoulaeva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viktor</namePart>
<namePart type="family">Hangya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Fraser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Language Technology for Equality, Diversity and Inclusion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="given">P</namePart>
<namePart type="family">McCrae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manel</namePart>
<namePart type="family">Zarrouk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Buitelaar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kyiv</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We address the task of automatic hate speech detection for low-resource languages. Rather than collecting and annotating new hate speech data, we show how to use cross-lingual transfer learning to leverage already existing data from higher-resource languages. Using bilingual word embeddings based classifiers we achieve good performance on the target language by training only on the source dataset. Using our transferred system we bootstrap on unlabeled target language data, improving the performance of standard cross-lingual transfer approaches. We use English as a high resource language and German as the target language for which only a small amount of annotated corpora are available. Our results indicate that cross-lingual transfer learning together with our approach to leverage additional unlabeled data is an effective way of achieving good performance on low-resource target languages without the need for any target-language annotations.</abstract>
<identifier type="citekey">bigoulaeva-etal-2021-cross</identifier>
<location>
<url>https://aclanthology.org/2021.ltedi-1.3/</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>15</start>
<end>25</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cross-Lingual Transfer Learning for Hate Speech Detection
%A Bigoulaeva, Irina
%A Hangya, Viktor
%A Fraser, Alexander
%Y Chakravarthi, Bharathi Raja
%Y McCrae, John P.
%Y Zarrouk, Manel
%Y Bali, Kalika
%Y Buitelaar, Paul
%S Proceedings of the First Workshop on Language Technology for Equality, Diversity and Inclusion
%D 2021
%8 April
%I Association for Computational Linguistics
%C Kyiv
%F bigoulaeva-etal-2021-cross
%X We address the task of automatic hate speech detection for low-resource languages. Rather than collecting and annotating new hate speech data, we show how to use cross-lingual transfer learning to leverage already existing data from higher-resource languages. Using bilingual word embeddings based classifiers we achieve good performance on the target language by training only on the source dataset. Using our transferred system we bootstrap on unlabeled target language data, improving the performance of standard cross-lingual transfer approaches. We use English as a high resource language and German as the target language for which only a small amount of annotated corpora are available. Our results indicate that cross-lingual transfer learning together with our approach to leverage additional unlabeled data is an effective way of achieving good performance on low-resource target languages without the need for any target-language annotations.
%U https://aclanthology.org/2021.ltedi-1.3/
%P 15-25
Markdown (Informal)
[Cross-Lingual Transfer Learning for Hate Speech Detection](https://aclanthology.org/2021.ltedi-1.3/) (Bigoulaeva et al., LTEDI 2021)
ACL