@inproceedings{caselli-etal-2021-dalc,
title = "{DALC}: the {D}utch Abusive Language Corpus",
author = "Caselli, Tommaso and
Schelhaas, Arjan and
Weultjes, Marieke and
Leistra, Folkert and
van der Veen, Hylke and
Timmerman, Gerben and
Nissim, Malvina",
editor = "Mostafazadeh Davani, Aida and
Kiela, Douwe and
Lambert, Mathias and
Vidgen, Bertie and
Prabhakaran, Vinodkumar and
Waseem, Zeerak",
booktitle = "Proceedings of the 5th Workshop on Online Abuse and Harms (WOAH 2021)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.woah-1.6/",
doi = "10.18653/v1/2021.woah-1.6",
pages = "54--66",
abstract = "As socially unacceptable language become pervasive in social media platforms, the need for automatic content moderation become more pressing. This contribution introduces the Dutch Abusive Language Corpus (DALC v1.0), a new dataset with tweets manually an- notated for abusive language. The resource ad- dress a gap in language resources for Dutch and adopts a multi-layer annotation scheme modeling the explicitness and the target of the abusive messages. Baselines experiments on all annotation layers have been conducted, achieving a macro F1 score of 0.748 for binary classification of the explicitness layer and .489 for target classification."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="caselli-etal-2021-dalc">
<titleInfo>
<title>DALC: the Dutch Abusive Language Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tommaso</namePart>
<namePart type="family">Caselli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arjan</namePart>
<namePart type="family">Schelhaas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marieke</namePart>
<namePart type="family">Weultjes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Folkert</namePart>
<namePart type="family">Leistra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hylke</namePart>
<namePart type="family">van der Veen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerben</namePart>
<namePart type="family">Timmerman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malvina</namePart>
<namePart type="family">Nissim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Online Abuse and Harms (WOAH 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aida</namePart>
<namePart type="family">Mostafazadeh Davani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Douwe</namePart>
<namePart type="family">Kiela</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathias</namePart>
<namePart type="family">Lambert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bertie</namePart>
<namePart type="family">Vidgen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vinodkumar</namePart>
<namePart type="family">Prabhakaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Waseem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As socially unacceptable language become pervasive in social media platforms, the need for automatic content moderation become more pressing. This contribution introduces the Dutch Abusive Language Corpus (DALC v1.0), a new dataset with tweets manually an- notated for abusive language. The resource ad- dress a gap in language resources for Dutch and adopts a multi-layer annotation scheme modeling the explicitness and the target of the abusive messages. Baselines experiments on all annotation layers have been conducted, achieving a macro F1 score of 0.748 for binary classification of the explicitness layer and .489 for target classification.</abstract>
<identifier type="citekey">caselli-etal-2021-dalc</identifier>
<identifier type="doi">10.18653/v1/2021.woah-1.6</identifier>
<location>
<url>https://aclanthology.org/2021.woah-1.6/</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>54</start>
<end>66</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DALC: the Dutch Abusive Language Corpus
%A Caselli, Tommaso
%A Schelhaas, Arjan
%A Weultjes, Marieke
%A Leistra, Folkert
%A van der Veen, Hylke
%A Timmerman, Gerben
%A Nissim, Malvina
%Y Mostafazadeh Davani, Aida
%Y Kiela, Douwe
%Y Lambert, Mathias
%Y Vidgen, Bertie
%Y Prabhakaran, Vinodkumar
%Y Waseem, Zeerak
%S Proceedings of the 5th Workshop on Online Abuse and Harms (WOAH 2021)
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F caselli-etal-2021-dalc
%X As socially unacceptable language become pervasive in social media platforms, the need for automatic content moderation become more pressing. This contribution introduces the Dutch Abusive Language Corpus (DALC v1.0), a new dataset with tweets manually an- notated for abusive language. The resource ad- dress a gap in language resources for Dutch and adopts a multi-layer annotation scheme modeling the explicitness and the target of the abusive messages. Baselines experiments on all annotation layers have been conducted, achieving a macro F1 score of 0.748 for binary classification of the explicitness layer and .489 for target classification.
%R 10.18653/v1/2021.woah-1.6
%U https://aclanthology.org/2021.woah-1.6/
%U https://doi.org/10.18653/v1/2021.woah-1.6
%P 54-66
Markdown (Informal)
[DALC: the Dutch Abusive Language Corpus](https://aclanthology.org/2021.woah-1.6/) (Caselli et al., WOAH 2021)
ACL
- Tommaso Caselli, Arjan Schelhaas, Marieke Weultjes, Folkert Leistra, Hylke van der Veen, Gerben Timmerman, and Malvina Nissim. 2021. DALC: the Dutch Abusive Language Corpus. In Proceedings of the 5th Workshop on Online Abuse and Harms (WOAH 2021), pages 54–66, Online. Association for Computational Linguistics.