@inproceedings{kalkenings-mandl-2022-university,
title = "{U}niversity of {H}ildesheim at {S}em{E}val-2022 task 5: Combining Deep Text and Image Models for Multimedia Misogyny Detection",
author = "Kalkenings, Milan and
Mandl, Thomas",
editor = "Emerson, Guy and
Schluter, Natalie and
Stanovsky, Gabriel and
Kumar, Ritesh and
Palmer, Alexis and
Schneider, Nathan and
Singh, Siddharth and
Ratan, Shyam",
booktitle = "Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.semeval-1.98/",
doi = "10.18653/v1/2022.semeval-1.98",
pages = "718--723",
abstract = "This paper describes the participation of the University of Hildesheim at the SemEval task 5. The task deals with Multimedia Automatic Misogyny Identification (MAMI). Hateful memes need to be detected within a data collection. For this task, we implemented six models for text and image analysis and tested the effectiveness of their combinations. A fusion system implements a multi-modal transformer to integrate the embeddings of these models. The best performing models included BERT for the text of the meme, manually derived associations for words in the memes and a Faster R-CNN network for the image. We evaluated the performance of our approach also with the data of the Facebook Hateful Memes challenge in order to analyze the generalisation capabilities of the approach."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kalkenings-mandl-2022-university">
<titleInfo>
<title>University of Hildesheim at SemEval-2022 task 5: Combining Deep Text and Image Models for Multimedia Misogyny Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Milan</namePart>
<namePart type="family">Kalkenings</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Mandl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guy</namePart>
<namePart type="family">Emerson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalie</namePart>
<namePart type="family">Schluter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Stanovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siddharth</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shyam</namePart>
<namePart type="family">Ratan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the participation of the University of Hildesheim at the SemEval task 5. The task deals with Multimedia Automatic Misogyny Identification (MAMI). Hateful memes need to be detected within a data collection. For this task, we implemented six models for text and image analysis and tested the effectiveness of their combinations. A fusion system implements a multi-modal transformer to integrate the embeddings of these models. The best performing models included BERT for the text of the meme, manually derived associations for words in the memes and a Faster R-CNN network for the image. We evaluated the performance of our approach also with the data of the Facebook Hateful Memes challenge in order to analyze the generalisation capabilities of the approach.</abstract>
<identifier type="citekey">kalkenings-mandl-2022-university</identifier>
<identifier type="doi">10.18653/v1/2022.semeval-1.98</identifier>
<location>
<url>https://aclanthology.org/2022.semeval-1.98/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>718</start>
<end>723</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T University of Hildesheim at SemEval-2022 task 5: Combining Deep Text and Image Models for Multimedia Misogyny Detection
%A Kalkenings, Milan
%A Mandl, Thomas
%Y Emerson, Guy
%Y Schluter, Natalie
%Y Stanovsky, Gabriel
%Y Kumar, Ritesh
%Y Palmer, Alexis
%Y Schneider, Nathan
%Y Singh, Siddharth
%Y Ratan, Shyam
%S Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F kalkenings-mandl-2022-university
%X This paper describes the participation of the University of Hildesheim at the SemEval task 5. The task deals with Multimedia Automatic Misogyny Identification (MAMI). Hateful memes need to be detected within a data collection. For this task, we implemented six models for text and image analysis and tested the effectiveness of their combinations. A fusion system implements a multi-modal transformer to integrate the embeddings of these models. The best performing models included BERT for the text of the meme, manually derived associations for words in the memes and a Faster R-CNN network for the image. We evaluated the performance of our approach also with the data of the Facebook Hateful Memes challenge in order to analyze the generalisation capabilities of the approach.
%R 10.18653/v1/2022.semeval-1.98
%U https://aclanthology.org/2022.semeval-1.98/
%U https://doi.org/10.18653/v1/2022.semeval-1.98
%P 718-723
Markdown (Informal)
[University of Hildesheim at SemEval-2022 task 5: Combining Deep Text and Image Models for Multimedia Misogyny Detection](https://aclanthology.org/2022.semeval-1.98/) (Kalkenings & Mandl, SemEval 2022)
ACL