@inproceedings{soltani-romberg-2023-general,
title = "A General Framework for Multimodal Argument Persuasiveness Classification of Tweets",
author = "Soltani, Mohammad and
Romberg, Julia",
editor = "Alshomary, Milad and
Chen, Chung-Chi and
Muresan, Smaranda and
Park, Joonsuk and
Romberg, Julia",
booktitle = "Proceedings of the 10th Workshop on Argument Mining",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.argmining-1.15/",
doi = "10.18653/v1/2023.argmining-1.15",
pages = "148--156",
abstract = "An important property of argumentation concerns the degree of its persuasiveness, which can be influenced by various modalities. On social media platforms, individuals usually have the option of supporting their textual statements with images. The goals of the ImageArg shared task, held with ArgMining 2023, were therefore (A) to classify tweet stances considering both modalities and (B) to predict the influence of an image on the persuasiveness of a tweet text. In this paper, we present our proposed methodology that shows strong performance on both tasks, placing 3rd team on the leaderboard in each case with F1 scores of 0.8273 (A) and 0.5281 (B). The framework relies on pre-trained models to extract text and image features, which are then fed into a task-specific classification model. Our experiments highlighted that the multimodal vision and language model CLIP holds a specific importance in the extraction of features, in particular for task (A)."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="soltani-romberg-2023-general">
<titleInfo>
<title>A General Framework for Multimodal Argument Persuasiveness Classification of Tweets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="family">Soltani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Romberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Workshop on Argument Mining</title>
</titleInfo>
<name type="personal">
<namePart type="given">Milad</namePart>
<namePart type="family">Alshomary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chung-Chi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joonsuk</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Romberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>An important property of argumentation concerns the degree of its persuasiveness, which can be influenced by various modalities. On social media platforms, individuals usually have the option of supporting their textual statements with images. The goals of the ImageArg shared task, held with ArgMining 2023, were therefore (A) to classify tweet stances considering both modalities and (B) to predict the influence of an image on the persuasiveness of a tweet text. In this paper, we present our proposed methodology that shows strong performance on both tasks, placing 3rd team on the leaderboard in each case with F1 scores of 0.8273 (A) and 0.5281 (B). The framework relies on pre-trained models to extract text and image features, which are then fed into a task-specific classification model. Our experiments highlighted that the multimodal vision and language model CLIP holds a specific importance in the extraction of features, in particular for task (A).</abstract>
<identifier type="citekey">soltani-romberg-2023-general</identifier>
<identifier type="doi">10.18653/v1/2023.argmining-1.15</identifier>
<location>
<url>https://aclanthology.org/2023.argmining-1.15/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>148</start>
<end>156</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A General Framework for Multimodal Argument Persuasiveness Classification of Tweets
%A Soltani, Mohammad
%A Romberg, Julia
%Y Alshomary, Milad
%Y Chen, Chung-Chi
%Y Muresan, Smaranda
%Y Park, Joonsuk
%Y Romberg, Julia
%S Proceedings of the 10th Workshop on Argument Mining
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F soltani-romberg-2023-general
%X An important property of argumentation concerns the degree of its persuasiveness, which can be influenced by various modalities. On social media platforms, individuals usually have the option of supporting their textual statements with images. The goals of the ImageArg shared task, held with ArgMining 2023, were therefore (A) to classify tweet stances considering both modalities and (B) to predict the influence of an image on the persuasiveness of a tweet text. In this paper, we present our proposed methodology that shows strong performance on both tasks, placing 3rd team on the leaderboard in each case with F1 scores of 0.8273 (A) and 0.5281 (B). The framework relies on pre-trained models to extract text and image features, which are then fed into a task-specific classification model. Our experiments highlighted that the multimodal vision and language model CLIP holds a specific importance in the extraction of features, in particular for task (A).
%R 10.18653/v1/2023.argmining-1.15
%U https://aclanthology.org/2023.argmining-1.15/
%U https://doi.org/10.18653/v1/2023.argmining-1.15
%P 148-156
Markdown (Informal)
[A General Framework for Multimodal Argument Persuasiveness Classification of Tweets](https://aclanthology.org/2023.argmining-1.15/) (Soltani & Romberg, ArgMining 2023)
ACL