@inproceedings{sharma-etal-2023-argumentative,
title = "Argumentative Stance Prediction: An Exploratory Study on Multimodality and Few-Shot Learning",
author = "Sharma, Arushi and
Gupta, Abhibha and
Bilalpur, Maneesh",
editor = "Alshomary, Milad and
Chen, Chung-Chi and
Muresan, Smaranda and
Park, Joonsuk and
Romberg, Julia",
booktitle = "Proceedings of the 10th Workshop on Argument Mining",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.argmining-1.18/",
doi = "10.18653/v1/2023.argmining-1.18",
pages = "167--174",
abstract = "To advance argumentative stance prediction as a multimodal problem, the First Shared Task in Multimodal Argument Mining hosted stance prediction in crucial social topics of gun control and abortion. Our exploratory study attempts to evaluate the necessity of images for stance prediction in tweets and compare out-of-the-box text-based large-language models (LLM) in few-shot settings against fine-tuned unimodal and multimodal models. Our work suggests an ensemble of fine-tuned text-based language models (0.817 F1-score) outperforms both the multimodal (0.677 F1-score) and text-based few-shot prediction using a recent state-of-the-art LLM (0.550 F1-score). In addition to the differences in performance, our findings suggest that the multimodal models tend to perform better when image content is summarized as natural language over their native pixel structure and, using in-context examples improves few-shot learning of LLMs performance."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sharma-etal-2023-argumentative">
<titleInfo>
<title>Argumentative Stance Prediction: An Exploratory Study on Multimodality and Few-Shot Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arushi</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhibha</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maneesh</namePart>
<namePart type="family">Bilalpur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Workshop on Argument Mining</title>
</titleInfo>
<name type="personal">
<namePart type="given">Milad</namePart>
<namePart type="family">Alshomary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chung-Chi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joonsuk</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Romberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>To advance argumentative stance prediction as a multimodal problem, the First Shared Task in Multimodal Argument Mining hosted stance prediction in crucial social topics of gun control and abortion. Our exploratory study attempts to evaluate the necessity of images for stance prediction in tweets and compare out-of-the-box text-based large-language models (LLM) in few-shot settings against fine-tuned unimodal and multimodal models. Our work suggests an ensemble of fine-tuned text-based language models (0.817 F1-score) outperforms both the multimodal (0.677 F1-score) and text-based few-shot prediction using a recent state-of-the-art LLM (0.550 F1-score). In addition to the differences in performance, our findings suggest that the multimodal models tend to perform better when image content is summarized as natural language over their native pixel structure and, using in-context examples improves few-shot learning of LLMs performance.</abstract>
<identifier type="citekey">sharma-etal-2023-argumentative</identifier>
<identifier type="doi">10.18653/v1/2023.argmining-1.18</identifier>
<location>
<url>https://aclanthology.org/2023.argmining-1.18/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>167</start>
<end>174</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Argumentative Stance Prediction: An Exploratory Study on Multimodality and Few-Shot Learning
%A Sharma, Arushi
%A Gupta, Abhibha
%A Bilalpur, Maneesh
%Y Alshomary, Milad
%Y Chen, Chung-Chi
%Y Muresan, Smaranda
%Y Park, Joonsuk
%Y Romberg, Julia
%S Proceedings of the 10th Workshop on Argument Mining
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F sharma-etal-2023-argumentative
%X To advance argumentative stance prediction as a multimodal problem, the First Shared Task in Multimodal Argument Mining hosted stance prediction in crucial social topics of gun control and abortion. Our exploratory study attempts to evaluate the necessity of images for stance prediction in tweets and compare out-of-the-box text-based large-language models (LLM) in few-shot settings against fine-tuned unimodal and multimodal models. Our work suggests an ensemble of fine-tuned text-based language models (0.817 F1-score) outperforms both the multimodal (0.677 F1-score) and text-based few-shot prediction using a recent state-of-the-art LLM (0.550 F1-score). In addition to the differences in performance, our findings suggest that the multimodal models tend to perform better when image content is summarized as natural language over their native pixel structure and, using in-context examples improves few-shot learning of LLMs performance.
%R 10.18653/v1/2023.argmining-1.18
%U https://aclanthology.org/2023.argmining-1.18/
%U https://doi.org/10.18653/v1/2023.argmining-1.18
%P 167-174
Markdown (Informal)
[Argumentative Stance Prediction: An Exploratory Study on Multimodality and Few-Shot Learning](https://aclanthology.org/2023.argmining-1.18/) (Sharma et al., ArgMining 2023)
ACL