@inproceedings{charfi-etal-2024-marasta,
title = "{MARASTA}: A Multi-dialectal {A}rabic Cross-domain Stance Corpus",
author = "Charfi, Anis and
Ben-Sghaier, Mabrouka and
Atalla, Andria Samy Raouf and
Akasheh, Raghda and
Al-Emadi, Sara and
Zaghouani, Wajdi",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.964/",
pages = "11060--11069",
abstract = "This paper introduces a cross-domain and multi-dialectal stance corpus for Arabic that includes four regions in the Arab World and covers the main Arabic dialect groups. Our corpus consists of 4657 sentences manually annotated with each sentence`s stance towards a specific topic. For each region, we collected sentences related to two controversial topics. We annotated each sentence by at least two annotators to indicate if its stance favors the topic, is against it, or is neutral. Our corpus is well-balanced concerning dialect and stance. Approximately half of the sentences are in Modern Standard Arabic (MSA) for each region, and the other half is in the region`s respective dialect. We conducted several machine-learning experiments for stance detection using our new corpus. Our most successful model is the Multi-Layer Perceptron (MLP), using Unigram or TF-IDF extracted features, which yielded an F1-score of 0.66 and an accuracy score of 0.66. Compared with the most similar state-of-the-art dataset, our dataset outperformed in specific stance classes, particularly {\textquotedblleft}neutral{\textquotedblright} and {\textquotedblleft}against{\textquotedblright}."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="charfi-etal-2024-marasta">
<titleInfo>
<title>MARASTA: A Multi-dialectal Arabic Cross-domain Stance Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anis</namePart>
<namePart type="family">Charfi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mabrouka</namePart>
<namePart type="family">Ben-Sghaier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andria</namePart>
<namePart type="given">Samy</namePart>
<namePart type="given">Raouf</namePart>
<namePart type="family">Atalla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raghda</namePart>
<namePart type="family">Akasheh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Al-Emadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper introduces a cross-domain and multi-dialectal stance corpus for Arabic that includes four regions in the Arab World and covers the main Arabic dialect groups. Our corpus consists of 4657 sentences manually annotated with each sentence‘s stance towards a specific topic. For each region, we collected sentences related to two controversial topics. We annotated each sentence by at least two annotators to indicate if its stance favors the topic, is against it, or is neutral. Our corpus is well-balanced concerning dialect and stance. Approximately half of the sentences are in Modern Standard Arabic (MSA) for each region, and the other half is in the region‘s respective dialect. We conducted several machine-learning experiments for stance detection using our new corpus. Our most successful model is the Multi-Layer Perceptron (MLP), using Unigram or TF-IDF extracted features, which yielded an F1-score of 0.66 and an accuracy score of 0.66. Compared with the most similar state-of-the-art dataset, our dataset outperformed in specific stance classes, particularly “neutral” and “against”.</abstract>
<identifier type="citekey">charfi-etal-2024-marasta</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.964/</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>11060</start>
<end>11069</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MARASTA: A Multi-dialectal Arabic Cross-domain Stance Corpus
%A Charfi, Anis
%A Ben-Sghaier, Mabrouka
%A Atalla, Andria Samy Raouf
%A Akasheh, Raghda
%A Al-Emadi, Sara
%A Zaghouani, Wajdi
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F charfi-etal-2024-marasta
%X This paper introduces a cross-domain and multi-dialectal stance corpus for Arabic that includes four regions in the Arab World and covers the main Arabic dialect groups. Our corpus consists of 4657 sentences manually annotated with each sentence‘s stance towards a specific topic. For each region, we collected sentences related to two controversial topics. We annotated each sentence by at least two annotators to indicate if its stance favors the topic, is against it, or is neutral. Our corpus is well-balanced concerning dialect and stance. Approximately half of the sentences are in Modern Standard Arabic (MSA) for each region, and the other half is in the region‘s respective dialect. We conducted several machine-learning experiments for stance detection using our new corpus. Our most successful model is the Multi-Layer Perceptron (MLP), using Unigram or TF-IDF extracted features, which yielded an F1-score of 0.66 and an accuracy score of 0.66. Compared with the most similar state-of-the-art dataset, our dataset outperformed in specific stance classes, particularly “neutral” and “against”.
%U https://aclanthology.org/2024.lrec-main.964/
%P 11060-11069
Markdown (Informal)
[MARASTA: A Multi-dialectal Arabic Cross-domain Stance Corpus](https://aclanthology.org/2024.lrec-main.964/) (Charfi et al., LREC-COLING 2024)
ACL
- Anis Charfi, Mabrouka Ben-Sghaier, Andria Samy Raouf Atalla, Raghda Akasheh, Sara Al-Emadi, and Wajdi Zaghouani. 2024. MARASTA: A Multi-dialectal Arabic Cross-domain Stance Corpus. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 11060–11069, Torino, Italia. ELRA and ICCL.