@inproceedings{tahssin-etal-2020-identifying,
title = "Identifying Nuanced Dialect for {A}rabic Tweets with Deep Learning and Reverse Translation Corpus Extension System",
author = "Tahssin, Rawan and
Kishk, Youssef and
Torki, Marwan",
editor = "Zitouni, Imed and
Abdul-Mageed, Muhammad and
Bouamor, Houda and
Bougares, Fethi and
El-Haj, Mahmoud and
Tomeh, Nadi and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Fifth Arabic Natural Language Processing Workshop",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wanlp-1.30",
pages = "288--294",
abstract = "In this paper, we present our work for the NADI Shared Task (Abdul-Mageed and Habash, 2020): Nuanced Arabic Dialect Identification for Subtask-1: country-level dialect identification. We introduce a Reverse Translation Corpus Extension Systems (RTCES) to handle data imbalance along with reported results on several experimented approaches of word and document representations and different models architectures. The top scoring model was based on AraBERT (Antoun et al., 2020), with our modified extended corpus based on reverse translation of the given Arabic tweets. The selected system achieved a macro average F1 score of 20.34{\%} on the test set, which places us as the 7th out of 18 teams in the final ranking Leaderboard.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tahssin-etal-2020-identifying">
<titleInfo>
<title>Identifying Nuanced Dialect for Arabic Tweets with Deep Learning and Reverse Translation Corpus Extension System</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rawan</namePart>
<namePart type="family">Tahssin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Youssef</namePart>
<namePart type="family">Kishk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marwan</namePart>
<namePart type="family">Torki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Arabic Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Abdul-Mageed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fethi</namePart>
<namePart type="family">Bougares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahmoud</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present our work for the NADI Shared Task (Abdul-Mageed and Habash, 2020): Nuanced Arabic Dialect Identification for Subtask-1: country-level dialect identification. We introduce a Reverse Translation Corpus Extension Systems (RTCES) to handle data imbalance along with reported results on several experimented approaches of word and document representations and different models architectures. The top scoring model was based on AraBERT (Antoun et al., 2020), with our modified extended corpus based on reverse translation of the given Arabic tweets. The selected system achieved a macro average F1 score of 20.34% on the test set, which places us as the 7th out of 18 teams in the final ranking Leaderboard.</abstract>
<identifier type="citekey">tahssin-etal-2020-identifying</identifier>
<location>
<url>https://aclanthology.org/2020.wanlp-1.30</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>288</start>
<end>294</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Identifying Nuanced Dialect for Arabic Tweets with Deep Learning and Reverse Translation Corpus Extension System
%A Tahssin, Rawan
%A Kishk, Youssef
%A Torki, Marwan
%Y Zitouni, Imed
%Y Abdul-Mageed, Muhammad
%Y Bouamor, Houda
%Y Bougares, Fethi
%Y El-Haj, Mahmoud
%Y Tomeh, Nadi
%Y Zaghouani, Wajdi
%S Proceedings of the Fifth Arabic Natural Language Processing Workshop
%D 2020
%8 December
%I Association for Computational Linguistics
%C Barcelona, Spain (Online)
%F tahssin-etal-2020-identifying
%X In this paper, we present our work for the NADI Shared Task (Abdul-Mageed and Habash, 2020): Nuanced Arabic Dialect Identification for Subtask-1: country-level dialect identification. We introduce a Reverse Translation Corpus Extension Systems (RTCES) to handle data imbalance along with reported results on several experimented approaches of word and document representations and different models architectures. The top scoring model was based on AraBERT (Antoun et al., 2020), with our modified extended corpus based on reverse translation of the given Arabic tweets. The selected system achieved a macro average F1 score of 20.34% on the test set, which places us as the 7th out of 18 teams in the final ranking Leaderboard.
%U https://aclanthology.org/2020.wanlp-1.30
%P 288-294
Markdown (Informal)
[Identifying Nuanced Dialect for Arabic Tweets with Deep Learning and Reverse Translation Corpus Extension System](https://aclanthology.org/2020.wanlp-1.30) (Tahssin et al., WANLP 2020)
ACL