@inproceedings{destaw-etal-2022-question,
title = "Question Answering Classification for {A}mharic Social Media Community Based Questions",
author = "Belay, Tadesse Destaw and
Yimam, Seid Muhie and
Ayele, Abinew and
Biemann, Chris",
editor = "Melero, Maite and
Sakti, Sakriani and
Soria, Claudia",
booktitle = "Proceedings of the 1st Annual Meeting of the ELRA/ISCA Special Interest Group on Under-Resourced Languages",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.sigul-1.18/",
pages = "137--145",
abstract = "In this work, we build a Question Answering (QA) classification dataset from a social media platform, namely the Telegram public channel called @AskAnythingEthiopia. The channel has more than 78k subscribers and has existed since May 31, 2019. The platform allows asking questions that belong to various domains, like politics, economics, health, education, and so on. Since the questions are posed in a mixed-code, we apply different strategies to pre-process the dataset. Questions are posted in Amharic, English, or Amharic but in a Latin script. As part of the pre-processing tools, we build a Latin to Ethiopic Script transliteration tool. We collect 8k Amharic and 24K transliterated questions and develop deep learning-based questions answering classifiers that attain as high as an F-score of 57.29 in 20 different question classes or categories. The datasets and pre-processing scripts are open-sourced to facilitate further research on the Amharic community-based question answering."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="destaw-etal-2022-question">
<titleInfo>
<title>Question Answering Classification for Amharic Social Media Community Based Questions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tadesse</namePart>
<namePart type="given">Destaw</namePart>
<namePart type="family">Belay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seid</namePart>
<namePart type="given">Muhie</namePart>
<namePart type="family">Yimam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abinew</namePart>
<namePart type="family">Ayele</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Biemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Annual Meeting of the ELRA/ISCA Special Interest Group on Under-Resourced Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maite</namePart>
<namePart type="family">Melero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Soria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we build a Question Answering (QA) classification dataset from a social media platform, namely the Telegram public channel called @AskAnythingEthiopia. The channel has more than 78k subscribers and has existed since May 31, 2019. The platform allows asking questions that belong to various domains, like politics, economics, health, education, and so on. Since the questions are posed in a mixed-code, we apply different strategies to pre-process the dataset. Questions are posted in Amharic, English, or Amharic but in a Latin script. As part of the pre-processing tools, we build a Latin to Ethiopic Script transliteration tool. We collect 8k Amharic and 24K transliterated questions and develop deep learning-based questions answering classifiers that attain as high as an F-score of 57.29 in 20 different question classes or categories. The datasets and pre-processing scripts are open-sourced to facilitate further research on the Amharic community-based question answering.</abstract>
<identifier type="citekey">destaw-etal-2022-question</identifier>
<location>
<url>https://aclanthology.org/2022.sigul-1.18/</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>137</start>
<end>145</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Question Answering Classification for Amharic Social Media Community Based Questions
%A Belay, Tadesse Destaw
%A Yimam, Seid Muhie
%A Ayele, Abinew
%A Biemann, Chris
%Y Melero, Maite
%Y Sakti, Sakriani
%Y Soria, Claudia
%S Proceedings of the 1st Annual Meeting of the ELRA/ISCA Special Interest Group on Under-Resourced Languages
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F destaw-etal-2022-question
%X In this work, we build a Question Answering (QA) classification dataset from a social media platform, namely the Telegram public channel called @AskAnythingEthiopia. The channel has more than 78k subscribers and has existed since May 31, 2019. The platform allows asking questions that belong to various domains, like politics, economics, health, education, and so on. Since the questions are posed in a mixed-code, we apply different strategies to pre-process the dataset. Questions are posted in Amharic, English, or Amharic but in a Latin script. As part of the pre-processing tools, we build a Latin to Ethiopic Script transliteration tool. We collect 8k Amharic and 24K transliterated questions and develop deep learning-based questions answering classifiers that attain as high as an F-score of 57.29 in 20 different question classes or categories. The datasets and pre-processing scripts are open-sourced to facilitate further research on the Amharic community-based question answering.
%U https://aclanthology.org/2022.sigul-1.18/
%P 137-145
Markdown (Informal)
[Question Answering Classification for Amharic Social Media Community Based Questions](https://aclanthology.org/2022.sigul-1.18/) (Belay et al., SIGUL 2022)
ACL