@inproceedings{alnefaie-etal-2023-haqa,
title = "{HAQA} and {QUQA}: Constructing Two {A}rabic Question-Answering Corpora for the {Q}uran and {H}adith",
author = "Alnefaie, Sarah and
Atwell, Eric and
Alsalka, Mohammad Ammar",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.ranlp-1.10",
pages = "90--97",
abstract = "It is neither possible nor fair to compare the performance of question-answering systems for the Holy Quran and Hadith Sharif in Arabic due to both the absence of a golden test dataset on the Hadith Sharif and the small size and easy questions of the newly created golden test dataset on the Holy Quran. This article presents two question{--}answer datasets: Hadith Question{--}Answer pairs (HAQA) and Quran Question{--}Answer pairs (QUQA). HAQA is the first Arabic Hadith question{--}answer dataset available to the research community, while the QUQA dataset is regarded as the more challenging and the most extensive collection of Arabic question{--}answer pairs on the Quran. HAQA was designed and its data collected from several expert sources, while QUQA went through several steps in the construction phase; that is, it was designed and then integrated with existing datasets in different formats, after which the datasets were enlarged with the addition of new data from books by experts. The HAQA corpus consists of 1598 question{--}answer pairs, and that of QUQA contains 3382. They may be useful as gold{--}standard datasets for the evaluation process, as training datasets for language models with question-answering tasks and for other uses in artificial intelligence.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alnefaie-etal-2023-haqa">
<titleInfo>
<title>HAQA and QUQA: Constructing Two Arabic Question-Answering Corpora for the Quran and Hadith</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Alnefaie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Atwell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Ammar</namePart>
<namePart type="family">Alsalka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>It is neither possible nor fair to compare the performance of question-answering systems for the Holy Quran and Hadith Sharif in Arabic due to both the absence of a golden test dataset on the Hadith Sharif and the small size and easy questions of the newly created golden test dataset on the Holy Quran. This article presents two question–answer datasets: Hadith Question–Answer pairs (HAQA) and Quran Question–Answer pairs (QUQA). HAQA is the first Arabic Hadith question–answer dataset available to the research community, while the QUQA dataset is regarded as the more challenging and the most extensive collection of Arabic question–answer pairs on the Quran. HAQA was designed and its data collected from several expert sources, while QUQA went through several steps in the construction phase; that is, it was designed and then integrated with existing datasets in different formats, after which the datasets were enlarged with the addition of new data from books by experts. The HAQA corpus consists of 1598 question–answer pairs, and that of QUQA contains 3382. They may be useful as gold–standard datasets for the evaluation process, as training datasets for language models with question-answering tasks and for other uses in artificial intelligence.</abstract>
<identifier type="citekey">alnefaie-etal-2023-haqa</identifier>
<location>
<url>https://aclanthology.org/2023.ranlp-1.10</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>90</start>
<end>97</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HAQA and QUQA: Constructing Two Arabic Question-Answering Corpora for the Quran and Hadith
%A Alnefaie, Sarah
%A Atwell, Eric
%A Alsalka, Mohammad Ammar
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F alnefaie-etal-2023-haqa
%X It is neither possible nor fair to compare the performance of question-answering systems for the Holy Quran and Hadith Sharif in Arabic due to both the absence of a golden test dataset on the Hadith Sharif and the small size and easy questions of the newly created golden test dataset on the Holy Quran. This article presents two question–answer datasets: Hadith Question–Answer pairs (HAQA) and Quran Question–Answer pairs (QUQA). HAQA is the first Arabic Hadith question–answer dataset available to the research community, while the QUQA dataset is regarded as the more challenging and the most extensive collection of Arabic question–answer pairs on the Quran. HAQA was designed and its data collected from several expert sources, while QUQA went through several steps in the construction phase; that is, it was designed and then integrated with existing datasets in different formats, after which the datasets were enlarged with the addition of new data from books by experts. The HAQA corpus consists of 1598 question–answer pairs, and that of QUQA contains 3382. They may be useful as gold–standard datasets for the evaluation process, as training datasets for language models with question-answering tasks and for other uses in artificial intelligence.
%U https://aclanthology.org/2023.ranlp-1.10
%P 90-97
Markdown (Informal)
[HAQA and QUQA: Constructing Two Arabic Question-Answering Corpora for the Quran and Hadith](https://aclanthology.org/2023.ranlp-1.10) (Alnefaie et al., RANLP 2023)
ACL