@inproceedings{zeldes-etal-2020-exhaustive,
title = "Exhaustive Entity Recognition for {C}optic: Challenges and Solutions",
author = "Zeldes, Amir and
Martin, Lance and
Tu, Sichang",
editor = "DeGaetano, Stefania and
Kazantseva, Anna and
Reiter, Nils and
Szpakowicz, Stan",
booktitle = "Proceedings of the 4th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature",
month = dec,
year = "2020",
address = "Online",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.latechclfl-1.3/",
pages = "19--28",
abstract = "Entity recognition provides semantic access to ancient materials in the Digital Humanities: it exposes people and places of interest in texts that cannot be read exhaustively, facilitates linking resources and can provide a window into text contents, even for texts with no translations. In this paper we present entity recognition for Coptic, the language of Hellenistic era Egypt. We evaluate NLP approaches to the task and lay out difficulties in applying them to a low-resource, morphologically complex language. We present solutions for named and non-named nested entity recognition and semi-automatic entity linking to Wikipedia, relying on robust dependency parsing, feature-based CRF models, and hand-crafted knowledge base resources, enabling high accuracy NER with orders of magnitude less data than those used for high resource languages. The results suggest avenues for research on other languages in similar settings."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zeldes-etal-2020-exhaustive">
<titleInfo>
<title>Exhaustive Entity Recognition for Coptic: Challenges and Solutions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Zeldes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lance</namePart>
<namePart type="family">Martin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sichang</namePart>
<namePart type="family">Tu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stefania</namePart>
<namePart type="family">DeGaetano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kazantseva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nils</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stan</namePart>
<namePart type="family">Szpakowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Entity recognition provides semantic access to ancient materials in the Digital Humanities: it exposes people and places of interest in texts that cannot be read exhaustively, facilitates linking resources and can provide a window into text contents, even for texts with no translations. In this paper we present entity recognition for Coptic, the language of Hellenistic era Egypt. We evaluate NLP approaches to the task and lay out difficulties in applying them to a low-resource, morphologically complex language. We present solutions for named and non-named nested entity recognition and semi-automatic entity linking to Wikipedia, relying on robust dependency parsing, feature-based CRF models, and hand-crafted knowledge base resources, enabling high accuracy NER with orders of magnitude less data than those used for high resource languages. The results suggest avenues for research on other languages in similar settings.</abstract>
<identifier type="citekey">zeldes-etal-2020-exhaustive</identifier>
<location>
<url>https://aclanthology.org/2020.latechclfl-1.3/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>19</start>
<end>28</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exhaustive Entity Recognition for Coptic: Challenges and Solutions
%A Zeldes, Amir
%A Martin, Lance
%A Tu, Sichang
%Y DeGaetano, Stefania
%Y Kazantseva, Anna
%Y Reiter, Nils
%Y Szpakowicz, Stan
%S Proceedings of the 4th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Online
%F zeldes-etal-2020-exhaustive
%X Entity recognition provides semantic access to ancient materials in the Digital Humanities: it exposes people and places of interest in texts that cannot be read exhaustively, facilitates linking resources and can provide a window into text contents, even for texts with no translations. In this paper we present entity recognition for Coptic, the language of Hellenistic era Egypt. We evaluate NLP approaches to the task and lay out difficulties in applying them to a low-resource, morphologically complex language. We present solutions for named and non-named nested entity recognition and semi-automatic entity linking to Wikipedia, relying on robust dependency parsing, feature-based CRF models, and hand-crafted knowledge base resources, enabling high accuracy NER with orders of magnitude less data than those used for high resource languages. The results suggest avenues for research on other languages in similar settings.
%U https://aclanthology.org/2020.latechclfl-1.3/
%P 19-28
Markdown (Informal)
[Exhaustive Entity Recognition for Coptic: Challenges and Solutions](https://aclanthology.org/2020.latechclfl-1.3/) (Zeldes et al., LaTeCHCLfL 2020)
ACL
- Amir Zeldes, Lance Martin, and Sichang Tu. 2020. Exhaustive Entity Recognition for Coptic: Challenges and Solutions. In Proceedings of the 4th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature, pages 19–28, Online. International Committee on Computational Linguistics.