@inproceedings{sahala-linden-2023-neural,
title = "A Neural Pipeline for {POS}-tagging and Lemmatizing Cuneiform Languages",
author = "Sahala, Aleksi and
Lind{\'e}n, Krister",
editor = "Anderson, Adam and
Gordin, Shai and
Li, Bin and
Liu, Yudong and
Passarotti, Marco C.",
booktitle = "Proceedings of the Ancient Language Processing Workshop",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.alp-1.23",
pages = "203--212",
abstract = "We presented a pipeline for POS-tagging and lemmatizing cuneiform languages and evaluated its performance on Sumerian, first millennium Babylonian, Neo-Assyrian and Urartian texts extracted from Oracc. The system achieves a POS-tagging accuracy between 95-98{\%} and a lemmatization accuracy of 94-96{\%} depending on the language or dialect. For OOV words only, the current version can predict correct POS-tags for 83-91{\%}, and lemmata for 68-84{\%} of the input words. Compared with the earlier version, the current one has about 10{\%} higher accuracy in OOV lemmatization and POS-tagging due to better neural network performance. We also tested the system for lemmatizing and POS-tagging the PROIEL Ancient Greek and Latin treebanks, achieving results similar to those with the cuneiform languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sahala-linden-2023-neural">
<titleInfo>
<title>A Neural Pipeline for POS-tagging and Lemmatizing Cuneiform Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aleksi</namePart>
<namePart type="family">Sahala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Krister</namePart>
<namePart type="family">Lindén</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ancient Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Anderson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shai</namePart>
<namePart type="family">Gordin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bin</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yudong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We presented a pipeline for POS-tagging and lemmatizing cuneiform languages and evaluated its performance on Sumerian, first millennium Babylonian, Neo-Assyrian and Urartian texts extracted from Oracc. The system achieves a POS-tagging accuracy between 95-98% and a lemmatization accuracy of 94-96% depending on the language or dialect. For OOV words only, the current version can predict correct POS-tags for 83-91%, and lemmata for 68-84% of the input words. Compared with the earlier version, the current one has about 10% higher accuracy in OOV lemmatization and POS-tagging due to better neural network performance. We also tested the system for lemmatizing and POS-tagging the PROIEL Ancient Greek and Latin treebanks, achieving results similar to those with the cuneiform languages.</abstract>
<identifier type="citekey">sahala-linden-2023-neural</identifier>
<location>
<url>https://aclanthology.org/2023.alp-1.23</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>203</start>
<end>212</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Neural Pipeline for POS-tagging and Lemmatizing Cuneiform Languages
%A Sahala, Aleksi
%A Lindén, Krister
%Y Anderson, Adam
%Y Gordin, Shai
%Y Li, Bin
%Y Liu, Yudong
%Y Passarotti, Marco C.
%S Proceedings of the Ancient Language Processing Workshop
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F sahala-linden-2023-neural
%X We presented a pipeline for POS-tagging and lemmatizing cuneiform languages and evaluated its performance on Sumerian, first millennium Babylonian, Neo-Assyrian and Urartian texts extracted from Oracc. The system achieves a POS-tagging accuracy between 95-98% and a lemmatization accuracy of 94-96% depending on the language or dialect. For OOV words only, the current version can predict correct POS-tags for 83-91%, and lemmata for 68-84% of the input words. Compared with the earlier version, the current one has about 10% higher accuracy in OOV lemmatization and POS-tagging due to better neural network performance. We also tested the system for lemmatizing and POS-tagging the PROIEL Ancient Greek and Latin treebanks, achieving results similar to those with the cuneiform languages.
%U https://aclanthology.org/2023.alp-1.23
%P 203-212
Markdown (Informal)
[A Neural Pipeline for POS-tagging and Lemmatizing Cuneiform Languages](https://aclanthology.org/2023.alp-1.23) (Sahala & Lindén, ALP-WS 2023)
ACL