@inproceedings{schlippe-etal-2008-diacritization,
title = "Diacritization as a Machine Translation and as a Sequence Labeling Problem",
author = "Schlippe, Tim and
Nguyen, ThuyLinh and
Vogel, Stephan",
booktitle = "Proceedings of the 8th Conference of the Association for Machine Translation in the Americas: Student Research Workshop",
month = oct # " 21-25",
year = "2008",
address = "Waikiki, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2008.amta-srw.5/",
pages = "270--278",
abstract = "In this paper we describe and compare two techniques for the automatic diacritization of Arabic text: First, we treat diacritization as a monotone machine translation problem, proposing and evaluating several translation and language models, including word and character-based models separately and combined as well as a model which uses statistical machine translation (SMT) to post-edit a rule-based diacritization system. Then we explore a more traditional view of diacritization as a sequence labeling problem, and propose a solution using conditional random fields (Lafferty et al., 2001). All these techniques are compared through word error rate and diacritization error rate both in terms of full diacritization and ignoring vowel endings. The empirical experiments showed that the machine translation approaches perform better than the sequence labeling approaches concerning the error rates."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="schlippe-etal-2008-diacritization">
<titleInfo>
<title>Diacritization as a Machine Translation and as a Sequence Labeling Problem</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Schlippe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">ThuyLinh</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephan</namePart>
<namePart type="family">Vogel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-oct 21-25</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Conference of the Association for Machine Translation in the Americas: Student Research Workshop</title>
</titleInfo>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Waikiki, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we describe and compare two techniques for the automatic diacritization of Arabic text: First, we treat diacritization as a monotone machine translation problem, proposing and evaluating several translation and language models, including word and character-based models separately and combined as well as a model which uses statistical machine translation (SMT) to post-edit a rule-based diacritization system. Then we explore a more traditional view of diacritization as a sequence labeling problem, and propose a solution using conditional random fields (Lafferty et al., 2001). All these techniques are compared through word error rate and diacritization error rate both in terms of full diacritization and ignoring vowel endings. The empirical experiments showed that the machine translation approaches perform better than the sequence labeling approaches concerning the error rates.</abstract>
<identifier type="citekey">schlippe-etal-2008-diacritization</identifier>
<location>
<url>https://aclanthology.org/2008.amta-srw.5/</url>
</location>
<part>
<date>2008-oct 21-25</date>
<extent unit="page">
<start>270</start>
<end>278</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Diacritization as a Machine Translation and as a Sequence Labeling Problem
%A Schlippe, Tim
%A Nguyen, ThuyLinh
%A Vogel, Stephan
%S Proceedings of the 8th Conference of the Association for Machine Translation in the Americas: Student Research Workshop
%D 2008
%8 oct 21 25
%I Association for Machine Translation in the Americas
%C Waikiki, USA
%F schlippe-etal-2008-diacritization
%X In this paper we describe and compare two techniques for the automatic diacritization of Arabic text: First, we treat diacritization as a monotone machine translation problem, proposing and evaluating several translation and language models, including word and character-based models separately and combined as well as a model which uses statistical machine translation (SMT) to post-edit a rule-based diacritization system. Then we explore a more traditional view of diacritization as a sequence labeling problem, and propose a solution using conditional random fields (Lafferty et al., 2001). All these techniques are compared through word error rate and diacritization error rate both in terms of full diacritization and ignoring vowel endings. The empirical experiments showed that the machine translation approaches perform better than the sequence labeling approaches concerning the error rates.
%U https://aclanthology.org/2008.amta-srw.5/
%P 270-278
Markdown (Informal)
[Diacritization as a Machine Translation and as a Sequence Labeling Problem](https://aclanthology.org/2008.amta-srw.5/) (Schlippe et al., AMTA 2008)
ACL