@inproceedings{seker-tsarfaty-2020-pointer,
title = "A Pointer Network Architecture for Joint Morphological Segmentation and Tagging",
author = "Seker, Amit and
Tsarfaty, Reut",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.391/",
doi = "10.18653/v1/2020.findings-emnlp.391",
pages = "4368--4378",
abstract = "Morphologically Rich Languages (MRLs) such as Arabic, Hebrew and Turkish often require Morphological Disambiguation (MD), i.e., the prediction of morphological decomposition of tokens into morphemes, early in the pipeline. Neural MD may be addressed as a simple pipeline, where segmentation is followed by sequence tagging, or as an end-to-end model, predicting morphemes from raw tokens. Both approaches are sub-optimal; the former is heavily prone to error propagation, and the latter does not enjoy explicit access to the basic processing units called morphemes. This paper offers MD architecture that combines the symbolic knowledge of morphemes with the learning capacity of neural end-to-end modeling. We propose a new, general and easy-to-implement Pointer Network model where the input is a morphological lattice and the output is a sequence of indices pointing at a single disambiguated path of morphemes. We demonstrate the efficacy of the model on segmentation and tagging, for Hebrew and Turkish texts, based on their respective Universal Dependencies (UD) treebanks. Our experiments show that with complete lattices, our model outperforms all shared-task results on segmenting and tagging these languages. On the SPMRL treebank, our model outperforms all previously reported results for Hebrew MD in realistic scenarios."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="seker-tsarfaty-2020-pointer">
<titleInfo>
<title>A Pointer Network Architecture for Joint Morphological Segmentation and Tagging</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amit</namePart>
<namePart type="family">Seker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reut</namePart>
<namePart type="family">Tsarfaty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2020</title>
</titleInfo>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Morphologically Rich Languages (MRLs) such as Arabic, Hebrew and Turkish often require Morphological Disambiguation (MD), i.e., the prediction of morphological decomposition of tokens into morphemes, early in the pipeline. Neural MD may be addressed as a simple pipeline, where segmentation is followed by sequence tagging, or as an end-to-end model, predicting morphemes from raw tokens. Both approaches are sub-optimal; the former is heavily prone to error propagation, and the latter does not enjoy explicit access to the basic processing units called morphemes. This paper offers MD architecture that combines the symbolic knowledge of morphemes with the learning capacity of neural end-to-end modeling. We propose a new, general and easy-to-implement Pointer Network model where the input is a morphological lattice and the output is a sequence of indices pointing at a single disambiguated path of morphemes. We demonstrate the efficacy of the model on segmentation and tagging, for Hebrew and Turkish texts, based on their respective Universal Dependencies (UD) treebanks. Our experiments show that with complete lattices, our model outperforms all shared-task results on segmenting and tagging these languages. On the SPMRL treebank, our model outperforms all previously reported results for Hebrew MD in realistic scenarios.</abstract>
<identifier type="citekey">seker-tsarfaty-2020-pointer</identifier>
<identifier type="doi">10.18653/v1/2020.findings-emnlp.391</identifier>
<location>
<url>https://aclanthology.org/2020.findings-emnlp.391/</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>4368</start>
<end>4378</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Pointer Network Architecture for Joint Morphological Segmentation and Tagging
%A Seker, Amit
%A Tsarfaty, Reut
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Findings of the Association for Computational Linguistics: EMNLP 2020
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F seker-tsarfaty-2020-pointer
%X Morphologically Rich Languages (MRLs) such as Arabic, Hebrew and Turkish often require Morphological Disambiguation (MD), i.e., the prediction of morphological decomposition of tokens into morphemes, early in the pipeline. Neural MD may be addressed as a simple pipeline, where segmentation is followed by sequence tagging, or as an end-to-end model, predicting morphemes from raw tokens. Both approaches are sub-optimal; the former is heavily prone to error propagation, and the latter does not enjoy explicit access to the basic processing units called morphemes. This paper offers MD architecture that combines the symbolic knowledge of morphemes with the learning capacity of neural end-to-end modeling. We propose a new, general and easy-to-implement Pointer Network model where the input is a morphological lattice and the output is a sequence of indices pointing at a single disambiguated path of morphemes. We demonstrate the efficacy of the model on segmentation and tagging, for Hebrew and Turkish texts, based on their respective Universal Dependencies (UD) treebanks. Our experiments show that with complete lattices, our model outperforms all shared-task results on segmenting and tagging these languages. On the SPMRL treebank, our model outperforms all previously reported results for Hebrew MD in realistic scenarios.
%R 10.18653/v1/2020.findings-emnlp.391
%U https://aclanthology.org/2020.findings-emnlp.391/
%U https://doi.org/10.18653/v1/2020.findings-emnlp.391
%P 4368-4378
Markdown (Informal)
[A Pointer Network Architecture for Joint Morphological Segmentation and Tagging](https://aclanthology.org/2020.findings-emnlp.391/) (Seker & Tsarfaty, Findings 2020)
ACL