@inproceedings{ek-bernardy-2020-composing,
title = "Composing Byte-Pair Encodings for Morphological Sequence Classification",
author = "Ek, Adam and
Bernardy, Jean-Philippe",
editor = "de Marneffe, Marie-Catherine and
de Lhoneux, Miryam and
Nivre, Joakim and
Schuster, Sebastian",
booktitle = "Proceedings of the Fourth Workshop on Universal Dependencies (UDW 2020)",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.udw-1.9/",
pages = "76--86",
abstract = "Byte-pair encodings is a method for splitting a word into sub-word tokens, a language model then assigns contextual representations separately to each of these tokens. In this paper, we evaluate four different methods of composing such sub-word representations into word representations. We evaluate the methods on morphological sequence classification, the task of predicting grammatical features of a word. Our experiments reveal that using an RNN to compute word representations is consistently more effective than the other methods tested across a sample of eight languages with different typology and varying numbers of byte-pair tokens per word."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ek-bernardy-2020-composing">
<titleInfo>
<title>Composing Byte-Pair Encodings for Morphological Sequence Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Ek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jean-Philippe</namePart>
<namePart type="family">Bernardy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Universal Dependencies (UDW 2020)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miryam</namePart>
<namePart type="family">de Lhoneux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joakim</namePart>
<namePart type="family">Nivre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Schuster</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Byte-pair encodings is a method for splitting a word into sub-word tokens, a language model then assigns contextual representations separately to each of these tokens. In this paper, we evaluate four different methods of composing such sub-word representations into word representations. We evaluate the methods on morphological sequence classification, the task of predicting grammatical features of a word. Our experiments reveal that using an RNN to compute word representations is consistently more effective than the other methods tested across a sample of eight languages with different typology and varying numbers of byte-pair tokens per word.</abstract>
<identifier type="citekey">ek-bernardy-2020-composing</identifier>
<location>
<url>https://aclanthology.org/2020.udw-1.9/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>76</start>
<end>86</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Composing Byte-Pair Encodings for Morphological Sequence Classification
%A Ek, Adam
%A Bernardy, Jean-Philippe
%Y de Marneffe, Marie-Catherine
%Y de Lhoneux, Miryam
%Y Nivre, Joakim
%Y Schuster, Sebastian
%S Proceedings of the Fourth Workshop on Universal Dependencies (UDW 2020)
%D 2020
%8 December
%I Association for Computational Linguistics
%C Barcelona, Spain (Online)
%F ek-bernardy-2020-composing
%X Byte-pair encodings is a method for splitting a word into sub-word tokens, a language model then assigns contextual representations separately to each of these tokens. In this paper, we evaluate four different methods of composing such sub-word representations into word representations. We evaluate the methods on morphological sequence classification, the task of predicting grammatical features of a word. Our experiments reveal that using an RNN to compute word representations is consistently more effective than the other methods tested across a sample of eight languages with different typology and varying numbers of byte-pair tokens per word.
%U https://aclanthology.org/2020.udw-1.9/
%P 76-86
Markdown (Informal)
[Composing Byte-Pair Encodings for Morphological Sequence Classification](https://aclanthology.org/2020.udw-1.9/) (Ek & Bernardy, UDW 2020)
ACL