@article{pimentel-etal-2020-phonotactic,
title = "Phonotactic Complexity and Its Trade-offs",
author = "Pimentel, Tiago and
Roark, Brian and
Cotterell, Ryan",
editor = "Johnson, Mark and
Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "8",
year = "2020",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2020.tacl-1.1/",
doi = "10.1162/tacl_a_00296",
pages = "1--18",
abstract = "We present methods for calculating a measure of phonotactic complexity{---}bits per phoneme{---} that permits a straightforward cross-linguistic comparison. When given a word, represented as a sequence of phonemic segments such as symbols in the international phonetic alphabet, and a statistical model trained on a sample of word types from the language, we can approximately measure bits per phoneme using the negative log-probability of that word under the model. This simple measure allows us to compare the entropy across languages, giving insight into how complex a language`s phonotactics is. Using a collection of 1016 basic concept words across 106 languages, we demonstrate a very strong negative correlation of {\ensuremath{-}} 0.74 between bits per phoneme and the average length of words."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pimentel-etal-2020-phonotactic">
<titleInfo>
<title>Phonotactic Complexity and Its Trade-offs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tiago</namePart>
<namePart type="family">Pimentel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Roark</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>We present methods for calculating a measure of phonotactic complexity—bits per phoneme— that permits a straightforward cross-linguistic comparison. When given a word, represented as a sequence of phonemic segments such as symbols in the international phonetic alphabet, and a statistical model trained on a sample of word types from the language, we can approximately measure bits per phoneme using the negative log-probability of that word under the model. This simple measure allows us to compare the entropy across languages, giving insight into how complex a language‘s phonotactics is. Using a collection of 1016 basic concept words across 106 languages, we demonstrate a very strong negative correlation of \ensuremath- 0.74 between bits per phoneme and the average length of words.</abstract>
<identifier type="citekey">pimentel-etal-2020-phonotactic</identifier>
<identifier type="doi">10.1162/tacl_a_00296</identifier>
<location>
<url>https://aclanthology.org/2020.tacl-1.1/</url>
</location>
<part>
<date>2020</date>
<detail type="volume"><number>8</number></detail>
<extent unit="page">
<start>1</start>
<end>18</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Phonotactic Complexity and Its Trade-offs
%A Pimentel, Tiago
%A Roark, Brian
%A Cotterell, Ryan
%J Transactions of the Association for Computational Linguistics
%D 2020
%V 8
%I MIT Press
%C Cambridge, MA
%F pimentel-etal-2020-phonotactic
%X We present methods for calculating a measure of phonotactic complexity—bits per phoneme— that permits a straightforward cross-linguistic comparison. When given a word, represented as a sequence of phonemic segments such as symbols in the international phonetic alphabet, and a statistical model trained on a sample of word types from the language, we can approximately measure bits per phoneme using the negative log-probability of that word under the model. This simple measure allows us to compare the entropy across languages, giving insight into how complex a language‘s phonotactics is. Using a collection of 1016 basic concept words across 106 languages, we demonstrate a very strong negative correlation of \ensuremath- 0.74 between bits per phoneme and the average length of words.
%R 10.1162/tacl_a_00296
%U https://aclanthology.org/2020.tacl-1.1/
%U https://doi.org/10.1162/tacl_a_00296
%P 1-18
Markdown (Informal)
[Phonotactic Complexity and Its Trade-offs](https://aclanthology.org/2020.tacl-1.1/) (Pimentel et al., TACL 2020)
ACL