@inproceedings{yang-etal-2020-less,
title = "Less is Better: A cognitively inspired unsupervised model for language segmentation",
author = "Yang, Jinbiao and
Frank, Stefan L. and
van den Bosch, Antal",
editor = "Zock, Michael and
Chersoni, Emmanuele and
Lenci, Alessandro and
Santus, Enrico",
booktitle = "Proceedings of the Workshop on the Cognitive Aspects of the Lexicon",
month = dec,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.cogalex-1.4",
pages = "33--45",
abstract = "Language users process utterances by segmenting them into many cognitive units, which vary in their sizes and linguistic levels. Although we can do such unitization/segmentation easily, its cognitive mechanism is still not clear. This paper proposes an unsupervised model, Less-is-Better (LiB), to simulate the human cognitive process with respect to language unitization/segmentation. LiB follows the principle of least effort and aims to build a lexicon which minimizes the number of unit tokens (alleviating the effort of analysis) and number of unit types (alleviating the effort of storage) at the same time on any given corpus. LiB{'}s workflow is inspired by empirical cognitive phenomena. The design makes the mechanism of LiB cognitively plausible and the computational requirement light-weight. The lexicon generated by LiB performs the best among different types of lexicons (e.g. ground-truth words) both from an information-theoretical view and a cognitive view, which suggests that the LiB lexicon may be a plausible proxy of the mental lexicon.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-etal-2020-less">
<titleInfo>
<title>Less is Better: A cognitively inspired unsupervised model for language segmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jinbiao</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Frank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antal</namePart>
<namePart type="family">van den Bosch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on the Cognitive Aspects of the Lexicon</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Zock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmanuele</namePart>
<namePart type="family">Chersoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrico</namePart>
<namePart type="family">Santus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Language users process utterances by segmenting them into many cognitive units, which vary in their sizes and linguistic levels. Although we can do such unitization/segmentation easily, its cognitive mechanism is still not clear. This paper proposes an unsupervised model, Less-is-Better (LiB), to simulate the human cognitive process with respect to language unitization/segmentation. LiB follows the principle of least effort and aims to build a lexicon which minimizes the number of unit tokens (alleviating the effort of analysis) and number of unit types (alleviating the effort of storage) at the same time on any given corpus. LiB’s workflow is inspired by empirical cognitive phenomena. The design makes the mechanism of LiB cognitively plausible and the computational requirement light-weight. The lexicon generated by LiB performs the best among different types of lexicons (e.g. ground-truth words) both from an information-theoretical view and a cognitive view, which suggests that the LiB lexicon may be a plausible proxy of the mental lexicon.</abstract>
<identifier type="citekey">yang-etal-2020-less</identifier>
<location>
<url>https://aclanthology.org/2020.cogalex-1.4</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>33</start>
<end>45</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Less is Better: A cognitively inspired unsupervised model for language segmentation
%A Yang, Jinbiao
%A Frank, Stefan L.
%A van den Bosch, Antal
%Y Zock, Michael
%Y Chersoni, Emmanuele
%Y Lenci, Alessandro
%Y Santus, Enrico
%S Proceedings of the Workshop on the Cognitive Aspects of the Lexicon
%D 2020
%8 December
%I Association for Computational Linguistics
%C Online
%F yang-etal-2020-less
%X Language users process utterances by segmenting them into many cognitive units, which vary in their sizes and linguistic levels. Although we can do such unitization/segmentation easily, its cognitive mechanism is still not clear. This paper proposes an unsupervised model, Less-is-Better (LiB), to simulate the human cognitive process with respect to language unitization/segmentation. LiB follows the principle of least effort and aims to build a lexicon which minimizes the number of unit tokens (alleviating the effort of analysis) and number of unit types (alleviating the effort of storage) at the same time on any given corpus. LiB’s workflow is inspired by empirical cognitive phenomena. The design makes the mechanism of LiB cognitively plausible and the computational requirement light-weight. The lexicon generated by LiB performs the best among different types of lexicons (e.g. ground-truth words) both from an information-theoretical view and a cognitive view, which suggests that the LiB lexicon may be a plausible proxy of the mental lexicon.
%U https://aclanthology.org/2020.cogalex-1.4
%P 33-45
Markdown (Informal)
[Less is Better: A cognitively inspired unsupervised model for language segmentation](https://aclanthology.org/2020.cogalex-1.4) (Yang et al., CogALex 2020)
ACL