@inproceedings{tuckute-etal-2022-sentspace,
title = "{S}ent{S}pace: Large-Scale Benchmarking and Evaluation of Text using Cognitively Motivated Lexical, Syntactic, and Semantic Features",
author = "Tuckute, Greta and
Sathe, Aalok and
Wang, Mingye and
Yoder, Harley and
Shain, Cory and
Fedorenko, Evelina",
editor = "Hajishirzi, Hannaneh and
Ning, Qiang and
Sil, Avi",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations",
month = jul,
year = "2022",
address = "Hybrid: Seattle, Washington + Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-demo.11/",
doi = "10.18653/v1/2022.naacl-demo.11",
pages = "99--113",
abstract = "SentSpace is a modular framework for streamlined evaluation of text. SentSpacecharacterizes textual input using diverse lexical, syntactic, and semantic features derivedfrom corpora and psycholinguistic experiments. Core sentence features fall into three primaryfeature spaces: 1) Lexical, 2) Contextual, and 3) Embeddings. To aid in the analysis of computed features, SentSpace provides a web interface for interactive visualization and comparison with text from large corpora. The modular design of SentSpace allows researchersto easily integrate their own feature computation into the pipeline while benefiting from acommon framework for evaluation and visualization. In this manuscript we will describe thedesign of SentSpace, its core feature spaces, and demonstrate an example use case by comparing human-written and machine-generated (GPT2-XL) sentences to each other. We findthat while GPT2-XL-generated text appears fluent at the surface level, psycholinguistic normsand measures of syntactic processing reveal key differences between text produced by humansand machines. Thus, SentSpace provides a broad set of cognitively motivated linguisticfeatures for evaluation of text within natural language processing, cognitive science, as wellas the social sciences."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tuckute-etal-2022-sentspace">
<titleInfo>
<title>SentSpace: Large-Scale Benchmarking and Evaluation of Text using Cognitively Motivated Lexical, Syntactic, and Semantic Features</title>
</titleInfo>
<name type="personal">
<namePart type="given">Greta</namePart>
<namePart type="family">Tuckute</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aalok</namePart>
<namePart type="family">Sathe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mingye</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harley</namePart>
<namePart type="family">Yoder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cory</namePart>
<namePart type="family">Shain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Evelina</namePart>
<namePart type="family">Fedorenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hannaneh</namePart>
<namePart type="family">Hajishirzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qiang</namePart>
<namePart type="family">Ning</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Avi</namePart>
<namePart type="family">Sil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hybrid: Seattle, Washington + Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>SentSpace is a modular framework for streamlined evaluation of text. SentSpacecharacterizes textual input using diverse lexical, syntactic, and semantic features derivedfrom corpora and psycholinguistic experiments. Core sentence features fall into three primaryfeature spaces: 1) Lexical, 2) Contextual, and 3) Embeddings. To aid in the analysis of computed features, SentSpace provides a web interface for interactive visualization and comparison with text from large corpora. The modular design of SentSpace allows researchersto easily integrate their own feature computation into the pipeline while benefiting from acommon framework for evaluation and visualization. In this manuscript we will describe thedesign of SentSpace, its core feature spaces, and demonstrate an example use case by comparing human-written and machine-generated (GPT2-XL) sentences to each other. We findthat while GPT2-XL-generated text appears fluent at the surface level, psycholinguistic normsand measures of syntactic processing reveal key differences between text produced by humansand machines. Thus, SentSpace provides a broad set of cognitively motivated linguisticfeatures for evaluation of text within natural language processing, cognitive science, as wellas the social sciences.</abstract>
<identifier type="citekey">tuckute-etal-2022-sentspace</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-demo.11</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-demo.11/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>99</start>
<end>113</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SentSpace: Large-Scale Benchmarking and Evaluation of Text using Cognitively Motivated Lexical, Syntactic, and Semantic Features
%A Tuckute, Greta
%A Sathe, Aalok
%A Wang, Mingye
%A Yoder, Harley
%A Shain, Cory
%A Fedorenko, Evelina
%Y Hajishirzi, Hannaneh
%Y Ning, Qiang
%Y Sil, Avi
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations
%D 2022
%8 July
%I Association for Computational Linguistics
%C Hybrid: Seattle, Washington + Online
%F tuckute-etal-2022-sentspace
%X SentSpace is a modular framework for streamlined evaluation of text. SentSpacecharacterizes textual input using diverse lexical, syntactic, and semantic features derivedfrom corpora and psycholinguistic experiments. Core sentence features fall into three primaryfeature spaces: 1) Lexical, 2) Contextual, and 3) Embeddings. To aid in the analysis of computed features, SentSpace provides a web interface for interactive visualization and comparison with text from large corpora. The modular design of SentSpace allows researchersto easily integrate their own feature computation into the pipeline while benefiting from acommon framework for evaluation and visualization. In this manuscript we will describe thedesign of SentSpace, its core feature spaces, and demonstrate an example use case by comparing human-written and machine-generated (GPT2-XL) sentences to each other. We findthat while GPT2-XL-generated text appears fluent at the surface level, psycholinguistic normsand measures of syntactic processing reveal key differences between text produced by humansand machines. Thus, SentSpace provides a broad set of cognitively motivated linguisticfeatures for evaluation of text within natural language processing, cognitive science, as wellas the social sciences.
%R 10.18653/v1/2022.naacl-demo.11
%U https://aclanthology.org/2022.naacl-demo.11/
%U https://doi.org/10.18653/v1/2022.naacl-demo.11
%P 99-113
Markdown (Informal)
[SentSpace: Large-Scale Benchmarking and Evaluation of Text using Cognitively Motivated Lexical, Syntactic, and Semantic Features](https://aclanthology.org/2022.naacl-demo.11/) (Tuckute et al., NAACL 2022)
ACL
- Greta Tuckute, Aalok Sathe, Mingye Wang, Harley Yoder, Cory Shain, and Evelina Fedorenko. 2022. SentSpace: Large-Scale Benchmarking and Evaluation of Text using Cognitively Motivated Lexical, Syntactic, and Semantic Features. In Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations, pages 99–113, Hybrid: Seattle, Washington + Online. Association for Computational Linguistics.