@inproceedings{liesenfeld-etal-2021-scikit,
title = "{S}cikit-talk: A toolkit for processing real-world conversational speech data",
author = "Liesenfeld, Andreas and
Parti, Gabor and
Huang, Chu-Ren",
editor = "Li, Haizhou and
Levow, Gina-Anne and
Yu, Zhou and
Gupta, Chitralekha and
Sisman, Berrak and
Cai, Siqi and
Vandyke, David and
Dethlefs, Nina and
Wu, Yan and
Li, Junyi Jessy",
booktitle = "Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = jul,
year = "2021",
address = "Singapore and Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.sigdial-1.26",
doi = "10.18653/v1/2021.sigdial-1.26",
pages = "252--256",
abstract = "We present Scikit-talk, an open-source toolkit for processing collections of real-world conversational speech in Python. First of its kind, the toolkit equips those interested in studying or modeling conversations with an easy-to-use interface to build and explore large collections of transcriptions and annotations of talk-in-interaction. Designed for applications in speech processing and Conversational AI, Scikit-talk provides tools to custom-build datasets for tasks such as intent prototyping, dialog flow testing, and conversation design. Its \textit{preprocessor} module comes with several pre-built interfaces for common transcription formats, which aim to make working across multiple data sources more accessible. The \textit{explorer} module provides a collection of tools to explore and analyse this data type via string matching and unsupervised machine learning techniques. Scikit-talk serves as a platform to collect and connect different transcription formats and representations of talk, enabling the user to quickly build multilingual datasets of varying detail and granularity. Thus, the toolkit aims to make working with authentic conversational speech data in Python more accessible and to provide the user with comprehensive options to work with representations of talk in appropriate detail for any downstream task. For the latest updates and information on currently supported languages and language resources, please refer to: \url{https://pypi.org/project/scikit-talk/}",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liesenfeld-etal-2021-scikit">
<titleInfo>
<title>Scikit-talk: A toolkit for processing real-world conversational speech data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Liesenfeld</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabor</namePart>
<namePart type="family">Parti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chu-Ren</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haizhou</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gina-Anne</namePart>
<namePart type="family">Levow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhou</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chitralekha</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Berrak</namePart>
<namePart type="family">Sisman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siqi</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Vandyke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nina</namePart>
<namePart type="family">Dethlefs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yan</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyi</namePart>
<namePart type="given">Jessy</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore and Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present Scikit-talk, an open-source toolkit for processing collections of real-world conversational speech in Python. First of its kind, the toolkit equips those interested in studying or modeling conversations with an easy-to-use interface to build and explore large collections of transcriptions and annotations of talk-in-interaction. Designed for applications in speech processing and Conversational AI, Scikit-talk provides tools to custom-build datasets for tasks such as intent prototyping, dialog flow testing, and conversation design. Its preprocessor module comes with several pre-built interfaces for common transcription formats, which aim to make working across multiple data sources more accessible. The explorer module provides a collection of tools to explore and analyse this data type via string matching and unsupervised machine learning techniques. Scikit-talk serves as a platform to collect and connect different transcription formats and representations of talk, enabling the user to quickly build multilingual datasets of varying detail and granularity. Thus, the toolkit aims to make working with authentic conversational speech data in Python more accessible and to provide the user with comprehensive options to work with representations of talk in appropriate detail for any downstream task. For the latest updates and information on currently supported languages and language resources, please refer to: https://pypi.org/project/scikit-talk/</abstract>
<identifier type="citekey">liesenfeld-etal-2021-scikit</identifier>
<identifier type="doi">10.18653/v1/2021.sigdial-1.26</identifier>
<location>
<url>https://aclanthology.org/2021.sigdial-1.26</url>
</location>
<part>
<date>2021-07</date>
<extent unit="page">
<start>252</start>
<end>256</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Scikit-talk: A toolkit for processing real-world conversational speech data
%A Liesenfeld, Andreas
%A Parti, Gabor
%A Huang, Chu-Ren
%Y Li, Haizhou
%Y Levow, Gina-Anne
%Y Yu, Zhou
%Y Gupta, Chitralekha
%Y Sisman, Berrak
%Y Cai, Siqi
%Y Vandyke, David
%Y Dethlefs, Nina
%Y Wu, Yan
%Y Li, Junyi Jessy
%S Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2021
%8 July
%I Association for Computational Linguistics
%C Singapore and Online
%F liesenfeld-etal-2021-scikit
%X We present Scikit-talk, an open-source toolkit for processing collections of real-world conversational speech in Python. First of its kind, the toolkit equips those interested in studying or modeling conversations with an easy-to-use interface to build and explore large collections of transcriptions and annotations of talk-in-interaction. Designed for applications in speech processing and Conversational AI, Scikit-talk provides tools to custom-build datasets for tasks such as intent prototyping, dialog flow testing, and conversation design. Its preprocessor module comes with several pre-built interfaces for common transcription formats, which aim to make working across multiple data sources more accessible. The explorer module provides a collection of tools to explore and analyse this data type via string matching and unsupervised machine learning techniques. Scikit-talk serves as a platform to collect and connect different transcription formats and representations of talk, enabling the user to quickly build multilingual datasets of varying detail and granularity. Thus, the toolkit aims to make working with authentic conversational speech data in Python more accessible and to provide the user with comprehensive options to work with representations of talk in appropriate detail for any downstream task. For the latest updates and information on currently supported languages and language resources, please refer to: https://pypi.org/project/scikit-talk/
%R 10.18653/v1/2021.sigdial-1.26
%U https://aclanthology.org/2021.sigdial-1.26
%U https://doi.org/10.18653/v1/2021.sigdial-1.26
%P 252-256
Markdown (Informal)
[Scikit-talk: A toolkit for processing real-world conversational speech data](https://aclanthology.org/2021.sigdial-1.26) (Liesenfeld et al., SIGDIAL 2021)
ACL