@inproceedings{abdibayev-etal-2021-automating,
title = "Automating the Detection of Poetic Features: The Limerick as Model Organism",
author = "Abdibayev, Almas and
Igarashi, Yohei and
Riddell, Allen and
Rockmore, Daniel",
editor = "Degaetano-Ortlieb, Stefania and
Kazantseva, Anna and
Reiter, Nils and
Szpakowicz, Stan",
booktitle = "Proceedings of the 5th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic (online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.latechclfl-1.9",
doi = "10.18653/v1/2021.latechclfl-1.9",
pages = "80--90",
abstract = "In this paper we take up the problem of {``}limerick detection{''} and describe a system to identify five-line poems as limericks or not. This turns out to be a surprisingly difficult challenge with many subtleties. More precisely, we produce an algorithm which focuses on the structural aspects of the limerick {--} rhyme scheme and rhythm (i.e., stress patterns) {--} and when tested on a a culled data set of 98,454 publicly available limericks, our {``}limerick filter{''} accepts 67{\%} as limericks. The primary failure of our filter is on the detection of {``}non-standard{''} rhymes, which we highlight as an outstanding challenge in computational poetics. Our accent detection algorithm proves to be very robust. Our main contributions are (1) a novel rhyme detection algorithm that works on English words including rare proper nouns and made-up words (and thus, words not in the widely used CMUDict database); (2) a novel rhythm-identifying heuristic that is robust to language noise at moderate levels and comparable in accuracy to state-of-the-art scansion algorithms. As a third significant contribution (3) we make publicly available a large corpus of limericks that includes tags of {``}limerick{''} or {``}not-limerick{''} as determined by our identification software, thereby providing a benchmark for the community. The poetic tasks that we have identified as challenges for machines suggest that the limerick is a useful {``}model organism{''} for the study of machine capabilities in poetry and more broadly literature and language. We include a list of open challenges as well. Generally, we anticipate that this work will provide useful material and benchmarks for future explorations in the field.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abdibayev-etal-2021-automating">
<titleInfo>
<title>Automating the Detection of Poetic Features: The Limerick as Model Organism</title>
</titleInfo>
<name type="personal">
<namePart type="given">Almas</namePart>
<namePart type="family">Abdibayev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yohei</namePart>
<namePart type="family">Igarashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Allen</namePart>
<namePart type="family">Riddell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Rockmore</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stefania</namePart>
<namePart type="family">Degaetano-Ortlieb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kazantseva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nils</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stan</namePart>
<namePart type="family">Szpakowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic (online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we take up the problem of “limerick detection” and describe a system to identify five-line poems as limericks or not. This turns out to be a surprisingly difficult challenge with many subtleties. More precisely, we produce an algorithm which focuses on the structural aspects of the limerick – rhyme scheme and rhythm (i.e., stress patterns) – and when tested on a a culled data set of 98,454 publicly available limericks, our “limerick filter” accepts 67% as limericks. The primary failure of our filter is on the detection of “non-standard” rhymes, which we highlight as an outstanding challenge in computational poetics. Our accent detection algorithm proves to be very robust. Our main contributions are (1) a novel rhyme detection algorithm that works on English words including rare proper nouns and made-up words (and thus, words not in the widely used CMUDict database); (2) a novel rhythm-identifying heuristic that is robust to language noise at moderate levels and comparable in accuracy to state-of-the-art scansion algorithms. As a third significant contribution (3) we make publicly available a large corpus of limericks that includes tags of “limerick” or “not-limerick” as determined by our identification software, thereby providing a benchmark for the community. The poetic tasks that we have identified as challenges for machines suggest that the limerick is a useful “model organism” for the study of machine capabilities in poetry and more broadly literature and language. We include a list of open challenges as well. Generally, we anticipate that this work will provide useful material and benchmarks for future explorations in the field.</abstract>
<identifier type="citekey">abdibayev-etal-2021-automating</identifier>
<identifier type="doi">10.18653/v1/2021.latechclfl-1.9</identifier>
<location>
<url>https://aclanthology.org/2021.latechclfl-1.9</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>80</start>
<end>90</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automating the Detection of Poetic Features: The Limerick as Model Organism
%A Abdibayev, Almas
%A Igarashi, Yohei
%A Riddell, Allen
%A Rockmore, Daniel
%Y Degaetano-Ortlieb, Stefania
%Y Kazantseva, Anna
%Y Reiter, Nils
%Y Szpakowicz, Stan
%S Proceedings of the 5th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic (online)
%F abdibayev-etal-2021-automating
%X In this paper we take up the problem of “limerick detection” and describe a system to identify five-line poems as limericks or not. This turns out to be a surprisingly difficult challenge with many subtleties. More precisely, we produce an algorithm which focuses on the structural aspects of the limerick – rhyme scheme and rhythm (i.e., stress patterns) – and when tested on a a culled data set of 98,454 publicly available limericks, our “limerick filter” accepts 67% as limericks. The primary failure of our filter is on the detection of “non-standard” rhymes, which we highlight as an outstanding challenge in computational poetics. Our accent detection algorithm proves to be very robust. Our main contributions are (1) a novel rhyme detection algorithm that works on English words including rare proper nouns and made-up words (and thus, words not in the widely used CMUDict database); (2) a novel rhythm-identifying heuristic that is robust to language noise at moderate levels and comparable in accuracy to state-of-the-art scansion algorithms. As a third significant contribution (3) we make publicly available a large corpus of limericks that includes tags of “limerick” or “not-limerick” as determined by our identification software, thereby providing a benchmark for the community. The poetic tasks that we have identified as challenges for machines suggest that the limerick is a useful “model organism” for the study of machine capabilities in poetry and more broadly literature and language. We include a list of open challenges as well. Generally, we anticipate that this work will provide useful material and benchmarks for future explorations in the field.
%R 10.18653/v1/2021.latechclfl-1.9
%U https://aclanthology.org/2021.latechclfl-1.9
%U https://doi.org/10.18653/v1/2021.latechclfl-1.9
%P 80-90
Markdown (Informal)
[Automating the Detection of Poetic Features: The Limerick as Model Organism](https://aclanthology.org/2021.latechclfl-1.9) (Abdibayev et al., LaTeCHCLfL 2021)
ACL
- Almas Abdibayev, Yohei Igarashi, Allen Riddell, and Daniel Rockmore. 2021. Automating the Detection of Poetic Features: The Limerick as Model Organism. In Proceedings of the 5th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature, pages 80–90, Punta Cana, Dominican Republic (online). Association for Computational Linguistics.