@inproceedings{opedal-etal-2023-efficient,
title = "Efficient Semiring-Weighted {E}arley Parsing",
author = "Opedal, Andreas and
Zmigrod, Ran and
Vieira, Tim and
Cotterell, Ryan and
Eisner, Jason",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-long.204",
doi = "10.18653/v1/2023.acl-long.204",
pages = "3687--3713",
abstract = "We present Earley{'}s (1970) context-free parsing algorithm as a deduction system, incorporating various known and new speed-ups. In particular, our presentation supports a known worst-case runtime improvement from Earley{'}s (1970) $O(N^3|G||R|)$, which is unworkable for the large grammars that arise in natural language processing, to $O(N^3|G|)$, which matches the complexity of CKY on a binarized version of the grammar G. Here N is the length of the sentence, |R| is the number of productions in G, and |G| is the total length of those productions. We also provide a version that achieves runtime of $O(N^3|M|)$ with $|M| \leq |G|$ when the grammar is represented compactly as a single finite-state automaton M (this is partly novel). We carefully treat the generalization to semiring-weighted deduction, preprocessing the grammar like Stolcke (1995) to eliminate the possibility of deduction cycles, and further generalize Stolcke{'}s method to compute the weights of sentence prefixes. We also provide implementation details for efficient execution, ensuring that on a preprocessed grammar, the semiring-weighted versions of our methods have the same asymptotic runtime and space requirements as the unweighted methods, including sub-cubic runtime on some grammars.",
}