@inproceedings{brandt-skelbye-dannells-2021-ocr,
title = "{OCR} Processing of {S}wedish Historical Newspapers Using Deep Hybrid {CNN}{--}{LSTM} Networks",
author = "Brandt Skelbye, Molly and
Dann{\'e}lls, Dana",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.23",
pages = "190--198",
abstract = "Deep CNN{--}LSTM hybrid neural networks have proven to improve the accuracy of Optical Character Recognition (OCR) models for different languages. In this paper we examine to what extent these networks improve the OCR accuracy rates on Swedish historical newspapers. By experimenting with the open source OCR engine Calamari, we are able to show that mixed deep CNN{--}LSTM hybrid models outperform previous models on the task of character recognition of Swedish historical newspapers spanning 1818{--}1848. We achieved an average character accuracy rate (CAR) of 97.43{\%} which is a new state{--}of{--}the{--}art result on 19th century Swedish newspaper text. Our data, code and models are released under CC-BY licence.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="brandt-skelbye-dannells-2021-ocr">
<titleInfo>
<title>OCR Processing of Swedish Historical Newspapers Using Deep Hybrid CNN–LSTM Networks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Molly</namePart>
<namePart type="family">Brandt Skelbye</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dana</namePart>
<namePart type="family">Dannélls</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Deep CNN–LSTM hybrid neural networks have proven to improve the accuracy of Optical Character Recognition (OCR) models for different languages. In this paper we examine to what extent these networks improve the OCR accuracy rates on Swedish historical newspapers. By experimenting with the open source OCR engine Calamari, we are able to show that mixed deep CNN–LSTM hybrid models outperform previous models on the task of character recognition of Swedish historical newspapers spanning 1818–1848. We achieved an average character accuracy rate (CAR) of 97.43% which is a new state–of–the–art result on 19th century Swedish newspaper text. Our data, code and models are released under CC-BY licence.</abstract>
<identifier type="citekey">brandt-skelbye-dannells-2021-ocr</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.23</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>190</start>
<end>198</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T OCR Processing of Swedish Historical Newspapers Using Deep Hybrid CNN–LSTM Networks
%A Brandt Skelbye, Molly
%A Dannélls, Dana
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F brandt-skelbye-dannells-2021-ocr
%X Deep CNN–LSTM hybrid neural networks have proven to improve the accuracy of Optical Character Recognition (OCR) models for different languages. In this paper we examine to what extent these networks improve the OCR accuracy rates on Swedish historical newspapers. By experimenting with the open source OCR engine Calamari, we are able to show that mixed deep CNN–LSTM hybrid models outperform previous models on the task of character recognition of Swedish historical newspapers spanning 1818–1848. We achieved an average character accuracy rate (CAR) of 97.43% which is a new state–of–the–art result on 19th century Swedish newspaper text. Our data, code and models are released under CC-BY licence.
%U https://aclanthology.org/2021.ranlp-1.23
%P 190-198
Markdown (Informal)
[OCR Processing of Swedish Historical Newspapers Using Deep Hybrid CNN–LSTM Networks](https://aclanthology.org/2021.ranlp-1.23) (Brandt Skelbye & Dannélls, RANLP 2021)
ACL