@inproceedings{jakubicek-etal-2020-current,
title = "Current Challenges in Web Corpus Building",
author = "Jakub{\'\i}{\v{c}}ek, Milo{\v{s}} and
Kov{\'a}{\v{r}}, Vojt{\v{e}}ch and
Rychl{\'y}, Pavel and
Suchomel, Vit",
editor = {Barbaresi, Adrien and
Bildhauer, Felix and
Sch{\"a}fer, Roland and
Stemle, Egon},
booktitle = "Proceedings of the 12th Web as Corpus Workshop",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.wac-1.1",
pages = "1--4",
abstract = "In this paper we discuss some of the current challenges in web corpus building that we faced in the recent years when expanding the corpora in Sketch Engine. The purpose of the paper is to provide an overview and raise discussion on possible solutions, rather than bringing ready solutions to the readers. For every issue we try to assess its severity and briefly discuss possible mitigation options.",
language = "English",
ISBN = "979-10-95546-68-9",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jakubicek-etal-2020-current">
<titleInfo>
<title>Current Challenges in Web Corpus Building</title>
</titleInfo>
<name type="personal">
<namePart type="given">Miloš</namePart>
<namePart type="family">Jakubíček</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vojtěch</namePart>
<namePart type="family">Kovář</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavel</namePart>
<namePart type="family">Rychlý</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vit</namePart>
<namePart type="family">Suchomel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Web as Corpus Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adrien</namePart>
<namePart type="family">Barbaresi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felix</namePart>
<namePart type="family">Bildhauer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roland</namePart>
<namePart type="family">Schäfer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Egon</namePart>
<namePart type="family">Stemle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-68-9</identifier>
</relatedItem>
<abstract>In this paper we discuss some of the current challenges in web corpus building that we faced in the recent years when expanding the corpora in Sketch Engine. The purpose of the paper is to provide an overview and raise discussion on possible solutions, rather than bringing ready solutions to the readers. For every issue we try to assess its severity and briefly discuss possible mitigation options.</abstract>
<identifier type="citekey">jakubicek-etal-2020-current</identifier>
<location>
<url>https://aclanthology.org/2020.wac-1.1</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>1</start>
<end>4</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Current Challenges in Web Corpus Building
%A Jakubíček, Miloš
%A Kovář, Vojtěch
%A Rychlý, Pavel
%A Suchomel, Vit
%Y Barbaresi, Adrien
%Y Bildhauer, Felix
%Y Schäfer, Roland
%Y Stemle, Egon
%S Proceedings of the 12th Web as Corpus Workshop
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-68-9
%G English
%F jakubicek-etal-2020-current
%X In this paper we discuss some of the current challenges in web corpus building that we faced in the recent years when expanding the corpora in Sketch Engine. The purpose of the paper is to provide an overview and raise discussion on possible solutions, rather than bringing ready solutions to the readers. For every issue we try to assess its severity and briefly discuss possible mitigation options.
%U https://aclanthology.org/2020.wac-1.1
%P 1-4
Markdown (Informal)
[Current Challenges in Web Corpus Building](https://aclanthology.org/2020.wac-1.1) (Jakubíček et al., WAC 2020)
ACL
- Miloš Jakubíček, Vojtěch Kovář, Pavel Rychlý, and Vit Suchomel. 2020. Current Challenges in Web Corpus Building. In Proceedings of the 12th Web as Corpus Workshop, pages 1–4, Marseille, France. European Language Resources Association.