@inproceedings{guo-etal-2020-smartcitecon,
title = "{S}mart{C}ite{C}on: Implicit Citation Context Extraction from Academic Literature Using Supervised Learning",
author = "Guo, Chenrui and
Cui, Haoran and
Zhang, Li and
Wang, Jiamin and
Lu, Wei and
Wu, Jian",
editor = "Knoth, Petr and
Stahl, Christopher and
Gyawali, Bikash and
Pride, David and
Kunnath, Suchetha N. and
Herrmannova, Drahomira",
booktitle = "Proceedings of the 8th International Workshop on Mining Scientific Publications",
month = "05 " # aug,
year = "2020",
address = "Wuhan, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wosp-1.3/",
pages = "21--26",
abstract = "We introduce SmartCiteCon (SCC), a Java API for extracting both explicit and implicit citation context from academic literature in English. The tool is built on a Support Vector Machine (SVM) model trained on a set of 7,058 manually annotated citation context sentences, curated from 34,000 papers from the ACL Anthology. The model with 19 features achieves F1=85.6{\%}. SCC supports PDF, XML, and JSON files out-of-box, provided that they are conformed to certain schemas. The API supports single document processing and batch processing in parallel. It takes about 12{--}45 seconds on average depending on the format to process a document on a dedicated server with 6 multithreaded cores. Using SCC, we extracted 11.8 million citation context sentences from {\textasciitilde}33.3k PMC papers in the CORD-19 dataset, released on June 13, 2020. We will provide continuous supplementary data contribution to the CORD-19 and other datasets. The source code is released at \url{https://gitee.com/irlab/SmartCiteCon}."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="guo-etal-2020-smartcitecon">
<titleInfo>
<title>SmartCiteCon: Implicit Citation Context Extraction from Academic Literature Using Supervised Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chenrui</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haoran</namePart>
<namePart type="family">Cui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiamin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jian</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05 aug</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th International Workshop on Mining Scientific Publications</title>
</titleInfo>
<name type="personal">
<namePart type="given">Petr</namePart>
<namePart type="family">Knoth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Stahl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bikash</namePart>
<namePart type="family">Gyawali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Pride</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suchetha</namePart>
<namePart type="given">N</namePart>
<namePart type="family">Kunnath</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Drahomira</namePart>
<namePart type="family">Herrmannova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Wuhan, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce SmartCiteCon (SCC), a Java API for extracting both explicit and implicit citation context from academic literature in English. The tool is built on a Support Vector Machine (SVM) model trained on a set of 7,058 manually annotated citation context sentences, curated from 34,000 papers from the ACL Anthology. The model with 19 features achieves F1=85.6%. SCC supports PDF, XML, and JSON files out-of-box, provided that they are conformed to certain schemas. The API supports single document processing and batch processing in parallel. It takes about 12–45 seconds on average depending on the format to process a document on a dedicated server with 6 multithreaded cores. Using SCC, we extracted 11.8 million citation context sentences from ~33.3k PMC papers in the CORD-19 dataset, released on June 13, 2020. We will provide continuous supplementary data contribution to the CORD-19 and other datasets. The source code is released at https://gitee.com/irlab/SmartCiteCon.</abstract>
<identifier type="citekey">guo-etal-2020-smartcitecon</identifier>
<location>
<url>https://aclanthology.org/2020.wosp-1.3/</url>
</location>
<part>
<date>2020-05 aug</date>
<extent unit="page">
<start>21</start>
<end>26</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SmartCiteCon: Implicit Citation Context Extraction from Academic Literature Using Supervised Learning
%A Guo, Chenrui
%A Cui, Haoran
%A Zhang, Li
%A Wang, Jiamin
%A Lu, Wei
%A Wu, Jian
%Y Knoth, Petr
%Y Stahl, Christopher
%Y Gyawali, Bikash
%Y Pride, David
%Y Kunnath, Suchetha N.
%Y Herrmannova, Drahomira
%S Proceedings of the 8th International Workshop on Mining Scientific Publications
%D 2020
%8 05 aug
%I Association for Computational Linguistics
%C Wuhan, China
%F guo-etal-2020-smartcitecon
%X We introduce SmartCiteCon (SCC), a Java API for extracting both explicit and implicit citation context from academic literature in English. The tool is built on a Support Vector Machine (SVM) model trained on a set of 7,058 manually annotated citation context sentences, curated from 34,000 papers from the ACL Anthology. The model with 19 features achieves F1=85.6%. SCC supports PDF, XML, and JSON files out-of-box, provided that they are conformed to certain schemas. The API supports single document processing and batch processing in parallel. It takes about 12–45 seconds on average depending on the format to process a document on a dedicated server with 6 multithreaded cores. Using SCC, we extracted 11.8 million citation context sentences from ~33.3k PMC papers in the CORD-19 dataset, released on June 13, 2020. We will provide continuous supplementary data contribution to the CORD-19 and other datasets. The source code is released at https://gitee.com/irlab/SmartCiteCon.
%U https://aclanthology.org/2020.wosp-1.3/
%P 21-26
Markdown (Informal)
[SmartCiteCon: Implicit Citation Context Extraction from Academic Literature Using Supervised Learning](https://aclanthology.org/2020.wosp-1.3/) (Guo et al., WOSP 2020)
ACL