@inproceedings{chepurova-etal-2024-prompt,
title = "Prompt Me One More Time: A Two-Step Knowledge Extraction Pipeline with Ontology-Based Verification",
author = "Chepurova, Alla and
Kuratov, Yuri and
Bulatov, Aydar and
Burtsev, Mikhail",
editor = "Ustalov, Dmitry and
Gao, Yanjun and
Panchenko, Alexander and
Tutubalina, Elena and
Nikishina, Irina and
Ramesh, Arti and
Sakhovskiy, Andrey and
Usbeck, Ricardo and
Penn, Gerald and
Valentino, Marco",
booktitle = "Proceedings of TextGraphs-17: Graph-based Methods for Natural Language Processing",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.textgraphs-1.5",
pages = "61--77",
abstract = "This study explores a method for extending real-world knowledge graphs (specifically, Wikidata) by extracting triplets from texts with the aid of Large Language Models (LLMs). We propose a two-step pipeline that includes the initial extraction of entity candidates, followed by their refinement and linkage to the canonical entities and relations of the knowledge graph. Finally, we utilize Wikidata relation constraints to select only verified triplets. We compare our approach to a model that was fine-tuned on a machine-generated dataset and demonstrate that it performs better on natural data. Our results suggest that LLM-based triplet extraction from texts, with subsequent verification, is a viable method for real-world applications.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chepurova-etal-2024-prompt">
<titleInfo>
<title>Prompt Me One More Time: A Two-Step Knowledge Extraction Pipeline with Ontology-Based Verification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alla</namePart>
<namePart type="family">Chepurova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Kuratov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aydar</namePart>
<namePart type="family">Bulatov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikhail</namePart>
<namePart type="family">Burtsev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of TextGraphs-17: Graph-based Methods for Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dmitry</namePart>
<namePart type="family">Ustalov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanjun</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Panchenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Tutubalina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Nikishina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arti</namePart>
<namePart type="family">Ramesh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrey</namePart>
<namePart type="family">Sakhovskiy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ricardo</namePart>
<namePart type="family">Usbeck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerald</namePart>
<namePart type="family">Penn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Valentino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study explores a method for extending real-world knowledge graphs (specifically, Wikidata) by extracting triplets from texts with the aid of Large Language Models (LLMs). We propose a two-step pipeline that includes the initial extraction of entity candidates, followed by their refinement and linkage to the canonical entities and relations of the knowledge graph. Finally, we utilize Wikidata relation constraints to select only verified triplets. We compare our approach to a model that was fine-tuned on a machine-generated dataset and demonstrate that it performs better on natural data. Our results suggest that LLM-based triplet extraction from texts, with subsequent verification, is a viable method for real-world applications.</abstract>
<identifier type="citekey">chepurova-etal-2024-prompt</identifier>
<location>
<url>https://aclanthology.org/2024.textgraphs-1.5</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>61</start>
<end>77</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Prompt Me One More Time: A Two-Step Knowledge Extraction Pipeline with Ontology-Based Verification
%A Chepurova, Alla
%A Kuratov, Yuri
%A Bulatov, Aydar
%A Burtsev, Mikhail
%Y Ustalov, Dmitry
%Y Gao, Yanjun
%Y Panchenko, Alexander
%Y Tutubalina, Elena
%Y Nikishina, Irina
%Y Ramesh, Arti
%Y Sakhovskiy, Andrey
%Y Usbeck, Ricardo
%Y Penn, Gerald
%Y Valentino, Marco
%S Proceedings of TextGraphs-17: Graph-based Methods for Natural Language Processing
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F chepurova-etal-2024-prompt
%X This study explores a method for extending real-world knowledge graphs (specifically, Wikidata) by extracting triplets from texts with the aid of Large Language Models (LLMs). We propose a two-step pipeline that includes the initial extraction of entity candidates, followed by their refinement and linkage to the canonical entities and relations of the knowledge graph. Finally, we utilize Wikidata relation constraints to select only verified triplets. We compare our approach to a model that was fine-tuned on a machine-generated dataset and demonstrate that it performs better on natural data. Our results suggest that LLM-based triplet extraction from texts, with subsequent verification, is a viable method for real-world applications.
%U https://aclanthology.org/2024.textgraphs-1.5
%P 61-77
Markdown (Informal)
[Prompt Me One More Time: A Two-Step Knowledge Extraction Pipeline with Ontology-Based Verification](https://aclanthology.org/2024.textgraphs-1.5) (Chepurova et al., TextGraphs-WS 2024)
ACL