@inproceedings{choi-etal-2020-toward,
title = "Toward General Scene Graph: Integration of Visual Semantic Knowledge with Entity Synset Alignment",
author = "Choi, Woo Suk and
On, Kyoung-Woon and
Heo, Yu-Jung and
Zhang, Byoung-Tak",
editor = "Wang, Xin and
Thomason, Jesse and
Hu, Ronghang and
Chen, Xinlei and
Anderson, Peter and
Wu, Qi and
Celikyilmaz, Asli and
Baldridge, Jason and
Wang, William Yang",
booktitle = "Proceedings of the First Workshop on Advances in Language and Vision Research",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.alvr-1.2/",
doi = "10.18653/v1/2020.alvr-1.2",
pages = "7--11",
abstract = "Scene graph is a graph representation that explicitly represents high-level semantic knowledge of an image such as objects, attributes of objects and relationships between objects. Various tasks have been proposed for the scene graph, but the problem is that they have a limited vocabulary and biased information due to their own hypothesis. Therefore, results of each task are not generalizable and difficult to be applied to other down-stream tasks. In this paper, we propose Entity Synset Alignment(ESA), which is a method to create a general scene graph by aligning various semantic knowledge efficiently to solve this bias problem. The ESA uses a large-scale lexical database, WordNet and Intersection of Union (IoU) to align the object labels in multiple scene graphs/semantic knowledge. In experiment, the integrated scene graph is applied to the image-caption retrieval task as a down-stream task. We confirm that integrating multiple scene graphs helps to get better representations of images."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="choi-etal-2020-toward">
<titleInfo>
<title>Toward General Scene Graph: Integration of Visual Semantic Knowledge with Entity Synset Alignment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Woo</namePart>
<namePart type="given">Suk</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyoung-Woon</namePart>
<namePart type="family">On</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu-Jung</namePart>
<namePart type="family">Heo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Byoung-Tak</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Advances in Language and Vision Research</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jesse</namePart>
<namePart type="family">Thomason</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ronghang</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinlei</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Anderson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qi</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asli</namePart>
<namePart type="family">Celikyilmaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="family">Baldridge</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="given">Yang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Scene graph is a graph representation that explicitly represents high-level semantic knowledge of an image such as objects, attributes of objects and relationships between objects. Various tasks have been proposed for the scene graph, but the problem is that they have a limited vocabulary and biased information due to their own hypothesis. Therefore, results of each task are not generalizable and difficult to be applied to other down-stream tasks. In this paper, we propose Entity Synset Alignment(ESA), which is a method to create a general scene graph by aligning various semantic knowledge efficiently to solve this bias problem. The ESA uses a large-scale lexical database, WordNet and Intersection of Union (IoU) to align the object labels in multiple scene graphs/semantic knowledge. In experiment, the integrated scene graph is applied to the image-caption retrieval task as a down-stream task. We confirm that integrating multiple scene graphs helps to get better representations of images.</abstract>
<identifier type="citekey">choi-etal-2020-toward</identifier>
<identifier type="doi">10.18653/v1/2020.alvr-1.2</identifier>
<location>
<url>https://aclanthology.org/2020.alvr-1.2/</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>7</start>
<end>11</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Toward General Scene Graph: Integration of Visual Semantic Knowledge with Entity Synset Alignment
%A Choi, Woo Suk
%A On, Kyoung-Woon
%A Heo, Yu-Jung
%A Zhang, Byoung-Tak
%Y Wang, Xin
%Y Thomason, Jesse
%Y Hu, Ronghang
%Y Chen, Xinlei
%Y Anderson, Peter
%Y Wu, Qi
%Y Celikyilmaz, Asli
%Y Baldridge, Jason
%Y Wang, William Yang
%S Proceedings of the First Workshop on Advances in Language and Vision Research
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F choi-etal-2020-toward
%X Scene graph is a graph representation that explicitly represents high-level semantic knowledge of an image such as objects, attributes of objects and relationships between objects. Various tasks have been proposed for the scene graph, but the problem is that they have a limited vocabulary and biased information due to their own hypothesis. Therefore, results of each task are not generalizable and difficult to be applied to other down-stream tasks. In this paper, we propose Entity Synset Alignment(ESA), which is a method to create a general scene graph by aligning various semantic knowledge efficiently to solve this bias problem. The ESA uses a large-scale lexical database, WordNet and Intersection of Union (IoU) to align the object labels in multiple scene graphs/semantic knowledge. In experiment, the integrated scene graph is applied to the image-caption retrieval task as a down-stream task. We confirm that integrating multiple scene graphs helps to get better representations of images.
%R 10.18653/v1/2020.alvr-1.2
%U https://aclanthology.org/2020.alvr-1.2/
%U https://doi.org/10.18653/v1/2020.alvr-1.2
%P 7-11
Markdown (Informal)
[Toward General Scene Graph: Integration of Visual Semantic Knowledge with Entity Synset Alignment](https://aclanthology.org/2020.alvr-1.2/) (Choi et al., ALVR 2020)
ACL