@inproceedings{belcavello-etal-2020-frame,
title = "Frame-Based Annotation of Multimodal Corpora: Tracking (A)Synchronies in Meaning Construction",
author = "Belcavello, Frederico and
Viridiano, Marcelo and
Diniz da Costa, Alexandre and
Matos, Ely Edison da Silva and
Torrent, Tiago Timponi",
editor = "Torrent, Tiago T. and
Baker, Collin F. and
Czulo, Oliver and
Ohara, Kyoko and
Petruck, Miriam R. L.",
booktitle = "Proceedings of the International FrameNet Workshop 2020: Towards a Global, Multilingual FrameNet",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.framenet-1.4",
pages = "23--30",
abstract = "Multimodal aspects of human communication are key in several applications of Natural Language Processing, such as Machine Translation and Natural Language Generation. Despite recent advances in integrating multimodality into Computational Linguistics, the merge between NLP and Computer Vision techniques is still timid, especially when it comes to providing fine-grained accounts for meaning construction. This paper reports on research aiming to determine appropriate methodology and develop a computational tool to annotate multimodal corpora according to a principled structured semantic representation of events, relations and entities: FrameNet. Taking a Brazilian television travel show as corpus, a pilot study was conducted to annotate the frames that are evoked by the audio and the ones that are evoked by visual elements. We also implemented a Multimodal Annotation tool which allows annotators to choose frames and locate frame elements both in the text and in the images, while keeping track of the time span in which those elements are active in each modality. Results suggest that adding a multimodal domain to the linguistic layer of annotation and analysis contributes both to enrich the kind of information that can be tagged in a corpus, and to enhance FrameNet as a model of linguistic cognition.",
language = "English",
ISBN = "979-10-95546-58-0",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="belcavello-etal-2020-frame">
<titleInfo>
<title>Frame-Based Annotation of Multimodal Corpora: Tracking (A)Synchronies in Meaning Construction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frederico</namePart>
<namePart type="family">Belcavello</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcelo</namePart>
<namePart type="family">Viridiano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Diniz da Costa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ely</namePart>
<namePart type="given">Edison</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Silva</namePart>
<namePart type="family">Matos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tiago</namePart>
<namePart type="given">Timponi</namePart>
<namePart type="family">Torrent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International FrameNet Workshop 2020: Towards a Global, Multilingual FrameNet</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tiago</namePart>
<namePart type="given">T</namePart>
<namePart type="family">Torrent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Collin</namePart>
<namePart type="given">F</namePart>
<namePart type="family">Baker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oliver</namePart>
<namePart type="family">Czulo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyoko</namePart>
<namePart type="family">Ohara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miriam</namePart>
<namePart type="given">R</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Petruck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-58-0</identifier>
</relatedItem>
<abstract>Multimodal aspects of human communication are key in several applications of Natural Language Processing, such as Machine Translation and Natural Language Generation. Despite recent advances in integrating multimodality into Computational Linguistics, the merge between NLP and Computer Vision techniques is still timid, especially when it comes to providing fine-grained accounts for meaning construction. This paper reports on research aiming to determine appropriate methodology and develop a computational tool to annotate multimodal corpora according to a principled structured semantic representation of events, relations and entities: FrameNet. Taking a Brazilian television travel show as corpus, a pilot study was conducted to annotate the frames that are evoked by the audio and the ones that are evoked by visual elements. We also implemented a Multimodal Annotation tool which allows annotators to choose frames and locate frame elements both in the text and in the images, while keeping track of the time span in which those elements are active in each modality. Results suggest that adding a multimodal domain to the linguistic layer of annotation and analysis contributes both to enrich the kind of information that can be tagged in a corpus, and to enhance FrameNet as a model of linguistic cognition.</abstract>
<identifier type="citekey">belcavello-etal-2020-frame</identifier>
<location>
<url>https://aclanthology.org/2020.framenet-1.4</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>23</start>
<end>30</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Frame-Based Annotation of Multimodal Corpora: Tracking (A)Synchronies in Meaning Construction
%A Belcavello, Frederico
%A Viridiano, Marcelo
%A Diniz da Costa, Alexandre
%A Matos, Ely Edison da Silva
%A Torrent, Tiago Timponi
%Y Torrent, Tiago T.
%Y Baker, Collin F.
%Y Czulo, Oliver
%Y Ohara, Kyoko
%Y Petruck, Miriam R. L.
%S Proceedings of the International FrameNet Workshop 2020: Towards a Global, Multilingual FrameNet
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-58-0
%G English
%F belcavello-etal-2020-frame
%X Multimodal aspects of human communication are key in several applications of Natural Language Processing, such as Machine Translation and Natural Language Generation. Despite recent advances in integrating multimodality into Computational Linguistics, the merge between NLP and Computer Vision techniques is still timid, especially when it comes to providing fine-grained accounts for meaning construction. This paper reports on research aiming to determine appropriate methodology and develop a computational tool to annotate multimodal corpora according to a principled structured semantic representation of events, relations and entities: FrameNet. Taking a Brazilian television travel show as corpus, a pilot study was conducted to annotate the frames that are evoked by the audio and the ones that are evoked by visual elements. We also implemented a Multimodal Annotation tool which allows annotators to choose frames and locate frame elements both in the text and in the images, while keeping track of the time span in which those elements are active in each modality. Results suggest that adding a multimodal domain to the linguistic layer of annotation and analysis contributes both to enrich the kind of information that can be tagged in a corpus, and to enhance FrameNet as a model of linguistic cognition.
%U https://aclanthology.org/2020.framenet-1.4
%P 23-30
Markdown (Informal)
[Frame-Based Annotation of Multimodal Corpora: Tracking (A)Synchronies in Meaning Construction](https://aclanthology.org/2020.framenet-1.4) (Belcavello et al., Framenet 2020)
ACL