@inproceedings{hemanthage-etal-2024-recantformer,
title = "{RECANTF}ormer: Referring Expression Comprehension with Varying Numbers of Targets",
author = "Hemanthage, Bhathiya and
Bilen, Hakan and
Bartie, Phil and
Dondrup, Christian and
Lemon, Oliver",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.1214/",
doi = "10.18653/v1/2024.emnlp-main.1214",
pages = "21784--21798",
abstract = "The Generalized Referring Expression Comprehension (GREC) task extends classic REC by generating image bounding boxes for objects referred to in natural language expressions, which may indicate zero, one, or multiple targets. This generalization enhances the practicality of REC models for diverse real-world applications. However, the presence of varying numbers of targets in samples makes GREC a more complex task, both in terms of training supervision and final prediction selection strategy. Addressing these challenges, we introduce RECANTFormer, a one-stage method for GREC that combines a decoder-free (encoder-only) transformer architecture with DETR-like Hungarian matching. Our approach consistently outperforms baselines by significant margins in three GREC datasets."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hemanthage-etal-2024-recantformer">
<titleInfo>
<title>RECANTFormer: Referring Expression Comprehension with Varying Numbers of Targets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bhathiya</namePart>
<namePart type="family">Hemanthage</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hakan</namePart>
<namePart type="family">Bilen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Phil</namePart>
<namePart type="family">Bartie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Dondrup</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oliver</namePart>
<namePart type="family">Lemon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Generalized Referring Expression Comprehension (GREC) task extends classic REC by generating image bounding boxes for objects referred to in natural language expressions, which may indicate zero, one, or multiple targets. This generalization enhances the practicality of REC models for diverse real-world applications. However, the presence of varying numbers of targets in samples makes GREC a more complex task, both in terms of training supervision and final prediction selection strategy. Addressing these challenges, we introduce RECANTFormer, a one-stage method for GREC that combines a decoder-free (encoder-only) transformer architecture with DETR-like Hungarian matching. Our approach consistently outperforms baselines by significant margins in three GREC datasets.</abstract>
<identifier type="citekey">hemanthage-etal-2024-recantformer</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.1214</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.1214/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>21784</start>
<end>21798</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RECANTFormer: Referring Expression Comprehension with Varying Numbers of Targets
%A Hemanthage, Bhathiya
%A Bilen, Hakan
%A Bartie, Phil
%A Dondrup, Christian
%A Lemon, Oliver
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F hemanthage-etal-2024-recantformer
%X The Generalized Referring Expression Comprehension (GREC) task extends classic REC by generating image bounding boxes for objects referred to in natural language expressions, which may indicate zero, one, or multiple targets. This generalization enhances the practicality of REC models for diverse real-world applications. However, the presence of varying numbers of targets in samples makes GREC a more complex task, both in terms of training supervision and final prediction selection strategy. Addressing these challenges, we introduce RECANTFormer, a one-stage method for GREC that combines a decoder-free (encoder-only) transformer architecture with DETR-like Hungarian matching. Our approach consistently outperforms baselines by significant margins in three GREC datasets.
%R 10.18653/v1/2024.emnlp-main.1214
%U https://aclanthology.org/2024.emnlp-main.1214/
%U https://doi.org/10.18653/v1/2024.emnlp-main.1214
%P 21784-21798
Markdown (Informal)
[RECANTFormer: Referring Expression Comprehension with Varying Numbers of Targets](https://aclanthology.org/2024.emnlp-main.1214/) (Hemanthage et al., EMNLP 2024)
ACL