@inproceedings{li-etal-2022-end, title = "End-to-End Modeling via Information Tree for One-Shot Natural Language Spatial Video Grounding", author = "Li, Mengze and Wang, Tianbao and Zhang, Haoyu and Zhang, Shengyu and Zhao, Zhou and Miao, Jiaxu and Zhang, Wenqiao and Tan, Wenming and Wang, Jin and Wang, Peng and Pu, Shiliang and Wu, Fei", editor = "Muresan, Smaranda and Nakov, Preslav and Villavicencio, Aline", booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", month = may, year = "2022", address = "Dublin, Ireland", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2022.acl-long.596/", doi = "10.18653/v1/2022.acl-long.596", pages = "8707--8717" }