@inproceedings{bhattacharyya-etal-2023-video, title = "A Video Is Worth 4096 Tokens: Verbalize Videos To Understand Them In Zero Shot", author = "Bhattacharyya, Aanisha and Singla, Yaman K and Krishnamurthy, Balaji and Shah, Rajiv Ratn and Chen, Changyou", editor = "Bouamor, Houda and Pino, Juan and Bali, Kalika", booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing", month = dec, year = "2023", address = "Singapore", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2023.emnlp-main.608/", doi = "10.18653/v1/2023.emnlp-main.608", pages = "9822--9839" }