@inproceedings{wu-etal-2024-deferred, title = "Deferred {NAM}: Low-latency Top-K Context Injection via Deferred Context Encoding for Non-Streaming {ASR}", author = "Wu, Zelin and Song, Gan and Li, Christopher and Rondon, Pat and Meng, Zhong and Velez, Xavier and Wang, Weiran and Caseiro, Diamantino and Pundak, Golan and Munkhdalai, Tsendsuren and Chandorkar, Angad and Prabhavalkar, Rohit", editor = "Yang, Yi and Davani, Aida and Sil, Avi and Kumar, Anoop", booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track)", month = jun, year = "2024", address = "Mexico City, Mexico", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2024.naacl-industry.26/", doi = "10.18653/v1/2024.naacl-industry.26", pages = "315--323" }