@inproceedings{li-etal-2025-quickllama, title = "{Q}uick{LL}a{MA}: Query-aware Inference Acceleration for Large Language Models", author = "Li, Jingyao and Shi, Han and Wu, Sitong and Zheng, Chuanyang and Li, Zhenguo and Jiang, Xin and Xu, Hong and Jia, Jiaya", editor = "Rambow, Owen and Wanner, Leo and Apidianaki, Marianna and Al-Khalifa, Hend and Eugenio, Barbara Di and Schockaert, Steven", booktitle = "Proceedings of the 31st International Conference on Computational Linguistics", month = jan, year = "2025", address = "Abu Dhabi, UAE", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2025.coling-main.34/", pages = "508--528" }