@inproceedings{li-etal-2022-mplug, title = "m{PLUG}: Effective and Efficient Vision-Language Learning by Cross-modal Skip-connections", author = "Li, Chenliang and Xu, Haiyang and Tian, Junfeng and Wang, Wei and Yan, Ming and Bi, Bin and Ye, Jiabo and Chen, He and Xu, Guohai and Cao, Zheng and Zhang, Ji and Huang, Songfang and Huang, Fei and Zhou, Jingren and Si, Luo", editor = "Goldberg, Yoav and Kozareva, Zornitsa and Zhang, Yue", booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing", month = dec, year = "2022", address = "Abu Dhabi, United Arab Emirates", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2022.emnlp-main.488/", doi = "10.18653/v1/2022.emnlp-main.488", pages = "7241--7259" }