@inproceedings{zhang2025beingvl05, title={Unified Multimodal Understanding via Byte-Pair Visual Encoding}, author={Zhang, Wanpeng and Feng, Yicheng and Luo, Hao and Li, Yijiang and Yue, Zihao and Zheng, Sipeng and Lu, Zongqing}, booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, year={2025} }