@article{liu2024minicache,title={MiniCache: KV Cache Compression in Depth Dimension for Large Language Models},author={Liu, Akide and Liu, Jing and Pan, Zizheng and He, Yefei and Haffari, Gholamreza and Zhuang, Bohan},journal={arXiv preprint arXiv:2405.14366},year={2024},}
arXiv
ZipCache: Accurate and Efficient KV Cache Quantization with Salient Token Identification
Yefei He , Luoming Zhang , Weijia Wu , Jing Liu , Hong Zhou , and Bohan Zhuang
@article{he2024zipcache,title={ZipCache: Accurate and Efficient KV Cache Quantization with Salient Token Identification},author={He, Yefei and Zhang, Luoming and Wu, Weijia and Liu, Jing and Zhou, Hong and Zhuang, Bohan},journal={arXiv preprint arXiv:2405.14256},year={2024},}
@inproceedings{pan2023snnetv2,title={Stitched ViTs are Flexible Vision Backbones},author={Pan, Zizheng and Liu, Jing and He, Haoyu and Cai, Jianfei and Zhuang, Bohan},booktitle={European Conference on Computer Vision (ECCV)},year={2024},}
@inproceedings{he2023efficient,title={Efficient Stitchable Task Adaptation},author={He, Haoyu and Pan, Zizheng and Liu, Jing and Cai, Jianfei and Zhuang, Bohan},booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)},year={2024},}
@inproceedings{huang2023tfmq,title={Tfmq-dm: Temporal feature maintenance quantization for diffusion models},author={Huang, Yushi and Gong, Ruihao and Liu, Jing and Chen, Tianlong and Liu, Xianglong},booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)},year={2024},price={Highlight},long_price={Spotlight (top 11% of the accepted papers)},}
@inproceedings{liu2024qllm,title={{QLLM}: Accurate and Efficient Low-Bitwidth Quantization for Large Language Models},author={Liu, Jing and Gong, Ruihao and Wei, Xiuying and Dong, Zhiwei and Cai, Jianfei and Zhuang, Bohan},booktitle={International Conference on Learning Representations (ICLR)},year={2024},}
@inproceedings{he2024efficientdm,title={Efficient{DM}: Efficient Quantization-Aware Fine-Tuning of Low-Bit Diffusion Models},author={He, Yefei and Liu, Jing and Wu, Weijia and Zhou, Hong and Zhuang, Bohan},booktitle={International Conference on Learning Representations (ICLR)},year={2024},price={Spotlight},long_price={Spotlight (top 5% of the accepted papers)}}
@article{he2021pruning,title={Pruning self-attentions into convolutional layers in single path},author={He, Haoyu and Liu, Jing and Pan, Zizheng and Cai, Jianfei and Zhang, Jing and Tao, Dacheng and Zhuang, Bohan},journal={IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},year={2023},}
@inproceedings{he2023ptqd,title={{PTQD}: Accurate Post-Training Quantization for Diffusion Models},author={He, Yefei and Liu, Luping and Liu, Jing and Wu, Weijia and Zhou, Hong and Zhuang, Bohan},booktitle={Conference on Neural Information Processing Systems (NeurIPS)},year={2023},url={https://openreview.net/forum?id=Y3g1PV5R9l},}
@inproceedings{he2023bivit,title={BiViT: Extremely Compressed Binary Vision Transformers},author={He, Yefei and Lou, Zhenyu and Zhang, Luoming and Liu, Jing and Wu, Weijia and Zhou, Hong and Zhuang, Bohan},booktitle={International Conference on Computer Vision (ICCV)},pages={5651--5663},year={2023},}
@article{liu2023single,title={Single-path bit sharing for automatic loss-aware model compression},author={Liu, Jing and Zhuang, Bohan and Chen, Peng and Shen, Chunhua and Cai, Jianfei and Tan, Mingkui},journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},year={2023},doi={10.1109/TPAMI.2023.3275159}}
@inproceedings{ijcai2023p764,title={A Survey on Efficient Training of Transformers},author={Zhuang, Bohan and Liu, Jing and Pan, Zizheng and He, Haoyu and Weng, Yuetian and Shen, Chunhua},booktitle={International Joint Conference on Artificial Intelligence (IJCAI)},pages={6823--6831},year={2023},note={Survey Track},doi={10.24963/ijcai.2023/764},}
@inproceedings{He_2023_CVPR,author={He, Haoyu and Cai, Jianfei and Pan, Zizheng and Liu, Jing and Zhang, Jing and Tao, Dacheng and Zhuang, Bohan},title={Dynamic Focus-Aware Positional Queries for Semantic Segmentation},booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)},year={2023},pages={11299-11308},}
@inproceedings{liu2022ecoformer,author={Liu, Jing and Pan, Zizheng and He, Haoyu and Cai, Jianfei and Zhuang, Bohan},booktitle={Conference on Neural Information Processing Systems (NeurIPS)},pages={10295--10308},title={EcoFormer: Energy-Saving Attention with Linear Complexity},volume={35},year={2022},price={Spotlight},long_price={Spotlight (top 5% of the accepted papers)},}
@inproceedings{pan2022less,title={Less is more: Pay less attention in vision transformers},author={Pan, Zizheng and Zhuang, Bohan and He, Haoyu and Liu, Jing and Cai, Jianfei},booktitle={AAAI Conference on Artificial Intelligence (AAAI)},volume={36},number={2},pages={2035--2043},year={2022},}
@inproceedings{Pan_2021_ICCV,author={Pan, Zizheng and Zhuang, Bohan and Liu, Jing and He, Haoyu and Cai, Jianfei},title={Scalable Vision Transformers With Hierarchical Pooling},booktitle={International Conference on Computer Vision (ICCV)},year={2021},pages={377-386},}
@article{liu2021discrimination,title={Discrimination-aware network pruning for deep model compression},author={Liu, Jing and Zhuang, Bohan and Zhuang, Zhuangwei and Guo, Yong and Huang, Junzhou and Zhu, Jinhui and Tan, Mingkui},journal={IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},volume={44},number={8},pages={4035--4051},year={2021},}
@article{zhuang2021effective,title={Effective training of convolutional neural networks with low-bitwidth weights and activations},author={Zhuang*, Bohan and Tan*, Mingkui and Liu*, Jing and Liu, Lingqiao and Reid, Ian and Shen, Chunhua},journal={IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},volume={44},number={10},pages={6140--6152},year={2021},}
@inproceedings{Chen_2021_CVPR,author={Chen*, Peng and Liu*, Jing and Zhuang, Bohan and Tan, Mingkui and Shen, Chunhua},title={AQD: Towards Accurate Quantized Object Detection},booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)},year={2021},pages={104-113},price={Oral},long_price={Oral Presentation (top 4% of the accepted papers)},}
@inproceedings{xie2020deep,title={Deep transferring quantization},author={Xie*, Zheng and Wen*, Zhiquan and Liu*, Jing and Liu, Zhiqiang and Wu, Xixian and Tan, Mingkui},booktitle={European Conference on Computer Vision (ECCV)},pages={625--642},year={2020},}
@inproceedings{xu2020generative,title={Generative low-bitwidth data free quantization},author={Xu*, Shoukai and Li*, Haokun and Zhuang*, Bohan and Liu, Jing and Cao, Jiezhang and Liang, Chuangrun and Tan, Mingkui},booktitle={European Conference on Computer Vision (ECCV)},pages={1--17},year={2020},}
@incollection{zhuang2018discrimination,title={Discrimination-aware Channel Pruning for Deep Neural Networks},author={Zhuang*, Zhuangwei and Tan*, Mingkui and Zhuang*, Bohan and Liu*, Jing and Guo, Yong and Wu, Qingyao and Huang, Junzhou and Zhu, Jinhui},booktitle={Conference on Neural Information Processing Systems (NeurIPS)},pages={881--892},year={2018},}