@inproceedings{zhang2024channel,title={Channel Merging: Preserving Specialization for Merged Experts},author={Zhang, Mingyang and Liu, Jing and Ding, Ganggui and Yu, Xinyi and Ou, Linlin and Zhuang, Bohan},booktitle={AAAI Conference on Artificial Intelligence (AAAI)},year={2024},}
Numerical Pruning for Efficient Autoregressive Models
Xuan Shen , Zhao Song , Yufa Zhou , Bo Chen , Jing Liu , Ruiyi Zhang , Ryan A Rossi , Hao Tan , Tong Yu , Xiang Chen , Yufan Zhou , Tong Sun , Pu Zhao , Yanzhi Wang , and Jiuxiang Gu
In AAAI Conference on Artificial Intelligence (AAAI) , 2024
@inproceedings{shen2024numerical,title={Numerical Pruning for Efficient Autoregressive Models},author={Shen, Xuan and Song, Zhao and Zhou, Yufa and Chen, Bo and Liu, Jing and Zhang, Ruiyi and Rossi, Ryan A and Tan, Hao and Yu, Tong and Chen, Xiang and Zhou, Yufan and Sun, Tong and Zhao, Pu and Wang, Yanzhi and Gu, Jiuxiang},booktitle={AAAI Conference on Artificial Intelligence (AAAI)},year={2024},}
@inproceedings{liu2024minicache,title={MiniCache: KV Cache Compression in Depth Dimension for Large Language Models},author={Liu, Akide and Liu, Jing and Pan, Zizheng and He, Yefei and Haffari, Gholamreza and Zhuang, Bohan},booktitle={Conference on Neural Information Processing Systems (NeurIPS)},year={2024},}
@inproceedings{he2024zipcache,title={ZipCache: Accurate and Efficient KV Cache Quantization with Salient Token Identification},author={He, Yefei and Zhang, Luoming and Wu, Weijia and Liu, Jing and Zhou, Hong and Zhuang, Bohan},booktitle={Conference on Neural Information Processing Systems (NeurIPS)},year={2024},}
@inproceedings{pan2023snnetv2,title={Stitched ViTs are Flexible Vision Backbones},author={Pan, Zizheng and Liu, Jing and He, Haoyu and Cai, Jianfei and Zhuang, Bohan},booktitle={European Conference on Computer Vision (ECCV)},year={2024},}
@inproceedings{he2023efficient,title={Efficient Stitchable Task Adaptation},author={He, Haoyu and Pan, Zizheng and Liu, Jing and Cai, Jianfei and Zhuang, Bohan},booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)},year={2024},}
@inproceedings{huang2023tfmq,title={Tfmq-dm: Temporal feature maintenance quantization for diffusion models},author={Huang*, Yushi and Gong*, Ruihao and Liu, Jing and Chen, Tianlong and Liu, Xianglong},booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)},year={2024},price={Highlight},long_price={Spotlight (top 11% of the accepted papers)},}
@inproceedings{liu2024qllm,title={{QLLM}: Accurate and Efficient Low-Bitwidth Quantization for Large Language Models},author={Liu, Jing and Gong, Ruihao and Wei, Xiuying and Dong, Zhiwei and Cai, Jianfei and Zhuang, Bohan},booktitle={International Conference on Learning Representations (ICLR)},year={2024},}
@inproceedings{he2024efficientdm,title={Efficient{DM}: Efficient Quantization-Aware Fine-Tuning of Low-Bit Diffusion Models},author={He, Yefei and Liu, Jing and Wu, Weijia and Zhou, Hong and Zhuang, Bohan},booktitle={International Conference on Learning Representations (ICLR)},year={2024},price={Spotlight},long_price={Spotlight (top 5% of the accepted papers)}}
@article{he2021pruning,title={Pruning self-attentions into convolutional layers in single path},author={He, Haoyu and Liu, Jing and Pan, Zizheng and Cai, Jianfei and Zhang, Jing and Tao, Dacheng and Zhuang, Bohan},journal={IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},year={2023},}
@inproceedings{he2023ptqd,title={{PTQD}: Accurate Post-Training Quantization for Diffusion Models},author={He, Yefei and Liu, Luping and Liu, Jing and Wu, Weijia and Zhou, Hong and Zhuang, Bohan},booktitle={Conference on Neural Information Processing Systems (NeurIPS)},year={2023},url={https://openreview.net/forum?id=Y3g1PV5R9l},}
@inproceedings{he2023bivit,title={BiViT: Extremely Compressed Binary Vision Transformers},author={He, Yefei and Lou, Zhenyu and Zhang, Luoming and Liu, Jing and Wu, Weijia and Zhou, Hong and Zhuang, Bohan},booktitle={International Conference on Computer Vision (ICCV)},pages={5651--5663},year={2023},}
@article{liu2023single,title={Single-path bit sharing for automatic loss-aware model compression},author={Liu, Jing and Zhuang, Bohan and Chen, Peng and Shen, Chunhua and Cai, Jianfei and Tan, Mingkui},journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},year={2023},doi={10.1109/TPAMI.2023.3275159}}
@inproceedings{ijcai2023p764,title={A Survey on Efficient Training of Transformers},author={Zhuang, Bohan and Liu, Jing and Pan, Zizheng and He, Haoyu and Weng, Yuetian and Shen, Chunhua},booktitle={International Joint Conference on Artificial Intelligence (IJCAI)},pages={6823--6831},year={2023},note={Survey Track},doi={10.24963/ijcai.2023/764},}
@inproceedings{He_2023_CVPR,author={He, Haoyu and Cai, Jianfei and Pan, Zizheng and Liu, Jing and Zhang, Jing and Tao, Dacheng and Zhuang, Bohan},title={Dynamic Focus-Aware Positional Queries for Semantic Segmentation},booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)},year={2023},pages={11299-11308},}
@inproceedings{liu2022ecoformer,author={Liu*, Jing and Pan*, Zizheng and He, Haoyu and Cai, Jianfei and Zhuang, Bohan},booktitle={Conference on Neural Information Processing Systems (NeurIPS)},pages={10295--10308},title={EcoFormer: Energy-Saving Attention with Linear Complexity},volume={35},year={2022},price={Spotlight},long_price={Spotlight (top 5% of the accepted papers)},}
@inproceedings{pan2022less,title={Less is more: Pay less attention in vision transformers},author={Pan, Zizheng and Zhuang, Bohan and He, Haoyu and Liu, Jing and Cai, Jianfei},booktitle={AAAI Conference on Artificial Intelligence (AAAI)},volume={36},number={2},pages={2035--2043},year={2022},}
@inproceedings{Pan_2021_ICCV,author={Pan, Zizheng and Zhuang, Bohan and Liu, Jing and He, Haoyu and Cai, Jianfei},title={Scalable Vision Transformers With Hierarchical Pooling},booktitle={International Conference on Computer Vision (ICCV)},year={2021},pages={377-386},}
arXiv
Sharpness-aware quantization for deep neural networks
@article{liu2021sharpness,title={Sharpness-aware quantization for deep neural networks},author={Liu, Jing and Cai, Jianfei and Zhuang, Bohan},booktitle={arXiv},journal={arXiv preprint arXiv:2111.12273},year={2021},}
arXiv
Mesa: A memory-saving training framework for transformers
Zizheng Pan , Peng Chen , Haoyu He , Jing Liu , Jianfei Cai , and Bohan Zhuang
@article{pan2021mesa,title={Mesa: A memory-saving training framework for transformers},author={Pan, Zizheng and Chen, Peng and He, Haoyu and Liu, Jing and Cai, Jianfei and Zhuang, Bohan},journal={arXiv preprint arXiv:2111.11124},year={2021},}
@article{liu2021discrimination,title={Discrimination-aware network pruning for deep model compression},author={Liu*, Jing and Zhuang*, Bohan and Zhuang*, Zhuangwei and Guo, Yong and Huang, Junzhou and Zhu, Jinhui and Tan*, Mingkui},journal={IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},volume={44},number={8},pages={4035--4051},year={2021},}
@article{zhuang2021effective,title={Effective training of convolutional neural networks with low-bitwidth weights and activations},author={Zhuang*, Bohan and Tan*, Mingkui and Liu*, Jing and Liu, Lingqiao and Reid, Ian and Shen, Chunhua},journal={IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},volume={44},number={10},pages={6140--6152},year={2021},}
@inproceedings{Chen_2021_CVPR,author={Chen*, Peng and Liu*, Jing and Zhuang, Bohan and Tan, Mingkui and Shen, Chunhua},title={AQD: Towards Accurate Quantized Object Detection},booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)},year={2021},pages={104-113},price={Oral},long_price={Oral Presentation (top 4% of the accepted papers)},}
@inproceedings{xie2020deep,title={Deep transferring quantization},author={Xie*, Zheng and Wen*, Zhiquan and Liu*, Jing and Liu, Zhiqiang and Wu, Xixian and Tan, Mingkui},booktitle={European Conference on Computer Vision (ECCV)},pages={625--642},year={2020},}
@inproceedings{xu2020generative,title={Generative low-bitwidth data free quantization},author={Xu*, Shoukai and Li*, Haokun and Zhuang*, Bohan and Liu, Jing and Cao, Jiezhang and Liang, Chuangrun and Tan, Mingkui},booktitle={European Conference on Computer Vision (ECCV)},pages={1--17},year={2020},}
@incollection{zhuang2018discrimination,title={Discrimination-aware Channel Pruning for Deep Neural Networks},author={Zhuang*, Zhuangwei and Tan*, Mingkui and Zhuang*, Bohan and Liu*, Jing and Guo, Yong and Wu, Qingyao and Huang, Junzhou and Zhu, Jinhui},booktitle={Conference on Neural Information Processing Systems (NeurIPS)},pages={881--892},year={2018},}