ChunkKV: Semantic-Preserving KV Cache Compression for Efficient Long-Context LLM Inference
BibTex
Copy
@misc{li2025chunkkvsemanticpreservingkv,
title={ChunkKV: Semantic-Preserving KV Cache Compression for Efficient Long-Context LLM Inference},
author={Bo Li and Xiang Liu and Zeyu Li and Zhenheng Tang and Xiaowen Chu and Xuming Hu and Peijie Dong},
year={2025},
eprint={2502.00299},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2502.00299},
}