TPLA: Tensor Parallel Latent Attention for Efficient Disaggregated Prefill and Decode Inference
BibTex
Copy
@misc{meng2025tplatensorparallel,
title={TPLA: Tensor Parallel Latent Attention for Efficient Disaggregated Prefill \& Decode Inference},
author={Fanxu Meng and Muhan Zhang and Yuxuan Wang and Di Yin and Xing Sun and Xiaojuan Tang and Pingzhi Tang},
year={2025},
eprint={2508.15881},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2508.15881},
}