On-the-Fly Adaptive Distillation of Transformer to Dual-State Linear Attention
BibTex
Copy
@misc{wang2025ontheflyadaptivedistillation,
title={On-the-Fly Adaptive Distillation of Transformer to Dual-State Linear Attention},
author={Zhangyang Wang and Zhenyu Zhang and Souvik Kundu and Aditya Akella and Yeonju Ro},
year={2025},
eprint={2506.09316},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2506.09316},
}