LaSeR: Reinforcement Learning with Last-Token Self-Rewarding
BibTex
Copy
@misc{linThu Oct 16 2025 17:55:11 GMT+0000 (Coordinated Universal Time)laserreinforcementlearning,
title={LaSeR: Reinforcement Learning with Last-Token Self-Rewarding},
author={Yankai Lin and Wenkai Yang and Weijie Liu and Ruobing Xie and Yiju Guo and Lulu Wu and Saiyong Yang},
year={Thu Oct 16 2025 17:55:11 GMT+0000 (Coordinated Universal Time)},
eprint={2510.14943},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/2510.14943},
}