Ignore the KL Penalty! Boosting Exploration on Critical Tokens to Enhance RL Fine-Tuning
BibTex
Copy
@misc{vassoyan2025ignoreklpenalty,
title={Ignore the KL Penalty! Boosting Exploration on Critical Tokens to Enhance RL Fine-Tuning},
author={Jean Vassoyan and Roman Plaud and Nathanaël Beau},
year={2025},
eprint={2502.06533},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2502.06533},
}