FlowRL: Matching Reward Distributions for LLM Reasoning
BibTex
Copy
@misc{zhu2025flowrlmatchingreward,
title={FlowRL: Matching Reward Distributions for LLM Reasoning},
author={Xuekai Zhu and Daixuan Cheng and Dinghuai Zhang and Hengli Li and Kaiyan Zhang and Che Jiang and Youbang Sun and Ermo Hua and Yuxin Zuo and Xingtai Lv and Qizheng Zhang and Lin Chen and Fanghao Shao and Bo Xue and Yunchong Song and Zhenjie Yang and Ganqu Cui and Ning Ding and Jianfeng Gao and Xiaodong Liu and Bowen Zhou and Hongyuan Mei and Zhouhan Lin},
year={2025},
eprint={2509.15207},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2509.15207},
}