Ring-lite: Scalable Reasoning via C3PO-Stabilized Reinforcement Learning for LLMs
BibTex
Copy
@misc{zhou2025ringlitescalablereasoning,
title={Ring-lite: Scalable Reasoning via C3PO-Stabilized Reinforcement Learning for LLMs},
author={Jun Zhou and Zihao Wang and Jia Guo and Yang Li and Liang Jiang and Ding Liu and Junwu Xiong and Xinyu Kong and Zujie Wen and Xin Zhao and Qing Cui and Bin Hu and Lei Liang and Zhiqiang Zhang and Jiaming Liu and Longfei Zheng and Feng Zhu and Xiaopei Wan and Junbo Zhao and Yongkang Liu and Qiang Gao and Zhenyu Huang and Kaihong Zhang and Cai Chen and Zhenduo Zhang and Jiewei Wu and Quan Wan and Xinxing Yang and Hao Dai and Deng Zhao and Dingnan Jin and Zhenglei Zhou and Kuan Xu and Shuaicheng Li and Xiaodong Yan and Wang Ren and Jun Mei and Hongzhi Luan and Yingting Wu and Xiaoyun Feng and Shaomian Zheng and Tongkai Yang and Zhankai Xu and Ring Team and Liangcheng Fu and Xuemin Yang},
year={2025},
eprint={2506.14731},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2506.14731},
}