SPG: Sandwiched Policy Gradient for Masked Diffusion Language Models
BibTex
Copy
@misc{wangFri Oct 10 2025 16:52:25 GMT+0000 (Coordinated Universal Time)spgsandwichedpolicy,
title={SPG: Sandwiched Policy Gradient for Masked Diffusion Language Models},
author={Chengyu Wang and Paria Rashidinejad and DiJia Su and Song Jiang and Sid Wang and Siyan Zhao and Cai Zhou and Shannon Zejiang Shen and Feiyu Chen and Tommi Jaakkola and Yuandong Tian and Bo Liu},
year={Fri Oct 10 2025 16:52:25 GMT+0000 (Coordinated Universal Time)},
eprint={2510.09541},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/2510.09541},
}