@Article{Rafailov2023DirectPO,
author = {Rafael Rafailov and Archit Sharma and E. Mitchell and Stefano Ermon and Christopher D. Manning and Chelsea Finn},
booktitle = {Neural Information Processing Systems},
journal = {ArXiv},
title = {Direct Preference Optimization: Your Language Model is Secretly a Reward Model},
volume = {abs/2305.18290},
year = {2023}
}