RL4F: Generating Natural Language Feedback with Reinforcement Learning for Repairing Model Outputs
BibTex
Copy
@misc{madaan2023rl4fgeneratingnatural,
title={RL4F: Generating Natural Language Feedback with Reinforcement Learning for Repairing Model Outputs},
author={Aman Madaan and Ashwin Kalyan and Peter Clark and Niket Tandon and Ekin Akyürek and Afra Feyza Akyürek and Derry Wijaya},
year={2023},
eprint={2305.08844},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2305.08844},
}