@misc{cywińskiFri Oct 31 2025 12:55:04 GMT+0000 (Coordinated Universal Time)elicitingsecretknowledge,
title={Eliciting Secret Knowledge from Language Models},
author={Bartosz Cywiński and Emil Ryd and Rowan Wang and Senthooran Rajamanoharan and Neel Nanda and Arthur Conmy and Samuel Marks},
year={Fri Oct 31 2025 12:55:04 GMT+0000 (Coordinated Universal Time)},
eprint={2510.01070},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2510.01070},
}