VideoEspresso: A Large-Scale Chain-of-Thought Dataset for Fine-Grained Video Reasoning via Core Frame Selection
BibTex
Copy
@misc{huang2024videoespressolargescalechainofthought,
title={VideoEspresso: A Large-Scale Chain-of-Thought Dataset for Fine-Grained Video Reasoning via Core Frame Selection},
author={Wei Huang and Si Liu and Xiaojuan Qi and Le Zhuo and Xu Zhou and Yue Liao and Xiu Su and Shifeng Zhang and Songhao Han and Hairong Shi},
year={2024},
eprint={2411.14794},
archivePrefix={arXiv},
primaryClass={cs.CV},
url={https://arxiv.org/abs/2411.14794},
}