VideoITG: Multimodal Video Understanding with Instructed Temporal Grounding
BibTex
Copy
@misc{zhang2025videoitgmultimodalvideo,
title={VideoITG: Multimodal Video Understanding with Instructed Temporal Grounding},
author={Lei Zhang and Jose M. Alvarez and Zhiqi Li and Guo Chen and Zhiding Yu and Shihao Wang and Minghan Li and Guilin Li and De-an Huang},
year={2025},
eprint={2507.13353},
archivePrefix={arXiv},
primaryClass={cs.CV},
url={https://arxiv.org/abs/2507.13353},
}