VLM4D: Towards Spatiotemporal Awareness in Vision Language Models
BibTex
Copy
@misc{chen2025vlm4dtowardsspatiotemporal,
title={VLM4D: Towards Spatiotemporal Awareness in Vision Language Models},
author={Dongdong Chen and Xuehai He and Xin Eric Wang and Ziyu Wan and Alexander Vilesov and Achuta Kadambi and Shijie Zhou and Di Chang and Shuwang Zhang and Aditya Nagachandra},
year={2025},
eprint={2508.02095},
archivePrefix={arXiv},
primaryClass={cs.CV},
url={https://arxiv.org/abs/2508.02095},
}