BabyBabelLM: A Multilingual Benchmark of Developmentally Plausible Training Data
BibTex
Copy
@misc{jumeletSat Oct 11 2025 10:50:47 GMT+0000 (Coordinated Universal Time)babybabellmmultilingualbenchmark,
title={BabyBabelLM: A Multilingual Benchmark of Developmentally Plausible Training Data},
author={Jaap Jumelet and Abdellah Fourtassi and Akari Haga and Bastian Bunzeck and Bhargav Shandilya and Diana Galvan-Sosa and Faiz Ghifari Haznitrama and Francesca Padovani and Francois Meyer and Hai Hu and Julen Etxaniz and Laurent Prévot and Linyang He and María Grandury and Mila Marcheva and Negar Foroutan and Nikitas Theodoropoulos and Pouya Sadeghi and Siyuan Song and Suchir Salhan and Susana Zhou and Yurii Paniv and Ziyin Zhang and Arianna Bisazza and Alex Warstadt and Leshem Choshen},
year={Sat Oct 11 2025 10:50:47 GMT+0000 (Coordinated Universal Time)},
eprint={2510.10159},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2510.10159},
}