Early Weight Averaging meets High Learning Rates for LLM Pre-training
BibTex
Copy
@Inproceedings{Sanyal2023EarlyWA,
author = {Sunny Sanyal and A. Neerkaje and Jean Kaddour and Abhishek Kumar and Sujay Sanghavi},
title = {Early Weight Averaging meets High Learning Rates for LLM Pre-training},
year = {2023}
}