From 517763346d945e2a9d4f24fee75e3406c743e495 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Thu, 27 Jul 2023 22:20:07 +0000 Subject: [PATCH] HF checkpoints i removed the optimizer to save space, init Adam without the first/second moments is ok --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 34248b8..79b2c8e 100644 --- a/train.py +++ b/train.py @@ -179,7 +179,7 @@ scaler = torch.cuda.amp.GradScaler(enabled=(dtype == "float16")) # optimizer optimizer = model.configure_optimizers(weight_decay, learning_rate, (beta1, beta2), device_type) -if init_from == "resume": +if init_from == "resume" and "optimizer" in checkpoint: optimizer.load_state_dict(checkpoint["optimizer"]) checkpoint = None # free up memory