fix bug, have to use raw_model not model to access the loss

2023-08-06 07:55:46 +00:00
parent 65b0846637
commit 623894f5da
1 changed files with 2 additions and 2 deletions
@@ -212,7 +212,7 @@ def estimate_loss():
            X, Y = next(batch_iter)
            with ctx:
                logits = model(X, Y)
-                loss = model.last_loss
+                loss = raw_model.last_loss
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
@@ -296,7 +296,7 @@ while True:
            model.require_backward_grad_sync = micro_step == gradient_accumulation_steps - 1
        with ctx:
            logits = model(X, Y)
-            loss = model.last_loss
+            loss = raw_model.last_loss
            loss = loss / gradient_accumulation_steps
        # immediately async prefetch next batch while model is doing the forward pass on the GPU
        X, Y = next(train_batch_iter)