fix bug, have to use raw_model not model to access the loss
This commit is contained in:
@@ -212,7 +212,7 @@ def estimate_loss():
|
||||
X, Y = next(batch_iter)
|
||||
with ctx:
|
||||
logits = model(X, Y)
|
||||
loss = model.last_loss
|
||||
loss = raw_model.last_loss
|
||||
losses[k] = loss.item()
|
||||
out[split] = losses.mean()
|
||||
model.train()
|
||||
@@ -296,7 +296,7 @@ while True:
|
||||
model.require_backward_grad_sync = micro_step == gradient_accumulation_steps - 1
|
||||
with ctx:
|
||||
logits = model(X, Y)
|
||||
loss = model.last_loss
|
||||
loss = raw_model.last_loss
|
||||
loss = loss / gradient_accumulation_steps
|
||||
# immediately async prefetch next batch while model is doing the forward pass on the GPU
|
||||
X, Y = next(train_batch_iter)
|
||||
|
||||
Reference in New Issue
Block a user