tweaks and add a simple test

This commit is contained in:
Andrej Karpathy
2023-07-23 14:52:08 +00:00
parent f499d9d2b5
commit 9414e7a45e
7 changed files with 73 additions and 48 deletions
+1 -1
View File
@@ -55,7 +55,7 @@ dropout = 0.0
# adamw optimizer
gradient_accumulation_steps = 4 # used to simulate larger batch sizes
learning_rate = 5e-4 # max learning rate
max_iters = 300000 # total number of training iterations
max_iters = 100000 # total number of training iterations
weight_decay = 1e-1
beta1 = 0.9
beta2 = 0.95