tweaks and add a simple test
This commit is contained in:
@@ -55,7 +55,7 @@ dropout = 0.0
|
||||
# adamw optimizer
|
||||
gradient_accumulation_steps = 4 # used to simulate larger batch sizes
|
||||
learning_rate = 5e-4 # max learning rate
|
||||
max_iters = 300000 # total number of training iterations
|
||||
max_iters = 100000 # total number of training iterations
|
||||
weight_decay = 1e-1
|
||||
beta1 = 0.9
|
||||
beta2 = 0.95
|
||||
|
||||
Reference in New Issue
Block a user