delete debug line
This commit is contained in:
@@ -7,7 +7,6 @@ $ python -m train.py --compile=False --eval_iters=10 --batch_size=8
|
|||||||
|
|
||||||
To run with DDP on 4 gpus on 1 node, example:
|
To run with DDP on 4 gpus on 1 node, example:
|
||||||
$ torchrun --standalone --nproc_per_node=4 train.py
|
$ torchrun --standalone --nproc_per_node=4 train.py
|
||||||
PYTHONPATH=/home/ubuntu/miniconda3/envs/pytorch2/lib/python3.10/site-packages torchrun --standalone --nproc_per_node=4 train.py --compile=False --wandb_log=True
|
|
||||||
|
|
||||||
To run with DDP on 4 gpus across 2 nodes, example:
|
To run with DDP on 4 gpus across 2 nodes, example:
|
||||||
- Run on the first (master) node with example IP 123.456.123.456:
|
- Run on the first (master) node with example IP 123.456.123.456:
|
||||||
@@ -56,7 +55,7 @@ dropout = 0.0
|
|||||||
# adamw optimizer
|
# adamw optimizer
|
||||||
gradient_accumulation_steps = 4 # used to simulate larger batch sizes
|
gradient_accumulation_steps = 4 # used to simulate larger batch sizes
|
||||||
learning_rate = 5e-4 # max learning rate
|
learning_rate = 5e-4 # max learning rate
|
||||||
max_iters = 100000 # total number of training iterations
|
max_iters = 300000 # total number of training iterations
|
||||||
weight_decay = 1e-1
|
weight_decay = 1e-1
|
||||||
beta1 = 0.9
|
beta1 = 0.9
|
||||||
beta2 = 0.95
|
beta2 = 0.95
|
||||||
|
|||||||
Reference in New Issue
Block a user