From f499d9d2b56c061e5eee338c8e728e8b28fe841c Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Sun, 23 Jul 2023 05:37:44 +0000 Subject: [PATCH] delete debug line --- train.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/train.py b/train.py index 2488bb3..9d9a098 100644 --- a/train.py +++ b/train.py @@ -7,7 +7,6 @@ $ python -m train.py --compile=False --eval_iters=10 --batch_size=8 To run with DDP on 4 gpus on 1 node, example: $ torchrun --standalone --nproc_per_node=4 train.py -PYTHONPATH=/home/ubuntu/miniconda3/envs/pytorch2/lib/python3.10/site-packages torchrun --standalone --nproc_per_node=4 train.py --compile=False --wandb_log=True To run with DDP on 4 gpus across 2 nodes, example: - Run on the first (master) node with example IP 123.456.123.456: @@ -56,7 +55,7 @@ dropout = 0.0 # adamw optimizer gradient_accumulation_steps = 4 # used to simulate larger batch sizes learning_rate = 5e-4 # max learning rate -max_iters = 100000 # total number of training iterations +max_iters = 300000 # total number of training iterations weight_decay = 1e-1 beta1 = 0.9 beta2 = 0.95