make default be the llama2 tokenizer

This commit is contained in:
Andrej Karpathy
2023-08-13 03:08:07 +00:00
parent 00a61dc7f9
commit 9c3cfb46a3
+2 -2
View File
@@ -46,8 +46,8 @@ wandb_run_name = "run" + datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
# data # data
batch_size = 128 # if gradient_accumulation_steps > 1, this is the micro-batch size batch_size = 128 # if gradient_accumulation_steps > 1, this is the micro-batch size
max_seq_len = 256 max_seq_len = 256
vocab_source = "custom" # llama2|custom; use Lllama 2 vocab from Meta, or custom trained vocab_source = "llama2" # llama2|custom; use Lllama 2 vocab from Meta, or custom trained
vocab_size = 512 vocab_size = 32000 # the Llama 2 tokenizer has 32K tokens
# model # model
dim = 288 dim = 288
n_layers = 6 n_layers = 6