make default be the llama2 tokenizer
This commit is contained in:
@@ -46,8 +46,8 @@ wandb_run_name = "run" + datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
|||||||
# data
|
# data
|
||||||
batch_size = 128 # if gradient_accumulation_steps > 1, this is the micro-batch size
|
batch_size = 128 # if gradient_accumulation_steps > 1, this is the micro-batch size
|
||||||
max_seq_len = 256
|
max_seq_len = 256
|
||||||
vocab_source = "custom" # llama2|custom; use Lllama 2 vocab from Meta, or custom trained
|
vocab_source = "llama2" # llama2|custom; use Lllama 2 vocab from Meta, or custom trained
|
||||||
vocab_size = 512
|
vocab_size = 32000 # the Llama 2 tokenizer has 32K tokens
|
||||||
# model
|
# model
|
||||||
dim = 288
|
dim = 288
|
||||||
n_layers = 6
|
n_layers = 6
|
||||||
|
|||||||
Reference in New Issue
Block a user