ok i can train and sample a model with a custom tokenizer

This commit is contained in:
Andrej Karpathy
2023-08-11 16:47:29 +00:00
parent 4c6f0af9ff
commit b0cfa2458d
4 changed files with 48 additions and 14 deletions
+3 -2
View File
@@ -11,12 +11,13 @@ from torch import nn
@dataclass
class ModelArgs:
# default hyperparameters for the Llama 7B model
dim: int = 4096
n_layers: int = 32
n_heads: int = 32
n_kv_heads: Optional[int] = None
vocab_size: int = -1 # defined later by tokenizer
multiple_of: int = 256 # make SwiGLU hidden layer size multiple of large power of 2
vocab_size: int = 32000
multiple_of: int = 256 # MLP hidden layer size will be multiple of
norm_eps: float = 1e-5
max_seq_len: int = 2048
dropout: float = 0.0