diff --git a/tinystories.py b/tinystories.py index 278c817..690cb02 100644 --- a/tinystories.py +++ b/tinystories.py @@ -120,7 +120,7 @@ def train_vocab(vocab_size): def process_shard(args, vocab_size): shard_id, shard = args - tokenizer_model = get_tokenizer_model_path() + tokenizer_model = get_tokenizer_model_path(vocab_size) enc = Tokenizer(tokenizer_model) with open(shard, "r") as f: data = json.load(f)