From 096325b66c2ab84095bd407cbab84d731edc65bc Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Thu, 24 Aug 2023 03:09:55 +0000 Subject: [PATCH] bring back num_threads --- tinystories.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tinystories.py b/tinystories.py index 003b1e3..800d73a 100644 --- a/tinystories.py +++ b/tinystories.py @@ -100,7 +100,6 @@ def train_vocab(vocab_size): # 2) train the sentencepiece model print("Will now train the vocab...") - spm.SentencePieceTrainer.train(input=tiny_file, model_prefix=prefix, model_type="bpe", @@ -108,6 +107,7 @@ def train_vocab(vocab_size): self_test_sample_size=0, input_format="text", character_coverage=1.0, + num_threads=os.cpu_count(), split_digits=True, allow_whitespace_only_pieces=True, byte_fallback=True,