add ability to export custom tokenizer to .bin format for run.c file

This commit is contained in:
Andrej Karpathy
2023-08-13 02:00:19 +00:00
parent b0cfa2458d
commit ea4cedc588
+6 -2
View File
@@ -4,7 +4,7 @@
import os
import struct
from logging import getLogger
import argparse
from typing import List
from sentencepiece import SentencePieceProcessor
@@ -72,5 +72,9 @@ class Tokenizer:
f.write(bytes)
if __name__ == "__main__":
t = Tokenizer()
parser = argparse.ArgumentParser()
parser.add_argument("-t", "--tokenizer-model", type=str, help="optional path to custom tokenizer ")
args = parser.parse_args()
t = Tokenizer(args.tokenizer_model)
t.export()