add ability to export custom tokenizer to .bin format for run.c file
This commit is contained in:
+6
-2
@@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import struct
|
import struct
|
||||||
from logging import getLogger
|
import argparse
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from sentencepiece import SentencePieceProcessor
|
from sentencepiece import SentencePieceProcessor
|
||||||
@@ -72,5 +72,9 @@ class Tokenizer:
|
|||||||
f.write(bytes)
|
f.write(bytes)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
t = Tokenizer()
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("-t", "--tokenizer-model", type=str, help="optional path to custom tokenizer ")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
t = Tokenizer(args.tokenizer_model)
|
||||||
t.export()
|
t.export()
|
||||||
|
|||||||
Reference in New Issue
Block a user