Update run_wrap.py
This commit is contained in:
+5
-4
@@ -17,15 +17,16 @@ enc = Tokenizer()
|
|||||||
|
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
tokens = []
|
tokens = []
|
||||||
|
last = ''
|
||||||
for line in proc.stdout:
|
for line in proc.stdout:
|
||||||
token = int(line.decode('utf-8').strip())
|
token = int(line.decode('utf-8').strip())
|
||||||
dec = enc.decode([token])
|
dec = enc.decode(tokens + [token])
|
||||||
print(dec, end=" ", flush=True)
|
chunk = dec[len(last):]
|
||||||
|
print(chunk, end='',flush=True)
|
||||||
tokens.append(token)
|
tokens.append(token)
|
||||||
|
last = dec
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
|
|
||||||
print('\n---\n')
|
|
||||||
print("Sorry I'm not sure why sentencepiece can't stream tokens properly, I'll solve it later. Here is the whole thing properly:")
|
|
||||||
print('\n---\n')
|
print('\n---\n')
|
||||||
print(enc.decode(tokens))
|
print(enc.decode(tokens))
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user