From 0bddcd94c17f060846bddbd64f61a4b570916aeb Mon Sep 17 00:00:00 2001 From: Artem Yatsenko <42897193+sumo43@users.noreply.github.com> Date: Sun, 23 Jul 2023 09:28:49 -0700 Subject: [PATCH] Update run_wrap.py --- run_wrap.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/run_wrap.py b/run_wrap.py index b609115..a2ef54e 100644 --- a/run_wrap.py +++ b/run_wrap.py @@ -17,15 +17,16 @@ enc = Tokenizer() t0 = time.time() tokens = [] +last = '' for line in proc.stdout: token = int(line.decode('utf-8').strip()) - dec = enc.decode([token]) - print(dec, end=" ", flush=True) + dec = enc.decode(tokens + [token]) + chunk = dec[len(last):] + print(chunk, end='',flush=True) tokens.append(token) + last = dec t1 = time.time() -print('\n---\n') -print("Sorry I'm not sure why sentencepiece can't stream tokens properly, I'll solve it later. Here is the whole thing properly:") print('\n---\n') print(enc.decode(tokens))