From e5752e1fc912bdd9230343c4befa73bc8ac99eb0 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Thu, 27 Jul 2023 22:59:19 +0000 Subject: [PATCH] strip leading whitespace --- run.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/run.c b/run.c index 68fabcd..912c3cb 100644 --- a/run.c +++ b/run.c @@ -467,7 +467,9 @@ int main(int argc, char *argv[]) { // we now want to sample from this distribution to get the next token next = sample(state.logits, config.vocab_size); } - printf("%s", vocab[next]); + // following BOS token (1), sentencepiece decoder strips any leading whitespace (see PR #89) + char *token_str = (token == 1 && vocab[next][0] == ' ') ? vocab[next]+1 : vocab[next]; + printf("%s", token_str); fflush(stdout); // advance forward