strip leading whitespace

This commit is contained in:
Andrej Karpathy
2023-07-27 22:59:19 +00:00
parent 25b50ee0e2
commit e5752e1fc9
+3 -1
View File
@@ -467,7 +467,9 @@ int main(int argc, char *argv[]) {
// we now want to sample from this distribution to get the next token
next = sample(state.logits, config.vocab_size);
}
printf("%s", vocab[next]);
// following BOS token (1), sentencepiece decoder strips any leading whitespace (see PR #89)
char *token_str = (token == 1 && vocab[next][0] == ' ') ? vocab[next]+1 : vocab[next];
printf("%s", token_str);
fflush(stdout);
// advance forward