From 55e60740f5c94ec37f66212864242bb6ee910065 Mon Sep 17 00:00:00 2001 From: rdentato Date: Wed, 16 Aug 2023 07:58:07 +0000 Subject: [PATCH] Added space to str_buffer in case max_token_length is 1. --- run.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run.c b/run.c index 70951c0..513eda9 100644 --- a/run.c +++ b/run.c @@ -362,7 +362,7 @@ void bpe_encode(char *text, char **vocab, float *vocab_scores, int vocab_size, u qsort(sorted_vocab, vocab_size, sizeof(TokenIndex), compare_tokens); // create a temporary buffer that will store merge candidates of always two consecutive tokens - char* str_buffer = malloc((max_token_length*2+1) * sizeof(char)); // *2 for concat, +1 for null terminator + char* str_buffer = malloc((max_token_length*2 +1 +2) * sizeof(char)); // *2 for concat, +1 for null terminator +2 for UTF8 (in case max_token_lenght is 1) size_t str_len = 0; // add_dummy_prefix is true by default