reshuffle blocks of code a bit

This commit is contained in:
Andrej Karpathy
2023-07-28 05:37:43 +00:00
parent d04336cac8
commit fd68dd222f
+40 -40
View File
@@ -337,46 +337,6 @@ void transformer(int token, int pos, Config* p, RunState* s, TransformerWeights*
matmul(s->logits, x, w->wcls, p->dim, p->vocab_size);
}
// https://en.wikipedia.org/wiki/Xorshift#xorshift.2A
unsigned long long rng_seed;
unsigned int random_u32() {
rng_seed ^= rng_seed >> 12;
rng_seed ^= rng_seed << 25;
rng_seed ^= rng_seed >> 27;
return (rng_seed * 0x2545F4914F6CDD1Dull) >> 32;
}
float random_f32() {
return (random_u32() >> 8) / 16777216.0f;
}
// ----------------------------------------------------------------------------
// functions to sample the next token from the transformer's predicted distribution
int sample(float* probabilities, int n) {
// sample index from probabilities, they must sum to 1
float r = random_f32();
float cdf = 0.0f;
for (int i = 0; i < n; i++) {
cdf += probabilities[i];
if (r < cdf) {
return i;
}
}
return n - 1; // in case of rounding errors
}
int argmax(float* v, int n) {
// return argmax of v in elements 0..n
int max_i = 0;
float max_p = v[0];
for (int i = 1; i < n; i++) {
if (v[i] > max_p) {
max_i = i;
max_p = v[i];
}
}
return max_i;
}
// ----------------------------------------------------------------------------
// byte pair encoding (BPE) tokenizer, encodes strings into tokens so we can prompt
@@ -441,11 +401,51 @@ void bpe_encode(char *text, char **vocab, float *vocab_scores, int vocab_size, u
// ----------------------------------------------------------------------------
// utilities
long time_in_ms() {
// return time in milliseconds, for benchmarking the model speed
struct timespec time;
clock_gettime(CLOCK_REALTIME, &time);
return time.tv_sec * 1000 + time.tv_nsec / 1000000;
}
unsigned long long rng_seed;
unsigned int random_u32() {
// xorshift rng: https://en.wikipedia.org/wiki/Xorshift#xorshift.2A
rng_seed ^= rng_seed >> 12;
rng_seed ^= rng_seed << 25;
rng_seed ^= rng_seed >> 27;
return (rng_seed * 0x2545F4914F6CDD1Dull) >> 32;
}
float random_f32() { // random float32 in [0,1)
return (random_u32() >> 8) / 16777216.0f;
}
int sample(float* probabilities, int n) {
// sample index from probabilities, they must sum to 1
float r = random_f32();
float cdf = 0.0f;
for (int i = 0; i < n; i++) {
cdf += probabilities[i];
if (r < cdf) {
return i;
}
}
return n - 1; // in case of rounding errors
}
int argmax(float* v, int n) {
// return argmax of v in elements 0..n
int max_i = 0;
float max_p = v[0];
for (int i = 1; i < n; i++) {
if (v[i] > max_p) {
max_i = i;
max_p = v[i];
}
}
return max_i;
}
// ----------------------------------------------------------------------------
int main(int argc, char *argv[]) {