small stylistic fixes and adjustments, fix bug in Makefile, and change the timing code to skip the first (slow) iteration

This commit is contained in:
Andrej Karpathy
2023-07-27 22:42:08 +00:00
parent 0e1b0d4c93
commit 25b50ee0e2
2 changed files with 14 additions and 18 deletions
+1 -1
View File
@@ -42,7 +42,7 @@ rungnu:
$(CC) -Ofast -std=gnu11 -o run run.c -lm $(CC) -Ofast -std=gnu11 -o run run.c -lm
.PHONY: runompgnu .PHONY: runompgnu
rungnu: runompgnu:
$(CC) -Ofast -fopenmp -std=gnu11 run.c -lm -o run $(CC) -Ofast -fopenmp -std=gnu11 run.c -lm -o run
.PHONY: clean .PHONY: clean
+10 -14
View File
@@ -193,6 +193,7 @@ void softmax(float* x, int size) {
void matmul(float* xout, float* x, float* w, int n, int d) { void matmul(float* xout, float* x, float* w, int n, int d) {
// W (d,n) @ x (n,) -> xout (d,) // W (d,n) @ x (n,) -> xout (d,)
// by far the most amount of time is spent inside this little function
int i; int i;
#pragma omp parallel for private(i) #pragma omp parallel for private(i)
for (i = 0; i < d; i++) { for (i = 0; i < d; i++) {
@@ -398,15 +399,12 @@ int main(int argc, char *argv[]) {
// read in the model.bin file // read in the model.bin file
Config config; Config config;
TransformerWeights weights; TransformerWeights weights;
int fd = 0; int fd = 0; // file descriptor for memory mapping
float* data = NULL; float* data = NULL; // memory mapped data pointer
long file_size; long file_size; // size of the checkpoint file in bytes
{ {
FILE *file = fopen(checkpoint, "rb"); FILE *file = fopen(checkpoint, "rb");
if (!file) { if (!file) { printf("Couldn't open file %s\n", checkpoint); return 1; }
printf("Unable to open the checkpoint file %s!\n", checkpoint);
return 1;
}
// read in the config header // read in the config header
if(fread(&config, sizeof(Config), 1, file) != 1) { return 1; } if(fread(&config, sizeof(Config), 1, file) != 1) { return 1; }
// negative vocab size is hacky way of signaling unshared weights. bit yikes. // negative vocab size is hacky way of signaling unshared weights. bit yikes.
@@ -431,11 +429,7 @@ int main(int argc, char *argv[]) {
char** vocab = (char**)malloc(config.vocab_size * sizeof(char*)); char** vocab = (char**)malloc(config.vocab_size * sizeof(char*));
{ {
FILE *file = fopen("tokenizer.bin", "rb"); FILE *file = fopen("tokenizer.bin", "rb");
if (!file) { if (!file) { printf("Couldn't load tokenizer.bin\n"); return 1; }
printf("Unable to open the tokenizer file tokenizer.bin! Run "
"python tokenizer.py to convert tokenizer.model -> tokenizer.bin\n");
return 1;
}
int len; int len;
for (int i = 0; i < config.vocab_size; i++) { for (int i = 0; i < config.vocab_size; i++) {
if(fread(&len, sizeof(int), 1, file) != 1) { return 1; } if(fread(&len, sizeof(int), 1, file) != 1) { return 1; }
@@ -451,7 +445,7 @@ int main(int argc, char *argv[]) {
malloc_run_state(&state, &config); malloc_run_state(&state, &config);
// the current position we are in // the current position we are in
long start = time_in_ms(); long start = 0; // used to time our code, only initialized after first iteration
int next; int next;
int token = 1; // 1 = BOS token in Llama-2 sentencepiece int token = 1; // 1 = BOS token in Llama-2 sentencepiece
int pos = 0; int pos = 0;
@@ -479,11 +473,13 @@ int main(int argc, char *argv[]) {
// advance forward // advance forward
token = next; token = next;
pos++; pos++;
// init our timer here because the first iteration is slow due to memmap
if (start == 0) { start = time_in_ms(); }
} }
// report achieved tok/s // report achieved tok/s
long end = time_in_ms(); long end = time_in_ms();
printf("\nachieved tok/s: %f\n", steps / (double)(end-start)*1000); printf("\nachieved tok/s: %f\n", (steps-1) / (double)(end-start)*1000);
// memory and file handles cleanup // memory and file handles cleanup
free_run_state(&state); free_run_state(&state);