1 Commits

Author SHA1 Message Date
Andrej Karpathy df80471914 draft of int8 attempt number two 2023-08-26 22:28:08 +00:00
3 changed files with 1105 additions and 0 deletions
+8
View File
@@ -6,11 +6,13 @@ CC = gcc
.PHONY: run
run: run.c
$(CC) -O3 -o run run.c -lm
$(CC) -O3 -o runq runq.c -lm
# useful for a debug build, can then e.g. analyze with valgrind, example:
# $ valgrind --leak-check=full ./run out/model.bin -n 3
rundebug: run.c
$(CC) -g -o run run.c -lm
$(CC) -g -o runq runq.c -lm
# https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
# https://simonbyrne.github.io/notes/fastmath/
@@ -24,6 +26,7 @@ rundebug: run.c
.PHONY: runfast
runfast: run.c
$(CC) -Ofast -o run run.c -lm
$(CC) -Ofast -o runq runq.c -lm
# additionally compiles with OpenMP, allowing multithreaded runs
# make sure to also enable multiple threads when running, e.g.:
@@ -31,19 +34,23 @@ runfast: run.c
.PHONY: runomp
runomp: run.c
$(CC) -Ofast -fopenmp -march=native run.c -lm -o run
$(CC) -Ofast -fopenmp -march=native runq.c -lm -o runq
.PHONY: win64
win64:
x86_64-w64-mingw32-gcc -Ofast -D_WIN32 -o run.exe -I. run.c win.c
x86_64-w64-mingw32-gcc -Ofast -D_WIN32 -o runq.exe -I. runq.c win.c
# compiles with gnu99 standard flags for amazon linux, coreos, etc. compatibility
.PHONY: rungnu
rungnu:
$(CC) -Ofast -std=gnu11 -o run run.c -lm
$(CC) -Ofast -std=gnu11 -o runq runq.c -lm
.PHONY: runompgnu
runompgnu:
$(CC) -Ofast -fopenmp -std=gnu11 run.c -lm -o run
$(CC) -Ofast -fopenmp -std=gnu11 runq.c -lm -o runq
# run all tests
.PHONY: test
@@ -66,3 +73,4 @@ testcc:
.PHONY: clean
clean:
rm -f run
rm -f runq
+6
View File
@@ -406,6 +406,12 @@ def load_hf_model(model_path):
# API entrypoint
def model_export(model, filepath, version):
"""
Versions docs:
v0: legacy llama2.c float format, DEPRECATED
v1: float32 export
v2: int8 quantized Q8_0 export, similar to llama.cpp, in groups
"""
if version == 0:
legacy_export(model, filepath)
elif version == 1:
+1091
View File
File diff suppressed because it is too large Load Diff