draft of int8 attempt number two

2023-08-26 22:28:08 +00:00
parent f4b8a81742
commit df80471914
3 changed files with 1105 additions and 0 deletions
@@ -6,11 +6,13 @@ CC = gcc
 .PHONY: run
 run: run.c
 	$(CC) -O3 -o run run.c -lm
+	$(CC) -O3 -o runq runq.c -lm

 # useful for a debug build, can then e.g. analyze with valgrind, example:
 # $ valgrind --leak-check=full ./run out/model.bin -n 3
 rundebug: run.c
 	$(CC) -g -o run run.c -lm
+	$(CC) -g -o runq runq.c -lm

 # https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
 # https://simonbyrne.github.io/notes/fastmath/
@@ -24,6 +26,7 @@ rundebug: run.c
 .PHONY: runfast
 runfast: run.c
 	$(CC) -Ofast -o run run.c -lm
+	$(CC) -Ofast -o runq runq.c -lm

 # additionally compiles with OpenMP, allowing multithreaded runs
 # make sure to also enable multiple threads when running, e.g.:
@@ -31,19 +34,23 @@ runfast: run.c
 .PHONY: runomp
 runomp: run.c
 	$(CC) -Ofast -fopenmp -march=native run.c  -lm  -o run
+	$(CC) -Ofast -fopenmp -march=native runq.c  -lm  -o runq

 .PHONY: win64
 win64:
 	x86_64-w64-mingw32-gcc -Ofast -D_WIN32 -o run.exe -I. run.c win.c
+	x86_64-w64-mingw32-gcc -Ofast -D_WIN32 -o runq.exe -I. runq.c win.c

 # compiles with gnu99 standard flags for amazon linux, coreos, etc. compatibility
 .PHONY: rungnu
 rungnu:
 	$(CC) -Ofast -std=gnu11 -o run run.c -lm
+	$(CC) -Ofast -std=gnu11 -o runq runq.c -lm

 .PHONY: runompgnu
 runompgnu:
 	$(CC) -Ofast -fopenmp -std=gnu11 run.c  -lm  -o run
+	$(CC) -Ofast -fopenmp -std=gnu11 runq.c  -lm  -o runq

 # run all tests
 .PHONY: test
@@ -66,3 +73,4 @@ testcc:
 .PHONY: clean
 clean:
 	rm -f run
+	rm -f runq
@@ -406,6 +406,12 @@ def load_hf_model(model_path):
 # API entrypoint

 def model_export(model, filepath, version):
+    """
+    Versions docs:
+    v0: legacy llama2.c float format, DEPRECATED
+    v1: float32 export
+    v2: int8 quantized Q8_0 export, similar to llama.cpp, in groups
+    """
    if version == 0:
        legacy_export(model, filepath)
    elif version == 1: