draft of int8 attempt number two
This commit is contained in:
@@ -6,11 +6,13 @@ CC = gcc
|
||||
.PHONY: run
|
||||
run: run.c
|
||||
$(CC) -O3 -o run run.c -lm
|
||||
$(CC) -O3 -o runq runq.c -lm
|
||||
|
||||
# useful for a debug build, can then e.g. analyze with valgrind, example:
|
||||
# $ valgrind --leak-check=full ./run out/model.bin -n 3
|
||||
rundebug: run.c
|
||||
$(CC) -g -o run run.c -lm
|
||||
$(CC) -g -o runq runq.c -lm
|
||||
|
||||
# https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
|
||||
# https://simonbyrne.github.io/notes/fastmath/
|
||||
@@ -24,6 +26,7 @@ rundebug: run.c
|
||||
.PHONY: runfast
|
||||
runfast: run.c
|
||||
$(CC) -Ofast -o run run.c -lm
|
||||
$(CC) -Ofast -o runq runq.c -lm
|
||||
|
||||
# additionally compiles with OpenMP, allowing multithreaded runs
|
||||
# make sure to also enable multiple threads when running, e.g.:
|
||||
@@ -31,19 +34,23 @@ runfast: run.c
|
||||
.PHONY: runomp
|
||||
runomp: run.c
|
||||
$(CC) -Ofast -fopenmp -march=native run.c -lm -o run
|
||||
$(CC) -Ofast -fopenmp -march=native runq.c -lm -o runq
|
||||
|
||||
.PHONY: win64
|
||||
win64:
|
||||
x86_64-w64-mingw32-gcc -Ofast -D_WIN32 -o run.exe -I. run.c win.c
|
||||
x86_64-w64-mingw32-gcc -Ofast -D_WIN32 -o runq.exe -I. runq.c win.c
|
||||
|
||||
# compiles with gnu99 standard flags for amazon linux, coreos, etc. compatibility
|
||||
.PHONY: rungnu
|
||||
rungnu:
|
||||
$(CC) -Ofast -std=gnu11 -o run run.c -lm
|
||||
$(CC) -Ofast -std=gnu11 -o runq runq.c -lm
|
||||
|
||||
.PHONY: runompgnu
|
||||
runompgnu:
|
||||
$(CC) -Ofast -fopenmp -std=gnu11 run.c -lm -o run
|
||||
$(CC) -Ofast -fopenmp -std=gnu11 runq.c -lm -o runq
|
||||
|
||||
# run all tests
|
||||
.PHONY: test
|
||||
@@ -66,3 +73,4 @@ testcc:
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -f run
|
||||
rm -f runq
|
||||
|
||||
@@ -406,6 +406,12 @@ def load_hf_model(model_path):
|
||||
# API entrypoint
|
||||
|
||||
def model_export(model, filepath, version):
|
||||
"""
|
||||
Versions docs:
|
||||
v0: legacy llama2.c float format, DEPRECATED
|
||||
v1: float32 export
|
||||
v2: int8 quantized Q8_0 export, similar to llama.cpp, in groups
|
||||
"""
|
||||
if version == 0:
|
||||
legacy_export(model, filepath)
|
||||
elif version == 1:
|
||||
|
||||
Reference in New Issue
Block a user