Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| df80471914 |
@@ -6,11 +6,13 @@ CC = gcc
|
|||||||
.PHONY: run
|
.PHONY: run
|
||||||
run: run.c
|
run: run.c
|
||||||
$(CC) -O3 -o run run.c -lm
|
$(CC) -O3 -o run run.c -lm
|
||||||
|
$(CC) -O3 -o runq runq.c -lm
|
||||||
|
|
||||||
# useful for a debug build, can then e.g. analyze with valgrind, example:
|
# useful for a debug build, can then e.g. analyze with valgrind, example:
|
||||||
# $ valgrind --leak-check=full ./run out/model.bin -n 3
|
# $ valgrind --leak-check=full ./run out/model.bin -n 3
|
||||||
rundebug: run.c
|
rundebug: run.c
|
||||||
$(CC) -g -o run run.c -lm
|
$(CC) -g -o run run.c -lm
|
||||||
|
$(CC) -g -o runq runq.c -lm
|
||||||
|
|
||||||
# https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
|
# https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
|
||||||
# https://simonbyrne.github.io/notes/fastmath/
|
# https://simonbyrne.github.io/notes/fastmath/
|
||||||
@@ -24,6 +26,7 @@ rundebug: run.c
|
|||||||
.PHONY: runfast
|
.PHONY: runfast
|
||||||
runfast: run.c
|
runfast: run.c
|
||||||
$(CC) -Ofast -o run run.c -lm
|
$(CC) -Ofast -o run run.c -lm
|
||||||
|
$(CC) -Ofast -o runq runq.c -lm
|
||||||
|
|
||||||
# additionally compiles with OpenMP, allowing multithreaded runs
|
# additionally compiles with OpenMP, allowing multithreaded runs
|
||||||
# make sure to also enable multiple threads when running, e.g.:
|
# make sure to also enable multiple threads when running, e.g.:
|
||||||
@@ -31,19 +34,23 @@ runfast: run.c
|
|||||||
.PHONY: runomp
|
.PHONY: runomp
|
||||||
runomp: run.c
|
runomp: run.c
|
||||||
$(CC) -Ofast -fopenmp -march=native run.c -lm -o run
|
$(CC) -Ofast -fopenmp -march=native run.c -lm -o run
|
||||||
|
$(CC) -Ofast -fopenmp -march=native runq.c -lm -o runq
|
||||||
|
|
||||||
.PHONY: win64
|
.PHONY: win64
|
||||||
win64:
|
win64:
|
||||||
x86_64-w64-mingw32-gcc -Ofast -D_WIN32 -o run.exe -I. run.c win.c
|
x86_64-w64-mingw32-gcc -Ofast -D_WIN32 -o run.exe -I. run.c win.c
|
||||||
|
x86_64-w64-mingw32-gcc -Ofast -D_WIN32 -o runq.exe -I. runq.c win.c
|
||||||
|
|
||||||
# compiles with gnu99 standard flags for amazon linux, coreos, etc. compatibility
|
# compiles with gnu99 standard flags for amazon linux, coreos, etc. compatibility
|
||||||
.PHONY: rungnu
|
.PHONY: rungnu
|
||||||
rungnu:
|
rungnu:
|
||||||
$(CC) -Ofast -std=gnu11 -o run run.c -lm
|
$(CC) -Ofast -std=gnu11 -o run run.c -lm
|
||||||
|
$(CC) -Ofast -std=gnu11 -o runq runq.c -lm
|
||||||
|
|
||||||
.PHONY: runompgnu
|
.PHONY: runompgnu
|
||||||
runompgnu:
|
runompgnu:
|
||||||
$(CC) -Ofast -fopenmp -std=gnu11 run.c -lm -o run
|
$(CC) -Ofast -fopenmp -std=gnu11 run.c -lm -o run
|
||||||
|
$(CC) -Ofast -fopenmp -std=gnu11 runq.c -lm -o runq
|
||||||
|
|
||||||
# run all tests
|
# run all tests
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
@@ -66,3 +73,4 @@ testcc:
|
|||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean:
|
clean:
|
||||||
rm -f run
|
rm -f run
|
||||||
|
rm -f runq
|
||||||
|
|||||||
@@ -109,7 +109,6 @@ Chat with Code Llama Instruct:
|
|||||||
python export.py codellama2_7b_instruct.bin --meta-llama /path/to/CodeLlama-7b-Instruct
|
python export.py codellama2_7b_instruct.bin --meta-llama /path/to/CodeLlama-7b-Instruct
|
||||||
python tokenizer.py --tokenizer-model=/path/to/CodeLlama-7b-Instruct/tokenizer.model
|
python tokenizer.py --tokenizer-model=/path/to/CodeLlama-7b-Instruct/tokenizer.model
|
||||||
./run codellama2_7b_instruct.bin -m chat -z /path/to/CodeLlama-7b-Instruct/tokenizer.bin
|
./run codellama2_7b_instruct.bin -m chat -z /path/to/CodeLlama-7b-Instruct/tokenizer.bin
|
||||||
```
|
|
||||||
|
|
||||||
## hugginface models
|
## hugginface models
|
||||||
|
|
||||||
|
|||||||
@@ -406,6 +406,12 @@ def load_hf_model(model_path):
|
|||||||
# API entrypoint
|
# API entrypoint
|
||||||
|
|
||||||
def model_export(model, filepath, version):
|
def model_export(model, filepath, version):
|
||||||
|
"""
|
||||||
|
Versions docs:
|
||||||
|
v0: legacy llama2.c float format, DEPRECATED
|
||||||
|
v1: float32 export
|
||||||
|
v2: int8 quantized Q8_0 export, similar to llama.cpp, in groups
|
||||||
|
"""
|
||||||
if version == 0:
|
if version == 0:
|
||||||
legacy_export(model, filepath)
|
legacy_export(model, filepath)
|
||||||
elif version == 1:
|
elif version == 1:
|
||||||
|
|||||||
Reference in New Issue
Block a user