2 Commits

Author SHA1 Message Date
Andrej 7325bab657 Merge pull request #365 from atamurad/patch-1
Update README.md - unclosed code block quotes
2023-08-26 20:11:04 -07:00
Atamurad Hezretkuliyev 37157bc0a3 Update README.md
Fixed unclosed code block quotes
2023-08-27 02:27:47 +03:00
4 changed files with 1 additions and 1105 deletions
-8
View File
@@ -6,13 +6,11 @@ CC = gcc
.PHONY: run
run: run.c
$(CC) -O3 -o run run.c -lm
$(CC) -O3 -o runq runq.c -lm
# useful for a debug build, can then e.g. analyze with valgrind, example:
# $ valgrind --leak-check=full ./run out/model.bin -n 3
rundebug: run.c
$(CC) -g -o run run.c -lm
$(CC) -g -o runq runq.c -lm
# https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
# https://simonbyrne.github.io/notes/fastmath/
@@ -26,7 +24,6 @@ rundebug: run.c
.PHONY: runfast
runfast: run.c
$(CC) -Ofast -o run run.c -lm
$(CC) -Ofast -o runq runq.c -lm
# additionally compiles with OpenMP, allowing multithreaded runs
# make sure to also enable multiple threads when running, e.g.:
@@ -34,23 +31,19 @@ runfast: run.c
.PHONY: runomp
runomp: run.c
$(CC) -Ofast -fopenmp -march=native run.c -lm -o run
$(CC) -Ofast -fopenmp -march=native runq.c -lm -o runq
.PHONY: win64
win64:
x86_64-w64-mingw32-gcc -Ofast -D_WIN32 -o run.exe -I. run.c win.c
x86_64-w64-mingw32-gcc -Ofast -D_WIN32 -o runq.exe -I. runq.c win.c
# compiles with gnu99 standard flags for amazon linux, coreos, etc. compatibility
.PHONY: rungnu
rungnu:
$(CC) -Ofast -std=gnu11 -o run run.c -lm
$(CC) -Ofast -std=gnu11 -o runq runq.c -lm
.PHONY: runompgnu
runompgnu:
$(CC) -Ofast -fopenmp -std=gnu11 run.c -lm -o run
$(CC) -Ofast -fopenmp -std=gnu11 runq.c -lm -o runq
# run all tests
.PHONY: test
@@ -73,4 +66,3 @@ testcc:
.PHONY: clean
clean:
rm -f run
rm -f runq
+1
View File
@@ -109,6 +109,7 @@ Chat with Code Llama Instruct:
python export.py codellama2_7b_instruct.bin --meta-llama /path/to/CodeLlama-7b-Instruct
python tokenizer.py --tokenizer-model=/path/to/CodeLlama-7b-Instruct/tokenizer.model
./run codellama2_7b_instruct.bin -m chat -z /path/to/CodeLlama-7b-Instruct/tokenizer.bin
```
## hugginface models
-6
View File
@@ -406,12 +406,6 @@ def load_hf_model(model_path):
# API entrypoint
def model_export(model, filepath, version):
"""
Versions docs:
v0: legacy llama2.c float format, DEPRECATED
v1: float32 export
v2: int8 quantized Q8_0 export, similar to llama.cpp, in groups
"""
if version == 0:
legacy_export(model, filepath)
elif version == 1:
-1091
View File
File diff suppressed because it is too large Load Diff