From 24917b23de7404eda597b1566e2b8be669d9ee1f Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Sun, 23 Jul 2023 15:28:24 +0000 Subject: [PATCH] fix run command --- README.md | 8 ++++---- run_wrap.py | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9a3c1b0..431dc3c 100644 --- a/README.md +++ b/README.md @@ -19,14 +19,14 @@ Let's just run a baby Llama 2 model in C. You need a model checkpoint. Download wget https://karpathy.ai/llama2c/model.bin -P out ``` -(if that doesn't work try [google drive](https://drive.google.com/file/d/1aTimLdx3JktDXxcHySNrZJOOk8Vb1qBR/view?usp=share_link)). Compile and run the C code, though it will only emit the raw token ids: +(if that doesn't work try [google drive](https://drive.google.com/file/d/1aTimLdx3JktDXxcHySNrZJOOk8Vb1qBR/view?usp=share_link)). Compile and run the C code: ```bash gcc -o run run.c -lm -./run +./run out/model.bin ``` -So to also translate them into text, we currently run it through a simple wrapper (for now): +You'll notice that this just streams the raw tokens. Unless you can read those directly, you'll want to translate them into text. For now sadly we have to run this C code through a simple wrapper that does the translation (see the file, it's just 30 lines): ```bash pip install sentencepiece @@ -67,7 +67,7 @@ gcc -o run run.c -lm You can now run it simply as ```bash -./run +./run out/model.bin ``` But note that this only emits the SentencePiece tokens. To decode the tokens into text too, run this script through a simple wrapper: diff --git a/run_wrap.py b/run_wrap.py index 00f2eb4..b609115 100644 --- a/run_wrap.py +++ b/run_wrap.py @@ -30,5 +30,4 @@ print('\n---\n') print(enc.decode(tokens)) print(f"achieved tok/s: {len(tokens) / (t1 - t0)}") -# Wait for the process to finish proc.wait()