diff --git a/README.md b/README.md
index 9b054dc..19db674 100644
--- a/README.md
+++ b/README.md
@@ -132,8 +132,7 @@ Watch the tokens stream by, fun! We can also run the PyTorch inference script fo
 
 ```bash
 wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt -P out15M
-mv out15M/stories15M.pt out15M/ckpt.pt # sorry the sample script current assumes this directory structure / filename...
-python sample.py --out_dir=out15M
+python sample.py --checkpoint=out15M/stories15M.pt
 ```
 
 Which gives the same results. More detailed testing will be done in `test_all.py`. Currently you will need two files to test or sample: both the .bin file, and the .ckpt file inside a directory (see `test_all.py` for details). Sorry this is a bit janky right now, I have to think through running the tests without having to download 200MB of data. But run the tests with pytest:
diff --git a/sample.py b/sample.py
index 2f66e7f..64bb177 100644
--- a/sample.py
+++ b/sample.py
@@ -12,12 +12,13 @@ from tokenizer import Tokenizer
 from tinystories import get_tokenizer_model_path
 
 # -----------------------------------------------------------------------------
-out_dir = 'out' # ignored if init_from is not 'resume'
+checkpoint = 'out/ckpt.pt'
 start = "" # or "<|endoftext|>" or etc. Can also specify a file, use as: "FILE:prompt.txt"
 num_samples = 1 # number of samples to draw
 max_new_tokens = 100 # number of tokens generated in each sample
 temperature = 1.0 # 1.0 = no change, < 1.0 = less random, > 1.0 = more random, in predictions
 top_k = 300 # retain only the top_k most likely tokens, clamp others to have 0 probability
+tokenizer = "" # override the tokenizer model path
 seed = 1337
 device = 'cuda' if torch.cuda.is_available() else 'cpu' # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1', etc.
 #dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else 'float16' # 'float32' or 'bfloat16' or 'float16'
@@ -35,11 +36,10 @@ ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torc
 ctx = nullcontext() if device_type == 'cpu' else torch.amp.autocast(device_type=device_type, dtype=ptdtype)
 
 # init from a model saved in a specific directory
-ckpt_path = os.path.join(out_dir, 'ckpt.pt')
-checkpoint = torch.load(ckpt_path, map_location=device)
-gptconf = ModelArgs(**checkpoint['model_args'])
+checkpoint_dict = torch.load(checkpoint, map_location=device)
+gptconf = ModelArgs(**checkpoint_dict['model_args'])
 model = Transformer(gptconf)
-state_dict = checkpoint['model']
+state_dict = checkpoint_dict['model']
 unwanted_prefix = '_orig_mod.'
 for k,v in list(state_dict.items()):
     if k.startswith(unwanted_prefix):
@@ -52,8 +52,11 @@ if compile:
     print("Compiling the model...")
     model = torch.compile(model) # requires PyTorch 2.0 (optional)
 
-# load the tokenizer
-tokenizer_model = get_tokenizer_model_path(vocab_size=gptconf.vocab_size)
+# load the tokenizer, either provided, or attempt to find it
+if tokenizer:
+    tokenizer_model = tokenizer
+else:
+    tokenizer_model = get_tokenizer_model_path(vocab_size=gptconf.vocab_size)
 enc = Tokenizer(tokenizer_model=tokenizer_model)
 
 # encode the beginning of the prompt