diff --git a/save_model.py b/save_model.py
index 4aebef0..f3b7539 100644
--- a/save_model.py
+++ b/save_model.py
@@ -5,6 +5,13 @@
 The resulting file can be loaded in C++ code and then used for training or inference with:
     #include <torch/script.h>
     torch::jit::Module module = torch::jit::load("model.pt")
+
+Note that the model includes the initial parameters and with default ModelArgs the serialized model
+is 59M and gzips down to 55M. If you want to serialize/distribute the model parameters separately
+and the size of the model file you can zero out the parameters before saving it and it will gzip
+down to 780K:
+    for p in model.parameters():
+        p.detach().zero_()
 """
 
 import glob