turn topp 0.9 back on by default thanks to recent PR contributions truncating before quicksort
This commit is contained in:
@@ -58,7 +58,7 @@ You can also prompt the model with a prefix or a number of additional command li
|
||||
|
||||
There is also an even better 110M param model available, see [models](#models).
|
||||
|
||||
Quick note on sampling, the recommendation for ~best results is to sample with `-t 1.0 -p 0.9`, i.e. temperature 1.0 (default) but also top-p sampling at 0.9 (not default!). The top-p sampling is turned off by default because it can run quite a bit slower. More generally, to control the diversity of samples use either the temperature (i.e. vary `-t` between 0 and 1 and keep top-p off with `-p 0`) or the top-p value (i.e. vary `-p` between 0 and 1 and keep `-t 1`), but not both. Nice explainers on LLM sampling strategies include [this](https://peterchng.com/blog/2023/05/02/token-selection-strategies-top-k-top-p-and-temperature/), [this](https://docs.cohere.com/docs/controlling-generation-with-top-k-top-p) or [this](https://huggingface.co/blog/how-to-generate).
|
||||
Quick note on sampling, the recommendation for ~best results is to sample with `-t 1.0 -p 0.9`, i.e. temperature 1.0 (default) but also top-p sampling at 0.9 (default). Intuitively, top-p ensures that tokens with tiny probabilities do not get sampled, so we can't get "unlucky" during sampling, and we are less likely to go "off the rails" afterwards. More generally, to control the diversity of samples use either the temperature (i.e. vary `-t` between 0 and 1 and keep top-p off with `-p 0`) or the top-p value (i.e. vary `-p` between 0 and 1 and keep `-t 1`), but not both. Nice explainers on LLM sampling strategies include [this](https://peterchng.com/blog/2023/05/02/token-selection-strategies-top-k-top-p-and-temperature/), [this](https://docs.cohere.com/docs/controlling-generation-with-top-k-top-p) or [this](https://huggingface.co/blog/how-to-generate).
|
||||
|
||||
## Meta's Llama 2 models
|
||||
|
||||
|
||||
@@ -474,8 +474,8 @@ int sample_topp(float* probabilities, int n, float topp, ProbIndex* probindex) {
|
||||
|
||||
int n0 = 0;
|
||||
// quicksort indices in descending order of probabilities
|
||||
// elements smaller than (1 - topp) / (n - 1) cannot be part of the result
|
||||
// and can be filtered out directly
|
||||
// values smaller than (1 - topp) / (n - 1) cannot be part of the result
|
||||
// so for efficiency we crop these out as candidates before sorting
|
||||
const float cutoff = (1.0f - topp) / (n - 1);
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (probabilities[i] >= cutoff) {
|
||||
@@ -518,7 +518,7 @@ void error_usage() {
|
||||
fprintf(stderr, "Example: run model.bin -n 256 -i \"Once upon a time\"\n");
|
||||
fprintf(stderr, "Options:\n");
|
||||
fprintf(stderr, " -t <float> temperature, default 1.0\n");
|
||||
fprintf(stderr, " -p <float> p value in top-p (nucleus) sampling. default 1.0 (=off)\n");
|
||||
fprintf(stderr, " -p <float> p value in top-p (nucleus) sampling. default 0.9\n");
|
||||
fprintf(stderr, " -s <int> random seed, default time(NULL)\n");
|
||||
fprintf(stderr, " -n <int> number of steps to run for, default 256. 0 = max_seq_len\n");
|
||||
fprintf(stderr, " -i <string> input prompt\n");
|
||||
@@ -532,7 +532,7 @@ int main(int argc, char *argv[]) {
|
||||
char *checkpoint = NULL; // e.g. out/model.bin
|
||||
char *tokenizer = "tokenizer.bin";
|
||||
float temperature = 1.0f; // 0.0 = greedy deterministic. 1.0 = original. don't set higher
|
||||
float topp = 1.0f; // top-p in nucleus sampling. 1.0 = off. 0.9 works well, but slower
|
||||
float topp = 0.9f; // top-p in nucleus sampling. 1.0 = off. 0.9 works well, but slower
|
||||
rng_seed = 0; // seed rng with time by default
|
||||
int steps = 256; // number of steps to run for
|
||||
char *prompt = NULL; // prompt string
|
||||
|
||||
Reference in New Issue
Block a user