From c9b1f1012428e0acda0fd8ff25e36eb1502cca6e Mon Sep 17 00:00:00 2001 From: Kris Jusiak Date: Mon, 24 Jul 2023 13:06:27 -0500 Subject: [PATCH] Speed up rmsnorm by using sqrtf/expf Problem: - exp and sqrt are using double precision for operations which is not required. Solution: - Use expf and sqrtf intead. Notes: - Although it's using single precision doesn't seem to affect the result. Results: ~ 10% improvement - before: 940 tok/s - after: 1020 tok/s --- run.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run.c b/run.c index 15352ae..0d54cfe 100644 --- a/run.c +++ b/run.c @@ -184,7 +184,7 @@ void rmsnorm(float* o, float* x, float* weight, int size) { } ss /= size; ss += 1e-5f; - ss = 1.0f / sqrt(ss); + ss = 1.0f / sqrtf(ss); // normalize and scale for (int j = 0; j < size; j++) { o[j] = weight[j] * (ss * x[j]); @@ -202,7 +202,7 @@ void softmax(float* x, int size) { // exp and sum float sum = 0.0f; for (int i = 0; i < size; i++) { - x[i] = exp(x[i] - max_val); + x[i] = expf(x[i] - max_val); sum += x[i]; } // normalize