From ad7a1ef52547263debd8074ea30a4e3b550abe28 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Tue, 22 Aug 2023 02:32:21 +0000 Subject: [PATCH] clean up swiglu a little bit --- run.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/run.c b/run.c index 328519a..dc232e5 100644 --- a/run.c +++ b/run.c @@ -339,14 +339,14 @@ float* forward(Transformer* transformer, int token, int pos) { matmul(s->hb, s->xb, w->w1 + l*dim*hidden_dim, dim, hidden_dim); matmul(s->hb2, s->xb, w->w3 + l*dim*hidden_dim, dim, hidden_dim); - // F.silu; silu(x)=x*σ(x),where σ(x) is the logistic sigmoid + // SwiGLU non-linearity for (int i = 0; i < hidden_dim; i++) { - s->hb[i] = s->hb[i] * (1.0f / (1.0f + expf(-s->hb[i]))); - } - - // elementwise multiply with w3(x) - for (int i = 0; i < hidden_dim; i++) { - s->hb[i] = s->hb[i] * s->hb2[i]; + float val = s->hb[i]; + // silu(x)=x*σ(x), where σ(x) is the logistic sigmoid + val *= (1.0f / (1.0f + expf(-val))); + // elementwise multiply with w3(x) + val *= s->hb2[i]; + s->hb[i] = val; } // final matmul to get the output of the ffn