Small changes to ROPE & comments
This commit is contained in:
@@ -216,7 +216,7 @@ class Transformer(nn.Module):
|
||||
self.tok_embeddings.weight = self.output.weight # https://paperswithcode.com/method/weight-tying
|
||||
|
||||
# some useful precompute for the RoPE relative positional embeddings. TODO why * 2 here? confuse
|
||||
freqs_cos, freqs_sin = precompute_freqs_cis(self.params.dim // self.params.n_heads, self.params.max_seq_len * 2)
|
||||
freqs_cos, freqs_sin = precompute_freqs_cis(self.params.dim // self.params.n_heads, self.params.max_seq_len)
|
||||
self.register_buffer("freqs_cos", freqs_cos, persistent=False)
|
||||
self.register_buffer("freqs_sin", freqs_sin, persistent=False)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user