diff --git a/src/llama-model.cpp b/src/llama-model.cpp index ab81f2d363..f2c27391ed 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -2434,8 +2434,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) { layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), { n_embd }, 0); layer.attn_post_norm = create_tensor(tn(LLM_TENSOR_ATTN_POST_NORM, "weight", i), { n_embd }, 0); - if ((i + 1) % 4 == 0) { // TODO: magic 4 - // Attention layers + if (!hparams.is_recurrent(i)) { + // Attention layers layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), { n_embd, n_embd_head_k * n_head }, 0); layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), { n_embd, n_embd_k_gqa }, 0); layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), { n_embd, n_embd_v_gqa }, 0);