fix: remove Q/K permutation (NEOX RoPE doesn't need it)
This commit is contained in:
parent
56571c3238
commit
d9a442f602
|
|
@ -8537,18 +8537,6 @@ class Jais2Model(TextModel):
|
|||
head_dim = hparams.get("head_dim", hparams["hidden_size"] // hparams["num_attention_heads"])
|
||||
self.gguf_writer.add_rope_dimension_count(head_dim)
|
||||
|
||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||
# Jais2 uses LLaMA-style RoPE (rotate_half), requiring Q/K permutation
|
||||
n_head = self.hparams["num_attention_heads"]
|
||||
n_kv_head = self.hparams.get("num_key_value_heads", n_head)
|
||||
|
||||
if name.endswith(("q_proj.weight", "q_proj.bias")):
|
||||
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
|
||||
if name.endswith(("k_proj.weight", "k_proj.bias")):
|
||||
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
|
||||
|
||||
return [(self.map_tensor_name(name), data_torch)]
|
||||
|
||||
|
||||
@ModelBase.register("JAISLMHeadModel")
|
||||
class JaisModel(TextModel):
|
||||
|
|
|
|||
Loading…
Reference in New Issue