Kimi Linear ggml.h

This commit is contained in:
Yee Man Chan 2025-12-02 11:14:34 +08:00
parent 57cca52779
commit 6167f39e08
2 changed files with 24 additions and 0 deletions

View File

@ -539,6 +539,7 @@ extern "C" {
GGML_OP_FLASH_ATTN_BACK,
GGML_OP_SSM_CONV,
GGML_OP_SSM_SCAN,
GGML_OP_KDA_SCAN,
GGML_OP_WIN_PART,
GGML_OP_WIN_UNPART,
GGML_OP_GET_REL_POS,
@ -2337,6 +2338,28 @@ extern "C" {
struct ggml_tensor * C,
struct ggml_tensor * ids);
// KDA (Kimi Delta Attention) scan
// Delta attention recurrence:
// h[t] = exp(g[t]) * h[t-1] + k[t]^T * (beta[t] * (v[t] - h[t-1] @ k[t]))
// o[t] = q[t]^T @ h[t]
// Parameters:
// h: hidden state {head_dim, head_dim, n_head, n_seqs+}
// q: query {head_dim, n_head, n_seq_tokens, n_seqs}
// k: key {head_dim, n_head, n_seq_tokens, n_seqs}
// v: value {head_dim, n_head, n_seq_tokens, n_seqs}
// g: gate {head_dim, n_head, n_seq_tokens, n_seqs}
// beta: mixing {n_head, n_seq_tokens, n_seqs}
// ids: seq indices {n_seqs}
GGML_API struct ggml_tensor * ggml_kda_scan(
struct ggml_context * ctx,
struct ggml_tensor * h,
struct ggml_tensor * q,
struct ggml_tensor * k,
struct ggml_tensor * v,
struct ggml_tensor * g,
struct ggml_tensor * beta,
struct ggml_tensor * ids);
// partition into non-overlapping windows with padding if needed
// example:
// a: 768 64 64 1

View File

@ -498,6 +498,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
"model.layers.{bid}.feed_forward.shared_expert.gate_proj", # llama4
"model.layers.{bid}.mlp.shared_mlp.gate_proj", # hunyuan
"model.layers.{bid}.block_sparse_moe.shared_experts.gate_proj", # kimi
),
MODEL_TENSOR.FFN_GATE_CHEXP: (