add model member function to build mtp graph, to be called from speculative.cpp

2025-08-12 01:03:59 -04:00 · 2025-08-12 01:03:59 -04:00 · 03231da69e
parent 1f477b3755
commit 03231da69e
2 changed files with 18 additions and 0 deletions
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@ -18673,6 +18673,22 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
    return llm->res->get_gf();
 }

+ggml_cgraph* llama_model::build_mtp_graph(const llm_graph_params& params,
+    ggml_tensor* hidden_state_inp, llama_token last_token_id, int n_past) const {
+    std::unique_ptr<llm_graph_context> llm;
+
+    switch (arch) {
+    case LLM_ARCH_GLM4_MOE:
+    {
+        llm = std::make_unique<llm_build_glm4_moe_mtp>(*this, params, hidden_state_inp, last_token_id, n_past);
+    } break;
+    default:
+        GGML_ABORT("fatal error");
+    }
+
+    return llm->res->get_gf();
+}
+
 //
 // interface implementation
 //
--- a/src/llama-model.h
+++ b/src/llama-model.h
@ -475,6 +475,8 @@ struct llama_model {

    // TODO: move this to new llm_arch_model_i interface
    ggml_cgraph * build_graph(const llm_graph_params & params) const;
+    ggml_cgraph * build_mtp_graph(const llm_graph_params & params,
+        ggml_tensor * hidden_state_inp, llama_token last_token_id, int n_past) const;

 private:
    struct impl;