From 03231da69eec20677e25e2307d4fe31ac2ede034 Mon Sep 17 00:00:00 2001 From: Aaron Lee Date: Tue, 12 Aug 2025 01:03:59 -0400 Subject: [PATCH] add model member function to build mtp graph, to be called from speculative.cpp --- src/llama-model.cpp | 16 ++++++++++++++++ src/llama-model.h | 2 ++ 2 files changed, 18 insertions(+) diff --git a/src/llama-model.cpp b/src/llama-model.cpp index a9310a6090..667d9e442b 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -18673,6 +18673,22 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const { return llm->res->get_gf(); } +ggml_cgraph* llama_model::build_mtp_graph(const llm_graph_params& params, + ggml_tensor* hidden_state_inp, llama_token last_token_id, int n_past) const { + std::unique_ptr llm; + + switch (arch) { + case LLM_ARCH_GLM4_MOE: + { + llm = std::make_unique(*this, params, hidden_state_inp, last_token_id, n_past); + } break; + default: + GGML_ABORT("fatal error"); + } + + return llm->res->get_gf(); +} + // // interface implementation // diff --git a/src/llama-model.h b/src/llama-model.h index 6fcd74d57f..77a18aca71 100644 --- a/src/llama-model.h +++ b/src/llama-model.h @@ -475,6 +475,8 @@ struct llama_model { // TODO: move this to new llm_arch_model_i interface ggml_cgraph * build_graph(const llm_graph_params & params) const; + ggml_cgraph * build_mtp_graph(const llm_graph_params & params, + ggml_tensor * hidden_state_inp, llama_token last_token_id, int n_past) const; private: struct impl;