add model member function to build mtp graph, to be called from speculative.cpp

This commit is contained in:
Aaron Lee 2025-08-12 01:03:59 -04:00
parent 1f477b3755
commit 03231da69e
2 changed files with 18 additions and 0 deletions

View File

@ -18673,6 +18673,22 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
return llm->res->get_gf();
}
ggml_cgraph* llama_model::build_mtp_graph(const llm_graph_params& params,
ggml_tensor* hidden_state_inp, llama_token last_token_id, int n_past) const {
std::unique_ptr<llm_graph_context> llm;
switch (arch) {
case LLM_ARCH_GLM4_MOE:
{
llm = std::make_unique<llm_build_glm4_moe_mtp>(*this, params, hidden_state_inp, last_token_id, n_past);
} break;
default:
GGML_ABORT("fatal error");
}
return llm->res->get_gf();
}
//
// interface implementation
//

View File

@ -475,6 +475,8 @@ struct llama_model {
// TODO: move this to new llm_arch_model_i interface
ggml_cgraph * build_graph(const llm_graph_params & params) const;
ggml_cgraph * build_mtp_graph(const llm_graph_params & params,
ggml_tensor * hidden_state_inp, llama_token last_token_id, int n_past) const;
private:
struct impl;