From 69c28f1547c169902f62ca48bee75fb876c4d8e6 Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Tue, 7 Apr 2026 21:39:41 +0800 Subject: [PATCH] llama-server: fix model params not propagated (#21509) Signed-off-by: Aaron Teo --- tools/server/server-context.cpp | 7 +++++-- tools/server/server-context.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index c1ccedf107..9d3ac53895 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -632,7 +632,7 @@ private: // load the model and initialize llama_context // this may also be called to resume from sleeping state - bool load_model(const common_params & params) { + bool load_model(common_params & params) { bool is_resume = sleeping; SRV_INF("loading model '%s'\n", params.model.path.c_str()); @@ -641,6 +641,9 @@ private: llama_init = common_init_from_params(params_base); + // propagate model-metadata sampling defaults back to caller + params.sampling = params_base.sampling; + model = llama_init->model(); ctx = llama_init->context(); @@ -2978,7 +2981,7 @@ private: server_context::server_context() : impl(new server_context_impl()) {} server_context::~server_context() = default; -bool server_context::load_model(const common_params & params) { +bool server_context::load_model(common_params & params) { return impl->load_model(params); } diff --git a/tools/server/server-context.h b/tools/server/server-context.h index a4d2201cbe..d7ce873583 100644 --- a/tools/server/server-context.h +++ b/tools/server/server-context.h @@ -56,7 +56,7 @@ struct server_context { // load the model and initialize llama_context // returns true on success - bool load_model(const common_params & params); + bool load_model(common_params & params); // this function will block main thread until termination void start_loop();