diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index 403131d93b..c7d8193f9a 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 6b2eb53e0f..337895a5ef 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -208,10 +208,11 @@ task_params server_task::params_from_json_cmpl( params.sampling.seed = json_value(data, "seed", defaults.sampling.seed); params.sampling.n_probs = json_value(data, "n_probs", defaults.sampling.n_probs); params.sampling.min_keep = json_value(data, "min_keep", defaults.sampling.min_keep); + params.sampling.backend_sampling = json_value(data, "backend_sampling", defaults.sampling.backend_sampling); params.post_sampling_probs = json_value(data, "post_sampling_probs", defaults.post_sampling_probs); - const bool request_backend_sampling = json_value(data, "backend_sampling", defaults.sampling.backend_sampling); - params.sampling.backend_sampling = defaults.sampling.backend_sampling && request_backend_sampling; + printf("params.sampling.backend_sampling = %d\n", params.sampling.backend_sampling); + printf("defaults.sampling.backend_sampling = %d\n", defaults.sampling.backend_sampling); params.speculative.n_min = json_value(data, "speculative.n_min", defaults.speculative.n_min); params.speculative.n_max = json_value(data, "speculative.n_max", defaults.speculative.n_max); diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index 4f78840a57..3cf3866203 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -1401,6 +1401,7 @@ class ChatStore { if (hasValue(currentConfig.dry_penalty_last_n)) apiOptions.dry_penalty_last_n = Number(currentConfig.dry_penalty_last_n); if (currentConfig.samplers) apiOptions.samplers = currentConfig.samplers; + if (currentConfig.backend_sampling) apiOptions.backend_sampling = currentConfig.backend_sampling; if (currentConfig.custom) apiOptions.custom = currentConfig.custom; return apiOptions; diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index e1e45b6528..26d2bcc0d8 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -149,6 +149,7 @@ export interface ApiLlamaCppServerProps { reasoning_in_content: boolean; thinking_forced_open: boolean; samplers: string[]; + backend_sampling: boolean; 'speculative.n_max': number; 'speculative.n_min': number; 'speculative.p_min': number; @@ -311,6 +312,7 @@ export interface ApiSlotData { reasoning_in_content: boolean; thinking_forced_open: boolean; samplers: string[]; + backend_sampling: boolean; 'speculative.n_max': number; 'speculative.n_min': number; 'speculative.p_min': number;