diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
index 403131d93b..c7d8193f9a 100644
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index 6b2eb53e0f..337895a5ef 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -208,10 +208,11 @@ task_params server_task::params_from_json_cmpl(
params.sampling.seed = json_value(data, "seed", defaults.sampling.seed);
params.sampling.n_probs = json_value(data, "n_probs", defaults.sampling.n_probs);
params.sampling.min_keep = json_value(data, "min_keep", defaults.sampling.min_keep);
+ params.sampling.backend_sampling = json_value(data, "backend_sampling", defaults.sampling.backend_sampling);
params.post_sampling_probs = json_value(data, "post_sampling_probs", defaults.post_sampling_probs);
- const bool request_backend_sampling = json_value(data, "backend_sampling", defaults.sampling.backend_sampling);
- params.sampling.backend_sampling = defaults.sampling.backend_sampling && request_backend_sampling;
+ printf("params.sampling.backend_sampling = %d\n", params.sampling.backend_sampling);
+ printf("defaults.sampling.backend_sampling = %d\n", defaults.sampling.backend_sampling);
params.speculative.n_min = json_value(data, "speculative.n_min", defaults.speculative.n_min);
params.speculative.n_max = json_value(data, "speculative.n_max", defaults.speculative.n_max);
diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts
index 4f78840a57..3cf3866203 100644
--- a/tools/server/webui/src/lib/stores/chat.svelte.ts
+++ b/tools/server/webui/src/lib/stores/chat.svelte.ts
@@ -1401,6 +1401,7 @@ class ChatStore {
if (hasValue(currentConfig.dry_penalty_last_n))
apiOptions.dry_penalty_last_n = Number(currentConfig.dry_penalty_last_n);
if (currentConfig.samplers) apiOptions.samplers = currentConfig.samplers;
+ if (currentConfig.backend_sampling) apiOptions.backend_sampling = currentConfig.backend_sampling;
if (currentConfig.custom) apiOptions.custom = currentConfig.custom;
return apiOptions;
diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts
index e1e45b6528..26d2bcc0d8 100644
--- a/tools/server/webui/src/lib/types/api.d.ts
+++ b/tools/server/webui/src/lib/types/api.d.ts
@@ -149,6 +149,7 @@ export interface ApiLlamaCppServerProps {
reasoning_in_content: boolean;
thinking_forced_open: boolean;
samplers: string[];
+ backend_sampling: boolean;
'speculative.n_max': number;
'speculative.n_min': number;
'speculative.p_min': number;
@@ -311,6 +312,7 @@ export interface ApiSlotData {
reasoning_in_content: boolean;
thinking_forced_open: boolean;
samplers: string[];
+ backend_sampling: boolean;
'speculative.n_max': number;
'speculative.n_min': number;
'speculative.p_min': number;