diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index 48e341dbd1..f3f73e9069 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte index d2a0a739c5..fa22a46ba4 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte @@ -152,6 +152,16 @@ key: 'samplers', label: 'Samplers', type: 'input' + }, + { + key: 'backend_sampling', + label: 'Backend sampling', + type: 'checkbox' + }, + { + key: 'backend_dist', + label: 'Backend dist sampling', + type: 'checkbox' } ] }, @@ -277,6 +287,10 @@ function handleConfigChange(key: string, value: string | boolean) { localConfig[key] = value; + + if (key === 'backend_sampling' && value === false) { + localConfig.backend_dist = false; + } } function handleClose() { diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte index d17f7e4229..64b815c92a 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte @@ -198,7 +198,9 @@

{/if} {:else if field.type === 'checkbox'} - {@const isDisabled = field.key === 'pdfAsImage' && !supportsVision()} + {@const pdfDisabled = field.key === 'pdfAsImage' && !supportsVision()} + {@const backendDistDisabled = field.key === 'backend_dist' && !localConfig.backend_sampling} + {@const isDisabled = pdfDisabled || backendDistDisabled}
{field.help || SETTING_CONFIG_INFO[field.key]}

- {:else if field.key === 'pdfAsImage' && !supportsVision()} + {:else if pdfDisabled}

PDF-to-image processing requires a vision-capable model. PDFs will be processed as text.

+ {:else if backendDistDisabled} +

+ Enable GPU sampling to allow GPU dist sampling. +

{/if}
diff --git a/tools/server/webui/src/lib/constants/settings-config.ts b/tools/server/webui/src/lib/constants/settings-config.ts index 7547832d95..9dbf70cc69 100644 --- a/tools/server/webui/src/lib/constants/settings-config.ts +++ b/tools/server/webui/src/lib/constants/settings-config.ts @@ -18,6 +18,8 @@ export const SETTING_CONFIG_DEFAULT: Record = modelSelectorEnabled: false, // make sure these default values are in sync with `common.h` samplers: 'top_k;typ_p;top_p;min_p;temperature', + backend_sampling: false, + backend_dist: false, temperature: 0.8, dynatemp_range: 0.0, dynatemp_exponent: 1.0, @@ -50,6 +52,10 @@ export const SETTING_CONFIG_INFO: Record = { 'On pasting long text, it will be converted to a file. You can control the file length by setting the value of this parameter. Value 0 means disable.', samplers: 'The order at which samplers are applied, in simplified way. Default is "top_k;typ_p;top_p;min_p;temperature": top_k->typ_p->top_p->min_p->temperature', + backend_sampling: + 'Enable backend-based samplers. When enabled, supported samplers run on the accelerator backend for faster sampling.', + backend_dist: + 'Perform the final distribution sampling step on the backend. Requires backend sampling to be enabled.', temperature: 'Controls the randomness of the generated text by affecting the probability distribution of the output tokens. Higher = more random, lower = more focused.', dynatemp_range: diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts index 1908d83909..980d1137e9 100644 --- a/tools/server/webui/src/lib/services/chat.ts +++ b/tools/server/webui/src/lib/services/chat.ts @@ -98,6 +98,8 @@ export class ChatService { dry_penalty_last_n, // Other parameters samplers, + backend_sampling, + backend_dist, custom, timings_per_token } = options; @@ -182,6 +184,9 @@ export class ChatService { : samplers; } + if (backend_sampling !== undefined) requestBody.backend_sampling = backend_sampling; + if (backend_dist !== undefined) requestBody.backend_dist = backend_dist; + if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token; if (custom) { diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index 5b5a9d74a5..12d0f7d36b 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -298,6 +298,12 @@ class ChatStore { if (currentConfig.samplers) { apiOptions.samplers = currentConfig.samplers; } + if (currentConfig.backend_sampling !== undefined) { + apiOptions.backend_sampling = Boolean(currentConfig.backend_sampling); + } + if (currentConfig.backend_dist !== undefined) { + apiOptions.backend_dist = Boolean(currentConfig.backend_dist); + } if (currentConfig.custom) { apiOptions.custom = currentConfig.custom; } diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index 1a8bc64989..149d4fb118 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -181,6 +181,8 @@ export interface ApiChatCompletionRequest { dry_penalty_last_n?: number; // Sampler configuration samplers?: string[]; + backend_sampling?: boolean; + backend_dist?: boolean; // Custom parameters (JSON string) custom?: Record; timings_per_token?: boolean; diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts index b85b0597d0..9bba5cfbc2 100644 --- a/tools/server/webui/src/lib/types/settings.d.ts +++ b/tools/server/webui/src/lib/types/settings.d.ts @@ -36,6 +36,8 @@ export interface SettingsChatServiceOptions { dry_penalty_last_n?: number; // Sampler configuration samplers?: string | string[]; + backend_sampling?: boolean; + backend_dist?: boolean; // Custom parameters custom?: string; timings_per_token?: boolean;