diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index 3a6fb1dc80..6c5e898ba6 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/services/completion.ts b/tools/server/webui/src/lib/services/completion.ts index 096c9bc838..52ce257afd 100644 --- a/tools/server/webui/src/lib/services/completion.ts +++ b/tools/server/webui/src/lib/services/completion.ts @@ -72,8 +72,7 @@ export class CompletionService { if (temperature !== undefined) requestBody.temperature = temperature; if (max_tokens !== undefined) { - // On the completion endpoint, max_tokens is called n_predict - requestBody.n_predict = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1; + requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1; } if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range; @@ -83,7 +82,10 @@ export class CompletionService { if (min_p !== undefined) requestBody.min_p = min_p; if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability; if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold; - if (typ_p !== undefined) requestBody.typ_p = typ_p; + if (typ_p !== undefined) { + // On the completion endpoint, typ_p is called typical_p + requestBody.typical_p = typ_p; + } if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n; if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty; diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index 1a304f2b7c..b8a742f23e 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -219,23 +219,35 @@ export interface ApiChatCompletionRequest { timings_per_token?: boolean; } +// Reference: https://github.com/ggml-org/llama.cpp/tree/master/tools/server#post-completion-given-a-prompt-it-returns-the-predicted-completion export interface ApiCompletionRequest { prompt: string; stream?: boolean; - cache_prompt?: boolean; model?: string; + // Configure return + return_progress?: boolean; + return_tokens?: boolean; + timings_per_token?: boolean; + post_sampling_probs?: boolean; + response_fields?: string[]; // Generation parameters temperature?: number; - n_predict?: number; // Sampling parameters dynatemp_range?: number; dynatemp_exponent?: number; top_k?: number; top_p?: number; min_p?: number; + // We can use either n_predict or max_tokens + max_tokens?: number; + n_indent?: number; + n_keep?: number; + n_cmpl?: number; + n_cache_reuse?: number; + stop?: string[]; + typical_p?: number; xtc_probability?: number; xtc_threshold?: number; - typ_p?: number; // Penalty parameters repeat_last_n?: number; repeat_penalty?: number; @@ -245,12 +257,24 @@ export interface ApiCompletionRequest { dry_base?: number; dry_allowed_length?: number; dry_penalty_last_n?: number; + dry_sequence_breakers?: string[]; + mirostat?: number; + mirostat_tau?: number; + mirostat_eta?: number; + grammar?: string; + json_schema?: string; + seed?: number; + ignore_eos?: boolean; + n_probs?: number; + min_keep?: number; + t_max_predict_ms?: number; + id_slot?: number; + cache_prompt?: boolean; // Sampler configuration samplers?: string[]; backend_sampling?: boolean; // Custom parameters (JSON string) custom?: Record; - timings_per_token?: boolean; } export interface ApiChatCompletionToolCallFunctionDelta {