Fix ApiChatCompletionRequest

This commit is contained in:
Leszek Hanusz 2026-02-10 03:14:14 +01:00
parent 8e125febc9
commit 8a6843aac1
3 changed files with 33 additions and 7 deletions

Binary file not shown.

View File

@ -72,8 +72,7 @@ export class CompletionService {
if (temperature !== undefined) requestBody.temperature = temperature;
if (max_tokens !== undefined) {
// On the completion endpoint, max_tokens is called n_predict
requestBody.n_predict = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
}
if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
@ -83,7 +82,10 @@ export class CompletionService {
if (min_p !== undefined) requestBody.min_p = min_p;
if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability;
if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold;
if (typ_p !== undefined) requestBody.typ_p = typ_p;
if (typ_p !== undefined) {
// On the completion endpoint, typ_p is called typical_p
requestBody.typical_p = typ_p;
}
if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n;
if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty;

View File

@ -219,23 +219,35 @@ export interface ApiChatCompletionRequest {
timings_per_token?: boolean;
}
// Reference: https://github.com/ggml-org/llama.cpp/tree/master/tools/server#post-completion-given-a-prompt-it-returns-the-predicted-completion
export interface ApiCompletionRequest {
prompt: string;
stream?: boolean;
cache_prompt?: boolean;
model?: string;
// Configure return
return_progress?: boolean;
return_tokens?: boolean;
timings_per_token?: boolean;
post_sampling_probs?: boolean;
response_fields?: string[];
// Generation parameters
temperature?: number;
n_predict?: number;
// Sampling parameters
dynatemp_range?: number;
dynatemp_exponent?: number;
top_k?: number;
top_p?: number;
min_p?: number;
// We can use either n_predict or max_tokens
max_tokens?: number;
n_indent?: number;
n_keep?: number;
n_cmpl?: number;
n_cache_reuse?: number;
stop?: string[];
typical_p?: number;
xtc_probability?: number;
xtc_threshold?: number;
typ_p?: number;
// Penalty parameters
repeat_last_n?: number;
repeat_penalty?: number;
@ -245,12 +257,24 @@ export interface ApiCompletionRequest {
dry_base?: number;
dry_allowed_length?: number;
dry_penalty_last_n?: number;
dry_sequence_breakers?: string[];
mirostat?: number;
mirostat_tau?: number;
mirostat_eta?: number;
grammar?: string;
json_schema?: string;
seed?: number;
ignore_eos?: boolean;
n_probs?: number;
min_keep?: number;
t_max_predict_ms?: number;
id_slot?: number;
cache_prompt?: boolean;
// Sampler configuration
samplers?: string[];
backend_sampling?: boolean;
// Custom parameters (JSON string)
custom?: Record<string, unknown>;
timings_per_token?: boolean;
}
export interface ApiChatCompletionToolCallFunctionDelta {