Fix ApiChatCompletionRequest
This commit is contained in:
parent
8e125febc9
commit
8a6843aac1
Binary file not shown.
|
|
@ -72,8 +72,7 @@ export class CompletionService {
|
|||
|
||||
if (temperature !== undefined) requestBody.temperature = temperature;
|
||||
if (max_tokens !== undefined) {
|
||||
// On the completion endpoint, max_tokens is called n_predict
|
||||
requestBody.n_predict = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
|
||||
requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
|
||||
}
|
||||
|
||||
if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
|
||||
|
|
@ -83,7 +82,10 @@ export class CompletionService {
|
|||
if (min_p !== undefined) requestBody.min_p = min_p;
|
||||
if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability;
|
||||
if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold;
|
||||
if (typ_p !== undefined) requestBody.typ_p = typ_p;
|
||||
if (typ_p !== undefined) {
|
||||
// On the completion endpoint, typ_p is called typical_p
|
||||
requestBody.typical_p = typ_p;
|
||||
}
|
||||
|
||||
if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n;
|
||||
if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty;
|
||||
|
|
|
|||
|
|
@ -219,23 +219,35 @@ export interface ApiChatCompletionRequest {
|
|||
timings_per_token?: boolean;
|
||||
}
|
||||
|
||||
// Reference: https://github.com/ggml-org/llama.cpp/tree/master/tools/server#post-completion-given-a-prompt-it-returns-the-predicted-completion
|
||||
export interface ApiCompletionRequest {
|
||||
prompt: string;
|
||||
stream?: boolean;
|
||||
cache_prompt?: boolean;
|
||||
model?: string;
|
||||
// Configure return
|
||||
return_progress?: boolean;
|
||||
return_tokens?: boolean;
|
||||
timings_per_token?: boolean;
|
||||
post_sampling_probs?: boolean;
|
||||
response_fields?: string[];
|
||||
// Generation parameters
|
||||
temperature?: number;
|
||||
n_predict?: number;
|
||||
// Sampling parameters
|
||||
dynatemp_range?: number;
|
||||
dynatemp_exponent?: number;
|
||||
top_k?: number;
|
||||
top_p?: number;
|
||||
min_p?: number;
|
||||
// We can use either n_predict or max_tokens
|
||||
max_tokens?: number;
|
||||
n_indent?: number;
|
||||
n_keep?: number;
|
||||
n_cmpl?: number;
|
||||
n_cache_reuse?: number;
|
||||
stop?: string[];
|
||||
typical_p?: number;
|
||||
xtc_probability?: number;
|
||||
xtc_threshold?: number;
|
||||
typ_p?: number;
|
||||
// Penalty parameters
|
||||
repeat_last_n?: number;
|
||||
repeat_penalty?: number;
|
||||
|
|
@ -245,12 +257,24 @@ export interface ApiCompletionRequest {
|
|||
dry_base?: number;
|
||||
dry_allowed_length?: number;
|
||||
dry_penalty_last_n?: number;
|
||||
dry_sequence_breakers?: string[];
|
||||
mirostat?: number;
|
||||
mirostat_tau?: number;
|
||||
mirostat_eta?: number;
|
||||
grammar?: string;
|
||||
json_schema?: string;
|
||||
seed?: number;
|
||||
ignore_eos?: boolean;
|
||||
n_probs?: number;
|
||||
min_keep?: number;
|
||||
t_max_predict_ms?: number;
|
||||
id_slot?: number;
|
||||
cache_prompt?: boolean;
|
||||
// Sampler configuration
|
||||
samplers?: string[];
|
||||
backend_sampling?: boolean;
|
||||
// Custom parameters (JSON string)
|
||||
custom?: Record<string, unknown>;
|
||||
timings_per_token?: boolean;
|
||||
}
|
||||
|
||||
export interface ApiChatCompletionToolCallFunctionDelta {
|
||||
|
|
|
|||
Loading…
Reference in New Issue