Put completion api service in separate file

This commit is contained in:
Leszek Hanusz 2026-02-04 16:29:53 +01:00
parent 251ba9d72a
commit 393faf0166
4 changed files with 359 additions and 336 deletions

Binary file not shown.

View File

@ -259,176 +259,7 @@ export class ChatService {
} }
} }
/**
* Sends a completion request to the llama.cpp server.
* Supports both streaming and non-streaming responses.
*
* @param prompt - The text prompt to complete
* @param options - Configuration options for the completion request
* @returns {Promise<string | void>} that resolves to the complete response string (non-streaming) or void (streaming)
* @throws {Error} if the request fails or is aborted
*/
static async sendCompletion(
prompt: string,
options: SettingsChatServiceOptions = {},
signal?: AbortSignal
): Promise<string | void> {
const {
stream,
onChunk,
onComplete,
onError,
onModel,
onTimings,
// Generation parameters
temperature,
max_tokens,
// Sampling parameters
dynatemp_range,
dynatemp_exponent,
top_k,
top_p,
min_p,
xtc_probability,
xtc_threshold,
typ_p,
// Penalty parameters
repeat_last_n,
repeat_penalty,
presence_penalty,
frequency_penalty,
dry_multiplier,
dry_base,
dry_allowed_length,
dry_penalty_last_n,
// Other parameters
samplers,
backend_sampling,
custom,
timings_per_token
} = options;
const requestBody: ApiCompletionRequest = {
prompt,
stream
};
// Include model in request if provided
if (options.model) {
requestBody.model = options.model;
}
if (temperature !== undefined) requestBody.temperature = temperature;
if (max_tokens !== undefined) {
requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
}
if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent;
if (top_k !== undefined) requestBody.top_k = top_k;
if (top_p !== undefined) requestBody.top_p = top_p;
if (min_p !== undefined) requestBody.min_p = min_p;
if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability;
if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold;
if (typ_p !== undefined) requestBody.typ_p = typ_p;
if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n;
if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty;
if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty;
if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty;
if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier;
if (dry_base !== undefined) requestBody.dry_base = dry_base;
if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length;
if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n;
if (samplers !== undefined) {
requestBody.samplers =
typeof samplers === 'string'
? samplers.split(';').filter((s: string) => s.trim())
: samplers;
}
if (backend_sampling !== undefined) requestBody.backend_sampling = backend_sampling;
if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token;
if (custom) {
try {
const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom;
Object.assign(requestBody, customParams);
} catch (error) {
console.warn('Failed to parse custom parameters:', error);
}
}
try {
const response = await fetch(`./completion`, {
method: 'POST',
headers: getJsonHeaders(),
body: JSON.stringify(requestBody),
signal
});
if (!response.ok) {
const error = await ChatService.parseErrorResponse(response);
if (onError) {
onError(error);
}
throw error;
}
if (stream) {
await ChatService.handleCompletionStreamResponse(
response,
onChunk,
onComplete,
onError,
onModel,
onTimings,
signal
);
return;
} else {
return ChatService.handleCompletionNonStreamResponse(
response,
onComplete,
onError,
onModel
);
}
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
console.log('Completion request was aborted');
return;
}
let userFriendlyError: Error;
if (error instanceof Error) {
if (error.name === 'TypeError' && error.message.includes('fetch')) {
userFriendlyError = new Error(
'Unable to connect to server - please check if the server is running'
);
userFriendlyError.name = 'NetworkError';
} else if (error.message.includes('ECONNREFUSED')) {
userFriendlyError = new Error('Connection refused - server may be offline');
userFriendlyError.name = 'NetworkError';
} else if (error.message.includes('ETIMEDOUT')) {
userFriendlyError = new Error('Request timed out - the server took too long to respond');
userFriendlyError.name = 'TimeoutError';
} else {
userFriendlyError = error;
}
} else {
userFriendlyError = new Error('Unknown error occurred while sending completion');
}
console.error('Error in sendCompletion:', error);
if (onError) {
onError(userFriendlyError);
}
throw userFriendlyError;
}
}
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// Streaming // Streaming
@ -869,7 +700,7 @@ export class ChatService {
* @param response - HTTP response object * @param response - HTTP response object
* @returns Promise<Error> - Parsed error with context info if available * @returns Promise<Error> - Parsed error with context info if available
*/ */
private static async parseErrorResponse( public static async parseErrorResponse(
response: Response response: Response
): Promise<Error & { contextInfo?: { n_prompt_tokens: number; n_ctx: number } }> { ): Promise<Error & { contextInfo?: { n_prompt_tokens: number; n_ctx: number } }> {
try { try {
@ -912,7 +743,7 @@ export class ChatService {
* @returns Model name string if found, undefined otherwise * @returns Model name string if found, undefined otherwise
* @private * @private
*/ */
private static extractModelName(data: unknown): string | undefined { public static extractModelName(data: unknown): string | undefined {
const asRecord = (value: unknown): Record<string, unknown> | undefined => { const asRecord = (value: unknown): Record<string, unknown> | undefined => {
return typeof value === 'object' && value !== null return typeof value === 'object' && value !== null
? (value as Record<string, unknown>) ? (value as Record<string, unknown>)
@ -953,7 +784,7 @@ export class ChatService {
* @param onTimingsCallback - Callback function to invoke with timing data * @param onTimingsCallback - Callback function to invoke with timing data
* @private * @private
*/ */
private static notifyTimings( public static notifyTimings(
timings: ChatMessageTimings | undefined, timings: ChatMessageTimings | undefined,
promptProgress: ChatMessagePromptProgress | undefined, promptProgress: ChatMessagePromptProgress | undefined,
onTimingsCallback: onTimingsCallback:
@ -965,168 +796,6 @@ export class ChatService {
onTimingsCallback(timings, promptProgress); onTimingsCallback(timings, promptProgress);
} }
/**
* Handles streaming response from the completion API
*/
private static async handleCompletionStreamResponse(
response: Response,
onChunk?: (chunk: string) => void,
onComplete?: (
response: string,
reasoningContent?: string,
timings?: ChatMessageTimings,
toolCalls?: string
) => void,
onError?: (error: Error) => void,
onModel?: (model: string) => void,
onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
abortSignal?: AbortSignal
): Promise<void> {
const reader = response.body?.getReader();
if (!reader) {
throw new Error('No response body');
}
const decoder = new TextDecoder();
let aggregatedContent = '';
let lastTimings: ChatMessageTimings | undefined;
let streamFinished = false;
let modelEmitted = false;
try {
let chunk = '';
while (true) {
if (abortSignal?.aborted) {
break;
}
const { done, value } = await reader.read();
if (done) {
break;
}
if (abortSignal?.aborted) {
break;
}
chunk += decoder.decode(value, { stream: true });
const lines = chunk.split('\n');
chunk = lines.pop() || '';
for (const line of lines) {
if (abortSignal?.aborted) {
break;
}
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') {
streamFinished = true;
continue;
}
try {
const parsed: ApiCompletionStreamChunk = JSON.parse(data);
const content = parsed.content;
const timings = parsed.timings;
const model = parsed.model;
const promptProgress = parsed.prompt_progress;
if (parsed.stop) {
streamFinished = true;
}
if (model && !modelEmitted) {
modelEmitted = true;
onModel?.(model);
}
if (promptProgress) {
ChatService.notifyTimings(undefined, promptProgress, onTimings);
}
if (timings) {
ChatService.notifyTimings(timings, promptProgress, onTimings);
lastTimings = timings;
}
if (content) {
aggregatedContent += content;
if (!abortSignal?.aborted) {
onChunk?.(content);
}
}
} catch (e) {
console.error('Error parsing JSON chunk:', e);
}
}
}
if (streamFinished) {
break;
}
}
if (abortSignal?.aborted) {
return;
}
if (streamFinished) {
onComplete?.(aggregatedContent, undefined, lastTimings, undefined);
}
} catch (error) {
const err = error instanceof Error ? error : new Error('Stream error');
onError?.(err);
throw err;
} finally {
reader.releaseLock();
}
}
/**
* Handles non-streaming response from the completion API
*/
private static async handleCompletionNonStreamResponse(
response: Response,
onComplete?: (
response: string,
reasoningContent?: string,
timings?: ChatMessageTimings,
toolCalls?: string
) => void,
onError?: (error: Error) => void,
onModel?: (model: string) => void
): Promise<string> {
try {
const responseText = await response.text();
if (!responseText.trim()) {
const noResponseError = new Error('No response received from server. Please try again.');
throw noResponseError;
}
const data: ApiCompletionResponse = JSON.parse(responseText);
if (data.model) {
onModel?.(data.model);
}
const content = data.content || '';
if (!content.trim()) {
const noResponseError = new Error('No response received from server. Please try again.');
throw noResponseError;
}
onComplete?.(content, undefined, data.timings, undefined);
return content;
} catch (error) {
const err = error instanceof Error ? error : new Error('Parse error');
onError?.(err);
throw err;
}
}
} }

View File

@ -0,0 +1,354 @@
import { getJsonHeaders } from '$lib/utils';
import { ChatService } from '$lib/services/chat';
import type {
ApiCompletionRequest,
ApiCompletionResponse,
ApiCompletionStreamChunk,
ApiErrorResponse
} from '$lib/types/api';
import type { ChatMessageTimings, ChatMessagePromptProgress } from '$lib/types/chat';
import type { SettingsChatServiceOptions } from '$lib/types/settings';
/**
* CompletionService - Low-level API communication layer for raw text completions.
* Used in the notebook page.
*/
export class CompletionService {
/**
* Sends a completion request to the llama.cpp server.
* Supports both streaming and non-streaming responses.
*
* @param prompt - The text prompt to complete
* @param options - Configuration options for the completion request
* @returns {Promise<string | void>} that resolves to the complete response string (non-streaming) or void (streaming)
* @throws {Error} if the request fails or is aborted
*/
static async sendCompletion(
prompt: string,
options: SettingsChatServiceOptions = {},
signal?: AbortSignal
): Promise<string | void> {
const {
stream,
onChunk,
onComplete,
onError,
onModel,
onTimings,
// Generation parameters
temperature,
max_tokens,
// Sampling parameters
dynatemp_range,
dynatemp_exponent,
top_k,
top_p,
min_p,
xtc_probability,
xtc_threshold,
typ_p,
// Penalty parameters
repeat_last_n,
repeat_penalty,
presence_penalty,
frequency_penalty,
dry_multiplier,
dry_base,
dry_allowed_length,
dry_penalty_last_n,
// Other parameters
samplers,
backend_sampling,
custom,
timings_per_token
} = options;
const requestBody: ApiCompletionRequest = {
prompt,
stream
};
// Include model in request if provided
if (options.model) {
requestBody.model = options.model;
}
if (temperature !== undefined) requestBody.temperature = temperature;
if (max_tokens !== undefined) {
requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
}
if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent;
if (top_k !== undefined) requestBody.top_k = top_k;
if (top_p !== undefined) requestBody.top_p = top_p;
if (min_p !== undefined) requestBody.min_p = min_p;
if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability;
if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold;
if (typ_p !== undefined) requestBody.typ_p = typ_p;
if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n;
if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty;
if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty;
if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty;
if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier;
if (dry_base !== undefined) requestBody.dry_base = dry_base;
if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length;
if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n;
if (samplers !== undefined) {
requestBody.samplers =
typeof samplers === 'string'
? samplers.split(';').filter((s: string) => s.trim())
: samplers;
}
if (backend_sampling !== undefined) requestBody.backend_sampling = backend_sampling;
if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token;
if (custom) {
try {
const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom;
Object.assign(requestBody, customParams);
} catch (error) {
console.warn('Failed to parse custom parameters:', error);
}
}
try {
const response = await fetch(`./completion`, {
method: 'POST',
headers: getJsonHeaders(),
body: JSON.stringify(requestBody),
signal
});
if (!response.ok) {
const error = await ChatService.parseErrorResponse(response);
if (onError) {
onError(error);
}
throw error;
}
if (stream) {
await CompletionService.handleCompletionStreamResponse(
response,
onChunk,
onComplete,
onError,
onModel,
onTimings,
signal
);
return;
} else {
return CompletionService.handleCompletionNonStreamResponse(
response,
onComplete,
onError,
onModel
);
}
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
console.log('Completion request was aborted');
return;
}
let userFriendlyError: Error;
if (error instanceof Error) {
if (error.name === 'TypeError' && error.message.includes('fetch')) {
userFriendlyError = new Error(
'Unable to connect to server - please check if the server is running'
);
userFriendlyError.name = 'NetworkError';
} else if (error.message.includes('ECONNREFUSED')) {
userFriendlyError = new Error('Connection refused - server may be offline');
userFriendlyError.name = 'NetworkError';
} else if (error.message.includes('ETIMEDOUT')) {
userFriendlyError = new Error('Request timed out - the server took too long to respond');
userFriendlyError.name = 'TimeoutError';
} else {
userFriendlyError = error;
}
} else {
userFriendlyError = new Error('Unknown error occurred while sending completion');
}
console.error('Error in sendCompletion:', error);
if (onError) {
onError(userFriendlyError);
}
throw userFriendlyError;
}
}
/**
* Handles streaming response from the completion API
*/
private static async handleCompletionStreamResponse(
response: Response,
onChunk?: (chunk: string) => void,
onComplete?: (
response: string,
reasoningContent?: string,
timings?: ChatMessageTimings,
toolCalls?: string
) => void,
onError?: (error: Error) => void,
onModel?: (model: string) => void,
onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
abortSignal?: AbortSignal
): Promise<void> {
const reader = response.body?.getReader();
if (!reader) {
throw new Error('No response body');
}
const decoder = new TextDecoder();
let aggregatedContent = '';
let lastTimings: ChatMessageTimings | undefined;
let streamFinished = false;
let modelEmitted = false;
try {
let chunk = '';
while (true) {
if (abortSignal?.aborted) {
break;
}
const { done, value } = await reader.read();
if (done) {
break;
}
if (abortSignal?.aborted) {
break;
}
chunk += decoder.decode(value, { stream: true });
const lines = chunk.split('\n');
chunk = lines.pop() || '';
for (const line of lines) {
if (abortSignal?.aborted) {
break;
}
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') {
streamFinished = true;
continue;
}
try {
const parsed: ApiCompletionStreamChunk = JSON.parse(data);
const content = parsed.content;
const timings = parsed.timings;
const model = parsed.model;
const promptProgress = parsed.prompt_progress;
if (parsed.stop) {
streamFinished = true;
}
if (model && !modelEmitted) {
modelEmitted = true;
onModel?.(model);
}
if (promptProgress) {
ChatService.notifyTimings(undefined, promptProgress, onTimings);
}
if (timings) {
ChatService.notifyTimings(timings, promptProgress, onTimings);
lastTimings = timings;
}
if (content) {
aggregatedContent += content;
if (!abortSignal?.aborted) {
onChunk?.(content);
}
}
} catch (e) {
console.error('Error parsing JSON chunk:', e);
}
}
}
if (streamFinished) {
break;
}
}
if (abortSignal?.aborted) {
return;
}
if (streamFinished) {
onComplete?.(aggregatedContent, undefined, lastTimings, undefined);
}
} catch (error) {
const err = error instanceof Error ? error : new Error('Stream error');
onError?.(err);
throw err;
} finally {
reader.releaseLock();
}
}
/**
* Handles non-streaming response from the completion API
*/
private static async handleCompletionNonStreamResponse(
response: Response,
onComplete?: (
response: string,
reasoningContent?: string,
timings?: ChatMessageTimings,
toolCalls?: string
) => void,
onError?: (error: Error) => void,
onModel?: (model: string) => void
): Promise<string> {
try {
const responseText = await response.text();
if (!responseText.trim()) {
const noResponseError = new Error('No response received from server. Please try again.');
throw noResponseError;
}
const data: ApiCompletionResponse = JSON.parse(responseText);
if (data.model) {
onModel?.(data.model);
}
const content = data.content || '';
if (!content.trim()) {
const noResponseError = new Error('No response received from server. Please try again.');
throw noResponseError;
}
onComplete?.(content, undefined, data.timings, undefined);
return content;
} catch (error) {
const err = error instanceof Error ? error : new Error('Parse error');
onError?.(err);
throw err;
}
}
}

View File

@ -1,4 +1,4 @@
import { ChatService } from '$lib/services/chat'; import { CompletionService } from '$lib/services/completion';
import { config } from '$lib/stores/settings.svelte'; import { config } from '$lib/stores/settings.svelte';
import { tokenize } from '$lib/services/tokenize'; import { tokenize } from '$lib/services/tokenize';
@ -45,7 +45,7 @@ export class NotebookStore {
try { try {
const currentConfig = config(); const currentConfig = config();
await ChatService.sendCompletion( await CompletionService.sendCompletion(
this.content, this.content,
{ {
...currentConfig, ...currentConfig,