diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index e3b06f4901..b7b36176d8 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte index aa0c27f6d3..d748dd868e 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte @@ -118,6 +118,17 @@ +
+ + + Notebook + +
+ {#if (filteredConversations.length > 0 && isSearchModeActive) || !isSearchModeActive} diff --git a/tools/server/webui/src/lib/components/app/index.ts b/tools/server/webui/src/lib/components/app/index.ts index 8631d4fb3b..1c90d690db 100644 --- a/tools/server/webui/src/lib/components/app/index.ts +++ b/tools/server/webui/src/lib/components/app/index.ts @@ -73,3 +73,6 @@ export { default as ModelsSelector } from './models/ModelsSelector.svelte'; export { default as ServerStatus } from './server/ServerStatus.svelte'; export { default as ServerErrorSplash } from './server/ServerErrorSplash.svelte'; export { default as ServerLoadingSplash } from './server/ServerLoadingSplash.svelte'; + +// Notebook +export { default as NotebookScreen } from './notebook/NotebookScreen.svelte'; diff --git a/tools/server/webui/src/lib/components/app/notebook/NotebookScreen.svelte b/tools/server/webui/src/lib/components/app/notebook/NotebookScreen.svelte new file mode 100644 index 0000000000..6871b1da2f --- /dev/null +++ b/tools/server/webui/src/lib/components/app/notebook/NotebookScreen.svelte @@ -0,0 +1,43 @@ + + +
+
+

Notebook

+
+ {#if notebookStore.isGenerating} + + {:else} + + {/if} +
+
+ +
+ +
+ +
+

+ Model: {config().model || 'Default'} | Temperature: {config().temperature ?? 0.8} | Max Tokens: {config() + .max_tokens ?? -1} +

+
+
diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts index 02fc6381c0..22960cffdd 100644 --- a/tools/server/webui/src/lib/services/chat.ts +++ b/tools/server/webui/src/lib/services/chat.ts @@ -247,6 +247,177 @@ export class ChatService { } } + /** + * Sends a completion request to the llama.cpp server. + * Supports both streaming and non-streaming responses. + * + * @param prompt - The text prompt to complete + * @param options - Configuration options for the completion request + * @returns {Promise} that resolves to the complete response string (non-streaming) or void (streaming) + * @throws {Error} if the request fails or is aborted + */ + static async sendCompletion( + prompt: string, + options: SettingsChatServiceOptions = {}, + signal?: AbortSignal + ): Promise { + const { + stream, + onChunk, + onComplete, + onError, + onModel, + onTimings, + // Generation parameters + temperature, + max_tokens, + // Sampling parameters + dynatemp_range, + dynatemp_exponent, + top_k, + top_p, + min_p, + xtc_probability, + xtc_threshold, + typ_p, + // Penalty parameters + repeat_last_n, + repeat_penalty, + presence_penalty, + frequency_penalty, + dry_multiplier, + dry_base, + dry_allowed_length, + dry_penalty_last_n, + // Other parameters + samplers, + backend_sampling, + custom, + timings_per_token + } = options; + + const requestBody: ApiCompletionRequest = { + prompt, + stream + }; + + // Include model in request if provided + if (options.model) { + requestBody.model = options.model; + } + + if (temperature !== undefined) requestBody.temperature = temperature; + if (max_tokens !== undefined) { + requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1; + } + + if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range; + if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent; + if (top_k !== undefined) requestBody.top_k = top_k; + if (top_p !== undefined) requestBody.top_p = top_p; + if (min_p !== undefined) requestBody.min_p = min_p; + if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability; + if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold; + if (typ_p !== undefined) requestBody.typ_p = typ_p; + + if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n; + if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty; + if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty; + if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty; + if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier; + if (dry_base !== undefined) requestBody.dry_base = dry_base; + if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length; + if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n; + + if (samplers !== undefined) { + requestBody.samplers = + typeof samplers === 'string' + ? samplers.split(';').filter((s: string) => s.trim()) + : samplers; + } + + if (backend_sampling !== undefined) requestBody.backend_sampling = backend_sampling; + if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token; + + if (custom) { + try { + const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom; + Object.assign(requestBody, customParams); + } catch (error) { + console.warn('Failed to parse custom parameters:', error); + } + } + + try { + const response = await fetch(`./completion`, { + method: 'POST', + headers: getJsonHeaders(), + body: JSON.stringify(requestBody), + signal + }); + + if (!response.ok) { + const error = await ChatService.parseErrorResponse(response); + if (onError) { + onError(error); + } + throw error; + } + + if (stream) { + await ChatService.handleCompletionStreamResponse( + response, + onChunk, + onComplete, + onError, + onModel, + onTimings, + signal + ); + return; + } else { + return ChatService.handleCompletionNonStreamResponse( + response, + onComplete, + onError, + onModel + ); + } + } catch (error) { + if (error instanceof Error && error.name === 'AbortError') { + console.log('Completion request was aborted'); + return; + } + + let userFriendlyError: Error; + + if (error instanceof Error) { + if (error.name === 'TypeError' && error.message.includes('fetch')) { + userFriendlyError = new Error( + 'Unable to connect to server - please check if the server is running' + ); + userFriendlyError.name = 'NetworkError'; + } else if (error.message.includes('ECONNREFUSED')) { + userFriendlyError = new Error('Connection refused - server may be offline'); + userFriendlyError.name = 'NetworkError'; + } else if (error.message.includes('ETIMEDOUT')) { + userFriendlyError = new Error('Request timed out - the server took too long to respond'); + userFriendlyError.name = 'TimeoutError'; + } else { + userFriendlyError = error; + } + } else { + userFriendlyError = new Error('Unknown error occurred while sending completion'); + } + + console.error('Error in sendCompletion:', error); + if (onError) { + onError(userFriendlyError); + } + throw userFriendlyError; + } + } + // ───────────────────────────────────────────────────────────────────────────── // Streaming // ───────────────────────────────────────────────────────────────────────────── @@ -781,4 +952,147 @@ export class ChatService { onTimingsCallback(timings, promptProgress); } + + /** + * Handles streaming response from the completion API + */ + private static async handleCompletionStreamResponse( + response: Response, + onChunk?: (chunk: string) => void, + onComplete?: ( + response: string, + reasoningContent?: string, + timings?: ChatMessageTimings, + toolCalls?: string + ) => void, + onError?: (error: Error) => void, + onModel?: (model: string) => void, + onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void, + abortSignal?: AbortSignal + ): Promise { + const reader = response.body?.getReader(); + + if (!reader) { + throw new Error('No response body'); + } + + const decoder = new TextDecoder(); + let aggregatedContent = ''; + let lastTimings: ChatMessageTimings | undefined; + let streamFinished = false; + let modelEmitted = false; + + try { + let chunk = ''; + while (true) { + if (abortSignal?.aborted) break; + + const { done, value } = await reader.read(); + if (done) break; + + if (abortSignal?.aborted) break; + + chunk += decoder.decode(value, { stream: true }); + const lines = chunk.split('\n'); + chunk = lines.pop() || ''; + + for (const line of lines) { + if (abortSignal?.aborted) break; + + if (line.startsWith('data: ')) { + const data = line.slice(6); + if (data === '[DONE]') { + streamFinished = true; + continue; + } + + try { + const parsed: ApiCompletionStreamChunk = JSON.parse(data); + const content = parsed.content; + const timings = parsed.timings; + const model = parsed.model; + + if (model && !modelEmitted) { + modelEmitted = true; + onModel?.(model); + } + + if (timings) { + ChatService.notifyTimings(timings, undefined, onTimings); + lastTimings = timings; + } + + if (content) { + aggregatedContent += content; + if (!abortSignal?.aborted) { + onChunk?.(content); + } + } + } catch (e) { + console.error('Error parsing JSON chunk:', e); + } + } + } + + if (abortSignal?.aborted) break; + } + + if (abortSignal?.aborted) return; + + if (streamFinished) { + onComplete?.(aggregatedContent, undefined, lastTimings, undefined); + } + } catch (error) { + const err = error instanceof Error ? error : new Error('Stream error'); + onError?.(err); + throw err; + } finally { + reader.releaseLock(); + } + } + + /** + * Handles non-streaming response from the completion API + */ + private static async handleCompletionNonStreamResponse( + response: Response, + onComplete?: ( + response: string, + reasoningContent?: string, + timings?: ChatMessageTimings, + toolCalls?: string + ) => void, + onError?: (error: Error) => void, + onModel?: (model: string) => void + ): Promise { + try { + const responseText = await response.text(); + + if (!responseText.trim()) { + const noResponseError = new Error('No response received from server. Please try again.'); + throw noResponseError; + } + + const data: ApiCompletionResponse = JSON.parse(responseText); + + if (data.model) { + onModel?.(data.model); + } + + const content = data.content || ''; + + if (!content.trim()) { + const noResponseError = new Error('No response received from server. Please try again.'); + throw noResponseError; + } + + onComplete?.(content, undefined, data.timings, undefined); + + return content; + } catch (error) { + const err = error instanceof Error ? error : new Error('Parse error'); + onError?.(err); + throw err; + } + } } diff --git a/tools/server/webui/src/lib/stores/notebook.svelte.ts b/tools/server/webui/src/lib/stores/notebook.svelte.ts new file mode 100644 index 0000000000..890e72c1fc --- /dev/null +++ b/tools/server/webui/src/lib/stores/notebook.svelte.ts @@ -0,0 +1,51 @@ +import { ChatService } from '$lib/services/chat'; +import { config } from '$lib/stores/settings.svelte'; + +export class NotebookStore { + content = $state(''); + isGenerating = $state(false); + abortController: AbortController | null = null; + + async generate(model?: string) { + if (this.isGenerating) return; + + this.isGenerating = true; + this.abortController = new AbortController(); + + try { + const currentConfig = config(); + await ChatService.sendCompletion( + this.content, + { + ...currentConfig, + model, + stream: true, + onChunk: (chunk) => { + this.content += chunk; + }, + onComplete: () => { + this.isGenerating = false; + }, + onError: (error) => { + console.error('Notebook generation error:', error); + this.isGenerating = false; + } + }, + this.abortController.signal + ); + } catch (error) { + console.error('Notebook generation failed:', error); + this.isGenerating = false; + } + } + + stop() { + if (this.abortController) { + this.abortController.abort(); + this.abortController = null; + } + this.isGenerating = false; + } +} + +export const notebookStore = new NotebookStore(); diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index 714509f024..7e036be156 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -23,12 +23,12 @@ export interface ApiContextSizeError { export interface ApiErrorResponse { error: - | ApiContextSizeError - | { - code: number; - message: string; - type?: string; - }; + | ApiContextSizeError + | { + code: number; + message: string; + type?: string; + }; } export interface ApiChatMessageData { @@ -219,6 +219,39 @@ export interface ApiChatCompletionRequest { timings_per_token?: boolean; } +export interface ApiCompletionRequest { + prompt: string; + stream?: boolean; + model?: string; + // Generation parameters + temperature?: number; + max_tokens?: number; + // Sampling parameters + dynatemp_range?: number; + dynatemp_exponent?: number; + top_k?: number; + top_p?: number; + min_p?: number; + xtc_probability?: number; + xtc_threshold?: number; + typ_p?: number; + // Penalty parameters + repeat_last_n?: number; + repeat_penalty?: number; + presence_penalty?: number; + frequency_penalty?: number; + dry_multiplier?: number; + dry_base?: number; + dry_allowed_length?: number; + dry_penalty_last_n?: number; + // Sampler configuration + samplers?: string[]; + backend_sampling?: boolean; + // Custom parameters (JSON string) + custom?: Record; + timings_per_token?: boolean; +} + export interface ApiChatCompletionToolCallFunctionDelta { name?: string; arguments?: string; @@ -258,6 +291,32 @@ export interface ApiChatCompletionStreamChunk { prompt_progress?: ChatMessagePromptProgress; } +export interface ApiCompletionStreamChunk { + content: string; + stop: boolean; + model: string; + timings?: { + prompt_n?: number; + prompt_ms?: number; + predicted_n?: number; + predicted_ms?: number; + cache_n?: number; + }; +} + +export interface ApiCompletionResponse { + content: string; + stop: boolean; + model: string; + timings?: { + prompt_n?: number; + prompt_ms?: number; + predicted_n?: number; + predicted_ms?: number; + cache_n?: number; + }; +} + export interface ApiChatCompletionResponse { model?: string; choices: Array<{ diff --git a/tools/server/webui/src/routes/notebook/+page.svelte b/tools/server/webui/src/routes/notebook/+page.svelte new file mode 100644 index 0000000000..d8417c8acc --- /dev/null +++ b/tools/server/webui/src/routes/notebook/+page.svelte @@ -0,0 +1,9 @@ + + + + Notebook - llama.cpp + + +