diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
index e3b06f4901..73ed15b55b 100644
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebarActions.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebarActions.svelte
index 30d1f9d4b7..6d4978ad85 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebarActions.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebarActions.svelte
@@ -1,5 +1,5 @@
+
+
+
+
+
+
+
+ Notebook
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Undo last generation
+
+
+
+
+
+
+
+
+
+ Redo last generation
+
+
+
+
+ {#snippet generateButton()}
+
+ {/snippet}
+
+
+
+ {@render generateButton()}
+
+
+
+ {#if generateTooltip}
+ {generateTooltip}
+ {:else}
+
+
+
+ {/if}
+
+
+
+
+
+
+ {#if showMessageStats}
+
+ {#if notebookStore.totalTokens > 0}
+
+
+
+
+ {notebookStore.totalTokens} tokens
+
+
+
+ Total tokens
+
+
+ {/if}
+
+ {#if notebookStore.promptTokens > 0 || notebookStore.predictedTokens > 0}
+
+ {/if}
+
+ {/if}
+
+
+
+
(settingsOpen = open)} />
+
+
+
diff --git a/tools/server/webui/src/lib/constants/settings-config.ts b/tools/server/webui/src/lib/constants/settings-config.ts
index cac48a557c..c150107933 100644
--- a/tools/server/webui/src/lib/constants/settings-config.ts
+++ b/tools/server/webui/src/lib/constants/settings-config.ts
@@ -95,8 +95,7 @@ export const SETTING_CONFIG_INFO: Record = {
disableReasoningFormat:
'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.',
keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
- showMessageStats:
- 'Display generation statistics (tokens/second, token count, duration) below each assistant message.',
+ showMessageStats: 'Display generation statistics (tokens/second, token count, duration).',
askForTitleConfirmation:
'Ask for confirmation before automatically changing conversation title when editing the first message.',
pdfAsImage:
diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts
index 02fc6381c0..c84310b19a 100644
--- a/tools/server/webui/src/lib/services/chat.ts
+++ b/tools/server/webui/src/lib/services/chat.ts
@@ -1,5 +1,14 @@
import { getJsonHeaders } from '$lib/utils';
import { AttachmentType } from '$lib/enums';
+import type {
+ ApiChatCompletionRequest,
+ ApiChatCompletionResponse,
+ ApiChatCompletionStreamChunk,
+ ApiChatCompletionToolCall,
+ ApiChatCompletionToolCallDelta,
+ ApiChatMessageContentPart,
+ ApiChatMessageData
+} from '$lib/types/api';
/**
* ChatService - Low-level API communication layer for Chat Completions
@@ -686,7 +695,7 @@ export class ChatService {
* @param response - HTTP response object
* @returns Promise - Parsed error with context info if available
*/
- private static async parseErrorResponse(
+ public static async parseErrorResponse(
response: Response
): Promise {
try {
@@ -729,7 +738,7 @@ export class ChatService {
* @returns Model name string if found, undefined otherwise
* @private
*/
- private static extractModelName(data: unknown): string | undefined {
+ public static extractModelName(data: unknown): string | undefined {
const asRecord = (value: unknown): Record | undefined => {
return typeof value === 'object' && value !== null
? (value as Record)
@@ -770,7 +779,7 @@ export class ChatService {
* @param onTimingsCallback - Callback function to invoke with timing data
* @private
*/
- private static notifyTimings(
+ public static notifyTimings(
timings: ChatMessageTimings | undefined,
promptProgress: ChatMessagePromptProgress | undefined,
onTimingsCallback:
diff --git a/tools/server/webui/src/lib/services/completion.ts b/tools/server/webui/src/lib/services/completion.ts
new file mode 100644
index 0000000000..016ca4838c
--- /dev/null
+++ b/tools/server/webui/src/lib/services/completion.ts
@@ -0,0 +1,347 @@
+import { getJsonHeaders } from '$lib/utils';
+import { ChatService } from '$lib/services/chat';
+
+import type {
+ ApiCompletionRequest,
+ ApiCompletionResponse,
+ ApiCompletionStreamChunk
+} from '$lib/types/api';
+import type { ChatMessageTimings, ChatMessagePromptProgress } from '$lib/types/chat';
+import type { SettingsChatServiceOptions } from '$lib/types/settings';
+
+/**
+ * CompletionService - Low-level API communication layer for raw text completions.
+ * Used in the notebook page.
+ */
+export class CompletionService {
+ /**
+ * Sends a completion request to the llama.cpp server.
+ * Supports both streaming and non-streaming responses.
+ *
+ * @param prompt - The text prompt to complete
+ * @param options - Configuration options for the completion request
+ * @returns {Promise} that resolves to the complete response string (non-streaming) or void (streaming)
+ * @throws {Error} if the request fails or is aborted
+ */
+ static async sendCompletion(
+ prompt: string,
+ options: SettingsChatServiceOptions = {},
+ signal?: AbortSignal
+ ): Promise {
+ const {
+ stream,
+ onChunk,
+ onComplete,
+ onError,
+ onModel,
+ onTimings,
+ // Generation parameters
+ temperature,
+ max_tokens,
+ // Sampling parameters
+ dynatemp_range,
+ dynatemp_exponent,
+ top_k,
+ top_p,
+ min_p,
+ xtc_probability,
+ xtc_threshold,
+ typ_p,
+ // Penalty parameters
+ repeat_last_n,
+ repeat_penalty,
+ presence_penalty,
+ frequency_penalty,
+ dry_multiplier,
+ dry_base,
+ dry_allowed_length,
+ dry_penalty_last_n,
+ // Other parameters
+ samplers,
+ backend_sampling,
+ custom,
+ timings_per_token
+ } = options;
+
+ const requestBody: ApiCompletionRequest = {
+ prompt,
+ stream
+ };
+
+ // Include model in request if provided
+ if (options.model) {
+ requestBody.model = options.model;
+ }
+
+ if (temperature !== undefined) requestBody.temperature = temperature;
+ if (max_tokens !== undefined) {
+ requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
+ }
+
+ if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
+ if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent;
+ if (top_k !== undefined) requestBody.top_k = top_k;
+ if (top_p !== undefined) requestBody.top_p = top_p;
+ if (min_p !== undefined) requestBody.min_p = min_p;
+ if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability;
+ if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold;
+ if (typ_p !== undefined) requestBody.typ_p = typ_p;
+
+ if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n;
+ if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty;
+ if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty;
+ if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty;
+ if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier;
+ if (dry_base !== undefined) requestBody.dry_base = dry_base;
+ if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length;
+ if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n;
+
+ if (samplers !== undefined) {
+ requestBody.samplers =
+ typeof samplers === 'string'
+ ? samplers.split(';').filter((s: string) => s.trim())
+ : samplers;
+ }
+
+ if (backend_sampling !== undefined) requestBody.backend_sampling = backend_sampling;
+ if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token;
+
+ if (custom) {
+ try {
+ const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom;
+ Object.assign(requestBody, customParams);
+ } catch (error) {
+ console.warn('Failed to parse custom parameters:', error);
+ }
+ }
+
+ try {
+ const response = await fetch(`./completion`, {
+ method: 'POST',
+ headers: getJsonHeaders(),
+ body: JSON.stringify(requestBody),
+ signal
+ });
+
+ if (!response.ok) {
+ const error = await ChatService.parseErrorResponse(response);
+ if (onError) {
+ onError(error);
+ }
+ throw error;
+ }
+
+ if (stream) {
+ await CompletionService.handleCompletionStreamResponse(
+ response,
+ onChunk,
+ onComplete,
+ onError,
+ onModel,
+ onTimings,
+ signal
+ );
+ return;
+ } else {
+ return CompletionService.handleCompletionNonStreamResponse(
+ response,
+ onComplete,
+ onError,
+ onModel
+ );
+ }
+ } catch (error) {
+ if (error instanceof Error && error.name === 'AbortError') {
+ console.log('Completion request was aborted');
+ return;
+ }
+
+ let userFriendlyError: Error;
+
+ if (error instanceof Error) {
+ if (error.name === 'TypeError' && error.message.includes('fetch')) {
+ userFriendlyError = new Error(
+ 'Unable to connect to server - please check if the server is running'
+ );
+ userFriendlyError.name = 'NetworkError';
+ } else if (error.message.includes('ECONNREFUSED')) {
+ userFriendlyError = new Error('Connection refused - server may be offline');
+ userFriendlyError.name = 'NetworkError';
+ } else if (error.message.includes('ETIMEDOUT')) {
+ userFriendlyError = new Error('Request timed out - the server took too long to respond');
+ userFriendlyError.name = 'TimeoutError';
+ } else {
+ userFriendlyError = error;
+ }
+ } else {
+ userFriendlyError = new Error('Unknown error occurred while sending completion');
+ }
+
+ console.error('Error in sendCompletion:', error);
+ if (onError) {
+ onError(userFriendlyError);
+ }
+ throw userFriendlyError;
+ }
+ }
+
+ /**
+ * Handles streaming response from the completion API
+ */
+ private static async handleCompletionStreamResponse(
+ response: Response,
+ onChunk?: (chunk: string) => void,
+ onComplete?: (
+ response: string,
+ reasoningContent?: string,
+ timings?: ChatMessageTimings,
+ toolCalls?: string
+ ) => void,
+ onError?: (error: Error) => void,
+ onModel?: (model: string) => void,
+ onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
+ abortSignal?: AbortSignal
+ ): Promise {
+ const reader = response.body?.getReader();
+
+ if (!reader) {
+ throw new Error('No response body');
+ }
+
+ const decoder = new TextDecoder();
+ let aggregatedContent = '';
+ let lastTimings: ChatMessageTimings | undefined;
+ let streamFinished = false;
+ let modelEmitted = false;
+
+ try {
+ let chunk = '';
+ while (true) {
+ if (abortSignal?.aborted) {
+ break;
+ }
+
+ const { done, value } = await reader.read();
+ if (done) {
+ break;
+ }
+
+ if (abortSignal?.aborted) {
+ break;
+ }
+
+ chunk += decoder.decode(value, { stream: true });
+ const lines = chunk.split('\n');
+ chunk = lines.pop() || '';
+
+ for (const line of lines) {
+ if (abortSignal?.aborted) {
+ break;
+ }
+
+ if (line.startsWith('data: ')) {
+ const data = line.slice(6);
+
+ try {
+ const parsed: ApiCompletionStreamChunk = JSON.parse(data);
+ const content = parsed.content;
+ const timings = parsed.timings;
+ const model = parsed.model;
+ const promptProgress = parsed.prompt_progress;
+
+ if (parsed.stop) {
+ streamFinished = true;
+ }
+
+ if (model && !modelEmitted) {
+ modelEmitted = true;
+ onModel?.(model);
+ }
+
+ if (promptProgress) {
+ ChatService.notifyTimings(undefined, promptProgress, onTimings);
+ }
+
+ if (timings) {
+ ChatService.notifyTimings(timings, promptProgress, onTimings);
+ lastTimings = timings;
+ }
+
+ if (content) {
+ aggregatedContent += content;
+ if (!abortSignal?.aborted) {
+ onChunk?.(content);
+ }
+ }
+ } catch (e) {
+ console.error('Error parsing JSON chunk:', e);
+ }
+ }
+ }
+
+ if (streamFinished) {
+ break;
+ }
+ }
+
+ if (abortSignal?.aborted) {
+ return;
+ }
+
+ if (streamFinished) {
+ onComplete?.(aggregatedContent, undefined, lastTimings, undefined);
+ }
+ } catch (error) {
+ const err = error instanceof Error ? error : new Error('Stream error');
+ onError?.(err);
+ throw err;
+ } finally {
+ reader.releaseLock();
+ }
+ }
+
+ /**
+ * Handles non-streaming response from the completion API
+ */
+ private static async handleCompletionNonStreamResponse(
+ response: Response,
+ onComplete?: (
+ response: string,
+ reasoningContent?: string,
+ timings?: ChatMessageTimings,
+ toolCalls?: string
+ ) => void,
+ onError?: (error: Error) => void,
+ onModel?: (model: string) => void
+ ): Promise {
+ try {
+ const responseText = await response.text();
+
+ if (!responseText.trim()) {
+ const noResponseError = new Error('No response received from server. Please try again.');
+ throw noResponseError;
+ }
+
+ const data: ApiCompletionResponse = JSON.parse(responseText);
+
+ if (data.model) {
+ onModel?.(data.model);
+ }
+
+ const content = data.content || '';
+
+ if (!content.trim()) {
+ const noResponseError = new Error('No response received from server. Please try again.');
+ throw noResponseError;
+ }
+
+ onComplete?.(content, undefined, data.timings, undefined);
+
+ return content;
+ } catch (error) {
+ const err = error instanceof Error ? error : new Error('Parse error');
+ onError?.(err);
+ throw err;
+ }
+ }
+}
diff --git a/tools/server/webui/src/lib/services/tokenize.ts b/tools/server/webui/src/lib/services/tokenize.ts
new file mode 100644
index 0000000000..5de49fba9e
--- /dev/null
+++ b/tools/server/webui/src/lib/services/tokenize.ts
@@ -0,0 +1,39 @@
+import { getJsonHeaders } from '$lib/utils';
+
+/**
+ * Tokenizes the provided text using the server's tokenizer.
+ *
+ * @param content - The text content to tokenize
+ * @param model - Optional model name to use for tokenization (required in router mode)
+ * @param signal - Optional AbortSignal
+ * @returns {Promise} Promise that resolves to an array of token IDs
+ */
+export async function tokenize(
+ content: string,
+ model?: string,
+ signal?: AbortSignal
+): Promise {
+ try {
+ const body: { content: string; model?: string } = { content };
+ if (model) {
+ body.model = model;
+ }
+
+ const response = await fetch('./tokenize', {
+ method: 'POST',
+ headers: getJsonHeaders(),
+ body: JSON.stringify(body),
+ signal
+ });
+
+ if (!response.ok) {
+ throw new Error(`Tokenize failed: ${response.statusText}`);
+ }
+
+ const data = await response.json();
+ return data.tokens;
+ } catch (error) {
+ console.error('Tokenize error:', error);
+ return [];
+ }
+}
diff --git a/tools/server/webui/src/lib/stores/notebook.svelte.ts b/tools/server/webui/src/lib/stores/notebook.svelte.ts
new file mode 100644
index 0000000000..794192ea03
--- /dev/null
+++ b/tools/server/webui/src/lib/stores/notebook.svelte.ts
@@ -0,0 +1,161 @@
+import { CompletionService } from '$lib/services/completion';
+import { config } from '$lib/stores/settings.svelte';
+import { tokenize } from '$lib/services/tokenize';
+
+export class NotebookStore {
+ content = $state('');
+ isGenerating = $state(false);
+ abortController: AbortController | null = null;
+
+ // Statistics
+ cacheTokens = $state(0);
+ promptTokens = $state(0);
+ promptMs = $state(0);
+ predictedTokens = $state(0);
+ predictedMs = $state(0);
+ totalTokens = $state(0);
+ generationStartTokens = $state(0);
+ generationEndTokens = $state(0);
+ tokenizeTimeout: ReturnType | undefined;
+
+ error = $state<{
+ message: string;
+ type: 'timeout' | 'server';
+ contextInfo?: { n_prompt_tokens: number; n_ctx: number };
+ } | null>(null);
+
+ previousContent = $state(null);
+ undoneContent = $state(null);
+
+ async generate(model?: string) {
+ if (this.isGenerating) return;
+
+ this.previousContent = this.content;
+ this.undoneContent = null;
+ this.isGenerating = true;
+ this.abortController = new AbortController();
+ this.error = null;
+
+ // Reset stats
+ this.cacheTokens = 0;
+ this.promptTokens = 0;
+ this.promptMs = 0;
+ this.predictedTokens = 0;
+ this.predictedMs = 0;
+
+ // Save number of tokens before generation
+ this.generationStartTokens = this.totalTokens;
+
+ try {
+ const currentConfig = config();
+ await CompletionService.sendCompletion(
+ this.content,
+ {
+ ...currentConfig,
+ model,
+ stream: true,
+ timings_per_token: true,
+ onChunk: (chunk: string) => {
+ this.content += chunk;
+ },
+ onTimings: (timings: ChatMessageTimings, promptProgress: ChatMessagePromptProgress) => {
+ if (timings) {
+ if (timings.cache_n) this.cacheTokens = timings.cache_n;
+ if (timings.prompt_n) this.promptTokens = timings.prompt_n;
+ if (timings.prompt_ms) this.promptMs = timings.prompt_ms;
+ if (timings.predicted_n) this.predictedTokens = timings.predicted_n;
+ if (timings.predicted_ms) this.predictedMs = timings.predicted_ms;
+ }
+
+ if (promptProgress) {
+ // Update prompt stats from progress
+ const { processed, time_ms } = promptProgress;
+ if (processed > 0) this.promptTokens = processed;
+ if (time_ms > 0) this.promptMs = time_ms;
+ }
+
+ // Update totalTokens live
+ this.totalTokens = this.cacheTokens + this.promptTokens + this.predictedTokens;
+ },
+ onComplete: () => {
+ this.isGenerating = false;
+ },
+ onError: (error: unknown) => {
+ if (error instanceof Error && error.name === 'AbortError') {
+ // aborted by user
+ } else {
+ console.error('Notebook generation error:', error);
+ this.error = {
+ message: error instanceof Error ? error.message : String(error),
+ type: 'server'
+ };
+ }
+ this.isGenerating = false;
+ }
+ },
+ this.abortController.signal
+ );
+ } catch (error) {
+ console.error('Notebook generation failed:', error);
+ this.error = {
+ message: error instanceof Error ? error.message : String(error),
+ type: 'server'
+ };
+ this.isGenerating = false;
+ }
+ // Save number of tokens after generation
+ this.generationEndTokens = this.totalTokens;
+ }
+
+ dismissError() {
+ this.error = null;
+ }
+
+ undo() {
+ if (this.previousContent !== null) {
+ this.undoneContent = this.content;
+ this.content = this.previousContent;
+ this.previousContent = null;
+ this.totalTokens = this.generationStartTokens;
+ }
+ }
+
+ redo() {
+ if (this.undoneContent !== null) {
+ this.previousContent = this.content;
+ this.content = this.undoneContent;
+ this.undoneContent = null;
+ this.totalTokens = this.generationEndTokens;
+ }
+ }
+
+ resetUndoRedo() {
+ this.previousContent = null;
+ this.undoneContent = null;
+ }
+
+ stop() {
+ if (this.abortController) {
+ this.abortController.abort();
+ this.abortController = null;
+ }
+ this.isGenerating = false;
+ }
+
+ updateTokenCount(model?: string) {
+ if (this.tokenizeTimeout) {
+ clearTimeout(this.tokenizeTimeout);
+ }
+
+ this.tokenizeTimeout = setTimeout(async () => {
+ if (this.content.length === 0) {
+ this.totalTokens = 0;
+ return;
+ }
+ const tokens = await tokenize(this.content, model);
+ this.totalTokens = tokens.length;
+ }, 500);
+ }
+}
+
+export const notebookStore = new NotebookStore();
diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts
index 714509f024..33e4a92c86 100644
--- a/tools/server/webui/src/lib/types/api.d.ts
+++ b/tools/server/webui/src/lib/types/api.d.ts
@@ -219,6 +219,39 @@ export interface ApiChatCompletionRequest {
timings_per_token?: boolean;
}
+export interface ApiCompletionRequest {
+ prompt: string;
+ stream?: boolean;
+ model?: string;
+ // Generation parameters
+ temperature?: number;
+ max_tokens?: number;
+ // Sampling parameters
+ dynatemp_range?: number;
+ dynatemp_exponent?: number;
+ top_k?: number;
+ top_p?: number;
+ min_p?: number;
+ xtc_probability?: number;
+ xtc_threshold?: number;
+ typ_p?: number;
+ // Penalty parameters
+ repeat_last_n?: number;
+ repeat_penalty?: number;
+ presence_penalty?: number;
+ frequency_penalty?: number;
+ dry_multiplier?: number;
+ dry_base?: number;
+ dry_allowed_length?: number;
+ dry_penalty_last_n?: number;
+ // Sampler configuration
+ samplers?: string[];
+ backend_sampling?: boolean;
+ // Custom parameters (JSON string)
+ custom?: Record;
+ timings_per_token?: boolean;
+}
+
export interface ApiChatCompletionToolCallFunctionDelta {
name?: string;
arguments?: string;
@@ -258,6 +291,33 @@ export interface ApiChatCompletionStreamChunk {
prompt_progress?: ChatMessagePromptProgress;
}
+export interface ApiCompletionStreamChunk {
+ content: string;
+ stop: boolean;
+ model: string;
+ timings?: {
+ prompt_n?: number;
+ prompt_ms?: number;
+ predicted_n?: number;
+ predicted_ms?: number;
+ cache_n?: number;
+ };
+ prompt_progress?: ChatMessagePromptProgress;
+}
+
+export interface ApiCompletionResponse {
+ content: string;
+ stop: boolean;
+ model: string;
+ timings?: {
+ prompt_n?: number;
+ prompt_ms?: number;
+ predicted_n?: number;
+ predicted_ms?: number;
+ cache_n?: number;
+ };
+}
+
export interface ApiChatCompletionResponse {
model?: string;
choices: Array<{
diff --git a/tools/server/webui/src/routes/+layout.svelte b/tools/server/webui/src/routes/+layout.svelte
index 095827b9ca..691d92516a 100644
--- a/tools/server/webui/src/routes/+layout.svelte
+++ b/tools/server/webui/src/routes/+layout.svelte
@@ -21,6 +21,7 @@
let isChatRoute = $derived(page.route.id === '/chat/[id]');
let isHomeRoute = $derived(page.route.id === '/');
+ let isNotebookRoute = $derived(page.route.id === '/notebook');
let isNewChatMode = $derived(page.url.searchParams.get('new_chat') === 'true');
let showSidebarByDefault = $derived(activeMessages().length > 0 || isLoading());
let alwaysShowSidebarOnDesktop = $derived(config().alwaysShowSidebarOnDesktop);
@@ -93,7 +94,7 @@
} else if (isHomeRoute && isNewChatMode) {
// Keep sidebar open in new chat mode
sidebarOpen = true;
- } else if (isChatRoute) {
+ } else if (isChatRoute || isNotebookRoute) {
// On chat routes, only auto-show sidebar if setting is enabled
if (autoShowSidebarOnNewChat) {
sidebarOpen = true;
diff --git a/tools/server/webui/src/routes/notebook/+page.svelte b/tools/server/webui/src/routes/notebook/+page.svelte
new file mode 100644
index 0000000000..d8417c8acc
--- /dev/null
+++ b/tools/server/webui/src/routes/notebook/+page.svelte
@@ -0,0 +1,9 @@
+
+
+
+ Notebook - llama.cpp
+
+
+