Merge a0c5c26fb9 into 3688c4f504
This commit is contained in:
commit
8d731cad10
Binary file not shown.
|
|
@ -1,5 +1,5 @@
|
|||
<script lang="ts">
|
||||
import { Search, SquarePen, X } from '@lucide/svelte';
|
||||
import { NotepadText, Search, SquarePen, X } from '@lucide/svelte';
|
||||
import { KeyboardShortcutInfo } from '$lib/components/app';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { Input } from '$lib/components/ui/input';
|
||||
|
|
@ -63,6 +63,18 @@
|
|||
<KeyboardShortcutInfo keys={['shift', 'cmd', 'o']} />
|
||||
</Button>
|
||||
|
||||
<Button
|
||||
class="w-full justify-between hover:[&>kbd]:opacity-100"
|
||||
href="/#/notebook"
|
||||
onclick={handleMobileSidebarItemClick}
|
||||
variant="ghost"
|
||||
>
|
||||
<div class="flex items-center gap-2">
|
||||
<NotepadText class="h-4 w-4" />
|
||||
Notebook
|
||||
</div>
|
||||
</Button>
|
||||
|
||||
<Button
|
||||
class="w-full justify-between hover:[&>kbd]:opacity-100"
|
||||
onclick={() => {
|
||||
|
|
|
|||
|
|
@ -73,3 +73,6 @@ export { default as ModelsSelector } from './models/ModelsSelector.svelte';
|
|||
export { default as ServerStatus } from './server/ServerStatus.svelte';
|
||||
export { default as ServerErrorSplash } from './server/ServerErrorSplash.svelte';
|
||||
export { default as ServerLoadingSplash } from './server/ServerLoadingSplash.svelte';
|
||||
|
||||
// Notebook
|
||||
export { default as NotebookScreen } from './notebook/NotebookScreen.svelte';
|
||||
|
|
|
|||
|
|
@ -17,9 +17,11 @@
|
|||
<kbd class="{baseClasses} {variantClasses} {className}">
|
||||
{#each keys as key, index (index)}
|
||||
{#if key === 'shift'}
|
||||
<ArrowBigUp class="h-1 w-1 {variant === 'destructive' ? 'text-destructive' : ''} -mr-1" />
|
||||
<ArrowBigUp class="size-4 {variant === 'destructive' ? 'text-destructive' : ''} -mr-1" />
|
||||
{:else if key === 'cmd'}
|
||||
<span class={variant === 'destructive' ? 'text-destructive' : ''}>⌘</span>
|
||||
{:else if key === 'ctrl'}
|
||||
<span class={variant === 'destructive' ? 'text-destructive' : ''}>Ctrl</span>
|
||||
{:else}
|
||||
{key.toUpperCase()}
|
||||
{/if}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,346 @@
|
|||
<script lang="ts">
|
||||
import { notebookStore } from '$lib/stores/notebook.svelte';
|
||||
import Button from '$lib/components/ui/button/button.svelte';
|
||||
import Textarea from '$lib/components/ui/textarea/textarea.svelte';
|
||||
import { Play, Square, Settings, Undo, Redo, RulerDimensionLine } from '@lucide/svelte';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import DialogChatSettings from '$lib/components/app/dialogs/DialogChatSettings.svelte';
|
||||
import {
|
||||
ChatMessageStatistics,
|
||||
DialogChatError,
|
||||
KeyboardShortcutInfo,
|
||||
ModelsSelector
|
||||
} from '$lib/components/app';
|
||||
|
||||
import { modelOptions, selectedModelId } from '$lib/stores/models.svelte';
|
||||
import { isRouterMode } from '$lib/stores/server.svelte';
|
||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
|
||||
let settingsOpen = $state(false);
|
||||
|
||||
import {
|
||||
AUTO_SCROLL_AT_BOTTOM_THRESHOLD,
|
||||
AUTO_SCROLL_INTERVAL,
|
||||
INITIAL_SCROLL_DELAY
|
||||
} from '$lib/constants/auto-scroll';
|
||||
import { onMount } from 'svelte';
|
||||
|
||||
let disableAutoScroll = $derived(Boolean(config().disableAutoScroll));
|
||||
let showMessageStats = $derived(config().showMessageStats);
|
||||
let autoScrollEnabled = $state(true);
|
||||
let scrollContainer: HTMLTextAreaElement | null = $state(null);
|
||||
let lastScrollTop = $state(0);
|
||||
let scrollInterval: ReturnType<typeof setInterval> | undefined;
|
||||
let scrollTimeout: ReturnType<typeof setTimeout> | undefined;
|
||||
let userScrolledUp = $state(false);
|
||||
|
||||
let isRouter = $derived(isRouterMode());
|
||||
|
||||
let errorDialog = $derived(notebookStore.error);
|
||||
let canUndo = $derived(notebookStore.previousContent !== null && !notebookStore.isGenerating);
|
||||
let canRedo = $derived(notebookStore.undoneContent !== null && !notebookStore.isGenerating);
|
||||
|
||||
function handleInput(e: Event) {
|
||||
const target = e.target as HTMLTextAreaElement;
|
||||
notebookStore.content = target.value;
|
||||
notebookStore.resetUndoRedo();
|
||||
if (activeModelId || !isRouter) {
|
||||
notebookStore.updateTokenCount(activeModelId);
|
||||
}
|
||||
}
|
||||
|
||||
function handleErrorDialogOpenChange(open: boolean) {
|
||||
if (!open) {
|
||||
notebookStore.dismissError();
|
||||
}
|
||||
}
|
||||
|
||||
async function handleGenerate() {
|
||||
if (!disableAutoScroll) {
|
||||
userScrolledUp = false;
|
||||
autoScrollEnabled = true;
|
||||
scrollToBottom();
|
||||
}
|
||||
|
||||
if (activeModelId) {
|
||||
await notebookStore.generate(activeModelId);
|
||||
}
|
||||
}
|
||||
|
||||
function handleUndo() {
|
||||
notebookStore.undo();
|
||||
}
|
||||
|
||||
function handleRedo() {
|
||||
notebookStore.redo();
|
||||
}
|
||||
|
||||
function handleStop() {
|
||||
notebookStore.stop();
|
||||
}
|
||||
|
||||
let activeModelId = $derived.by(() => {
|
||||
const options = modelOptions();
|
||||
|
||||
if (!isRouter) {
|
||||
return options.length > 0 ? options[0].model : null;
|
||||
}
|
||||
|
||||
const selectedId = selectedModelId();
|
||||
if (selectedId) {
|
||||
const model = options.find((m) => m.id === selectedId);
|
||||
if (model) return model.model;
|
||||
}
|
||||
|
||||
return null;
|
||||
});
|
||||
|
||||
let hasModelSelected = $derived(!isRouter || !!selectedModelId());
|
||||
|
||||
let generateTooltip = $derived.by(() => {
|
||||
if (!hasModelSelected) {
|
||||
return 'Please select a model first';
|
||||
}
|
||||
|
||||
if (notebookStore.content.length == 0) {
|
||||
return 'Input some text first';
|
||||
}
|
||||
|
||||
return '';
|
||||
});
|
||||
|
||||
let canGenerate = $derived(notebookStore.content.length > 0 && hasModelSelected);
|
||||
let isDisabled = $derived(!canGenerate);
|
||||
|
||||
function handleScroll() {
|
||||
if (disableAutoScroll || !scrollContainer) return;
|
||||
|
||||
const { scrollTop, scrollHeight, clientHeight } = scrollContainer;
|
||||
const distanceFromBottom = scrollHeight - scrollTop - clientHeight;
|
||||
const isAtBottom = distanceFromBottom < AUTO_SCROLL_AT_BOTTOM_THRESHOLD;
|
||||
|
||||
if (scrollTop < lastScrollTop && !isAtBottom) {
|
||||
userScrolledUp = true;
|
||||
autoScrollEnabled = false;
|
||||
} else if (isAtBottom && userScrolledUp) {
|
||||
userScrolledUp = false;
|
||||
autoScrollEnabled = true;
|
||||
}
|
||||
|
||||
if (scrollTimeout) {
|
||||
clearTimeout(scrollTimeout);
|
||||
}
|
||||
|
||||
scrollTimeout = setTimeout(() => {
|
||||
if (isAtBottom) {
|
||||
userScrolledUp = false;
|
||||
autoScrollEnabled = true;
|
||||
}
|
||||
}, AUTO_SCROLL_INTERVAL);
|
||||
|
||||
lastScrollTop = scrollTop;
|
||||
}
|
||||
|
||||
function scrollToBottom(behavior: ScrollBehavior = 'smooth') {
|
||||
if (disableAutoScroll) return;
|
||||
|
||||
scrollContainer?.scrollTo({
|
||||
top: scrollContainer?.scrollHeight,
|
||||
behavior
|
||||
});
|
||||
}
|
||||
|
||||
onMount(() => {
|
||||
if (!disableAutoScroll) {
|
||||
setTimeout(() => scrollToBottom('instant'), INITIAL_SCROLL_DELAY);
|
||||
}
|
||||
});
|
||||
|
||||
$effect(() => {
|
||||
if (disableAutoScroll) {
|
||||
autoScrollEnabled = false;
|
||||
if (scrollInterval) {
|
||||
clearInterval(scrollInterval);
|
||||
scrollInterval = undefined;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (notebookStore.isGenerating && autoScrollEnabled) {
|
||||
scrollInterval = setInterval(() => scrollToBottom(), AUTO_SCROLL_INTERVAL);
|
||||
} else if (scrollInterval) {
|
||||
clearInterval(scrollInterval);
|
||||
scrollInterval = undefined;
|
||||
}
|
||||
});
|
||||
|
||||
function handleBeforeUnload(event: BeforeUnloadEvent) {
|
||||
// This should prevent the browser from closing the tab if there is content in the notebook
|
||||
if (notebookStore.content.length > 0) {
|
||||
event.preventDefault();
|
||||
event.returnValue = '';
|
||||
}
|
||||
}
|
||||
|
||||
function handleKeydown(event: KeyboardEvent) {
|
||||
const isCtrlOrCmd = event.ctrlKey || event.metaKey;
|
||||
|
||||
if (event.shiftKey && event.key === 'Enter') {
|
||||
event.preventDefault();
|
||||
if (notebookStore.isGenerating) {
|
||||
handleStop();
|
||||
} else if (canGenerate) {
|
||||
handleGenerate();
|
||||
}
|
||||
}
|
||||
|
||||
if (isCtrlOrCmd && event.key === 'z') {
|
||||
event.preventDefault();
|
||||
if (canUndo) {
|
||||
handleUndo();
|
||||
}
|
||||
}
|
||||
|
||||
if (isCtrlOrCmd && event.key === 'y') {
|
||||
event.preventDefault();
|
||||
if (canRedo) {
|
||||
handleRedo();
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<svelte:window onbeforeunload={handleBeforeUnload} onkeydown={handleKeydown} />
|
||||
|
||||
<div class="flex h-full flex-col">
|
||||
<header
|
||||
class="flex items-center justify-between border-b border-border/40 bg-background/95 px-6 py-3 backdrop-blur supports-[backdrop-filter]:bg-background/60"
|
||||
>
|
||||
<div class="w-10"></div>
|
||||
<!-- Spacer for centering -->
|
||||
<h1 class="text-lg font-semibold">Notebook</h1>
|
||||
<Button variant="ghost" size="icon" onclick={() => (settingsOpen = true)}>
|
||||
<Settings class="h-5 w-5" />
|
||||
</Button>
|
||||
</header>
|
||||
|
||||
<div class="flex-1 overflow-y-auto px-2 pt-2 pb-0 md:px-4 md:pt-4">
|
||||
<Textarea
|
||||
bind:ref={scrollContainer}
|
||||
onscroll={handleScroll}
|
||||
value={notebookStore.content}
|
||||
oninput={handleInput}
|
||||
class="h-full min-h-[100px] w-full resize-none rounded-xl border-none bg-muted p-4 text-base focus-visible:ring-0 md:p-6"
|
||||
placeholder="Enter your text here..."
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div class="bg-background p-2 md:p-4">
|
||||
<div class="flex flex-col-reverse gap-4 md:flex-row md:items-center md:justify-between">
|
||||
<div class="flex items-center gap-2">
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
<Button variant="ghost" size="icon" disabled={!canUndo} onclick={handleUndo}>
|
||||
<Undo class="h-4 w-4" />
|
||||
</Button>
|
||||
</Tooltip.Trigger>
|
||||
<Tooltip.Content>
|
||||
<p>Undo last generation</p>
|
||||
<KeyboardShortcutInfo keys={['ctrl', 'z']} class="w-full justify-center opacity-100" />
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
<Button variant="ghost" size="icon" disabled={!canRedo} onclick={handleRedo}>
|
||||
<Redo class="h-4 w-4" />
|
||||
</Button>
|
||||
</Tooltip.Trigger>
|
||||
<Tooltip.Content>
|
||||
<p>Redo last generation</p>
|
||||
<KeyboardShortcutInfo keys={['ctrl', 'y']} class="w-full justify-center opacity-100" />
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
|
||||
{#snippet generateButton()}
|
||||
<Button
|
||||
disabled={isDisabled}
|
||||
onclick={notebookStore.isGenerating ? handleStop : handleGenerate}
|
||||
size="sm"
|
||||
variant={notebookStore.isGenerating ? 'destructive' : 'default'}
|
||||
class="min-w-[120px] gap-2"
|
||||
>
|
||||
{#if notebookStore.isGenerating}
|
||||
<Square class="h-4 w-4 fill-current" />
|
||||
Stop
|
||||
{:else}
|
||||
<Play class="h-4 w-4 fill-current" />
|
||||
Generate
|
||||
{/if}
|
||||
</Button>
|
||||
{/snippet}
|
||||
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
{@render generateButton()}
|
||||
</Tooltip.Trigger>
|
||||
|
||||
<Tooltip.Content>
|
||||
{#if generateTooltip}
|
||||
<p>{generateTooltip}</p>
|
||||
{:else}
|
||||
<div class="flex items-center justify-center py-1">
|
||||
<KeyboardShortcutInfo keys={['shift', 'enter']} class="opacity-100" />
|
||||
</div>
|
||||
{/if}
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
|
||||
<ModelsSelector
|
||||
forceForegroundText={true}
|
||||
useGlobalSelection={true}
|
||||
disabled={notebookStore.isGenerating}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{#if showMessageStats}
|
||||
<div class="flex min-h-[42px] w-full flex-col items-end justify-center gap-0.5 md:w-auto">
|
||||
{#if notebookStore.totalTokens > 0}
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
<div class="flex items-center gap-1.5 pr-3.5 text-xs text-muted-foreground">
|
||||
<RulerDimensionLine class="h-3.5 w-3.5" />
|
||||
<span>{notebookStore.totalTokens} tokens</span>
|
||||
</div>
|
||||
</Tooltip.Trigger>
|
||||
<Tooltip.Content>
|
||||
<p>Total tokens</p>
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
{/if}
|
||||
|
||||
{#if notebookStore.promptTokens > 0 || notebookStore.predictedTokens > 0}
|
||||
<ChatMessageStatistics
|
||||
promptTokens={notebookStore.promptTokens}
|
||||
promptMs={notebookStore.promptMs}
|
||||
predictedTokens={notebookStore.predictedTokens}
|
||||
predictedMs={notebookStore.predictedMs}
|
||||
isLive={notebookStore.isGenerating}
|
||||
isProcessingPrompt={notebookStore.isGenerating && notebookStore.predictedTokens === 0}
|
||||
/>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<DialogChatSettings open={settingsOpen} onOpenChange={(open) => (settingsOpen = open)} />
|
||||
|
||||
<DialogChatError
|
||||
message={errorDialog?.message ?? ''}
|
||||
contextInfo={errorDialog?.contextInfo}
|
||||
onOpenChange={handleErrorDialogOpenChange}
|
||||
open={Boolean(errorDialog)}
|
||||
type={errorDialog?.type ?? 'server'}
|
||||
/>
|
||||
</div>
|
||||
|
|
@ -95,8 +95,7 @@ export const SETTING_CONFIG_INFO: Record<string, string> = {
|
|||
disableReasoningFormat:
|
||||
'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.',
|
||||
keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
|
||||
showMessageStats:
|
||||
'Display generation statistics (tokens/second, token count, duration) below each assistant message.',
|
||||
showMessageStats: 'Display generation statistics (tokens/second, token count, duration).',
|
||||
askForTitleConfirmation:
|
||||
'Ask for confirmation before automatically changing conversation title when editing the first message.',
|
||||
pdfAsImage:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,14 @@
|
|||
import { getJsonHeaders } from '$lib/utils';
|
||||
import { AttachmentType } from '$lib/enums';
|
||||
import type {
|
||||
ApiChatCompletionRequest,
|
||||
ApiChatCompletionResponse,
|
||||
ApiChatCompletionStreamChunk,
|
||||
ApiChatCompletionToolCall,
|
||||
ApiChatCompletionToolCallDelta,
|
||||
ApiChatMessageContentPart,
|
||||
ApiChatMessageData
|
||||
} from '$lib/types/api';
|
||||
|
||||
/**
|
||||
* ChatService - Low-level API communication layer for Chat Completions
|
||||
|
|
@ -686,7 +695,7 @@ export class ChatService {
|
|||
* @param response - HTTP response object
|
||||
* @returns Promise<Error> - Parsed error with context info if available
|
||||
*/
|
||||
private static async parseErrorResponse(
|
||||
public static async parseErrorResponse(
|
||||
response: Response
|
||||
): Promise<Error & { contextInfo?: { n_prompt_tokens: number; n_ctx: number } }> {
|
||||
try {
|
||||
|
|
@ -729,7 +738,7 @@ export class ChatService {
|
|||
* @returns Model name string if found, undefined otherwise
|
||||
* @private
|
||||
*/
|
||||
private static extractModelName(data: unknown): string | undefined {
|
||||
public static extractModelName(data: unknown): string | undefined {
|
||||
const asRecord = (value: unknown): Record<string, unknown> | undefined => {
|
||||
return typeof value === 'object' && value !== null
|
||||
? (value as Record<string, unknown>)
|
||||
|
|
@ -770,7 +779,7 @@ export class ChatService {
|
|||
* @param onTimingsCallback - Callback function to invoke with timing data
|
||||
* @private
|
||||
*/
|
||||
private static notifyTimings(
|
||||
public static notifyTimings(
|
||||
timings: ChatMessageTimings | undefined,
|
||||
promptProgress: ChatMessagePromptProgress | undefined,
|
||||
onTimingsCallback:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,347 @@
|
|||
import { getJsonHeaders } from '$lib/utils';
|
||||
import { ChatService } from '$lib/services/chat';
|
||||
|
||||
import type {
|
||||
ApiCompletionRequest,
|
||||
ApiCompletionResponse,
|
||||
ApiCompletionStreamChunk
|
||||
} from '$lib/types/api';
|
||||
import type { ChatMessageTimings, ChatMessagePromptProgress } from '$lib/types/chat';
|
||||
import type { SettingsChatServiceOptions } from '$lib/types/settings';
|
||||
|
||||
/**
|
||||
* CompletionService - Low-level API communication layer for raw text completions.
|
||||
* Used in the notebook page.
|
||||
*/
|
||||
export class CompletionService {
|
||||
/**
|
||||
* Sends a completion request to the llama.cpp server.
|
||||
* Supports both streaming and non-streaming responses.
|
||||
*
|
||||
* @param prompt - The text prompt to complete
|
||||
* @param options - Configuration options for the completion request
|
||||
* @returns {Promise<string | void>} that resolves to the complete response string (non-streaming) or void (streaming)
|
||||
* @throws {Error} if the request fails or is aborted
|
||||
*/
|
||||
static async sendCompletion(
|
||||
prompt: string,
|
||||
options: SettingsChatServiceOptions = {},
|
||||
signal?: AbortSignal
|
||||
): Promise<string | void> {
|
||||
const {
|
||||
stream,
|
||||
onChunk,
|
||||
onComplete,
|
||||
onError,
|
||||
onModel,
|
||||
onTimings,
|
||||
// Generation parameters
|
||||
temperature,
|
||||
max_tokens,
|
||||
// Sampling parameters
|
||||
dynatemp_range,
|
||||
dynatemp_exponent,
|
||||
top_k,
|
||||
top_p,
|
||||
min_p,
|
||||
xtc_probability,
|
||||
xtc_threshold,
|
||||
typ_p,
|
||||
// Penalty parameters
|
||||
repeat_last_n,
|
||||
repeat_penalty,
|
||||
presence_penalty,
|
||||
frequency_penalty,
|
||||
dry_multiplier,
|
||||
dry_base,
|
||||
dry_allowed_length,
|
||||
dry_penalty_last_n,
|
||||
// Other parameters
|
||||
samplers,
|
||||
backend_sampling,
|
||||
custom,
|
||||
timings_per_token
|
||||
} = options;
|
||||
|
||||
const requestBody: ApiCompletionRequest = {
|
||||
prompt,
|
||||
stream
|
||||
};
|
||||
|
||||
// Include model in request if provided
|
||||
if (options.model) {
|
||||
requestBody.model = options.model;
|
||||
}
|
||||
|
||||
if (temperature !== undefined) requestBody.temperature = temperature;
|
||||
if (max_tokens !== undefined) {
|
||||
requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
|
||||
}
|
||||
|
||||
if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
|
||||
if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent;
|
||||
if (top_k !== undefined) requestBody.top_k = top_k;
|
||||
if (top_p !== undefined) requestBody.top_p = top_p;
|
||||
if (min_p !== undefined) requestBody.min_p = min_p;
|
||||
if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability;
|
||||
if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold;
|
||||
if (typ_p !== undefined) requestBody.typ_p = typ_p;
|
||||
|
||||
if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n;
|
||||
if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty;
|
||||
if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty;
|
||||
if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty;
|
||||
if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier;
|
||||
if (dry_base !== undefined) requestBody.dry_base = dry_base;
|
||||
if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length;
|
||||
if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n;
|
||||
|
||||
if (samplers !== undefined) {
|
||||
requestBody.samplers =
|
||||
typeof samplers === 'string'
|
||||
? samplers.split(';').filter((s: string) => s.trim())
|
||||
: samplers;
|
||||
}
|
||||
|
||||
if (backend_sampling !== undefined) requestBody.backend_sampling = backend_sampling;
|
||||
if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token;
|
||||
|
||||
if (custom) {
|
||||
try {
|
||||
const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom;
|
||||
Object.assign(requestBody, customParams);
|
||||
} catch (error) {
|
||||
console.warn('Failed to parse custom parameters:', error);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(`./completion`, {
|
||||
method: 'POST',
|
||||
headers: getJsonHeaders(),
|
||||
body: JSON.stringify(requestBody),
|
||||
signal
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await ChatService.parseErrorResponse(response);
|
||||
if (onError) {
|
||||
onError(error);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (stream) {
|
||||
await CompletionService.handleCompletionStreamResponse(
|
||||
response,
|
||||
onChunk,
|
||||
onComplete,
|
||||
onError,
|
||||
onModel,
|
||||
onTimings,
|
||||
signal
|
||||
);
|
||||
return;
|
||||
} else {
|
||||
return CompletionService.handleCompletionNonStreamResponse(
|
||||
response,
|
||||
onComplete,
|
||||
onError,
|
||||
onModel
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'AbortError') {
|
||||
console.log('Completion request was aborted');
|
||||
return;
|
||||
}
|
||||
|
||||
let userFriendlyError: Error;
|
||||
|
||||
if (error instanceof Error) {
|
||||
if (error.name === 'TypeError' && error.message.includes('fetch')) {
|
||||
userFriendlyError = new Error(
|
||||
'Unable to connect to server - please check if the server is running'
|
||||
);
|
||||
userFriendlyError.name = 'NetworkError';
|
||||
} else if (error.message.includes('ECONNREFUSED')) {
|
||||
userFriendlyError = new Error('Connection refused - server may be offline');
|
||||
userFriendlyError.name = 'NetworkError';
|
||||
} else if (error.message.includes('ETIMEDOUT')) {
|
||||
userFriendlyError = new Error('Request timed out - the server took too long to respond');
|
||||
userFriendlyError.name = 'TimeoutError';
|
||||
} else {
|
||||
userFriendlyError = error;
|
||||
}
|
||||
} else {
|
||||
userFriendlyError = new Error('Unknown error occurred while sending completion');
|
||||
}
|
||||
|
||||
console.error('Error in sendCompletion:', error);
|
||||
if (onError) {
|
||||
onError(userFriendlyError);
|
||||
}
|
||||
throw userFriendlyError;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles streaming response from the completion API
|
||||
*/
|
||||
private static async handleCompletionStreamResponse(
|
||||
response: Response,
|
||||
onChunk?: (chunk: string) => void,
|
||||
onComplete?: (
|
||||
response: string,
|
||||
reasoningContent?: string,
|
||||
timings?: ChatMessageTimings,
|
||||
toolCalls?: string
|
||||
) => void,
|
||||
onError?: (error: Error) => void,
|
||||
onModel?: (model: string) => void,
|
||||
onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
|
||||
abortSignal?: AbortSignal
|
||||
): Promise<void> {
|
||||
const reader = response.body?.getReader();
|
||||
|
||||
if (!reader) {
|
||||
throw new Error('No response body');
|
||||
}
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
let aggregatedContent = '';
|
||||
let lastTimings: ChatMessageTimings | undefined;
|
||||
let streamFinished = false;
|
||||
let modelEmitted = false;
|
||||
|
||||
try {
|
||||
let chunk = '';
|
||||
while (true) {
|
||||
if (abortSignal?.aborted) {
|
||||
break;
|
||||
}
|
||||
|
||||
const { done, value } = await reader.read();
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (abortSignal?.aborted) {
|
||||
break;
|
||||
}
|
||||
|
||||
chunk += decoder.decode(value, { stream: true });
|
||||
const lines = chunk.split('\n');
|
||||
chunk = lines.pop() || '';
|
||||
|
||||
for (const line of lines) {
|
||||
if (abortSignal?.aborted) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (line.startsWith('data: ')) {
|
||||
const data = line.slice(6);
|
||||
|
||||
try {
|
||||
const parsed: ApiCompletionStreamChunk = JSON.parse(data);
|
||||
const content = parsed.content;
|
||||
const timings = parsed.timings;
|
||||
const model = parsed.model;
|
||||
const promptProgress = parsed.prompt_progress;
|
||||
|
||||
if (parsed.stop) {
|
||||
streamFinished = true;
|
||||
}
|
||||
|
||||
if (model && !modelEmitted) {
|
||||
modelEmitted = true;
|
||||
onModel?.(model);
|
||||
}
|
||||
|
||||
if (promptProgress) {
|
||||
ChatService.notifyTimings(undefined, promptProgress, onTimings);
|
||||
}
|
||||
|
||||
if (timings) {
|
||||
ChatService.notifyTimings(timings, promptProgress, onTimings);
|
||||
lastTimings = timings;
|
||||
}
|
||||
|
||||
if (content) {
|
||||
aggregatedContent += content;
|
||||
if (!abortSignal?.aborted) {
|
||||
onChunk?.(content);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Error parsing JSON chunk:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (streamFinished) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (abortSignal?.aborted) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (streamFinished) {
|
||||
onComplete?.(aggregatedContent, undefined, lastTimings, undefined);
|
||||
}
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error('Stream error');
|
||||
onError?.(err);
|
||||
throw err;
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles non-streaming response from the completion API
|
||||
*/
|
||||
private static async handleCompletionNonStreamResponse(
|
||||
response: Response,
|
||||
onComplete?: (
|
||||
response: string,
|
||||
reasoningContent?: string,
|
||||
timings?: ChatMessageTimings,
|
||||
toolCalls?: string
|
||||
) => void,
|
||||
onError?: (error: Error) => void,
|
||||
onModel?: (model: string) => void
|
||||
): Promise<string> {
|
||||
try {
|
||||
const responseText = await response.text();
|
||||
|
||||
if (!responseText.trim()) {
|
||||
const noResponseError = new Error('No response received from server. Please try again.');
|
||||
throw noResponseError;
|
||||
}
|
||||
|
||||
const data: ApiCompletionResponse = JSON.parse(responseText);
|
||||
|
||||
if (data.model) {
|
||||
onModel?.(data.model);
|
||||
}
|
||||
|
||||
const content = data.content || '';
|
||||
|
||||
if (!content.trim()) {
|
||||
const noResponseError = new Error('No response received from server. Please try again.');
|
||||
throw noResponseError;
|
||||
}
|
||||
|
||||
onComplete?.(content, undefined, data.timings, undefined);
|
||||
|
||||
return content;
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error('Parse error');
|
||||
onError?.(err);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
import { getJsonHeaders } from '$lib/utils';
|
||||
|
||||
/**
|
||||
* Tokenizes the provided text using the server's tokenizer.
|
||||
*
|
||||
* @param content - The text content to tokenize
|
||||
* @param model - Optional model name to use for tokenization (required in router mode)
|
||||
* @param signal - Optional AbortSignal
|
||||
* @returns {Promise<number[]>} Promise that resolves to an array of token IDs
|
||||
*/
|
||||
export async function tokenize(
|
||||
content: string,
|
||||
model?: string,
|
||||
signal?: AbortSignal
|
||||
): Promise<number[]> {
|
||||
try {
|
||||
const body: { content: string; model?: string } = { content };
|
||||
if (model) {
|
||||
body.model = model;
|
||||
}
|
||||
|
||||
const response = await fetch('./tokenize', {
|
||||
method: 'POST',
|
||||
headers: getJsonHeaders(),
|
||||
body: JSON.stringify(body),
|
||||
signal
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Tokenize failed: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.tokens;
|
||||
} catch (error) {
|
||||
console.error('Tokenize error:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,161 @@
|
|||
import { CompletionService } from '$lib/services/completion';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { tokenize } from '$lib/services/tokenize';
|
||||
|
||||
export class NotebookStore {
|
||||
content = $state('');
|
||||
isGenerating = $state(false);
|
||||
abortController: AbortController | null = null;
|
||||
|
||||
// Statistics
|
||||
cacheTokens = $state(0);
|
||||
promptTokens = $state(0);
|
||||
promptMs = $state(0);
|
||||
predictedTokens = $state(0);
|
||||
predictedMs = $state(0);
|
||||
totalTokens = $state(0);
|
||||
generationStartTokens = $state(0);
|
||||
generationEndTokens = $state(0);
|
||||
tokenizeTimeout: ReturnType<typeof setTimeout> | undefined;
|
||||
|
||||
error = $state<{
|
||||
message: string;
|
||||
type: 'timeout' | 'server';
|
||||
contextInfo?: { n_prompt_tokens: number; n_ctx: number };
|
||||
} | null>(null);
|
||||
|
||||
previousContent = $state<string | null>(null);
|
||||
undoneContent = $state<string | null>(null);
|
||||
|
||||
async generate(model?: string) {
|
||||
if (this.isGenerating) return;
|
||||
|
||||
this.previousContent = this.content;
|
||||
this.undoneContent = null;
|
||||
this.isGenerating = true;
|
||||
this.abortController = new AbortController();
|
||||
this.error = null;
|
||||
|
||||
// Reset stats
|
||||
this.cacheTokens = 0;
|
||||
this.promptTokens = 0;
|
||||
this.promptMs = 0;
|
||||
this.predictedTokens = 0;
|
||||
this.predictedMs = 0;
|
||||
|
||||
// Save number of tokens before generation
|
||||
this.generationStartTokens = this.totalTokens;
|
||||
|
||||
try {
|
||||
const currentConfig = config();
|
||||
await CompletionService.sendCompletion(
|
||||
this.content,
|
||||
{
|
||||
...currentConfig,
|
||||
model,
|
||||
stream: true,
|
||||
timings_per_token: true,
|
||||
onChunk: (chunk: string) => {
|
||||
this.content += chunk;
|
||||
},
|
||||
onTimings: (timings: ChatMessageTimings, promptProgress: ChatMessagePromptProgress) => {
|
||||
if (timings) {
|
||||
if (timings.cache_n) this.cacheTokens = timings.cache_n;
|
||||
if (timings.prompt_n) this.promptTokens = timings.prompt_n;
|
||||
if (timings.prompt_ms) this.promptMs = timings.prompt_ms;
|
||||
if (timings.predicted_n) this.predictedTokens = timings.predicted_n;
|
||||
if (timings.predicted_ms) this.predictedMs = timings.predicted_ms;
|
||||
}
|
||||
|
||||
if (promptProgress) {
|
||||
// Update prompt stats from progress
|
||||
const { processed, time_ms } = promptProgress;
|
||||
if (processed > 0) this.promptTokens = processed;
|
||||
if (time_ms > 0) this.promptMs = time_ms;
|
||||
}
|
||||
|
||||
// Update totalTokens live
|
||||
this.totalTokens = this.cacheTokens + this.promptTokens + this.predictedTokens;
|
||||
},
|
||||
onComplete: () => {
|
||||
this.isGenerating = false;
|
||||
},
|
||||
onError: (error: unknown) => {
|
||||
if (error instanceof Error && error.name === 'AbortError') {
|
||||
// aborted by user
|
||||
} else {
|
||||
console.error('Notebook generation error:', error);
|
||||
this.error = {
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
type: 'server'
|
||||
};
|
||||
}
|
||||
this.isGenerating = false;
|
||||
}
|
||||
},
|
||||
this.abortController.signal
|
||||
);
|
||||
} catch (error) {
|
||||
console.error('Notebook generation failed:', error);
|
||||
this.error = {
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
type: 'server'
|
||||
};
|
||||
this.isGenerating = false;
|
||||
}
|
||||
// Save number of tokens after generation
|
||||
this.generationEndTokens = this.totalTokens;
|
||||
}
|
||||
|
||||
dismissError() {
|
||||
this.error = null;
|
||||
}
|
||||
|
||||
undo() {
|
||||
if (this.previousContent !== null) {
|
||||
this.undoneContent = this.content;
|
||||
this.content = this.previousContent;
|
||||
this.previousContent = null;
|
||||
this.totalTokens = this.generationStartTokens;
|
||||
}
|
||||
}
|
||||
|
||||
redo() {
|
||||
if (this.undoneContent !== null) {
|
||||
this.previousContent = this.content;
|
||||
this.content = this.undoneContent;
|
||||
this.undoneContent = null;
|
||||
this.totalTokens = this.generationEndTokens;
|
||||
}
|
||||
}
|
||||
|
||||
resetUndoRedo() {
|
||||
this.previousContent = null;
|
||||
this.undoneContent = null;
|
||||
}
|
||||
|
||||
stop() {
|
||||
if (this.abortController) {
|
||||
this.abortController.abort();
|
||||
this.abortController = null;
|
||||
}
|
||||
this.isGenerating = false;
|
||||
}
|
||||
|
||||
updateTokenCount(model?: string) {
|
||||
if (this.tokenizeTimeout) {
|
||||
clearTimeout(this.tokenizeTimeout);
|
||||
}
|
||||
|
||||
this.tokenizeTimeout = setTimeout(async () => {
|
||||
if (this.content.length === 0) {
|
||||
this.totalTokens = 0;
|
||||
return;
|
||||
}
|
||||
const tokens = await tokenize(this.content, model);
|
||||
this.totalTokens = tokens.length;
|
||||
}, 500);
|
||||
}
|
||||
}
|
||||
|
||||
export const notebookStore = new NotebookStore();
|
||||
|
|
@ -219,6 +219,39 @@ export interface ApiChatCompletionRequest {
|
|||
timings_per_token?: boolean;
|
||||
}
|
||||
|
||||
export interface ApiCompletionRequest {
|
||||
prompt: string;
|
||||
stream?: boolean;
|
||||
model?: string;
|
||||
// Generation parameters
|
||||
temperature?: number;
|
||||
max_tokens?: number;
|
||||
// Sampling parameters
|
||||
dynatemp_range?: number;
|
||||
dynatemp_exponent?: number;
|
||||
top_k?: number;
|
||||
top_p?: number;
|
||||
min_p?: number;
|
||||
xtc_probability?: number;
|
||||
xtc_threshold?: number;
|
||||
typ_p?: number;
|
||||
// Penalty parameters
|
||||
repeat_last_n?: number;
|
||||
repeat_penalty?: number;
|
||||
presence_penalty?: number;
|
||||
frequency_penalty?: number;
|
||||
dry_multiplier?: number;
|
||||
dry_base?: number;
|
||||
dry_allowed_length?: number;
|
||||
dry_penalty_last_n?: number;
|
||||
// Sampler configuration
|
||||
samplers?: string[];
|
||||
backend_sampling?: boolean;
|
||||
// Custom parameters (JSON string)
|
||||
custom?: Record<string, unknown>;
|
||||
timings_per_token?: boolean;
|
||||
}
|
||||
|
||||
export interface ApiChatCompletionToolCallFunctionDelta {
|
||||
name?: string;
|
||||
arguments?: string;
|
||||
|
|
@ -258,6 +291,33 @@ export interface ApiChatCompletionStreamChunk {
|
|||
prompt_progress?: ChatMessagePromptProgress;
|
||||
}
|
||||
|
||||
export interface ApiCompletionStreamChunk {
|
||||
content: string;
|
||||
stop: boolean;
|
||||
model: string;
|
||||
timings?: {
|
||||
prompt_n?: number;
|
||||
prompt_ms?: number;
|
||||
predicted_n?: number;
|
||||
predicted_ms?: number;
|
||||
cache_n?: number;
|
||||
};
|
||||
prompt_progress?: ChatMessagePromptProgress;
|
||||
}
|
||||
|
||||
export interface ApiCompletionResponse {
|
||||
content: string;
|
||||
stop: boolean;
|
||||
model: string;
|
||||
timings?: {
|
||||
prompt_n?: number;
|
||||
prompt_ms?: number;
|
||||
predicted_n?: number;
|
||||
predicted_ms?: number;
|
||||
cache_n?: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface ApiChatCompletionResponse {
|
||||
model?: string;
|
||||
choices: Array<{
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
let isChatRoute = $derived(page.route.id === '/chat/[id]');
|
||||
let isHomeRoute = $derived(page.route.id === '/');
|
||||
let isNotebookRoute = $derived(page.route.id === '/notebook');
|
||||
let isNewChatMode = $derived(page.url.searchParams.get('new_chat') === 'true');
|
||||
let showSidebarByDefault = $derived(activeMessages().length > 0 || isLoading());
|
||||
let alwaysShowSidebarOnDesktop = $derived(config().alwaysShowSidebarOnDesktop);
|
||||
|
|
@ -93,7 +94,7 @@
|
|||
} else if (isHomeRoute && isNewChatMode) {
|
||||
// Keep sidebar open in new chat mode
|
||||
sidebarOpen = true;
|
||||
} else if (isChatRoute) {
|
||||
} else if (isChatRoute || isNotebookRoute) {
|
||||
// On chat routes, only auto-show sidebar if setting is enabled
|
||||
if (autoShowSidebarOnNewChat) {
|
||||
sidebarOpen = true;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,9 @@
|
|||
<script lang="ts">
|
||||
import { NotebookScreen } from '$lib/components/app';
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>Notebook - llama.cpp</title>
|
||||
</svelte:head>
|
||||
|
||||
<NotebookScreen />
|
||||
Loading…
Reference in New Issue