diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index 07f7b7e422..20523afa33 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/components/app/models/ModelId.svelte b/tools/server/webui/src/lib/components/app/models/ModelId.svelte index 9b25d05c13..5fda493429 100644 --- a/tools/server/webui/src/lib/components/app/models/ModelId.svelte +++ b/tools/server/webui/src/lib/components/app/models/ModelId.svelte @@ -28,6 +28,11 @@ let parsed = $derived(ModelsService.parseModelId(modelId)); let resolvedShowRaw = $derived(showRaw ?? (config().showRawModelNames as boolean) ?? false); + let displayName = $derived( + aliases && aliases.length > 0 ? aliases[0] : (parsed.modelName ?? modelId) + ); + let remainingAliases = $derived(aliases && aliases.length > 1 ? aliases.slice(1) : []); + let allTags = $derived([...(parsed.tags ?? []), ...(tags ?? [])]); {#if resolvedShowRaw} @@ -35,7 +40,7 @@ {:else} - {#if showOrgName && parsed.orgName}{parsed.orgName}/{/if}{parsed.modelName ?? modelId} + {#if showOrgName && parsed.orgName && !(aliases && aliases.length > 0)}{parsed.orgName}/{/if}{displayName} {#if parsed.params} @@ -50,14 +55,14 @@ {/if} - {#if aliases && aliases.length > 0} - {#each aliases as alias (alias)} + {#if remainingAliases.length > 0} + {#each remainingAliases as alias (alias)} {alias} {/each} {/if} - {#if tags && tags.length > 0} - {#each tags as tag (tag)} + {#if allTags.length > 0} + {#each allTags as tag (tag)} {tag} {/each} {/if} diff --git a/tools/server/webui/src/lib/constants/model-id.ts b/tools/server/webui/src/lib/constants/model-id.ts index eb6662a02d..ee314d1674 100644 --- a/tools/server/webui/src/lib/constants/model-id.ts +++ b/tools/server/webui/src/lib/constants/model-id.ts @@ -11,10 +11,16 @@ export const MODEL_ID_SEGMENT_SEPARATOR = '-'; export const MODEL_ID_QUANTIZATION_SEPARATOR = ':'; /** - * Matches a trailing ALL-CAPS format segment, e.g. `GGUF`, `BF16`, `Q4_K_M`. - * Must be at least 2 uppercase letters, optionally followed by uppercase letters or digits. + * Matches a quantization/precision segment, e.g. `Q4_K_M`, `IQ4_XS`, `F16`, `BF16`, `MXFP4`. + * Case-insensitive to handle both uppercase and lowercase inputs. */ -export const MODEL_FORMAT_SEGMENT_RE = /^[A-Z]{2,}[A-Z0-9]*$/; +export const MODEL_QUANTIZATION_SEGMENT_RE = + /^(I?Q\d+(_[A-Z0-9]+)*|F\d+|BF\d+|MXFP\d+(_[A-Z0-9]+)*)$/i; + +/** + * Matches prefix for custom quantization types, e.g. `UD-Q8_K_XL`. + */ +export const MODEL_CUSTOM_QUANTIZATION_PREFIX_RE = /^UD$/i; /** * Matches a parameter-count segment, e.g. `7B`, `1.5b`, `120M`. @@ -22,7 +28,12 @@ export const MODEL_FORMAT_SEGMENT_RE = /^[A-Z]{2,}[A-Z0-9]*$/; export const MODEL_PARAMS_RE = /^\d+(\.\d+)?[BbMmKkTt]$/; /** - * Matches an activated-parameter-count segment, e.g. `A10B`, `A2.4b`. - * The leading `A` distinguishes it from a regular params segment. + * Matches an activated-parameter-count segment, e.g. `A10B`, `a2.4b`. + * The leading `A`/`a` distinguishes it from a regular params segment. */ -export const MODEL_ACTIVATED_PARAMS_RE = /^A\d+(\.\d+)?[BbMmKkTt]$/; +export const MODEL_ACTIVATED_PARAMS_RE = /^[Aa]\d+(\.\d+)?[BbMmKkTt]$/; + +/** + * Container format segments to exclude from tags (every model uses these). + */ +export const MODEL_IGNORED_SEGMENTS = new Set(['GGUF', 'GGML']); diff --git a/tools/server/webui/src/lib/services/models.service.ts b/tools/server/webui/src/lib/services/models.service.ts index de90c48cf0..209bd7caba 100644 --- a/tools/server/webui/src/lib/services/models.service.ts +++ b/tools/server/webui/src/lib/services/models.service.ts @@ -2,9 +2,11 @@ import { ServerModelStatus } from '$lib/enums'; import { apiFetch, apiPost } from '$lib/utils'; import type { ParsedModelId } from '$lib/types/models'; import { - MODEL_FORMAT_SEGMENT_RE, + MODEL_QUANTIZATION_SEGMENT_RE, + MODEL_CUSTOM_QUANTIZATION_PREFIX_RE, MODEL_PARAMS_RE, MODEL_ACTIVATED_PARAMS_RE, + MODEL_IGNORED_SEGMENTS, MODEL_ID_NOT_FOUND, MODEL_ID_ORG_SEPARATOR, MODEL_ID_SEGMENT_SEPARATOR, @@ -119,8 +121,9 @@ export class ModelsService { /** * Parse a model ID string into its structured components. * - * Handles the convention: - * `/-(-)-:` + * Handles conventions like: + * `/-(-)(-)(-):` + * `.` (dot-separated quantization, e.g. `model.Q4_K_M`) * * @param modelId - Raw model identifier string * @returns Structured {@link ParsedModelId} with all detected fields @@ -132,11 +135,11 @@ export class ModelsService { modelName: null, params: null, activatedParams: null, - format: null, quantization: null, tags: [] }; + // 1. Extract colon-separated quantization (e.g. `model:Q4_K_M`) const colonIdx = modelId.indexOf(MODEL_ID_QUANTIZATION_SEPARATOR); let modelPath: string; @@ -147,6 +150,7 @@ export class ModelsService { modelPath = modelId; } + // 2. Extract org name (e.g. `org/model` -> org = "org") const slashIdx = modelPath.indexOf(MODEL_ID_ORG_SEPARATOR); let modelStr: string; @@ -157,37 +161,66 @@ export class ModelsService { modelStr = modelPath; } - const segments = modelStr.split(MODEL_ID_SEGMENT_SEPARATOR); + // 3. Handle dot-separated quantization (e.g. `model-name.Q4_K_M`) + const dotIdx = modelStr.lastIndexOf('.'); - if (segments.length > 0 && MODEL_FORMAT_SEGMENT_RE.test(segments[segments.length - 1])) { - result.format = segments.pop()!; + if (dotIdx !== MODEL_ID_NOT_FOUND && !result.quantization) { + const afterDot = modelStr.slice(dotIdx + 1); + + if (MODEL_QUANTIZATION_SEGMENT_RE.test(afterDot)) { + result.quantization = afterDot; + modelStr = modelStr.slice(0, dotIdx); + } } - const paramsRe = MODEL_PARAMS_RE; - const activatedParamsRe = MODEL_ACTIVATED_PARAMS_RE; + const segments = modelStr.split(MODEL_ID_SEGMENT_SEPARATOR); + // 4. Detect trailing quantization from dash-separated segments + // Handle UD-prefixed quantization (e.g. `UD-Q8_K_XL`) and + // standalone quantization (e.g. `Q4_K_M`, `BF16`, `F16`, `MXFP4`) + if (!result.quantization && segments.length > 1) { + const last = segments[segments.length - 1]; + const secondLast = segments.length > 2 ? segments[segments.length - 2] : null; + + if (MODEL_QUANTIZATION_SEGMENT_RE.test(last)) { + if (secondLast && MODEL_CUSTOM_QUANTIZATION_PREFIX_RE.test(secondLast)) { + result.quantization = `${secondLast}-${last}`; + segments.splice(segments.length - 2, 2); + } else { + result.quantization = last; + segments.pop(); + } + } + } + + // 5. Find params and activated params let paramsIdx = MODEL_ID_NOT_FOUND; let activatedParamsIdx = MODEL_ID_NOT_FOUND; for (let i = 0; i < segments.length; i++) { const seg = segments[i]; - if (paramsIdx === -1 && paramsRe.test(seg)) { + + if (paramsIdx === MODEL_ID_NOT_FOUND && MODEL_PARAMS_RE.test(seg)) { paramsIdx = i; result.params = seg.toUpperCase(); - } else if (activatedParamsRe.test(seg)) { + } else if (paramsIdx !== MODEL_ID_NOT_FOUND && MODEL_ACTIVATED_PARAMS_RE.test(seg)) { activatedParamsIdx = i; result.activatedParams = seg.toUpperCase(); } } + // 6. Model name = segments before params; tags = remaining segments after params const pivotIdx = paramsIdx !== MODEL_ID_NOT_FOUND ? paramsIdx : segments.length; result.modelName = segments.slice(0, pivotIdx).join(MODEL_ID_SEGMENT_SEPARATOR) || null; if (paramsIdx !== MODEL_ID_NOT_FOUND) { - result.tags = segments - .slice(paramsIdx + 1) - .filter((_, relIdx) => paramsIdx + 1 + relIdx !== activatedParamsIdx); + result.tags = segments.slice(paramsIdx + 1).filter((_, relIdx) => { + const absIdx = paramsIdx + 1 + relIdx; + if (absIdx === activatedParamsIdx) return false; + + return !MODEL_IGNORED_SEGMENTS.has(segments[absIdx].toUpperCase()); + }); } return result; diff --git a/tools/server/webui/src/lib/types/models.d.ts b/tools/server/webui/src/lib/types/models.d.ts index dc8e86485c..b4d5f11f57 100644 --- a/tools/server/webui/src/lib/types/models.d.ts +++ b/tools/server/webui/src/lib/types/models.d.ts @@ -25,7 +25,6 @@ export interface ParsedModelId { modelName: string | null; params: string | null; activatedParams: string | null; - format: string | null; quantization: string | null; tags: string[]; } diff --git a/tools/server/webui/tests/unit/model-id-parser.test.ts b/tools/server/webui/tests/unit/model-id-parser.test.ts new file mode 100644 index 0000000000..3c2937d356 --- /dev/null +++ b/tools/server/webui/tests/unit/model-id-parser.test.ts @@ -0,0 +1,270 @@ +import { describe, expect, it } from 'vitest'; +import { ModelsService } from '$lib/services/models.service'; + +const { parseModelId } = ModelsService; + +describe('parseModelId', () => { + it('handles unknown patterns correctly', () => { + expect(parseModelId('model-name-1')).toStrictEqual({ + activatedParams: null, + modelName: 'model-name-1', + orgName: null, + params: null, + quantization: null, + raw: 'model-name-1', + tags: [] + }); + + expect(parseModelId('org/model-name-2')).toStrictEqual({ + activatedParams: null, + modelName: 'model-name-2', + orgName: 'org', + params: null, + quantization: null, + raw: 'org/model-name-2', + tags: [] + }); + }); + + it('extracts model parameters correctly', () => { + expect(parseModelId('model-100B-BF16')).toMatchObject({ params: '100B' }); + expect(parseModelId('model-100B:Q4_K_M')).toMatchObject({ params: '100B' }); + }); + + it('extracts model parameters correctly in lowercase', () => { + expect(parseModelId('model-100b-bf16')).toMatchObject({ params: '100B' }); + expect(parseModelId('model-100b:q4_k_m')).toMatchObject({ params: '100B' }); + }); + + it('extracts activated parameters correctly', () => { + expect(parseModelId('model-100B-A10B-BF16')).toMatchObject({ activatedParams: 'A10B' }); + expect(parseModelId('model-100B-A10B:Q4_K_M')).toMatchObject({ activatedParams: 'A10B' }); + }); + + it('extracts activated parameters correctly in lowercase', () => { + expect(parseModelId('model-100b-a10b-bf16')).toMatchObject({ activatedParams: 'A10B' }); + expect(parseModelId('model-100b-a10b:q4_k_m')).toMatchObject({ activatedParams: 'A10B' }); + }); + + it('extracts quantization correctly', () => { + // Dash-separated quantization + expect(parseModelId('model-100B-UD-IQ1_S')).toMatchObject({ quantization: 'UD-IQ1_S' }); + expect(parseModelId('model-100B-IQ4_XS')).toMatchObject({ quantization: 'IQ4_XS' }); + expect(parseModelId('model-100B-Q4_K_M')).toMatchObject({ quantization: 'Q4_K_M' }); + expect(parseModelId('model-100B-Q8_0')).toMatchObject({ quantization: 'Q8_0' }); + expect(parseModelId('model-100B-UD-Q8_K_XL')).toMatchObject({ quantization: 'UD-Q8_K_XL' }); + expect(parseModelId('model-100B-F16')).toMatchObject({ quantization: 'F16' }); + expect(parseModelId('model-100B-BF16')).toMatchObject({ quantization: 'BF16' }); + expect(parseModelId('model-100B-MXFP4')).toMatchObject({ quantization: 'MXFP4' }); + + // Colon-separated quantization + expect(parseModelId('model-100B:UD-IQ1_S')).toMatchObject({ quantization: 'UD-IQ1_S' }); + expect(parseModelId('model-100B:IQ4_XS')).toMatchObject({ quantization: 'IQ4_XS' }); + expect(parseModelId('model-100B:Q4_K_M')).toMatchObject({ quantization: 'Q4_K_M' }); + expect(parseModelId('model-100B:Q8_0')).toMatchObject({ quantization: 'Q8_0' }); + expect(parseModelId('model-100B:UD-Q8_K_XL')).toMatchObject({ quantization: 'UD-Q8_K_XL' }); + expect(parseModelId('model-100B:F16')).toMatchObject({ quantization: 'F16' }); + expect(parseModelId('model-100B:BF16')).toMatchObject({ quantization: 'BF16' }); + expect(parseModelId('model-100B:MXFP4')).toMatchObject({ quantization: 'MXFP4' }); + + // Dot-separated quantization + expect(parseModelId('nomic-embed-text-v2-moe.Q4_K_M')).toMatchObject({ + quantization: 'Q4_K_M' + }); + }); + + it('extracts additional tags correctly', () => { + expect(parseModelId('model-100B-foobar-Q4_K_M')).toMatchObject({ tags: ['foobar'] }); + expect(parseModelId('model-100B-A10B-foobar-1M-BF16')).toMatchObject({ + tags: ['foobar', '1M'] + }); + expect(parseModelId('model-100B-1M-foobar:UD-Q8_K_XL')).toMatchObject({ + tags: ['1M', 'foobar'] + }); + }); + + it('filters out container format segments from tags', () => { + expect(parseModelId('model-100B-GGUF-Instruct-BF16')).toMatchObject({ + tags: ['Instruct'] + }); + expect(parseModelId('model-100B-GGML-Instruct:Q4_K_M')).toMatchObject({ + tags: ['Instruct'] + }); + }); + + it('handles real-world examples correctly', () => { + expect(parseModelId('meta-llama/Llama-3.1-8B')).toStrictEqual({ + activatedParams: null, + modelName: 'Llama-3.1', + orgName: 'meta-llama', + params: '8B', + quantization: null, + raw: 'meta-llama/Llama-3.1-8B', + tags: [] + }); + + expect(parseModelId('openai/gpt-oss-120b-MXFP4')).toStrictEqual({ + activatedParams: null, + modelName: 'gpt-oss', + orgName: 'openai', + params: '120B', + quantization: 'MXFP4', + raw: 'openai/gpt-oss-120b-MXFP4', + tags: [] + }); + + expect(parseModelId('openai/gpt-oss-20b:Q4_K_M')).toStrictEqual({ + activatedParams: null, + modelName: 'gpt-oss', + orgName: 'openai', + params: '20B', + quantization: 'Q4_K_M', + raw: 'openai/gpt-oss-20b:Q4_K_M', + tags: [] + }); + + expect(parseModelId('Qwen/Qwen3-Coder-30B-A3B-Instruct-1M-BF16')).toStrictEqual({ + activatedParams: 'A3B', + modelName: 'Qwen3-Coder', + orgName: 'Qwen', + params: '30B', + quantization: 'BF16', + raw: 'Qwen/Qwen3-Coder-30B-A3B-Instruct-1M-BF16', + tags: ['Instruct', '1M'] + }); + }); + + it('handles real-world examples with quantization in segments', () => { + expect(parseModelId('meta-llama/Llama-4-Scout-17B-16E-Instruct-Q4_K_M')).toStrictEqual({ + activatedParams: null, + modelName: 'Llama-4-Scout', + orgName: 'meta-llama', + params: '17B', + quantization: 'Q4_K_M', + raw: 'meta-llama/Llama-4-Scout-17B-16E-Instruct-Q4_K_M', + tags: ['16E', 'Instruct'] + }); + + expect(parseModelId('MiniMaxAI/MiniMax-M2-IQ4_XS')).toStrictEqual({ + activatedParams: null, + modelName: 'MiniMax-M2', + orgName: 'MiniMaxAI', + params: null, + quantization: 'IQ4_XS', + raw: 'MiniMaxAI/MiniMax-M2-IQ4_XS', + tags: [] + }); + + expect(parseModelId('MiniMaxAI/MiniMax-M2-UD-Q3_K_XL')).toStrictEqual({ + activatedParams: null, + modelName: 'MiniMax-M2', + orgName: 'MiniMaxAI', + params: null, + quantization: 'UD-Q3_K_XL', + raw: 'MiniMaxAI/MiniMax-M2-UD-Q3_K_XL', + tags: [] + }); + + expect(parseModelId('mistralai/Devstral-2-123B-Instruct-2512-Q4_K_M')).toStrictEqual({ + activatedParams: null, + modelName: 'Devstral-2', + orgName: 'mistralai', + params: '123B', + quantization: 'Q4_K_M', + raw: 'mistralai/Devstral-2-123B-Instruct-2512-Q4_K_M', + tags: ['Instruct', '2512'] + }); + + expect(parseModelId('mistralai/Devstral-Small-2-24B-Instruct-2512-Q8_0')).toStrictEqual({ + activatedParams: null, + modelName: 'Devstral-Small-2', + orgName: 'mistralai', + params: '24B', + quantization: 'Q8_0', + raw: 'mistralai/Devstral-Small-2-24B-Instruct-2512-Q8_0', + tags: ['Instruct', '2512'] + }); + + expect(parseModelId('noctrex/GLM-4.7-Flash-MXFP4_MOE')).toStrictEqual({ + activatedParams: null, + modelName: 'GLM-4.7-Flash', + orgName: 'noctrex', + params: null, + quantization: 'MXFP4_MOE', + raw: 'noctrex/GLM-4.7-Flash-MXFP4_MOE', + tags: [] + }); + + expect(parseModelId('Qwen/Qwen3-Coder-Next-Q4_K_M')).toStrictEqual({ + activatedParams: null, + modelName: 'Qwen3-Coder-Next', + orgName: 'Qwen', + params: null, + quantization: 'Q4_K_M', + raw: 'Qwen/Qwen3-Coder-Next-Q4_K_M', + tags: [] + }); + + expect(parseModelId('openai/gpt-oss-120b-Q4_K_M')).toStrictEqual({ + activatedParams: null, + modelName: 'gpt-oss', + orgName: 'openai', + params: '120B', + quantization: 'Q4_K_M', + raw: 'openai/gpt-oss-120b-Q4_K_M', + tags: [] + }); + + expect(parseModelId('openai/gpt-oss-20b-F16')).toStrictEqual({ + activatedParams: null, + modelName: 'gpt-oss', + orgName: 'openai', + params: '20B', + quantization: 'F16', + raw: 'openai/gpt-oss-20b-F16', + tags: [] + }); + + expect(parseModelId('nomic-embed-text-v2-moe.Q4_K_M')).toStrictEqual({ + activatedParams: null, + modelName: 'nomic-embed-text-v2-moe', + orgName: null, + params: null, + quantization: 'Q4_K_M', + raw: 'nomic-embed-text-v2-moe.Q4_K_M', + tags: [] + }); + }); + + it('handles ambiguous model names', () => { + // Qwen3.5 Instruct vs Thinking — tags should distinguish them + expect(parseModelId('Qwen/Qwen3.5-30B-A3B-Instruct')).toMatchObject({ + modelName: 'Qwen3.5', + params: '30B', + activatedParams: 'A3B', + tags: ['Instruct'] + }); + + expect(parseModelId('Qwen/Qwen3.5-30B-A3B-Thinking')).toMatchObject({ + modelName: 'Qwen3.5', + params: '30B', + activatedParams: 'A3B', + tags: ['Thinking'] + }); + + // Dot-separated quantization with variant suffixes + expect(parseModelId('gemma-3-27b-it-heretic-v2.Q8_0')).toMatchObject({ + modelName: 'gemma-3', + params: '27B', + quantization: 'Q8_0', + tags: ['it', 'heretic', 'v2'] + }); + + expect(parseModelId('gemma-3-27b-it.Q8_0')).toMatchObject({ + modelName: 'gemma-3', + params: '27B', + quantization: 'Q8_0', + tags: ['it'] + }); + }); +});