diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
index 07f7b7e422..20523afa33 100644
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/webui/src/lib/components/app/models/ModelId.svelte b/tools/server/webui/src/lib/components/app/models/ModelId.svelte
index 9b25d05c13..5fda493429 100644
--- a/tools/server/webui/src/lib/components/app/models/ModelId.svelte
+++ b/tools/server/webui/src/lib/components/app/models/ModelId.svelte
@@ -28,6 +28,11 @@
let parsed = $derived(ModelsService.parseModelId(modelId));
let resolvedShowRaw = $derived(showRaw ?? (config().showRawModelNames as boolean) ?? false);
+ let displayName = $derived(
+ aliases && aliases.length > 0 ? aliases[0] : (parsed.modelName ?? modelId)
+ );
+ let remainingAliases = $derived(aliases && aliases.length > 1 ? aliases.slice(1) : []);
+ let allTags = $derived([...(parsed.tags ?? []), ...(tags ?? [])]);
{#if resolvedShowRaw}
@@ -35,7 +40,7 @@
{:else}
- {#if showOrgName && parsed.orgName}{parsed.orgName}/{/if}{parsed.modelName ?? modelId}
+ {#if showOrgName && parsed.orgName && !(aliases && aliases.length > 0)}{parsed.orgName}/{/if}{displayName}
{#if parsed.params}
@@ -50,14 +55,14 @@
{/if}
- {#if aliases && aliases.length > 0}
- {#each aliases as alias (alias)}
+ {#if remainingAliases.length > 0}
+ {#each remainingAliases as alias (alias)}
{alias}
{/each}
{/if}
- {#if tags && tags.length > 0}
- {#each tags as tag (tag)}
+ {#if allTags.length > 0}
+ {#each allTags as tag (tag)}
{tag}
{/each}
{/if}
diff --git a/tools/server/webui/src/lib/constants/model-id.ts b/tools/server/webui/src/lib/constants/model-id.ts
index eb6662a02d..ee314d1674 100644
--- a/tools/server/webui/src/lib/constants/model-id.ts
+++ b/tools/server/webui/src/lib/constants/model-id.ts
@@ -11,10 +11,16 @@ export const MODEL_ID_SEGMENT_SEPARATOR = '-';
export const MODEL_ID_QUANTIZATION_SEPARATOR = ':';
/**
- * Matches a trailing ALL-CAPS format segment, e.g. `GGUF`, `BF16`, `Q4_K_M`.
- * Must be at least 2 uppercase letters, optionally followed by uppercase letters or digits.
+ * Matches a quantization/precision segment, e.g. `Q4_K_M`, `IQ4_XS`, `F16`, `BF16`, `MXFP4`.
+ * Case-insensitive to handle both uppercase and lowercase inputs.
*/
-export const MODEL_FORMAT_SEGMENT_RE = /^[A-Z]{2,}[A-Z0-9]*$/;
+export const MODEL_QUANTIZATION_SEGMENT_RE =
+ /^(I?Q\d+(_[A-Z0-9]+)*|F\d+|BF\d+|MXFP\d+(_[A-Z0-9]+)*)$/i;
+
+/**
+ * Matches prefix for custom quantization types, e.g. `UD-Q8_K_XL`.
+ */
+export const MODEL_CUSTOM_QUANTIZATION_PREFIX_RE = /^UD$/i;
/**
* Matches a parameter-count segment, e.g. `7B`, `1.5b`, `120M`.
@@ -22,7 +28,12 @@ export const MODEL_FORMAT_SEGMENT_RE = /^[A-Z]{2,}[A-Z0-9]*$/;
export const MODEL_PARAMS_RE = /^\d+(\.\d+)?[BbMmKkTt]$/;
/**
- * Matches an activated-parameter-count segment, e.g. `A10B`, `A2.4b`.
- * The leading `A` distinguishes it from a regular params segment.
+ * Matches an activated-parameter-count segment, e.g. `A10B`, `a2.4b`.
+ * The leading `A`/`a` distinguishes it from a regular params segment.
*/
-export const MODEL_ACTIVATED_PARAMS_RE = /^A\d+(\.\d+)?[BbMmKkTt]$/;
+export const MODEL_ACTIVATED_PARAMS_RE = /^[Aa]\d+(\.\d+)?[BbMmKkTt]$/;
+
+/**
+ * Container format segments to exclude from tags (every model uses these).
+ */
+export const MODEL_IGNORED_SEGMENTS = new Set(['GGUF', 'GGML']);
diff --git a/tools/server/webui/src/lib/services/models.service.ts b/tools/server/webui/src/lib/services/models.service.ts
index de90c48cf0..209bd7caba 100644
--- a/tools/server/webui/src/lib/services/models.service.ts
+++ b/tools/server/webui/src/lib/services/models.service.ts
@@ -2,9 +2,11 @@ import { ServerModelStatus } from '$lib/enums';
import { apiFetch, apiPost } from '$lib/utils';
import type { ParsedModelId } from '$lib/types/models';
import {
- MODEL_FORMAT_SEGMENT_RE,
+ MODEL_QUANTIZATION_SEGMENT_RE,
+ MODEL_CUSTOM_QUANTIZATION_PREFIX_RE,
MODEL_PARAMS_RE,
MODEL_ACTIVATED_PARAMS_RE,
+ MODEL_IGNORED_SEGMENTS,
MODEL_ID_NOT_FOUND,
MODEL_ID_ORG_SEPARATOR,
MODEL_ID_SEGMENT_SEPARATOR,
@@ -119,8 +121,9 @@ export class ModelsService {
/**
* Parse a model ID string into its structured components.
*
- * Handles the convention:
- * `/-(-)-:`
+ * Handles conventions like:
+ * `/-(-)(-)(-):`
+ * `.` (dot-separated quantization, e.g. `model.Q4_K_M`)
*
* @param modelId - Raw model identifier string
* @returns Structured {@link ParsedModelId} with all detected fields
@@ -132,11 +135,11 @@ export class ModelsService {
modelName: null,
params: null,
activatedParams: null,
- format: null,
quantization: null,
tags: []
};
+ // 1. Extract colon-separated quantization (e.g. `model:Q4_K_M`)
const colonIdx = modelId.indexOf(MODEL_ID_QUANTIZATION_SEPARATOR);
let modelPath: string;
@@ -147,6 +150,7 @@ export class ModelsService {
modelPath = modelId;
}
+ // 2. Extract org name (e.g. `org/model` -> org = "org")
const slashIdx = modelPath.indexOf(MODEL_ID_ORG_SEPARATOR);
let modelStr: string;
@@ -157,37 +161,66 @@ export class ModelsService {
modelStr = modelPath;
}
- const segments = modelStr.split(MODEL_ID_SEGMENT_SEPARATOR);
+ // 3. Handle dot-separated quantization (e.g. `model-name.Q4_K_M`)
+ const dotIdx = modelStr.lastIndexOf('.');
- if (segments.length > 0 && MODEL_FORMAT_SEGMENT_RE.test(segments[segments.length - 1])) {
- result.format = segments.pop()!;
+ if (dotIdx !== MODEL_ID_NOT_FOUND && !result.quantization) {
+ const afterDot = modelStr.slice(dotIdx + 1);
+
+ if (MODEL_QUANTIZATION_SEGMENT_RE.test(afterDot)) {
+ result.quantization = afterDot;
+ modelStr = modelStr.slice(0, dotIdx);
+ }
}
- const paramsRe = MODEL_PARAMS_RE;
- const activatedParamsRe = MODEL_ACTIVATED_PARAMS_RE;
+ const segments = modelStr.split(MODEL_ID_SEGMENT_SEPARATOR);
+ // 4. Detect trailing quantization from dash-separated segments
+ // Handle UD-prefixed quantization (e.g. `UD-Q8_K_XL`) and
+ // standalone quantization (e.g. `Q4_K_M`, `BF16`, `F16`, `MXFP4`)
+ if (!result.quantization && segments.length > 1) {
+ const last = segments[segments.length - 1];
+ const secondLast = segments.length > 2 ? segments[segments.length - 2] : null;
+
+ if (MODEL_QUANTIZATION_SEGMENT_RE.test(last)) {
+ if (secondLast && MODEL_CUSTOM_QUANTIZATION_PREFIX_RE.test(secondLast)) {
+ result.quantization = `${secondLast}-${last}`;
+ segments.splice(segments.length - 2, 2);
+ } else {
+ result.quantization = last;
+ segments.pop();
+ }
+ }
+ }
+
+ // 5. Find params and activated params
let paramsIdx = MODEL_ID_NOT_FOUND;
let activatedParamsIdx = MODEL_ID_NOT_FOUND;
for (let i = 0; i < segments.length; i++) {
const seg = segments[i];
- if (paramsIdx === -1 && paramsRe.test(seg)) {
+
+ if (paramsIdx === MODEL_ID_NOT_FOUND && MODEL_PARAMS_RE.test(seg)) {
paramsIdx = i;
result.params = seg.toUpperCase();
- } else if (activatedParamsRe.test(seg)) {
+ } else if (paramsIdx !== MODEL_ID_NOT_FOUND && MODEL_ACTIVATED_PARAMS_RE.test(seg)) {
activatedParamsIdx = i;
result.activatedParams = seg.toUpperCase();
}
}
+ // 6. Model name = segments before params; tags = remaining segments after params
const pivotIdx = paramsIdx !== MODEL_ID_NOT_FOUND ? paramsIdx : segments.length;
result.modelName = segments.slice(0, pivotIdx).join(MODEL_ID_SEGMENT_SEPARATOR) || null;
if (paramsIdx !== MODEL_ID_NOT_FOUND) {
- result.tags = segments
- .slice(paramsIdx + 1)
- .filter((_, relIdx) => paramsIdx + 1 + relIdx !== activatedParamsIdx);
+ result.tags = segments.slice(paramsIdx + 1).filter((_, relIdx) => {
+ const absIdx = paramsIdx + 1 + relIdx;
+ if (absIdx === activatedParamsIdx) return false;
+
+ return !MODEL_IGNORED_SEGMENTS.has(segments[absIdx].toUpperCase());
+ });
}
return result;
diff --git a/tools/server/webui/src/lib/types/models.d.ts b/tools/server/webui/src/lib/types/models.d.ts
index dc8e86485c..b4d5f11f57 100644
--- a/tools/server/webui/src/lib/types/models.d.ts
+++ b/tools/server/webui/src/lib/types/models.d.ts
@@ -25,7 +25,6 @@ export interface ParsedModelId {
modelName: string | null;
params: string | null;
activatedParams: string | null;
- format: string | null;
quantization: string | null;
tags: string[];
}
diff --git a/tools/server/webui/tests/unit/model-id-parser.test.ts b/tools/server/webui/tests/unit/model-id-parser.test.ts
new file mode 100644
index 0000000000..3c2937d356
--- /dev/null
+++ b/tools/server/webui/tests/unit/model-id-parser.test.ts
@@ -0,0 +1,270 @@
+import { describe, expect, it } from 'vitest';
+import { ModelsService } from '$lib/services/models.service';
+
+const { parseModelId } = ModelsService;
+
+describe('parseModelId', () => {
+ it('handles unknown patterns correctly', () => {
+ expect(parseModelId('model-name-1')).toStrictEqual({
+ activatedParams: null,
+ modelName: 'model-name-1',
+ orgName: null,
+ params: null,
+ quantization: null,
+ raw: 'model-name-1',
+ tags: []
+ });
+
+ expect(parseModelId('org/model-name-2')).toStrictEqual({
+ activatedParams: null,
+ modelName: 'model-name-2',
+ orgName: 'org',
+ params: null,
+ quantization: null,
+ raw: 'org/model-name-2',
+ tags: []
+ });
+ });
+
+ it('extracts model parameters correctly', () => {
+ expect(parseModelId('model-100B-BF16')).toMatchObject({ params: '100B' });
+ expect(parseModelId('model-100B:Q4_K_M')).toMatchObject({ params: '100B' });
+ });
+
+ it('extracts model parameters correctly in lowercase', () => {
+ expect(parseModelId('model-100b-bf16')).toMatchObject({ params: '100B' });
+ expect(parseModelId('model-100b:q4_k_m')).toMatchObject({ params: '100B' });
+ });
+
+ it('extracts activated parameters correctly', () => {
+ expect(parseModelId('model-100B-A10B-BF16')).toMatchObject({ activatedParams: 'A10B' });
+ expect(parseModelId('model-100B-A10B:Q4_K_M')).toMatchObject({ activatedParams: 'A10B' });
+ });
+
+ it('extracts activated parameters correctly in lowercase', () => {
+ expect(parseModelId('model-100b-a10b-bf16')).toMatchObject({ activatedParams: 'A10B' });
+ expect(parseModelId('model-100b-a10b:q4_k_m')).toMatchObject({ activatedParams: 'A10B' });
+ });
+
+ it('extracts quantization correctly', () => {
+ // Dash-separated quantization
+ expect(parseModelId('model-100B-UD-IQ1_S')).toMatchObject({ quantization: 'UD-IQ1_S' });
+ expect(parseModelId('model-100B-IQ4_XS')).toMatchObject({ quantization: 'IQ4_XS' });
+ expect(parseModelId('model-100B-Q4_K_M')).toMatchObject({ quantization: 'Q4_K_M' });
+ expect(parseModelId('model-100B-Q8_0')).toMatchObject({ quantization: 'Q8_0' });
+ expect(parseModelId('model-100B-UD-Q8_K_XL')).toMatchObject({ quantization: 'UD-Q8_K_XL' });
+ expect(parseModelId('model-100B-F16')).toMatchObject({ quantization: 'F16' });
+ expect(parseModelId('model-100B-BF16')).toMatchObject({ quantization: 'BF16' });
+ expect(parseModelId('model-100B-MXFP4')).toMatchObject({ quantization: 'MXFP4' });
+
+ // Colon-separated quantization
+ expect(parseModelId('model-100B:UD-IQ1_S')).toMatchObject({ quantization: 'UD-IQ1_S' });
+ expect(parseModelId('model-100B:IQ4_XS')).toMatchObject({ quantization: 'IQ4_XS' });
+ expect(parseModelId('model-100B:Q4_K_M')).toMatchObject({ quantization: 'Q4_K_M' });
+ expect(parseModelId('model-100B:Q8_0')).toMatchObject({ quantization: 'Q8_0' });
+ expect(parseModelId('model-100B:UD-Q8_K_XL')).toMatchObject({ quantization: 'UD-Q8_K_XL' });
+ expect(parseModelId('model-100B:F16')).toMatchObject({ quantization: 'F16' });
+ expect(parseModelId('model-100B:BF16')).toMatchObject({ quantization: 'BF16' });
+ expect(parseModelId('model-100B:MXFP4')).toMatchObject({ quantization: 'MXFP4' });
+
+ // Dot-separated quantization
+ expect(parseModelId('nomic-embed-text-v2-moe.Q4_K_M')).toMatchObject({
+ quantization: 'Q4_K_M'
+ });
+ });
+
+ it('extracts additional tags correctly', () => {
+ expect(parseModelId('model-100B-foobar-Q4_K_M')).toMatchObject({ tags: ['foobar'] });
+ expect(parseModelId('model-100B-A10B-foobar-1M-BF16')).toMatchObject({
+ tags: ['foobar', '1M']
+ });
+ expect(parseModelId('model-100B-1M-foobar:UD-Q8_K_XL')).toMatchObject({
+ tags: ['1M', 'foobar']
+ });
+ });
+
+ it('filters out container format segments from tags', () => {
+ expect(parseModelId('model-100B-GGUF-Instruct-BF16')).toMatchObject({
+ tags: ['Instruct']
+ });
+ expect(parseModelId('model-100B-GGML-Instruct:Q4_K_M')).toMatchObject({
+ tags: ['Instruct']
+ });
+ });
+
+ it('handles real-world examples correctly', () => {
+ expect(parseModelId('meta-llama/Llama-3.1-8B')).toStrictEqual({
+ activatedParams: null,
+ modelName: 'Llama-3.1',
+ orgName: 'meta-llama',
+ params: '8B',
+ quantization: null,
+ raw: 'meta-llama/Llama-3.1-8B',
+ tags: []
+ });
+
+ expect(parseModelId('openai/gpt-oss-120b-MXFP4')).toStrictEqual({
+ activatedParams: null,
+ modelName: 'gpt-oss',
+ orgName: 'openai',
+ params: '120B',
+ quantization: 'MXFP4',
+ raw: 'openai/gpt-oss-120b-MXFP4',
+ tags: []
+ });
+
+ expect(parseModelId('openai/gpt-oss-20b:Q4_K_M')).toStrictEqual({
+ activatedParams: null,
+ modelName: 'gpt-oss',
+ orgName: 'openai',
+ params: '20B',
+ quantization: 'Q4_K_M',
+ raw: 'openai/gpt-oss-20b:Q4_K_M',
+ tags: []
+ });
+
+ expect(parseModelId('Qwen/Qwen3-Coder-30B-A3B-Instruct-1M-BF16')).toStrictEqual({
+ activatedParams: 'A3B',
+ modelName: 'Qwen3-Coder',
+ orgName: 'Qwen',
+ params: '30B',
+ quantization: 'BF16',
+ raw: 'Qwen/Qwen3-Coder-30B-A3B-Instruct-1M-BF16',
+ tags: ['Instruct', '1M']
+ });
+ });
+
+ it('handles real-world examples with quantization in segments', () => {
+ expect(parseModelId('meta-llama/Llama-4-Scout-17B-16E-Instruct-Q4_K_M')).toStrictEqual({
+ activatedParams: null,
+ modelName: 'Llama-4-Scout',
+ orgName: 'meta-llama',
+ params: '17B',
+ quantization: 'Q4_K_M',
+ raw: 'meta-llama/Llama-4-Scout-17B-16E-Instruct-Q4_K_M',
+ tags: ['16E', 'Instruct']
+ });
+
+ expect(parseModelId('MiniMaxAI/MiniMax-M2-IQ4_XS')).toStrictEqual({
+ activatedParams: null,
+ modelName: 'MiniMax-M2',
+ orgName: 'MiniMaxAI',
+ params: null,
+ quantization: 'IQ4_XS',
+ raw: 'MiniMaxAI/MiniMax-M2-IQ4_XS',
+ tags: []
+ });
+
+ expect(parseModelId('MiniMaxAI/MiniMax-M2-UD-Q3_K_XL')).toStrictEqual({
+ activatedParams: null,
+ modelName: 'MiniMax-M2',
+ orgName: 'MiniMaxAI',
+ params: null,
+ quantization: 'UD-Q3_K_XL',
+ raw: 'MiniMaxAI/MiniMax-M2-UD-Q3_K_XL',
+ tags: []
+ });
+
+ expect(parseModelId('mistralai/Devstral-2-123B-Instruct-2512-Q4_K_M')).toStrictEqual({
+ activatedParams: null,
+ modelName: 'Devstral-2',
+ orgName: 'mistralai',
+ params: '123B',
+ quantization: 'Q4_K_M',
+ raw: 'mistralai/Devstral-2-123B-Instruct-2512-Q4_K_M',
+ tags: ['Instruct', '2512']
+ });
+
+ expect(parseModelId('mistralai/Devstral-Small-2-24B-Instruct-2512-Q8_0')).toStrictEqual({
+ activatedParams: null,
+ modelName: 'Devstral-Small-2',
+ orgName: 'mistralai',
+ params: '24B',
+ quantization: 'Q8_0',
+ raw: 'mistralai/Devstral-Small-2-24B-Instruct-2512-Q8_0',
+ tags: ['Instruct', '2512']
+ });
+
+ expect(parseModelId('noctrex/GLM-4.7-Flash-MXFP4_MOE')).toStrictEqual({
+ activatedParams: null,
+ modelName: 'GLM-4.7-Flash',
+ orgName: 'noctrex',
+ params: null,
+ quantization: 'MXFP4_MOE',
+ raw: 'noctrex/GLM-4.7-Flash-MXFP4_MOE',
+ tags: []
+ });
+
+ expect(parseModelId('Qwen/Qwen3-Coder-Next-Q4_K_M')).toStrictEqual({
+ activatedParams: null,
+ modelName: 'Qwen3-Coder-Next',
+ orgName: 'Qwen',
+ params: null,
+ quantization: 'Q4_K_M',
+ raw: 'Qwen/Qwen3-Coder-Next-Q4_K_M',
+ tags: []
+ });
+
+ expect(parseModelId('openai/gpt-oss-120b-Q4_K_M')).toStrictEqual({
+ activatedParams: null,
+ modelName: 'gpt-oss',
+ orgName: 'openai',
+ params: '120B',
+ quantization: 'Q4_K_M',
+ raw: 'openai/gpt-oss-120b-Q4_K_M',
+ tags: []
+ });
+
+ expect(parseModelId('openai/gpt-oss-20b-F16')).toStrictEqual({
+ activatedParams: null,
+ modelName: 'gpt-oss',
+ orgName: 'openai',
+ params: '20B',
+ quantization: 'F16',
+ raw: 'openai/gpt-oss-20b-F16',
+ tags: []
+ });
+
+ expect(parseModelId('nomic-embed-text-v2-moe.Q4_K_M')).toStrictEqual({
+ activatedParams: null,
+ modelName: 'nomic-embed-text-v2-moe',
+ orgName: null,
+ params: null,
+ quantization: 'Q4_K_M',
+ raw: 'nomic-embed-text-v2-moe.Q4_K_M',
+ tags: []
+ });
+ });
+
+ it('handles ambiguous model names', () => {
+ // Qwen3.5 Instruct vs Thinking — tags should distinguish them
+ expect(parseModelId('Qwen/Qwen3.5-30B-A3B-Instruct')).toMatchObject({
+ modelName: 'Qwen3.5',
+ params: '30B',
+ activatedParams: 'A3B',
+ tags: ['Instruct']
+ });
+
+ expect(parseModelId('Qwen/Qwen3.5-30B-A3B-Thinking')).toMatchObject({
+ modelName: 'Qwen3.5',
+ params: '30B',
+ activatedParams: 'A3B',
+ tags: ['Thinking']
+ });
+
+ // Dot-separated quantization with variant suffixes
+ expect(parseModelId('gemma-3-27b-it-heretic-v2.Q8_0')).toMatchObject({
+ modelName: 'gemma-3',
+ params: '27B',
+ quantization: 'Q8_0',
+ tags: ['it', 'heretic', 'v2']
+ });
+
+ expect(parseModelId('gemma-3-27b-it.Q8_0')).toMatchObject({
+ modelName: 'gemma-3',
+ params: '27B',
+ quantization: 'Q8_0',
+ tags: ['it']
+ });
+ });
+});