Merge branch 'dev_backend_openvino' into xuejun/ov-bk-add-func-is-splited-model

This commit is contained in:
Xuejun Zhai 2026-03-18 13:21:51 +08:00 committed by GitHub
commit 37f6bca87b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
58 changed files with 1958 additions and 772 deletions

17
.github/labeler.yml vendored
View File

@ -104,3 +104,20 @@ OpenCL:
- any-glob-to-any-file:
- ggml/include/ggml-opencl.h
- ggml/src/ggml-opencl/**
- docs/backend/OPENCL.md
Hexagon:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-hexagon.h
- ggml/src/ggml-hexagon/**
WebGPU:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-webgpu.h
- ggml/src/ggml-webgpu/**
OpenVINO:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-openvino.h
- ggml/src/ggml-openvino/**
- docs/backend/OPENVINO.md

View File

@ -97,19 +97,21 @@ jobs:
vulkaninfo --summary
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
ggml-ci-cpu-amx:
runs-on: [self-hosted, Linux, CPU, AMX]
# TODO: provision AMX-compatible machine
#ggml-ci-cpu-amx:
# runs-on: [self-hosted, Linux, CPU, AMX]
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v6
- name: Test
id: ggml-ci
run: |
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
# - name: Test
# id: ggml-ci
# run: |
# bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
# TODO: provision AMD GPU machine
# ggml-ci-amd-vulkan:
# runs-on: [self-hosted, Linux, AMD]
@ -124,6 +126,7 @@ jobs:
# vulkaninfo --summary
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
# TODO: provision AMD GPU machine
# ggml-ci-amd-rocm:
# runs-on: [self-hosted, Linux, AMD]

View File

@ -24,9 +24,9 @@ Fri Mar 6 11:39:45 2026
+-----------------------------------------+------------------------+----------------------+
```
## ggml-org/nemotron-3-super-120b-GGUF
## ggml-org/Nemotron-3-Super-120B-GGUF
Model: https://huggingface.co/ggml-org/nemotron-3-super-120b-GGUF
Model: https://huggingface.co/ggml-org/Nemotron-3-Super-120B-GGUF
- `llama-batched-bench`
@ -53,7 +53,6 @@ main: n_kv_max = 303104, n_batch = 2048, n_ubatch = 2048, flash_attn = 1, is_pp_
| 8192 | 32 | 16 | 131584 | 171.066 | 766.21 | 10.774 | 47.52 | 181.840 | 723.62 |
| 8192 | 32 | 32 | 263168 | 342.140 | 766.19 | 18.969 | 53.98 | 361.109 | 728.78 |
- `llama-bench`
| model | size | params | backend | n_ubatch | fa | test | t/s |
@ -70,3 +69,49 @@ main: n_kv_max = 303104, n_batch = 2048, n_ubatch = 2048, flash_attn = 1, is_pp_
| nemotron 120B.A12B Q4_K | 65.10 GiB | 120.67 B | CUDA | 2048 | 1 | tg32 @ d32768 | 19.45 ± 0.18 |
build: 04a65daab (8268)
## ggml-org/Nemotron-3-Nano-4B-GGUF
Model: https://huggingface.co/ggml-org/Nemotron-3-Nano-4B-GGUF
- `llama-batched-bench`
main: n_kv_max = 303104, n_batch = 2048, n_ubatch = 2048, flash_attn = 1, is_pp_shared = 0, is_tg_separate = 0, n_gpu_layers = 99, n_threads = 20, n_threads_batch = 20
| PP | TG | B | N_KV | T_PP s | S_PP t/s | T_TG s | S_TG t/s | T s | S t/s |
|-------|--------|------|--------|----------|----------|----------|----------|----------|----------|
| 512 | 32 | 1 | 544 | 0.152 | 3371.61 | 0.597 | 53.64 | 0.748 | 726.90 |
| 512 | 32 | 2 | 1088 | 0.319 | 3208.68 | 0.857 | 74.66 | 1.176 | 924.89 |
| 512 | 32 | 4 | 2176 | 0.720 | 2843.56 | 1.323 | 96.78 | 2.043 | 1065.18 |
| 512 | 32 | 8 | 4352 | 1.428 | 2867.96 | 2.311 | 110.76 | 3.739 | 1163.82 |
| 512 | 32 | 16 | 8704 | 2.857 | 2866.94 | 4.203 | 121.82 | 7.060 | 1232.82 |
| 512 | 32 | 32 | 17408 | 5.709 | 2869.76 | 7.964 | 128.58 | 13.673 | 1273.14 |
| 4096 | 32 | 1 | 4128 | 1.458 | 2809.76 | 0.605 | 52.92 | 2.062 | 2001.52 |
| 4096 | 32 | 2 | 8256 | 2.905 | 2819.95 | 0.875 | 73.12 | 3.780 | 2183.95 |
| 4096 | 32 | 4 | 16512 | 5.790 | 2829.74 | 1.361 | 94.07 | 7.151 | 2309.17 |
| 4096 | 32 | 8 | 33024 | 11.598 | 2825.32 | 2.378 | 107.65 | 13.976 | 2362.89 |
| 4096 | 32 | 16 | 66048 | 23.208 | 2823.88 | 4.348 | 117.76 | 27.556 | 2396.89 |
| 4096 | 32 | 32 | 132096 | 46.515 | 2817.85 | 8.279 | 123.69 | 54.794 | 2410.79 |
| 8192 | 32 | 1 | 8224 | 2.950 | 2776.95 | 0.617 | 51.89 | 3.567 | 2305.75 |
| 8192 | 32 | 2 | 16448 | 5.921 | 2767.32 | 0.896 | 71.45 | 6.816 | 2413.05 |
| 8192 | 32 | 4 | 32896 | 11.842 | 2767.21 | 1.401 | 91.34 | 13.243 | 2484.03 |
| 8192 | 32 | 8 | 65792 | 23.726 | 2762.17 | 2.461 | 104.03 | 26.187 | 2512.38 |
| 8192 | 32 | 16 | 131584 | 47.777 | 2743.43 | 4.577 | 111.86 | 52.354 | 2513.36 |
| 8192 | 32 | 32 | 263168 | 96.691 | 2711.16 | 8.772 | 116.73 | 105.463 | 2495.36 |
- `llama-bench`
| model | size | params | backend | n_ubatch | fa | test | t/s |
| ----------------------- | ---------: | ---------: | ---------- | -------: | -: | --------------: | -------------------: |
| nemotron 4B Q8_0 | 3.94 GiB | 3.97 B | CUDA | 2048 | 1 | pp2048 | 2761.90 ± 19.31 |
| nemotron 4B Q8_0 | 3.94 GiB | 3.97 B | CUDA | 2048 | 1 | tg32 | 52.85 ± 0.12 |
| nemotron 4B Q8_0 | 3.94 GiB | 3.97 B | CUDA | 2048 | 1 | pp2048 @ d4096 | 2687.07 ± 21.84 |
| nemotron 4B Q8_0 | 3.94 GiB | 3.97 B | CUDA | 2048 | 1 | tg32 @ d4096 | 52.32 ± 0.23 |
| nemotron 4B Q8_0 | 3.94 GiB | 3.97 B | CUDA | 2048 | 1 | pp2048 @ d8192 | 2564.52 ± 57.69 |
| nemotron 4B Q8_0 | 3.94 GiB | 3.97 B | CUDA | 2048 | 1 | tg32 @ d8192 | 51.27 ± 0.34 |
| nemotron 4B Q8_0 | 3.94 GiB | 3.97 B | CUDA | 2048 | 1 | pp2048 @ d16384 | 2334.02 ± 37.83 |
| nemotron 4B Q8_0 | 3.94 GiB | 3.97 B | CUDA | 2048 | 1 | tg32 @ d16384 | 49.71 ± 0.14 |
| nemotron 4B Q8_0 | 3.94 GiB | 3.97 B | CUDA | 2048 | 1 | pp2048 @ d32768 | 2041.46 ± 40.45 |
| nemotron 4B Q8_0 | 3.94 GiB | 3.97 B | CUDA | 2048 | 1 | tg32 @ d32768 | 46.71 ± 0.13 |
build: 1bbec6a75 (8382)

View File

@ -1519,7 +1519,6 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
// map developer to system for all models except for GPT-OSS
workaround::map_developer_role_to_system(params.messages);
}
workaround::func_args_not_string(params.messages);
if (!tmpl.original_caps().supports_system_role) {
workaround::system_message_not_supported(params.messages);
@ -1532,6 +1531,10 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
workaround::requires_non_null_content(params.messages);
}
if (tmpl.original_caps().supports_object_arguments) {
workaround::func_args_not_string(params.messages);
}
params.extra_context = common_chat_extra_context();
for (auto el : inputs.chat_template_kwargs) {
params.extra_context[el.first] = json::parse(el.second);

View File

@ -75,6 +75,7 @@ std::map<std::string, bool> caps::to_map() const {
{"supports_parallel_tool_calls", supports_parallel_tool_calls},
{"supports_system_role", supports_system_role},
{"supports_preserve_reasoning", supports_preserve_reasoning},
{"supports_object_arguments", supports_object_arguments},
};
}
@ -158,9 +159,9 @@ caps caps_get(jinja::program & prog) {
}
);
JJ_DEBUG("%s\n", ">>> Running capability check: single tool support");
JJ_DEBUG("%s\n", ">>> Running capability check: single tool with object arguments support");
// case: tools support: single call
// case: tools support: single call with object arguments
caps_try_execute(
prog,
[&]() {
@ -226,9 +227,7 @@ caps caps_get(jinja::program & prog) {
},
[&](bool success, value & messages, value & tools) {
if (!success) {
result.supports_tool_calls = false;
result.supports_tools = false;
return;
return; // Nothing can be inferred
}
auto & tool_name = tools->at(0)->at("function")->at("name");
@ -242,16 +241,117 @@ caps caps_get(jinja::program & prog) {
caps_print_stats(tool_calls, "messages[1].tool_calls");
if (!tool_calls->stats.used) {
result.supports_tool_calls = false;
return;
}
auto & tool_arg = tool_calls->at(0)->at("function")->at("arguments")->at("arg");
caps_print_stats(tool_arg, "messages[1].tool_calls[0].function.arguments.arg");
if (tool_arg->stats.used) {
result.supports_object_arguments = true;
}
}
);
if (!result.supports_object_arguments) {
JJ_DEBUG("%s\n", ">>> Running capability check: single tool with string arguments support");
// case: tools support: single call with string arguments
caps_try_execute(
prog,
[&]() {
// messages
return json::array({
{
{"role", "user"},
{"content", "User message"},
},
{
{"role", "assistant"},
{"content", ""}, // Some templates expect content to be empty with tool calls
{"tool_calls", json::array({
{
{"id", "call00001"},
{"type", "function"},
{"function", {
{"name", "tool1"},
{"arguments", R"({"arg": "value"})"}
}}
}
})}
},
{
{"role", "tool"},
{"content", "Tool response"},
{"tool_call_id", "call00001"}
},
{
{"role", "assistant"},
{"content", "The tool response was 'tool response'"}
},
{
{"role", "user"},
{"content", "User message"},
},
});
},
[&]() {
// tools
return json::array({
{
{"name", "tool"},
{"type", "function"},
{"function", {
{"name", "tool1"},
{"description", "Tool description"},
{"parameters", {
{"type", "object"},
{"properties", {
{"arg", {
{"type", "string"},
{"description", "Arg description"},
}},
}},
{"required", json::array({ "arg" })},
}},
}},
},
});
},
[&](bool success, value & messages, value & tools) {
if (!success) {
result.supports_tool_calls = false;
result.supports_tools = false;
return;
}
auto & tool_name = tools->at(0)->at("function")->at("name");
caps_print_stats(tool_name, "tools[0].function.name");
caps_print_stats(tools, "tools");
if (!tool_name->stats.used) {
result.supports_tools = false;
}
auto & tool_calls = messages->at(1)->at("tool_calls");
caps_print_stats(tool_calls, "messages[1].tool_calls");
if (!tool_calls->stats.used) {
result.supports_tool_calls = false;
return;
}
}
);
}
JJ_DEBUG("%s\n", ">>> Running capability check: parallel tool support");
// case: tools support: parallel calls
caps_try_execute(
prog,
[&]() {
json args = json(R"({"arg": "value"})");
if (result.supports_object_arguments) {
args = json{{"arg", "value"}};
}
// messages
return json::array({
{
@ -267,9 +367,7 @@ caps caps_get(jinja::program & prog) {
{"type", "function"},
{"function", {
{"name", "tool1"},
{"arguments", {
{"arg", "value"}
}}
{"arguments", args}
}}
},
{
@ -277,9 +375,7 @@ caps caps_get(jinja::program & prog) {
{"type", "function"},
{"function", {
{"name", "tool1"},
{"arguments", {
{"arg", "value"}
}}
{"arguments", args}
}}
}
})}
@ -328,7 +424,7 @@ caps caps_get(jinja::program & prog) {
return;
}
auto & tool_calls = messages->at(1)->at("tool_calls");;
auto & tool_calls = messages->at(1)->at("tool_calls");
caps_print_stats(tool_calls, "messages[1].tool_calls");
// check for second tool call usage

View File

@ -18,6 +18,8 @@ struct caps {
bool supports_string_content = true;
bool supports_typed_content = false;
bool supports_object_arguments = false;
// for reporting on server
std::map<std::string, bool> to_map() const;

View File

@ -102,7 +102,7 @@ std::string regex_to_reversed_partial_regex(const std::string & pattern) {
auto is_star = *it == '*';
++it;
if (is_star) {
if (*it == '?') {
if (it != end && *it == '?') {
++it;
}
}

View File

@ -272,8 +272,9 @@ class ModelBase:
return tensors
def dequant_model(self):
if self._is_nvfp4:
return # NVFP4 weights are repacked in _generate_nvfp4_tensors
# If all quantized tensors were already handled (e.g. pure NVFP4), skip
if self._is_nvfp4 and not any(k.endswith((".weight_scale", ".weight_scale_inv")) for k in self.model_tensors):
return
tensors_to_remove: list[str] = []
new_tensors: dict[str, Callable[[], Tensor]] = {}
@ -297,11 +298,16 @@ class ModelBase:
scale = scale.float()
if block_size is not None:
dim_offset = scale.ndim - len(block_size)
for i, size in enumerate(block_size):
scale = scale.repeat_interleave(size, i)
scale = scale.repeat_interleave(size, dim_offset + i)
# unpad the scale (e.g. when the tensor size isn't a multiple of the block size)
scale = scale[tuple(slice(0, size) for size in weight.shape)]
# align scale dims to weight for correct broadcasting (e.g. [128] -> [128, 1, 1])
while scale.ndim < weight.ndim:
scale = scale.unsqueeze(-1)
return weight.float() * scale
# ref: https://github.com/ModelCloud/GPTQModel/blob/037c5c0f6c9e33c500d975b038d02e7ca437546d/gptqmodel/nn_modules/qlinear/__init__.py#L437-L476
@ -392,7 +398,7 @@ class ModelBase:
elif quant_method == "fp8":
block_size = quant_config.get("weight_block_size")
for name in self.model_tensors.keys():
if name.endswith(".weight_scale_inv"):
if name.endswith("_scale_inv"):
weight_name = name.removesuffix("_scale_inv")
w = self.model_tensors[weight_name]
s = self.model_tensors[name]
@ -400,6 +406,8 @@ class ModelBase:
tensors_to_remove.append(name)
if name.endswith(".activation_scale"): # unused
tensors_to_remove.append(name)
if name.endswith("_activation_scale"): # Mistral-Small-4-119B-2602, unused
tensors_to_remove.append(name)
# mistral format
if name.endswith(".qscale_weight"):
weight_name = name.removesuffix("qscale_weight") + "weight"
@ -474,7 +482,20 @@ class ModelBase:
tensors_to_remove.append(base_name + "_zero_point")
else:
raise NotImplementedError(f"Quant format {quant_format!r} for method {quant_method!r} is not yet supported")
else:
elif quant_method == "modelopt":
# Mixed-precision ModelOpt models: NVFP4 tensors are handled by
# _generate_nvfp4_tensors; FP8 tensors have 1D weight_scale and
# are dequantized here. input_scale tensors are unused.
for name in self.model_tensors.keys():
if name.endswith(".weight_scale"):
weight_name = name.removesuffix("_scale")
w = self.model_tensors[weight_name]
s = self.model_tensors[name]
self.model_tensors[weight_name] = lambda w=w, s=s: dequant_simple(w(), s(), None)
tensors_to_remove.append(name)
if name.endswith((".input_scale", ".k_scale", ".v_scale")):
tensors_to_remove.append(name)
elif quant_method is not None:
raise NotImplementedError(f"Quant method is not yet supported: {quant_method!r}")
for name in tensors_to_remove:
@ -520,12 +541,6 @@ class ModelBase:
raise NotImplementedError("set_gguf_parameters() must be implemented in subclasses")
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
# skip NVFP4 auxiliary tensors (handled in _generate_nvfp4_tensors)
if self._is_nvfp4:
if name.endswith((".weight_scale", ".weight_scale_2", ".input_scale", ".k_scale", ".v_scale")):
return []
if name.endswith(".weight") and name.replace(".weight", ".weight_scale") in self.model_tensors:
return []
new_name = self.map_tensor_name(name)
@ -609,6 +624,7 @@ class ModelBase:
expert_scales: dict[tuple[int, str], list[tuple[int, float]]] = {}
expert_shapes: dict[tuple[int, str], list[int]] = {}
n_experts = self.find_hparam(["num_local_experts", "num_experts"], optional=True) or 0
consumed: list[str] = []
for name in list(self.model_tensors.keys()):
if not name.endswith(".weight"):
@ -620,8 +636,18 @@ class ModelBase:
# Force eager materialization of lazy tensors
weight = LazyTorchTensor.to_eager(self.model_tensors[name]())
scale = LazyTorchTensor.to_eager(self.model_tensors[scale_name]())
# Skip non-NVFP4 tensors (e.g. FP8 with per-channel 1D scales)
if scale.ndim < 2:
continue
scale2 = LazyTorchTensor.to_eager(self.model_tensors.get(scale2_name, lambda: torch.tensor(1.0))())
# Mark tensors for removal from model_tensors (already written to gguf)
consumed.extend([name, scale_name])
if scale2_name in self.model_tensors:
consumed.append(scale2_name)
# Check if this is a per-expert tensor
m = re.search(r'\.experts\.(\d+)\.(gate_proj|up_proj|down_proj)\.weight$', name)
if m:
@ -652,6 +678,15 @@ class ModelBase:
for (bid, proj_type) in list(expert_blocks.keys()):
self._flush_nvfp4_experts((bid, proj_type), expert_blocks, expert_scales, expert_shapes, bid, proj_type)
# Remove consumed tensors so get_tensors/modify_tensors won't see them
for name in consumed:
self.model_tensors.pop(name, None)
# Remove unused auxiliary tensors (input_scale, k_scale, v_scale)
for name in list(self.model_tensors.keys()):
if name.endswith((".input_scale", ".k_scale", ".v_scale")):
del self.model_tensors[name]
def _flush_nvfp4_experts(self, key, expert_blocks, expert_scales, expert_shapes, bid, proj_type):
experts = expert_blocks.pop(key)
scales = expert_scales.pop(key)
@ -677,20 +712,31 @@ class ModelBase:
def prepare_tensors(self):
# detect NVFP4 quantization (ModelOpt format)
quant_algo = (self.hparams.get("quantization_config") or {}).get("quant_algo")
quant_layers = (self.hparams.get("quantization_config") or {}).get("quantized_layers") or {}
quant_config_file = self.dir_model / "hf_quant_config.json"
if not quant_algo and quant_config_file.is_file():
if (not quant_algo or not quant_layers) and quant_config_file.is_file():
with open(quant_config_file, "r", encoding="utf-8") as f:
quant_algo = (json.load(f).get("quantization") or {}).get("quant_algo")
quant_config = json.load(f).get("quantization") or {}
quant_algo = quant_config.get("quant_algo", quant_algo)
quant_layers = quant_config.get("quantized_layers", quant_layers) or {}
# Some models use per-tensor quant_algo (e.g. "MIXED_PRECISION" with
# per-layer NVFP4/FP8) instead of a single global "NVFP4" value.
if quant_algo != "NVFP4":
if any(v.get("quant_algo") == "NVFP4" for v in quant_layers.values() if isinstance(v, dict)):
quant_algo = "NVFP4"
self._is_nvfp4 = quant_algo == "NVFP4"
self.dequant_model()
# NVFP4 weights are repacked and written directly to gguf_writer
# NVFP4 weights are repacked and written directly to gguf_writer.
# This must run before dequant_model so NVFP4 tensors are removed
# from model_tensors, leaving only non-NVFP4 (e.g. FP8) for dequant.
if self._is_nvfp4:
self._generate_nvfp4_tensors()
self.dequant_model()
# Handle empty tensor_map for models with block_count=0 (like MobileNetV5)
if self.tensor_map.mapping:
max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,")
@ -2992,10 +3038,16 @@ class LlavaVisionModel(MmprojModel):
def get_token_id(self, token: str) -> int:
tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
added_tokens_decoder = json.load(f)['added_tokens_decoder']
added_tokens_decoder = json.load(f).get('added_tokens_decoder') or {}
for id_, token_data in added_tokens_decoder.items():
if token_data["content"] == token:
if token_data.get("content") == token:
return int(id_)
# fallthrough to tokenizer.json
with open(self.dir_model / "tokenizer.json", "r", encoding="utf-8") as f:
tokenizer_json = json.load(f)
for token_data in tokenizer_json["added_tokens"]:
if token_data["content"] == token:
return int(token_data["id"])
raise ValueError(f"Token '{token}' not found in tokenizer config.")
def set_gguf_parameters(self):
@ -3159,40 +3211,6 @@ class Llama4VisionModel(MmprojModel):
yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register(
"Mistral3ForConditionalGeneration",
"Ministral3ForCausalLM",
)
class Mistral3Model(LlamaModel):
model_arch = gguf.MODEL_ARCH.MISTRAL3
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# for compatibility, we use LLAMA arch for older models
# TODO: remove this once everyone has migrated to newer version of llama.cpp
if self.hparams.get("model_type") != "ministral3":
self.model_arch = gguf.MODEL_ARCH.LLAMA
self.gguf_writer.arch = gguf.MODEL_ARCH_NAMES[self.model_arch]
self.gguf_writer.add_architecture()
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
def set_gguf_parameters(self):
super().set_gguf_parameters()
rope_params = self.rope_parameters
if self.hparams.get("model_type") == "ministral3":
assert rope_params, "ministral3 must have 'rope_parameters' config"
assert rope_params["rope_type"] == "yarn", "ministral3 rope_type must be 'yarn'"
self.gguf_writer.add_rope_scaling_yarn_log_mul(rope_params["mscale_all_dim"])
self.gguf_writer.add_attn_temperature_scale(rope_params["llama_4_scaling_beta"])
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
name = name.replace("language_model.", "")
if "multi_modal_projector" in name or "vision_tower" in name:
return
yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("DeciLMForCausalLM")
class DeciModel(TextModel):
model_arch = gguf.MODEL_ARCH.DECI
@ -8232,6 +8250,8 @@ class DeepseekV2Model(TextModel):
# TODO @ngxson : remove this when we support MTP for deepseek models
skip_mtp = True
merge_expert = True
def set_vocab(self):
try:
self._set_vocab_gpt2()
@ -8370,7 +8390,7 @@ class DeepseekV2Model(TextModel):
return
# process the experts separately
if name.find("mlp.experts") != -1:
if self.merge_expert and name.find("mlp.experts") != -1:
n_experts = self.hparams["n_routed_experts"]
assert bid is not None
@ -8429,6 +8449,69 @@ class DeepseekV2Model(TextModel):
raise ValueError(f"Unprocessed experts: {experts}")
@ModelBase.register(
"Mistral3ForConditionalGeneration",
"Ministral3ForCausalLM",
)
class Mistral3Model(TextModel):
class Ministral3Model(LlamaModel):
model_arch = gguf.MODEL_ARCH.MISTRAL3
def set_gguf_parameters(self):
super().set_gguf_parameters()
rope_params = self.rope_parameters
if self.hparams.get("model_type") == "ministral3":
assert rope_params, "ministral3 must have 'rope_parameters' config"
assert rope_params["rope_type"] == "yarn", "ministral3 rope_type must be 'yarn'"
self.gguf_writer.add_rope_scaling_yarn_log_mul(rope_params["mscale_all_dim"])
self.gguf_writer.add_attn_temperature_scale(rope_params["llama_4_scaling_beta"])
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
name = name.replace("language_model.", "")
if "multi_modal_projector" in name or "vision_tower" in name:
return
yield from super().modify_tensors(data_torch, name, bid)
class Mistral4Model(DeepseekV2Model):
model_arch = gguf.MODEL_ARCH.MISTRAL4
skip_mtp = False # model contains no MTP layers, so no need to skip
merge_expert = False # experts are already stacked as 3D
def modify_tensors(self, data_torch, name, bid):
if name.endswith(".down_proj") or name.endswith(".gate_up_proj"):
name = name + ".weight"
yield from super().modify_tensors(data_torch, name, bid)
model_arch = gguf.MODEL_ARCH.MISTRAL3 # unused
impl: TextModel
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self.hparams.get("model_type") == "mistral4":
self.impl = Mistral3Model.Mistral4Model(*args, **kwargs)
else:
self.impl = Mistral3Model.Ministral3Model(*args, **kwargs)
def set_vocab(self):
self.impl.set_vocab()
def set_gguf_parameters(self):
self.impl.set_gguf_parameters()
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
yield from self.impl.modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
self.impl.prepare_tensors()
def write_vocab(self):
self.impl.write_vocab()
def write(self):
self.impl.write()
@ModelBase.register("MiniMaxM2ForCausalLM")
class MiniMaxM2Model(TextModel):
model_arch = gguf.MODEL_ARCH.MINIMAXM2

View File

@ -117,5 +117,5 @@ Legend:
| TOP_K | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ❌ | ❌ |
| TRI | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
| TRUNC | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ✅ | ❌ | ❌ |
| UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | ❌ | ❌ | ❌ |
| UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | 🟡 | | ✅ | ❌ | ❌ | ❌ |
| XIELU | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ |

View File

@ -5937,6 +5937,20 @@
"SYCL0","RMS_NORM_BACK","type=f32,ne=[1025,5,4,3],eps=0.100000","support","1","yes","SYCL"
"SYCL0","L2_NORM","type=f32,ne=[1025,5,4,3],eps=0.100000,v=0","support","1","yes","SYCL"
"SYCL0","L2_NORM","type=f32,ne=[1025,5,4,3],eps=0.100000,v=1","support","1","yes","SYCL"
"SYCL0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=10.000000","support","1","yes","SYCL"
"SYCL0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=10.000000,inplace=0","support","1","yes","SYCL"
"SYCL0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=10.000000","support","1","yes","SYCL"
"SYCL0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=10.000000,inplace=0","support","1","yes","SYCL"
"SYCL0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=10.000000","support","1","yes","SYCL"
"SYCL0","L2_NORM","type=f32,ne=[64,5,4,3],eps=10.000000,v=0","support","1","yes","SYCL"
"SYCL0","L2_NORM","type=f32,ne=[64,5,4,3],eps=10.000000,v=1","support","1","yes","SYCL"
"SYCL0","NORM","type=f32,ne=[1025,5,4,3],v=0,eps=10.000000","support","1","yes","SYCL"
"SYCL0","RMS_NORM","type=f32,ne=[1025,5,4,3],v=0,eps=10.000000,inplace=0","support","1","yes","SYCL"
"SYCL0","NORM","type=f32,ne=[1025,5,4,3],v=1,eps=10.000000","support","1","yes","SYCL"
"SYCL0","RMS_NORM","type=f32,ne=[1025,5,4,3],v=1,eps=10.000000,inplace=0","support","1","yes","SYCL"
"SYCL0","RMS_NORM_BACK","type=f32,ne=[1025,5,4,3],eps=10.000000","support","1","yes","SYCL"
"SYCL0","L2_NORM","type=f32,ne=[1025,5,4,3],eps=10.000000,v=0","support","1","yes","SYCL"
"SYCL0","L2_NORM","type=f32,ne=[1025,5,4,3],eps=10.000000,v=1","support","1","yes","SYCL"
"SYCL0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=1","support","1","yes","SYCL"
"SYCL0","SSM_CONV","type=f32,ne_a=[3,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","SYCL"
"SYCL0","SSM_CONV","type=f32,ne_a=[6,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","SYCL"
@ -10209,24 +10223,24 @@
"SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=nearest","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=0","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=1","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bilinear","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bicubic,transpose=0","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bicubic,transpose=1","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bicubic","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear|antialias,transpose=0","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear|antialias,transpose=1","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear|antialias","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bilinear|antialias","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear|align_corners","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bilinear|align_corners","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bilinear|align_corners","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic|align_corners","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bicubic|align_corners","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bicubic|align_corners","support","0","no","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=0","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=1","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bilinear","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bicubic,transpose=0","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bicubic,transpose=1","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bicubic","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear|antialias,transpose=0","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear|antialias,transpose=1","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear|antialias","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bilinear|antialias","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear|align_corners","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bilinear|align_corners","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bilinear|align_corners","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic|align_corners","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bicubic|align_corners","support","1","yes","SYCL"
"SYCL0","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bicubic|align_corners","support","1","yes","SYCL"
"SYCL0","SUM","type=f32,ne=[10,5,4,3]","support","1","yes","SYCL"
"SYCL0","SUM","type=f32,ne=[11,5,6,3],permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","SUM","type=f32,ne=[11,5,6,3],permute=[0,3,2,1]","support","0","no","SYCL"
@ -13325,6 +13339,262 @@
"SYCL0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=75,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=75,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=75,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=75,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=75,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=75,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=75,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=75,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=75,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=75,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=75,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=75,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=75,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=75,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=75,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=75,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=75,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=75,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=75,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=75,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=75,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=75,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=75,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=75,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=75,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=75,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=75,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=75,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=75,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=75,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=75,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=75,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=75,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=75,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=75,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=75,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=75,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=75,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=75,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=75,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=75,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=75,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=75,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=75,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=75,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=75,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=75,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=75,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=75,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=75,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=75,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=75,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=75,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=75,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=75,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=75,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=113,nb=75,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=512,nb=75,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[1,1],kv=1024,nb=75,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[4,1],kv=512,nb=75,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=1,nr23=[32,1],kv=512,nb=75,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=113,nb=75,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=512,nb=75,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=75,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=320,hsv=256,nh=4,nr23=[4,1],kv=512,nb=75,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=1,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=1,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"
"SYCL0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=1,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","SYCL"

Can't render this file because it is too large.

View File

@ -666,7 +666,7 @@ void ggml_vec_dot_nvfp4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
float sumf = 0;
#if defined __ARM_NEON
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_FMA)
const int8x16_t values = vld1q_s8(kvalues_mxfp4);
const uint8x16_t m4b = vdupq_n_u8(0x0f);
float32x4_t acc = vdupq_n_f32(0.0f);

View File

@ -1473,10 +1473,12 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
if (op->src[0]->buffer && op->src[0]->buffer->buft == ggml_backend_cpu_kleidiai_buffer_type()) {
return (ggml::cpu::tensor_traits *) op->src[0]->extra;
} else {
if (op->src[0]->type != GGML_TYPE_F16) {
return nullptr;
}
std::array<ggml_kleidiai_kernels *, GGML_KLEIDIAI_MAX_KERNEL_SLOTS> kernel_chain;
const int slot_total = kleidiai_collect_kernel_chain(op, kernel_chain);
const bool has_kernel = slot_total > 0;
if (has_kernel && op->src[1]->ne[1] > 1) {
if (slot_total > 0 && op->src[1]->ne[1] > 1) {
if ((op->src[0]->nb[1] * op->src[0]->ne[1] != op->src[0]->nb[2]) ||
(op->src[1]->nb[1] * op->src[1]->ne[1] != op->src[1]->nb[2])) {
return nullptr;

View File

@ -6205,7 +6205,7 @@ static void ggml_compute_forward_im2col_f16(
const ggml_tensor * src1 = dst->src[1];
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT(src1->type == GGML_TYPE_F16 || src1->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F16);
GGML_TENSOR_BINARY_OP_LOCALS;
@ -6236,7 +6236,7 @@ static void ggml_compute_forward_im2col_f16(
int ofs1 = is_2D ? nb12 : nb11;
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb10 == sizeof(float));
GGML_ASSERT(nb10 == ggml_type_size(src1->type));
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
{
@ -6249,7 +6249,12 @@ static void ggml_compute_forward_im2col_f16(
// micro kernel
ggml_fp16_t * dst_data = wdata + (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
const float * const src_data = (float *)((char *) src1->data + in*ofs0 + iic*ofs1); // [IH, IW]
const float * const src_data_f32 = src1->type == GGML_TYPE_F32
? (const float *)((const char *) src1->data + in*ofs0 + iic*ofs1)
: nullptr; // [IH, IW]
const ggml_fp16_t * const src_data_f16 = src1->type == GGML_TYPE_F16
? (const ggml_fp16_t *)((const char *) src1->data + in*ofs0 + iic*ofs1)
: nullptr; // [IH, IW]
for (int64_t ikh = 0; ikh < KH; ikh++) { // 1
for (int64_t ikw = 0; ikw < KW; ikw++) {
@ -6259,7 +6264,11 @@ static void ggml_compute_forward_im2col_f16(
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
dst_data[iic*(KH*KW) + ikh*KW + ikw] = 0;
} else {
dst_data[iic*(KH*KW) + ikh*KW + ikw] = GGML_CPU_FP32_TO_FP16(src_data[iih*IW + iiw]);
if (src_data_f32 != nullptr) {
dst_data[iic*(KH*KW) + ikh*KW + ikw] = GGML_CPU_FP32_TO_FP16(src_data_f32[iih*IW + iiw]);
} else {
dst_data[iic*(KH*KW) + ikh*KW + ikw] = src_data_f16[iih*IW + iiw];
}
}
}
}

View File

@ -1,7 +1,8 @@
#include "gated_delta_net.cuh"
template <int S_v, bool KDA>
__global__ void gated_delta_net_cuda(const float * q,
__global__ void __launch_bounds__((ggml_cuda_get_physical_warp_size() < S_v ? ggml_cuda_get_physical_warp_size() : S_v) * 4, 2)
gated_delta_net_cuda(const float * q,
const float * k,
const float * v,
const float * g,
@ -38,7 +39,7 @@ __global__ void gated_delta_net_cuda(const float * q,
const int64_t state_offset = (sequence * H + h_idx) * S_v * S_v;
state += state_offset;
curr_state += state_offset;
curr_state += state_offset + col * S_v;
attn_data += (sequence * n_tokens * H + h_idx) * S_v;
constexpr int warp_size = ggml_cuda_get_physical_warp_size() < S_v ? ggml_cuda_get_physical_warp_size() : S_v;
@ -46,10 +47,11 @@ __global__ void gated_delta_net_cuda(const float * q,
constexpr int rows_per_lane = (S_v + warp_size - 1) / warp_size;
float s_shard[rows_per_lane];
// state is stored transposed: M[col][i] = S[i][col], row col is contiguous
#pragma unroll
for (int r = 0; r < rows_per_lane; r++) {
const int i = r * warp_size + lane;
s_shard[r] = curr_state[col * S_v + i];
s_shard[r] = curr_state[i];
}
for (int t = 0; t < n_tokens; t++) {
@ -63,6 +65,16 @@ __global__ void gated_delta_net_cuda(const float * q,
const float beta_val = *beta_t;
// Cache k and q in registers
float k_reg[rows_per_lane];
float q_reg[rows_per_lane];
#pragma unroll
for (int r = 0; r < rows_per_lane; r++) {
const int i = r * warp_size + lane;
k_reg[r] = k_t[i];
q_reg[r] = q_t[i];
}
if constexpr (!KDA) {
const float g_val = expf(*g_t);
@ -70,8 +82,7 @@ __global__ void gated_delta_net_cuda(const float * q,
float kv_shard = 0.0f;
#pragma unroll
for (int r = 0; r < rows_per_lane; r++) {
const int i = r * warp_size + lane;
kv_shard += s_shard[r] * k_t[i];
kv_shard += s_shard[r] * k_reg[r];
}
float kv_col = warp_reduce_sum<warp_size>(kv_shard);
@ -83,9 +94,8 @@ __global__ void gated_delta_net_cuda(const float * q,
float attn_partial = 0.0f;
#pragma unroll
for (int r = 0; r < rows_per_lane; r++) {
const int i = r * warp_size + lane;
s_shard[r] = g_val * s_shard[r] + k_t[i] * delta_col;
attn_partial += s_shard[r] * q_t[i];
s_shard[r] = g_val * s_shard[r] + k_reg[r] * delta_col;
attn_partial += s_shard[r] * q_reg[r];
}
float attn_col = warp_reduce_sum<warp_size>(attn_partial);
@ -99,7 +109,7 @@ __global__ void gated_delta_net_cuda(const float * q,
#pragma unroll
for (int r = 0; r < rows_per_lane; r++) {
const int i = r * warp_size + lane;
kv_shard += expf(g_t[i]) * s_shard[r] * k_t[i];
kv_shard += expf(g_t[i]) * s_shard[r] * k_reg[r];
}
float kv_col = warp_reduce_sum<warp_size>(kv_shard);
@ -113,8 +123,8 @@ __global__ void gated_delta_net_cuda(const float * q,
#pragma unroll
for (int r = 0; r < rows_per_lane; r++) {
const int i = r * warp_size + lane;
s_shard[r] = expf(g_t[i]) * s_shard[r] + k_t[i] * delta_col;
attn_partial += s_shard[r] * q_t[i];
s_shard[r] = expf(g_t[i]) * s_shard[r] + k_reg[r] * delta_col;
attn_partial += s_shard[r] * q_reg[r];
}
float attn_col = warp_reduce_sum<warp_size>(attn_partial);

View File

@ -19,7 +19,6 @@
#include <iomanip>
#include <map>
#include <memory>
#include <mutex>
#include <openvino/core/dimension.hpp>
#include <openvino/core/except.hpp>
#include <openvino/core/node.hpp>
@ -70,6 +69,7 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph,
validate_cgraph();
set_input_output();
compute_node_dynamic_dims();
compute_model_inputs();
compute_model_outputs();
@ -332,7 +332,7 @@ void GgmlOvDecoder::validate_cgraph() const {
}
}
ov::PartialShape GgmlOvDecoder::get_graph_input_shape(const ggml_tensor * op, const ggml_tensor * input) const {
ov::PartialShape GgmlOvDecoder::get_graph_input_shape(const ggml_tensor * op, const ggml_tensor * input, int dynamic_dim_index) const {
if (m_naive) {
return input!= nullptr ? ov::PartialShape{get_shape(input)} : ov::PartialShape{get_shape(op)};
}
@ -383,6 +383,9 @@ ov::PartialShape GgmlOvDecoder::get_graph_input_shape(const ggml_tensor * op, co
} else {
input_shape = ov::PartialShape{get_shape(input)};
}
if (dynamic_dim_index != -1) {
input_shape[3 - dynamic_dim_index] = -1;
}
return input_shape;
}
@ -445,7 +448,7 @@ void GgmlOvDecoder::compute_model_inputs() {
if (m_model_weights.find(node_name) == m_model_weights.end()) {
m_inputs[node_name] = node;
auto param_node =
std::make_shared<ov::op::v0::Parameter>(get_ov_type(node), get_graph_input_shape(node, nullptr));
std::make_shared<ov::op::v0::Parameter>(get_ov_type(node), get_graph_input_shape(node, nullptr, m_node_dynamic_dims[node]));
param_node->set_friendly_name(node_name);
param_node->output(0).get_tensor().set_names({node_name});
m_model_inputs[node_name] = param_node;
@ -489,7 +492,7 @@ void GgmlOvDecoder::compute_model_inputs() {
m_model_params.kv_names.push_back(src_name);
}
}
ov::PartialShape param_shape = get_graph_input_shape(node, src);
ov::PartialShape param_shape = get_graph_input_shape(node, src, m_node_dynamic_dims[src]);
auto param_node = std::make_shared<ov::op::v0::Parameter>(get_ov_type(src), param_shape);
param_node->set_friendly_name(src_name);
param_node->output(0).get_tensor().set_names({src_name});
@ -575,9 +578,6 @@ std::map<std::string, std::string> GgmlOvDecoder::get_kv_param_res_names() const
}
std::map<std::string, std::shared_ptr<ov::Node>> GgmlOvDecoder::create_weight_nodes(ggml_cgraph * cgraph, bool naive) {
static std::mutex weights_mutex;
std::lock_guard<std::mutex> lock(weights_mutex);
std::map<std::string, std::shared_ptr<ov::Node>> model_weights;
auto * nodes = cgraph->nodes;
auto n_nodes = cgraph->n_nodes;
@ -974,4 +974,266 @@ const std::string & GgmlOvDecoder::get_op_type(int node_idx) const {
const std::string & GgmlOvDecoder::get_op_type() const {
static const std::string unknown_op = "UNKNOWN_GGML_OP";
return unknown_op;
}
void GgmlOvDecoder::compute_node_dynamic_dims() {
auto visit_node = [&](auto && self, ggml_tensor * node) -> void {
if (!node) {
return;
}
if (node->op == GGML_OP_CPY) {
m_node_dynamic_dims[node] = -1;
}
if (m_node_dynamic_dims.count(node)) {
return;
}
for (int i = 0; i < GGML_MAX_SRC; i++) {
ggml_tensor * src = node->src[i];
if (src == nullptr) {
continue;
}
struct ggml_tensor *root_src = nullptr;
// if (src->org_src) {
// root_src = src->org_src;
// }
if (root_src) {
if (is_inp_tok(root_src, node) || is_inp_pos(root_src, node) ||
is_output_idx(root_src, node)) {
m_node_dynamic_dims[root_src] = 0;
m_node_dynamic_dims[src] = m_node_dynamic_dims[root_src];
continue;
}
self(self, root_src);
m_node_dynamic_dims[src] = m_node_dynamic_dims[root_src];
} else {
if (is_inp_tok(src, node) || is_inp_pos(src, node) || is_output_idx(src, node)) {
m_node_dynamic_dims[src] = 0;
continue;
}
self(self, src);
}
}
switch (node->op) {
case GGML_OP_NONE:
m_node_dynamic_dims[node] = -1;
break;
case GGML_OP_GET_ROWS:
m_node_dynamic_dims[node] = -1;
if (m_node_dynamic_dims[node->src[1]] != -1) {
auto dynamic_dim_idx = m_node_dynamic_dims[node->src[1]];
auto dynamic_dim_value = node->src[1]->ne[dynamic_dim_idx];
if (dynamic_dim_idx == 0) {
m_node_dynamic_dims[node] = 1;
} else {
auto dynamic_dim_stride = node->src[1]->nb[dynamic_dim_idx] / ggml_type_size(node->src[1]->type) *
ggml_type_size(node->src[0]->type);
for (int i = 0; i < GGML_MAX_DIMS; i++) {
if (dynamic_dim_stride == node->src[0]->nb[i]) {
m_node_dynamic_dims[node] = i;
break;
}
}
}
OPENVINO_ASSERT(dynamic_dim_value == node->ne[m_node_dynamic_dims[node]],
"Dynamic dim value mismatch for node: " + std::string(node->name) +
" and its src[1]: " + std::string(node->src[1]->name));
}
break;
case GGML_OP_MUL:
case GGML_OP_MUL_MAT:
m_node_dynamic_dims[node] = -1;
if (m_node_dynamic_dims[node->src[0]] != -1) {
m_node_dynamic_dims[node] = m_node_dynamic_dims[node->src[0]];
}
if (m_node_dynamic_dims[node->src[1]] != -1) {
m_node_dynamic_dims[node] = m_node_dynamic_dims[node->src[1]];
}
break;
case GGML_OP_PERMUTE:
m_node_dynamic_dims[node] = -1;
if (m_node_dynamic_dims[node->src[0]] != -1) {
auto dynamic_dim_idx = m_node_dynamic_dims[node->src[0]];
auto dynamic_dim_value = node->src[0]->ne[dynamic_dim_idx];
for (int i = 0; i < GGML_MAX_DIMS; i++) {
if (node->op_params[i] == dynamic_dim_idx) {
m_node_dynamic_dims[node] = i;
break;
}
}
OPENVINO_ASSERT(dynamic_dim_value == node->ne[m_node_dynamic_dims[node]],
"Dynamic dim value mismatch for node: " + std::string(node->name) +
" and its src[0]: " + std::string(node->src[0]->name));
}
break;
case GGML_OP_VIEW: {
// Use stride-based matching: the stride of a VIEW dimension directly
// encodes which source dimension it indexes into, so it uniquely
// identifies the dynamic dim even when two dims share the same size.
m_node_dynamic_dims[node] = -1;
if (m_node_dynamic_dims[node->src[0]] != -1) {
auto dynamic_dim_idx = m_node_dynamic_dims[node->src[0]];
auto dynamic_dim_value = node->src[0]->ne[dynamic_dim_idx];
auto dynamic_dim_stride =
node->src[0]->nb[dynamic_dim_idx] / ggml_type_size(node->src[0]->type) *
ggml_type_size(node->type);
for (int i = 0; i < GGML_MAX_DIMS; i++) {
if (node->nb[i] == dynamic_dim_stride) {
m_node_dynamic_dims[node] = i;
break;
}
}
OPENVINO_ASSERT(m_node_dynamic_dims[node] != -1 &&
dynamic_dim_value == node->ne[m_node_dynamic_dims[node]],
"Dynamic dim value mismatch for node: " + std::string(node->name) +
" and its src[0]: " + std::string(node->src[0]->name));
}
break;
}
case GGML_OP_RESHAPE: {
// RESHAPE requires src[0] to be contiguous, so both src and result
// have standard compact strides: nb[i] = type_size * prod(ne[0..i-1]).
// Match src->nb[dynamic_dim] against result->nb[i] to find the output
// dimension whose flat-memory boundary aligns with the source dynamic
// boundary. This is unambiguous (result strides are strictly monotone)
// and handles merged-lower-dim cases that ne-value matching misses.
m_node_dynamic_dims[node] = -1;
if (m_node_dynamic_dims[node->src[0]] != -1) {
auto dynamic_dim_idx = m_node_dynamic_dims[node->src[0]];
auto dynamic_dim_stride = node->src[0]->nb[dynamic_dim_idx];
for (int i = 0; i < GGML_MAX_DIMS; i++) {
if (node->nb[i] == dynamic_dim_stride && node->ne[i] == node->src[0]->ne[dynamic_dim_idx]) {
m_node_dynamic_dims[node] = i;
break;
}
}
if (m_node_dynamic_dims[node] == -1) {
std::cout << "Cannot determine dynamic dim for RESHAPE node: " << node->name << std::endl;
}
}
break;
}
case GGML_OP_FLASH_ATTN_EXT: {
// Output shape is hard-coded in ggml_flash_attn_ext as:
// ne = { v->ne[0], q->ne[2], q->ne[1], q->ne[3] }
// i.e. output dim 0 <- v dim 0 (head_size, static)
// output dim 1 <- q dim 2 (n_heads, static)
// output dim 2 <- q dim 1 (n_tokens, potentially dynamic)
// output dim 3 <- q dim 3 (batch, static)
// Using the fixed q-dim -> output-dim mapping table.
// q is src[0]; the mapping from q's dynamic dim to the output dim is:
// q dim 1 -> output dim 2
// q dim 2 -> output dim 1
// q dim 3 -> output dim 3
// q dim 0 -> output dim 0 (head_size axis, unlikely to be dynamic)
constexpr int q_to_out[GGML_MAX_DIMS] = { 0, 2, 1, 3 };
m_node_dynamic_dims[node] = -1;
if (m_node_dynamic_dims[node->src[0]] != -1) {
auto q_dynamic_dim = m_node_dynamic_dims[node->src[0]];
m_node_dynamic_dims[node] = q_to_out[q_dynamic_dim];
}
break;
}
case GGML_OP_CONT:
m_node_dynamic_dims[node] = -1;
if (m_node_dynamic_dims[node->src[0]] != -1) {
auto dynamic_dim_idx = m_node_dynamic_dims[node->src[0]];
if (ggml_are_same_shape(node, node->src[0])) {
m_node_dynamic_dims[node] = dynamic_dim_idx;
} else {
size_t src_logical_nb[GGML_MAX_DIMS];
src_logical_nb[0] = ggml_type_size(node->src[0]->type);
src_logical_nb[1] = src_logical_nb[0] *
(node->src[0]->ne[0] / ggml_blck_size(node->src[0]->type));
for (int i = 2; i < GGML_MAX_DIMS; i++) {
src_logical_nb[i] = src_logical_nb[i - 1] * node->src[0]->ne[i - 1];
}
auto dynamic_dim_stride = src_logical_nb[dynamic_dim_idx] /
ggml_type_size(node->src[0]->type) *
ggml_type_size(node->type);
int matched_dim_count = 0;
for (int i = 0; i < GGML_MAX_DIMS; i++) {
if (node->nb[i] == dynamic_dim_stride && node->ne[i] == node->src[0]->ne[dynamic_dim_idx]) {
m_node_dynamic_dims[node] = i;
matched_dim_count++;
}
}
OPENVINO_ASSERT(matched_dim_count == 1,
"Cannot determine dynamic dim for CONT node: " + std::string(node->name));
}
}
break;
case GGML_OP_RMS_NORM:
case GGML_OP_ADD:
case GGML_OP_GLU:
case GGML_OP_ROPE:
case GGML_OP_SCALE:
case GGML_OP_TRANSPOSE:
case GGML_OP_SOFT_MAX:
case GGML_OP_ARGSORT:
case GGML_OP_ADD_ID:
m_node_dynamic_dims[node] = m_node_dynamic_dims[node->src[0]];
break;
case GGML_OP_MUL_MAT_ID:
m_node_dynamic_dims[node] = m_node_dynamic_dims[node->src[1]];
break;
case GGML_OP_CPY:
case GGML_OP_SET_ROWS:
m_node_dynamic_dims[node] = -1;
break;
default:
std::cout << "Doesn't handle node name: " << node->name << " op: " << ggml_op_name(node->op) << std::endl;
break;
}
};
for (int i = 0; i < m_cgraph->n_nodes; i++) {
ggml_tensor * node = m_cgraph->nodes[i];
visit_node(visit_node, node);
}
// print the nodes in m_cgraph name & shape with the dynamic dim (the dynamic dim is the dimension with -1 in m_node_dynamic_dims) for debugging
if (0) {
for (int i = 0; i < m_cgraph->n_nodes; i++) {
ggml_tensor * node = m_cgraph->nodes[i];
int dynamic_dim = m_node_dynamic_dims[node];
std::cout << "[" << i << "] " << "node_name: " << node->name << " op: " << ggml_op_name(node->op)
<< " shape: [";
for (int j = 0; j < 4; j++) {
if (j == dynamic_dim) {
std::cout << "*";
} else {
std::cout << node->ne[j];
}
if (j < 3) {
std::cout << ", ";
}
}
std::cout << "]" << std::endl;
// print the src name & shape with the dynamic dim for debugging
for (int j = 0; j < GGML_MAX_SRC; j++) {
ggml_tensor * src = node->src[j];
if (src == nullptr) {
continue;
}
int src_dynamic_dim = m_node_dynamic_dims[src];
std::cout << " [" << j << "] src_name: " << src->name << " [";
for (int k = 0; k < 4; k++) {
if (k == src_dynamic_dim) {
std::cout << "*";
} else {
std::cout << src->ne[k];
}
if (k < 3) {
std::cout << ", ";
}
}
std::cout << "]" << std::endl;
}
std::cout << std::endl;
}
}
}

View File

@ -180,7 +180,7 @@ public:
return m_model_is_splitted;
}
ov::PartialShape get_graph_input_shape(const ggml_tensor * op, const ggml_tensor * input) const;
ov::PartialShape get_graph_input_shape(const ggml_tensor * op, const ggml_tensor * input, int dynamic_dim_index=-1) const;
static void dump_cgraph(const ggml_cgraph * cgraph, std::string & filename);
@ -278,6 +278,9 @@ private:
void compute_model_inputs();
void compute_model_outputs();
// Infer and propagate dynamic-dimension indices for all tensors in the GGML graph.
void compute_node_dynamic_dims();
void validate_cgraph() const;
ggml_cgraph * m_cgraph = nullptr;
@ -290,6 +293,7 @@ private:
std::map<std::string, ggml_tensor *> m_model_outputs;
std::vector<std::string> m_model_output_names;
std::vector<NodeInfo> m_node_info_list;
std::map<ggml_tensor *, int> m_node_dynamic_dims;
ModelParams m_model_params;
ComputeParams m_compute_params;

View File

@ -108,17 +108,23 @@ enum ggml_status ov_graph_compute_dynamic(ggml_cgraph * cgraph, std::shared_ptr<
int64_t infer_end_time;
{
std::lock_guard<std::mutex> lock(r_ctx->ov_compute_mutex);
std::shared_ptr<std::mutex> mutex;
auto it = r_ctx->decoder_cache.find(key);
cache_hit = it != r_ctx->decoder_cache.end();
ModelParams old_m_params;
if (cache_hit) {
ggml_decoder = it->second;
mutex = it->second->mutex;
std::lock_guard<std::mutex> lock(*(mutex));
ggml_decoder = it->second->ptr;
old_m_params = ggml_decoder->get_model_params();
cache_hit = old_m_params.can_reuse_dynamically(m_params);
} else {
mutex = std::make_shared<std::mutex>();
r_ctx->decoder_cache[key] = std::make_shared<decoder_runtime_ctx>(mutex);
}
std::lock_guard<std::mutex> lock(*(mutex));
if (cache_hit) {
std::map<std::string, std::shared_ptr<ov::Node>> model_weights;
@ -202,7 +208,7 @@ enum ggml_status ov_graph_compute_dynamic(ggml_cgraph * cgraph, std::shared_ptr<
compile_end_time = ggml_time_us();
infer_request = std::make_shared<ov::InferRequest>(compiled_model.create_infer_request());
r_ctx->infer_request_cache[key] = infer_request;
r_ctx->decoder_cache[key] = ggml_decoder;
r_ctx->decoder_cache.at(key)->ptr = ggml_decoder;
std::vector<std::string> ov_input_names;
std::vector<std::string> ov_output_names;
@ -308,15 +314,23 @@ enum ggml_status ov_graph_compute_static(ggml_cgraph * cgraph, std::shared_ptr<o
int64_t compile_end_time;
int64_t infer_end_time;
std::shared_ptr<std::mutex> mutex;
auto it = r_ctx->decoder_cache.find(key);
cache_hit = it != r_ctx->decoder_cache.end();
ModelParams old_m_params;
if (cache_hit) {
ggml_decoder = it->second;
mutex = it->second->mutex;
std::lock_guard<std::mutex> lock(*(mutex));
ggml_decoder = it->second->ptr;
old_m_params = ggml_decoder->get_model_params();
cache_hit = old_m_params.can_reuse_statically(m_params);
} else {
mutex = std::make_shared<std::mutex>();
r_ctx->decoder_cache[key] = std::make_shared<decoder_runtime_ctx>(mutex);
}
std::lock_guard<std::mutex> lock(*(mutex));
if (cache_hit) {
std::map<std::string, std::shared_ptr<ov::Node>> model_weights;
@ -383,7 +397,7 @@ enum ggml_status ov_graph_compute_static(ggml_cgraph * cgraph, std::shared_ptr<o
model = is_prefill ? model_prefill : model_decode;
ggml_decoder = is_prefill ? ggml_decoder_prefill : ggml_decoder_decode;
infer_request = is_prefill ? r_ctx->infer_request_cache_prefill[key] : r_ctx->infer_request_cache[key];
r_ctx->decoder_cache[key] = ggml_decoder;
r_ctx->decoder_cache.at(key)->ptr = ggml_decoder;
std::vector<std::string> ov_input_names;
std::vector<std::string> ov_output_names;

View File

@ -40,11 +40,17 @@ struct graph_key_hash {
}
};
struct decoder_runtime_ctx {
decoder_runtime_ctx(std::shared_ptr<std::mutex> mutex) :
mutex(mutex) {}
std::shared_ptr<std::mutex> mutex;
std::shared_ptr<GgmlOvDecoder> ptr;
};
struct ov_runtime_context {
std::mutex ov_compute_mutex;
std::string device;
bool stateful;
std::unordered_map<graph_key, std::shared_ptr<GgmlOvDecoder>, graph_key_hash> decoder_cache;
std::unordered_map<graph_key, std::shared_ptr<decoder_runtime_ctx>, graph_key_hash> decoder_cache;
std::unordered_map<graph_key, std::shared_ptr<ov::InferRequest>, graph_key_hash> infer_request_cache;
std::unordered_map<graph_key, std::shared_ptr<ov::InferRequest>, graph_key_hash> infer_request_cache_prefill;
std::unordered_map<graph_key, std::vector<std::string>, graph_key_hash> ov_input_names_cache;

View File

@ -24,6 +24,7 @@
#include "dmmv.hpp"
#include "element_wise.hpp"
#include "fattn.hpp"
#include "gated_delta_net.hpp"
#include "gla.hpp"
#include "im2col.hpp"
#include "mmq.hpp"
@ -31,6 +32,7 @@
#include "norm.hpp"
#include "outprod.hpp"
#include "pad.hpp"
#include "pad_reflect_1d.hpp"
#include "quantize.hpp"
#include "quants.hpp"
#include "roll.hpp"
@ -39,8 +41,8 @@
#include "ssm_conv.hpp"
#include "softmax.hpp"
#include "tsembd.hpp"
#include "upscale.hpp"
#include "wkv.hpp"
#include "pad_reflect_1d.hpp"
#endif // GGML_SYCL_BACKEND_HPP

View File

@ -294,30 +294,6 @@ static void unary_op_trunc_kernel(const T * x, T * dst, const int k, const sycl:
}
}
template<typename T>
static void upscale(const T *x, T *dst, const int nb00, const int nb01,
const int nb02, const int nb03, const int ne10, const int ne11,
const int ne12, const int ne13, const float sf0, const float sf1,
const float sf2, const float sf3, const sycl::nd_item<1> &item_ct1) {
int index = item_ct1.get_local_id(0) +
item_ct1.get_group(0) * item_ct1.get_local_range(0);
if (index >= ne10 * ne11 * ne12 * ne13) {
return;
}
// operation
int i10 = index % ne10;
int i11 = (index / ne10) % ne11;
int i12 = (index / (ne10 * ne11)) % ne12;
int i13 = (index / (ne10 * ne11 * ne12)) % ne13;
int i00 = static_cast<int>(i10 / sf0);
int i01 = static_cast<int>(i11 / sf1);
int i02 = static_cast<int>(i12 / sf2);
int i03 = static_cast<int>(i13 / sf3);
dst[index] = *(const T *)((const char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
}
template<typename T>
static void clamp(const T * x, T * dst, const float min, const float max, const int k,
const sycl::nd_item<1> &item_ct1) {
@ -392,20 +368,6 @@ static void arange_kernel(T * dst, const int k, T start, T step,
}
}
template<typename T>
static void upscale_sycl(const T *x, T *dst, const int nb00, const int nb01,
const int nb02, const int nb03, const int ne10, const int ne11,
const int ne12, const int ne13, const float sf0, const float sf1,
const float sf2, const float sf3, queue_ptr stream) {
int dst_size = ne10 * ne11 * ne12 * ne13;
int num_blocks = ceil_div(dst_size, SYCL_UPSCALE_BLOCK_SIZE);
sycl::range<1> gridDim(num_blocks * SYCL_UPSCALE_BLOCK_SIZE);
stream->parallel_for(
sycl::nd_range<1>(gridDim, sycl::range<1>(SYCL_UPSCALE_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) {
upscale(x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3, item_ct1);
});
}
template<typename KernelInvoker, typename... Args>
static inline void dispatch_ggml_sycl_op_unary(ggml_backend_sycl_context & ctx, ggml_tensor * dst, KernelInvoker kernel_invoker, Args&&... args) {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
@ -505,42 +467,6 @@ static inline void dispatch_ggml_sycl_op_fused_glu(ggml_backend_sycl_context & c
}
}
template<typename KernelInvoker, typename... Args>
static inline void dispatch_ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst, KernelInvoker kernel_invoker, Args&&... args) {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
GGML_ASSERT(dst->src[0]->type == dst->type);
dpct::queue_ptr main_stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
const float sf0 = (float) dst->ne[0] / dst->src[0]->ne[0];
const float sf1 = (float) dst->ne[1] / dst->src[0]->ne[1];
const float sf2 = (float) dst->ne[2] / dst->src[0]->ne[2];
const float sf3 = (float) dst->ne[3] / dst->src[0]->ne[3];
switch (dst->type) {
case GGML_TYPE_F16:
{
auto data_pts = cast_data<sycl::half>(dst);
kernel_invoker(data_pts.src, data_pts.dst, (int)dst->src[0]->nb[0], (int)dst->src[0]->nb[1], (int)dst->src[0]->nb[2],
(int)dst->src[0]->nb[3], (int)dst->ne[0], (int)dst->ne[1], (int)dst->ne[2], (int)dst->ne[3], sf0, sf1, sf2, sf3,
main_stream, std::forward<Args>(args)...);
break;
}
case GGML_TYPE_F32:
{
auto data_pts = cast_data<float>(dst);
kernel_invoker(data_pts.src, data_pts.dst, (int)dst->src[0]->nb[0], (int)dst->src[0]->nb[1], (int)dst->src[0]->nb[2],
(int)dst->src[0]->nb[3], (int)dst->ne[0], (int)dst->ne[1], (int)dst->ne[2], (int)dst->ne[3], sf0, sf1, sf2, sf3,
main_stream, std::forward<Args>(args)...);
break;
}
default:
GGML_ABORT("GGML tensor type not supported!\n");
}
}
template<typename F>
static inline void ggml_sycl_op_unary(
ggml_backend_sycl_context & ctx, ggml_tensor * dst, F func) {
@ -784,15 +710,6 @@ static inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor
});
}
static inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_detail::dispatch_ggml_sycl_op_upscale(ctx, dst,
[](const auto* src, auto* dst_ptr, int nb00, int nb01, int nb02, int nb03,
int ne10, int ne11, int ne12, int ne13, float sf0, float sf1, float sf2, float sf3,
queue_ptr stream) {
ggml_sycl_detail::upscale_sycl(src, dst_ptr, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3, stream);
});
}
static inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
float min_val;
float max_val;
@ -1131,12 +1048,6 @@ void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_op_sqr(ctx, dst);
}
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_upscale(ctx, dst);
}
void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_clamp(ctx, dst);

View File

@ -71,8 +71,6 @@ void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst);

View File

@ -44,7 +44,6 @@
#include "ggml-sycl/backend.hpp"
#include "ggml-sycl/common.hpp"
#include "ggml-sycl/element_wise.hpp"
#include "ggml-sycl/gated_delta_net.hpp"
#include "ggml-sycl/gemm.hpp"
#include "ggml-sycl/getrows.hpp"
#include "ggml-sycl/norm.hpp"
@ -4863,9 +4862,8 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
case GGML_OP_ROPE:
case GGML_OP_ROPE_BACK:
case GGML_OP_IM2COL:
return true;
case GGML_OP_UPSCALE:
return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST && !(op->op_params[0] & GGML_SCALE_FLAG_ANTIALIAS);
return true;
case GGML_OP_SUM:
case GGML_OP_SUM_ROWS:
case GGML_OP_MEAN:

View File

@ -0,0 +1,410 @@
#include "upscale.hpp"
static void upscale_f32(const float * x, float * dst,
const int nb00, const int nb01, const int nb02, const int nb03,
const int ne10, const int ne11, const int ne12, const int ne13,
const float sf0, const float sf1, const float sf2, const float sf3) {
auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
int index = item_ct1.get_local_id(2) + item_ct1.get_group(2) * item_ct1.get_local_range(2);
if (index >= ne10 * ne11 * ne12 * ne13) {
return;
}
int i10 = index % ne10;
int i11 = (index / ne10) % ne11;
int i12 = (index / (ne10 * ne11)) % ne12;
int i13 = (index / (ne10 * ne11 * ne12)) % ne13;
int i00 = i10 / sf0;
int i01 = i11 / sf1;
int i02 = i12 / sf2;
int i03 = i13 / sf3;
dst[index] = *((const float*)((const char*)x + i03 * nb03 + i02 * nb02 +
i01 * nb01 + i00 * nb00));
}
static void upscale_f32_bilinear(const float * x, float * dst,
const int nb00, const int nb01, const int nb02, const int nb03,
const int ne00_src, const int ne01_src,
const int ne10_dst, const int ne11_dst, const int ne12_dst, const int ne13_dst,
const float sf0, const float sf1, const float sf2, const float sf3,
const float pixel_offset) {
auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
const int64_t index = item_ct1.get_local_id(2) +
item_ct1.get_group(2) * item_ct1.get_local_range(2);
const int64_t dst_total_elements = ne10_dst * ne11_dst * ne12_dst * ne13_dst;
if (index >= dst_total_elements) {
return;
}
const int i10_dst = index % ne10_dst;
const int i11_dst = (index / ne10_dst) % ne11_dst;
const int i12_dst = (index / (ne10_dst * ne11_dst)) % ne12_dst;
const int i13_dst = index / (ne10_dst * ne11_dst * ne12_dst);
const int i02_src = (int)(i12_dst / sf2);
const int i03_src = (int)(i13_dst / sf3);
const float y_src_f = ((float)i11_dst + pixel_offset) / sf1 - pixel_offset;
int y0_src = (int) sycl::floor((float) y_src_f);
int y1_src = y0_src + 1;
y0_src = sycl::max(0, sycl::min(y0_src, ne01_src - 1));
y1_src = sycl::max(0, sycl::min(y1_src, ne01_src - 1));
float dy = y_src_f - (float)y0_src;
dy = sycl::max(0.0f, sycl::min(dy, 1.0f));
float x_src_f = ((float)i10_dst + pixel_offset) / sf0 - pixel_offset;
int x0_src = (int) sycl::floor(x_src_f);
int x1_src = x0_src + 1;
x0_src = sycl::max(0, sycl::min(x0_src, ne00_src - 1));
x1_src = sycl::max(0, sycl::min(x1_src, ne00_src - 1));
float dx = x_src_f - (float)x0_src;
dx = sycl::max(0.0f, sycl::min(dx, 1.0f));
const float* p_a =
(const float*)((const char*)x + (int64_t)x0_src * nb00 +
(int64_t)y0_src * nb01 + (int64_t)i02_src * nb02 +
(int64_t)i03_src * nb03);
const float* p_b =
(const float*)((const char*)x + (int64_t)x1_src * nb00 +
(int64_t)y0_src * nb01 + (int64_t)i02_src * nb02 +
(int64_t)i03_src * nb03);
const float* p_c =
(const float*)((const char*)x + (int64_t)x0_src * nb00 +
(int64_t)y1_src * nb01 + (int64_t)i02_src * nb02 +
(int64_t)i03_src * nb03);
const float* p_d =
(const float*)((const char*)x + (int64_t)x1_src * nb00 +
(int64_t)y1_src * nb01 + (int64_t)i02_src * nb02 +
(int64_t)i03_src * nb03);
const float val_a = *p_a;
const float val_b = *p_b;
const float val_c = *p_c;
const float val_d = *p_d;
float result = val_a * (1.0f - dx) * (1.0f - dy) +
val_b * dx * (1.0f - dy) +
val_c * (1.0f - dx) * dy +
val_d * dx * dy;
dst[index] = result;
}
// Similar to F.interpolate(..., mode="bilinear", align_corners=False, antialias=True)
// https://github.com/pytorch/pytorch/blob/8871ff29b743948d1225389d5b7068f37b22750b/aten/src/ATen/native/cpu/UpSampleKernel.cpp
static void upscale_f32_bilinear_antialias(const float * src0,
float * dst,
const int nb00,
const int nb01,
const int nb02,
const int nb03,
const int ne00_src,
const int ne01_src,
const int ne10_dst,
const int ne11_dst,
const int ne12_dst,
const int ne13_dst,
const float sf0,
const float sf1,
const float sf2,
const float sf3,
const float pixel_offset) {
auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
const int64_t index = item_ct1.get_local_id(2) +
item_ct1.get_group(2) * item_ct1.get_local_range(2);
const int64_t dst_total_elements = ne10_dst * ne11_dst * ne12_dst * ne13_dst;
if (index >= dst_total_elements) {
return;
}
const int i10_dst = index % ne10_dst;
const int i11_dst = (index / ne10_dst) % ne11_dst;
const int i12_dst = (index / (ne10_dst * ne11_dst)) % ne12_dst;
const int i13_dst = index / (ne10_dst * ne11_dst * ne12_dst);
const int i02_src = (int)(i12_dst / sf2);
const int i03_src = (int)(i13_dst / sf3);
const float y = ((float)i11_dst + pixel_offset) / sf1;
const float x = ((float)i10_dst + pixel_offset) / sf0;
// support and invscale, minimum 1 pixel for bilinear
const float support1 = sycl::max(1.0f / sf1, 1.0f);
const float invscale1 = 1.0f / support1;
const float support0 = sycl::max(1.0f / sf0, 1.0f);
const float invscale0 = 1.0f / support0;
// the range of source pixels that contribute
const int64_t x_min = sycl::max(int64_t(0), int64_t(x - support0 + pixel_offset));
const int64_t x_max = sycl::min(int64_t(ne00_src), int64_t(x + support0 + pixel_offset));
const int64_t y_min = sycl::max(int64_t(0), int64_t(y - support1 + pixel_offset));
const int64_t y_max = sycl::min(int64_t(ne01_src), int64_t(y + support1 + pixel_offset));
// bilinear filter with antialiasing
float val = 0.0f;
float total_weight = 0.0f;
auto triangle_filter = [](float x) -> float {
return sycl::max(1.0f - sycl::fabs(x), 0.0f);
};
for (int64_t sy = y_min; sy < y_max; sy++) {
const float weight_y = triangle_filter((sy - y + pixel_offset) * invscale1);
for (int64_t sx = x_min; sx < x_max; sx++) {
const float weight_x = triangle_filter((sx - x + pixel_offset) * invscale0);
const float weight = weight_x * weight_y;
if (weight <= 0.0f) {
continue;
}
const float pixel =
*(const float*)((const char*)src0 + sx * nb00 + sy * nb01 +
i02_src * nb02 + i03_src * nb03);
val += pixel * weight;
total_weight += weight;
}
}
if (total_weight > 0.0f) {
val /= total_weight;
}
dst[index] = val;
}
namespace bicubic_interpolation {
static float weight1(float x, const float &a) { return ((a + 2) * x - (a + 3)) * x * x + 1; };
static float weight2(float x, const float &a) { return ((a * x - 5 * a) * x + 8 * a) * x - 4 * a; };
static float bicubic(float p0, float p1, float p2, float p3, float x, float a) {
const float w0 = weight2(x + 1, a);
const float w1 = weight1(x + 0, a);
const float w2 = weight1(1 - x, a);
const float w3 = weight2(2 - x, a);
return p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
};
}
static void upscale_f32_bicubic(const float * x, float * dst,
const int nb00, const int nb01, const int nb02, const int nb03,
const int ne00_src, const int ne01_src,
const int ne10_dst, const int ne11_dst, const int ne12_dst, const int ne13_dst,
const float sf0, const float sf1, const float sf2, const float sf3,
const float pixel_offset) {
auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
const float a = -0.75f;
using bicubic_interpolation::bicubic;
const int64_t index = item_ct1.get_local_id(2) +
item_ct1.get_group(2) * item_ct1.get_local_range(2);
const int64_t dst_total_elements =
ne10_dst * ne11_dst * ne12_dst * ne13_dst;
if (index >= dst_total_elements) {
return;
}
const int i10_dst = index % ne10_dst;
const int i11_dst = (index / ne10_dst) % ne11_dst;
const int i12_dst = (index / (ne10_dst * ne11_dst)) % ne12_dst;
const int i13_dst = index / (ne10_dst * ne11_dst * ne12_dst);
const int i02_src = (int)(i12_dst / sf2);
const int i03_src = (int)(i13_dst / sf3);
const float y_src_f = ((float)i11_dst + pixel_offset) / sf1 - pixel_offset;
const int y0_src = (int) sycl::floor((float) y_src_f);
const float dy = y_src_f - (float)y0_src;
const float x_src_f = ((float)i10_dst + pixel_offset) / sf0 - pixel_offset;
const int x0_src = (int) sycl::floor((float) x_src_f);
const float dx = x_src_f - (float)x0_src;
const char * x_base = (const char *)x + (int64_t)i02_src * nb02 + (int64_t)i03_src * nb03;
auto load = [=](int x_off, int y_off) -> float {
int i00_src = sycl::max(0, sycl::min(x0_src + x_off, ne00_src - 1));
int i01_src = sycl::max(0, sycl::min(y0_src + y_off, ne01_src - 1));
return *(const float *)(x_base + (int64_t)i00_src * nb00 + (int64_t)i01_src * nb01);
};
const float result = bicubic(
bicubic(load(-1, -1), load(0, -1), load(1, -1), load(2, -1), dx, a),
bicubic(load(-1, 0), load(0, 0), load(1, 0), load(2, 0), dx, a),
bicubic(load(-1, 1), load(0, 1), load(1, 1), load(2, 1), dx, a),
bicubic(load(-1, 2), load(0, 2), load(1, 2), load(2, 2), dx, a),
dy,
a);
dst[index] = result;
}
static void upscale_f32_sycl(const float * x,
float * dst,
const int nb00,
const int nb01,
const int nb02,
const int nb03,
const int ne10,
const int ne11,
const int ne12,
const int ne13,
const float sf0,
const float sf1,
const float sf2,
const float sf3,
dpct::queue_ptr stream) {
const int64_t dst_size = ne10 * ne11 * ne12 * ne13;
const int64_t num_blocks = (dst_size + SYCL_UPSCALE_BLOCK_SIZE - 1) / SYCL_UPSCALE_BLOCK_SIZE;
stream->parallel_for(
sycl::nd_range<3>(
sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE),
sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)),
[=](sycl::nd_item<3> item_ct1) {
upscale_f32(x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3);
});
}
static void upscale_f32_bilinear_sycl(const float * x,
float * dst,
const int nb00,
const int nb01,
const int nb02,
const int nb03,
const int ne00_src,
const int ne01_src,
const int ne10_dst,
const int ne11_dst,
const int ne12_dst,
const int ne13_dst,
const float sf0,
const float sf1,
const float sf2,
const float sf3,
const float pixel_offset,
bool antialias,
dpct::queue_ptr stream) {
const int64_t dst_size = ne10_dst * ne11_dst * ne12_dst * ne13_dst;
const int64_t num_blocks = (dst_size + SYCL_UPSCALE_BLOCK_SIZE - 1) / SYCL_UPSCALE_BLOCK_SIZE;
if (antialias) {
stream->parallel_for(
sycl::nd_range<3>(
sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE),
sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)),
[=](sycl::nd_item<3> item_ct1) {
upscale_f32_bilinear_antialias(
x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst,
ne12_dst, ne13_dst, sf0, sf1, sf2, sf3, pixel_offset);
});
} else {
stream->parallel_for(
sycl::nd_range<3>(
sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE),
sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)),
[=](sycl::nd_item<3> item_ct1) {
upscale_f32_bilinear(
x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst, ne12_dst,
ne13_dst, sf0, sf1, sf2, sf3, pixel_offset);
});
}
}
static void upscale_f32_bicubic_sycl(const float * x,
float * dst,
const int nb00,
const int nb01,
const int nb02,
const int nb03,
const int ne00_src,
const int ne01_src,
const int ne10_dst,
const int ne11_dst,
const int ne12_dst,
const int ne13_dst,
const float sf0,
const float sf1,
const float sf2,
const float sf3,
const float pixel_offset,
dpct::queue_ptr stream) {
const int64_t dst_size = ne10_dst * ne11_dst * ne12_dst * ne13_dst;
const int64_t num_blocks = (dst_size + SYCL_UPSCALE_BLOCK_SIZE - 1) / SYCL_UPSCALE_BLOCK_SIZE;
{
stream->submit([&](sycl::handler & cgh) {
cgh.parallel_for(
sycl::nd_range<3>(
sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE),
sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)),
[=](sycl::nd_item<3> item_ct1) {
upscale_f32_bicubic(
x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst,
ne12_dst, ne13_dst, sf0, sf1, sf2, sf3, pixel_offset);
});
});
}
}
void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data;
dpct::queue_ptr stream = ctx.stream();
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
const int mode_flags = dst->op_params[0];
const ggml_scale_mode mode = (ggml_scale_mode)(mode_flags & 0xFF);
float sf0 = (float)dst->ne[0]/src0->ne[0];
float sf1 = (float)dst->ne[1]/src0->ne[1];
float sf2 = (float)dst->ne[2]/src0->ne[2];
const float sf3 = (float)dst->ne[3]/src0->ne[3];
float pixel_offset = 0.5f;
if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
sf0 = dst->ne[0] > 1 && src0->ne[0] > 1
? (float)(dst->ne[0] - 1) / (src0->ne[0] - 1)
: sf0;
sf1 = dst->ne[1] > 1 && src0->ne[1] > 1
? (float)(dst->ne[1] - 1) / (src0->ne[1] - 1)
: sf1;
pixel_offset = 0.0f;
}
if (mode == GGML_SCALE_MODE_NEAREST) {
upscale_f32_sycl(
src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3],
dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, stream);
} else if (mode == GGML_SCALE_MODE_BILINEAR) {
const bool antialias = (mode_flags & GGML_SCALE_FLAG_ANTIALIAS);
upscale_f32_bilinear_sycl(
src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3],
src0->ne[0], src0->ne[1], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3],
sf0, sf1, sf2, sf3, pixel_offset, antialias, stream);
} else if (mode == GGML_SCALE_MODE_BICUBIC) {
upscale_f32_bicubic_sycl(
src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3],
src0->ne[0], src0->ne[1], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3],
sf0, sf1, sf2, sf3, pixel_offset, stream);
}
}
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_upscale(ctx, dst);
}

View File

@ -0,0 +1,9 @@
#pragma once
#include <sycl/sycl.hpp>
#include "dpct/helper.hpp"
#include "common.hpp"
#define SYCL_UPSCALE_BLOCK_SIZE 256
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst);

View File

@ -245,7 +245,7 @@ void main() {
#endif
}
[[unroll]] for (uint32_t r = 0; r < rows_per_thread; ++r) {
Sf[r][c] += ACC_TYPE(dot(Q_cache[r], K_Tf));
Sf[r][c] += dot(ACC_TYPEV4(Q_cache[r]), ACC_TYPEV4(K_Tf));
}
}
}
@ -270,7 +270,7 @@ void main() {
#endif
}
[[unroll]] for (uint32_t r = 0; r < rows_per_thread; ++r) {
Sf[r][c] += ACC_TYPE(dot(Qf[tile_row(r) * qf_stride + d * D_split + d_tid], K_Tf));
Sf[r][c] += dot(ACC_TYPEV4(Qf[tile_row(r) * qf_stride + d * D_split + d_tid]), ACC_TYPEV4(K_Tf));
}
}
}

View File

@ -478,6 +478,7 @@ class MODEL_ARCH(IntEnum):
RND1 = auto()
PANGU_EMBED = auto()
MISTRAL3 = auto()
MISTRAL4 = auto()
PADDLEOCR = auto()
MIMO2 = auto()
STEP35 = auto()
@ -924,6 +925,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.RND1: "rnd1",
MODEL_ARCH.PANGU_EMBED: "pangu-embedded",
MODEL_ARCH.MISTRAL3: "mistral3",
MODEL_ARCH.MISTRAL4: "mistral4",
MODEL_ARCH.PADDLEOCR: "paddleocr",
MODEL_ARCH.MIMO2: "mimo2",
MODEL_ARCH.STEP35: "step35",
@ -3538,6 +3540,37 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_DOWN_EXP,
MODEL_TENSOR.FFN_UP_EXP,
],
MODEL_ARCH.MISTRAL4: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ROPE_FREQS,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_Q,
MODEL_TENSOR.ATTN_Q_A,
MODEL_TENSOR.ATTN_Q_B,
MODEL_TENSOR.ATTN_KV_A_MQA,
MODEL_TENSOR.ATTN_KV_B,
MODEL_TENSOR.ATTN_K_B,
MODEL_TENSOR.ATTN_V_B,
MODEL_TENSOR.ATTN_Q_A_NORM,
MODEL_TENSOR.ATTN_KV_A_NORM,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.ATTN_ROT_EMBD,
MODEL_TENSOR.FFN_GATE_INP,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_GATE,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.FFN_GATE_EXP,
MODEL_TENSOR.FFN_DOWN_EXP,
MODEL_TENSOR.FFN_UP_EXP,
MODEL_TENSOR.FFN_GATE_UP_EXP,
MODEL_TENSOR.FFN_GATE_SHEXP,
MODEL_TENSOR.FFN_DOWN_SHEXP,
MODEL_TENSOR.FFN_UP_SHEXP,
MODEL_TENSOR.FFN_EXP_PROBS_B,
],
MODEL_ARCH.MIMO2: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,

View File

@ -1 +1 @@
d6754f3d0e6d0acd21c12442353c9fd2f94188e7
553552e1d88be2b214b85e5159eedd39a63e2c34

View File

@ -123,6 +123,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_RND1, "rnd1" },
{ LLM_ARCH_PANGU_EMBED, "pangu-embedded" },
{ LLM_ARCH_MISTRAL3, "mistral3" },
{ LLM_ARCH_MISTRAL4, "mistral4" },
{ LLM_ARCH_PADDLEOCR, "paddleocr" },
{ LLM_ARCH_MIMO2, "mimo2" },
{ LLM_ARCH_STEP35, "step35" },
@ -1589,6 +1590,7 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
LLM_TENSOR_FFN_UP_SHEXP,
};
case LLM_ARCH_DEEPSEEK2:
case LLM_ARCH_MISTRAL4:
return {
LLM_TENSOR_TOKEN_EMBD,
LLM_TENSOR_OUTPUT_NORM,

View File

@ -127,6 +127,7 @@ enum llm_arch {
LLM_ARCH_RND1,
LLM_ARCH_PANGU_EMBED,
LLM_ARCH_MISTRAL3,
LLM_ARCH_MISTRAL4,
LLM_ARCH_PADDLEOCR,
LLM_ARCH_MIMO2,
LLM_ARCH_STEP35,

View File

@ -1587,6 +1587,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
}
} break;
case LLM_ARCH_DEEPSEEK2:
case LLM_ARCH_MISTRAL4:
{
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B, Kanana-2-30B-A3B
const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26 || (hparams.n_layer == 48 && n_vocab == 128256));
@ -4883,6 +4884,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
}
} break;
case LLM_ARCH_DEEPSEEK2:
case LLM_ARCH_MISTRAL4:
{
const bool is_mla = hparams.is_mla();
@ -7501,6 +7503,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
}
// recurrent / linear-attention weight scales (per-tensor, shape {1})
if (!layer.ssm_in_s && layer.ssm_in) {
layer.ssm_in_s = create_tensor(tn(LLM_TENSOR_SSM_IN, "scale", i), {1}, TENSOR_NOT_REQUIRED);
}
if (!layer.ssm_out_s && layer.ssm_out) {
layer.ssm_out_s = create_tensor(tn(LLM_TENSOR_SSM_OUT, "scale", i), {1}, TENSOR_NOT_REQUIRED);
}
@ -7847,7 +7852,7 @@ void llama_model::print_info() const {
LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n", __func__, hparams.expert_weights_scale);
}
if (arch == LLM_ARCH_DEEPSEEK2 || arch == LLM_ARCH_GLM_DSA) {
if (arch == LLM_ARCH_DEEPSEEK2 || arch == LLM_ARCH_GLM_DSA || arch == LLM_ARCH_MISTRAL4) {
LLAMA_LOG_INFO("%s: n_layer_dense_lead = %d\n", __func__, hparams.n_layer_dense_lead);
LLAMA_LOG_INFO("%s: n_lora_q = %d\n", __func__, hparams.n_lora_q);
LLAMA_LOG_INFO("%s: n_lora_kv = %d\n", __func__, hparams.n_lora_kv);
@ -8425,6 +8430,7 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
} break;
case LLM_ARCH_DEEPSEEK2:
case LLM_ARCH_GLM_DSA:
case LLM_ARCH_MISTRAL4:
{
llm = std::make_unique<llm_build_deepseek2>(*this, params);
} break;
@ -8836,6 +8842,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
case LLM_ARCH_ERNIE4_5:
case LLM_ARCH_ERNIE4_5_MOE:
case LLM_ARCH_MISTRAL3:
case LLM_ARCH_MISTRAL4:
case LLM_ARCH_LLAMA_EMBED:
case LLM_ARCH_MAINCODER:
case LLM_ARCH_GLM_DSA:

View File

@ -409,7 +409,8 @@ struct llama_layer {
struct ggml_tensor * ffn_gate_shexp_s = nullptr;
struct ggml_tensor * ffn_up_shexp_s = nullptr;
struct ggml_tensor * ffn_down_shexp_s = nullptr;
struct ggml_tensor * ssm_out_s = nullptr;
struct ggml_tensor * ssm_in_s = nullptr;
struct ggml_tensor * ssm_out_s = nullptr;
struct ggml_tensor * ssm_alpha_s = nullptr;
struct ggml_tensor * ssm_beta_s = nullptr;

View File

@ -42,7 +42,7 @@ ggml_tensor * llm_build_mamba_base::build_mamba_layer(llm_graph_input_rs * inp,
cur = ggml_reshape_3d(ctx0, cur, cur->ne[0], n_seq_tokens, n_seqs);
// {n_embd, 2*d_inner} @ {n_embd, n_seq_tokens, n_seqs} => {2*d_inner, n_seq_tokens, n_seqs}
ggml_tensor * xz = build_lora_mm(layer.ssm_in, cur);
ggml_tensor * xz = build_lora_mm(layer.ssm_in, cur, layer.ssm_in_s);
// split the above in two
// => {d_inner, n_seq_tokens, n_seqs}
ggml_tensor * x = ggml_view_3d(ctx0, xz, d_inner, xz->ne[1], xz->ne[2], xz->nb[1], xz->nb[2], 0);
@ -137,7 +137,7 @@ ggml_tensor * llm_build_mamba_base::build_mamba_layer(llm_graph_input_rs * inp,
y = ggml_swiglu_split(ctx0, ggml_cont(ctx0, z), y);
// {d_inner, n_embd} @ {d_inner, n_seq_tokens, n_seqs} => {n_embd, n_seq_tokens, n_seqs}
cur = build_lora_mm(layer.ssm_out, y);
cur = build_lora_mm(layer.ssm_out, y, layer.ssm_out_s);
}
// {n_embd, n_seq_tokens, n_seqs} => {n_embd, n_tokens}
@ -184,7 +184,7 @@ ggml_tensor * llm_build_mamba_base::build_mamba2_layer(llm_graph_input_rs * inp,
// d_in_proj = 2 * self.d_inner + 2 * self.ngroups * self.d_state + self.nheads
// {n_embd, d_in_proj} @ {n_embd, n_seq_tokens, n_seqs} => {d_in_proj, n_seq_tokens, n_seqs}
ggml_tensor * zxBCdt = build_lora_mm(model.layers[il].ssm_in, cur);
ggml_tensor * zxBCdt = build_lora_mm(model.layers[il].ssm_in, cur, model.layers[il].ssm_in_s);
// split the above in three
ggml_tensor * z = ggml_view_4d(ctx0, zxBCdt, head_dim, n_head, n_seq_tokens, n_seqs, head_dim * zxBCdt->nb[0],
@ -278,7 +278,7 @@ ggml_tensor * llm_build_mamba_base::build_mamba2_layer(llm_graph_input_rs * inp,
y = ggml_reshape_3d(ctx0, y, d_inner, n_seq_tokens, n_seqs);
// {d_inner, n_embd} @ {d_inner, n_seq_tokens, n_seqs} => {n_embd, n_seq_tokens, n_seqs}
cur = build_lora_mm(model.layers[il].ssm_out, y);
cur = build_lora_mm(model.layers[il].ssm_out, y, model.layers[il].ssm_out_s);
}
// {n_embd, n_seq_tokens, n_seqs} => {n_embd, n_tokens}

View File

@ -107,9 +107,9 @@ ggml_tensor * llm_build_nemotron_h::build_attention_layer(ggml_tensor *
ggml_tensor * llm_build_nemotron_h::build_ffn_layer(ggml_tensor * cur, const llama_model & model, int il) {
if (model.layers[il].ffn_gate_inp == nullptr) {
cur = build_ffn(cur,
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
model.layers[il].ffn_up, model.layers[il].ffn_up_b, model.layers[il].ffn_up_s,
NULL, NULL, NULL,
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
model.layers[il].ffn_down, model.layers[il].ffn_down_b, model.layers[il].ffn_down_s,
NULL,
LLM_FFN_RELU_SQR, LLM_FFN_PAR, il);
cb(cur, "ffn_out", il);
@ -136,7 +136,10 @@ ggml_tensor * llm_build_nemotron_h::build_ffn_layer(ggml_tensor * cur, const lla
hparams.expert_weights_scale,
LLAMA_EXPERT_GATING_FUNC_TYPE_SIGMOID,
il,
router_logits);
router_logits, nullptr,
model.layers[il].ffn_up_exps_s,
nullptr, // no gate
model.layers[il].ffn_down_exps_s);
cb(moe_out, "ffn_moe_out", il);
if (model.layers[il].ffn_latent_up) {
@ -144,9 +147,9 @@ ggml_tensor * llm_build_nemotron_h::build_ffn_layer(ggml_tensor * cur, const lla
}
ggml_tensor * ffn_shexp = build_ffn(inp_emb,
model.layers[il].ffn_up_shexp, NULL, NULL,
NULL /* no gate */ , NULL, NULL,
model.layers[il].ffn_down_shexp, NULL, NULL,
model.layers[il].ffn_up_shexp, NULL, model.layers[il].ffn_up_shexp_s,
NULL /* no gate */ , NULL, NULL,
model.layers[il].ffn_down_shexp, NULL, model.layers[il].ffn_down_shexp_s,
NULL,
LLM_FFN_RELU_SQR, LLM_FFN_PAR, il);
cb(ffn_shexp, "ffn_shexp", il);

View File

@ -224,7 +224,7 @@ ggml_tensor * llm_build_qwen35::build_layer_attn_linear(
beta = ggml_sigmoid(ctx0, beta);
ggml_tensor * alpha = build_lora_mm(model.layers[il].ssm_alpha, cur, model.layers[il].ssm_alpha_s);
alpha = ggml_cont_3d(ctx0, alpha, num_v_heads, n_seq_tokens, n_seqs);
alpha = ggml_reshape_3d(ctx0, alpha, num_v_heads, n_seq_tokens, n_seqs);
cb(alpha, "alpha", il);
ggml_tensor * alpha_biased = ggml_add(ctx0, alpha, model.layers[il].ssm_dt);

View File

@ -224,7 +224,7 @@ ggml_tensor * llm_build_qwen35moe ::build_layer_attn_linear(
beta = ggml_sigmoid(ctx0, beta);
ggml_tensor * alpha = build_lora_mm(model.layers[il].ssm_alpha, cur, model.layers[il].ssm_alpha_s);
alpha = ggml_cont_3d(ctx0, alpha, num_v_heads, n_seq_tokens, n_seqs);
alpha = ggml_reshape_3d(ctx0, alpha, num_v_heads, n_seq_tokens, n_seqs);
cb(alpha, "alpha", il);
ggml_tensor * alpha_biased = ggml_add(ctx0, alpha, model.layers[il].ssm_dt);

View File

@ -1915,7 +1915,7 @@ env.globals["raise_exception"] = raise_exception
template = env.from_string(tmpl)
result = template.render(**vars_json)
print(result, end='')
sys.stdout.buffer.write(result.encode())
)";
static void test_template_py(testing & t, const std::string & name, const std::string & tmpl, const json & vars, const std::string & expect) {

View File

@ -90,7 +90,10 @@ static gguf_context_ptr get_gguf_ctx(const llm_arch arch, const bool moe) {
n_embd = 64;
n_head = 1;
n_ff = 96;
} else if (arch == LLM_ARCH_DEEPSEEK2 || arch == LLM_ARCH_GLM_DSA || arch == LLM_ARCH_KIMI_LINEAR) {
} else if (arch == LLM_ARCH_DEEPSEEK2
|| arch == LLM_ARCH_GLM_DSA
|| arch == LLM_ARCH_KIMI_LINEAR
|| arch == LLM_ARCH_MISTRAL4) {
n_embd = 128;
n_head = 1;
n_ff = 192;
@ -145,7 +148,10 @@ static gguf_context_ptr get_gguf_ctx(const llm_arch arch, const bool moe) {
}
ms.add_kv(LLM_KV_ATTENTION_MAX_ALIBI_BIAS, 8.0f);
if (arch == LLM_ARCH_DEEPSEEK2 || arch == LLM_ARCH_GLM_DSA || arch == LLM_ARCH_KIMI_LINEAR) {
if (arch == LLM_ARCH_DEEPSEEK2
|| arch == LLM_ARCH_GLM_DSA
|| arch == LLM_ARCH_KIMI_LINEAR
|| arch == LLM_ARCH_MISTRAL4) {
ms.add_kv(LLM_KV_ATTENTION_KEY_LENGTH, uint32_t(576));
ms.add_kv(LLM_KV_ATTENTION_VALUE_LENGTH, uint32_t(512));
ms.add_kv(LLM_KV_ROPE_DIMENSION_COUNT, uint32_t(64));
@ -319,6 +325,7 @@ static bool moe_mandatory(const llm_arch arch) {
case LLM_ARCH_MIMO2:
case LLM_ARCH_KIMI_LINEAR:
case LLM_ARCH_STEP35:
case LLM_ARCH_MISTRAL4:
return true;
default:
return false;

Binary file not shown.

View File

@ -1273,17 +1273,27 @@ json convert_responses_to_chatcmpl(const json & response_body) {
for (const auto & output_text : item.at("content")) {
const std::string type = json_value(output_text, "type", std::string());
if (type != "output_text") {
throw std::invalid_argument("'type' must be 'output_text'");
if (type == "output_text") {
if (!exists_and_is_string(output_text, "text")) {
throw std::invalid_argument("'Output text' requires 'text'");
// Ignore annotations and logprobs for now
chatcmpl_content.push_back({
{"text", output_text.at("text")},
{"type", "text"},
});
}
} else if (type == "refusal") {
if (!exists_and_is_string(output_text, "refusal")) {
throw std::invalid_argument("'Refusal' requires 'refusal'");
// Ignore annotations and logprobs for now
chatcmpl_content.push_back({
{"refusal", output_text.at("refusal")},
{"type", "refusal"},
});
}
} else {
throw std::invalid_argument("'type' must be one of 'output_text' or 'refusal'");
}
if (!exists_and_is_string(output_text, "text")) {
throw std::invalid_argument("'Output text' requires 'text'");
}
// Ignore annotations and logprobs for now
chatcmpl_content.push_back({
{"text", output_text.at("text")},
{"type", "text"},
});
}
if (merge_prev) {

View File

@ -939,7 +939,6 @@
"integrity": "sha512-oJrXtQiAXLvT9clCf1K4kxp3eKsQhIaZqxEyowkBcsvZDdZkbWrVmnGknxs5flTD0VGsxrxKgBCZty1EzoiMzA==",
"dev": true,
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@swc/helpers": "^0.5.0"
}
@ -2161,7 +2160,6 @@
"integrity": "sha512-W9R51zUCd2iHOQBg/D93+bdpYv6kbtFx+kft5X8lPKQl6yEu0aKs9i5N5GyCASOhIApgx/tkqZIJ7vgM4cqrHA==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"ts-dedent": "^2.0.0",
"type-fest": "~2.19"
@ -2245,7 +2243,6 @@
"integrity": "sha512-875hTUkEbz+MyJIxWbQjfMaekqdmEKUUfR7JyKcpfMRZqcGyrO9Gd+iS1D/Dx8LpE5FEtutWGOtlAh4ReSAiOA==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@standard-schema/spec": "^1.0.0",
"@sveltejs/acorn-typescript": "^1.0.5",
@ -2289,7 +2286,6 @@
"integrity": "sha512-YZs/OSKOQAQCnJvM/P+F1URotNnYNeU3P2s4oIpzm1uFaqUEqRxUB0g5ejMjEb5Gjb9/PiBI5Ktrq4rUUF8UVQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@sveltejs/vite-plugin-svelte-inspector": "^5.0.0",
"debug": "^4.4.1",
@ -2705,7 +2701,6 @@
"integrity": "sha512-pemlzrSESWbdAloYml3bAJMEfNh1Z7EduzqPKprCH5S341frlpYnUEW0H72dLxa6IsYr+mPno20GiSm+h9dEdQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@babel/code-frame": "^7.10.4",
"@babel/runtime": "^7.12.5",
@ -2873,7 +2868,6 @@
"integrity": "sha512-+0/4J266CBGPUq/ELg7QUHhN25WYjE0wYTPSQJn1xeu8DOlIOPxXxrNGiLmfAWl7HMMgWFWXpt9IDjMWrF5Iow==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"undici-types": "~7.16.0"
}
@ -2940,7 +2934,6 @@
"integrity": "sha512-IgSWvLobTDOjnaxAfDTIHaECbkNlAlKv2j5SjpB2v7QHKv1FIfjwMy8FsDbVfDX/KjmCmYICcw7uGaXLhtsLNg==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@typescript-eslint/scope-manager": "8.56.0",
"@typescript-eslint/types": "8.56.0",
@ -3177,7 +3170,6 @@
"integrity": "sha512-tJxiPrWmzH8a+w9nLKlQMzAKX/7VjFs50MWgcAj7p9XQ7AQ9/35fByFYptgPELyLw+0aixTnC4pUWV+APcZ/kw==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@testing-library/dom": "^10.4.0",
"@testing-library/user-event": "^14.6.1",
@ -3305,7 +3297,6 @@
"integrity": "sha512-oukfKT9Mk41LreEW09vt45f8wx7DordoWUZMYdY/cyAk7w5TWkTRCNZYF7sX7n2wB7jyGAl74OxgwhPgKaqDMQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@vitest/utils": "3.2.4",
"pathe": "^2.0.3",
@ -3376,7 +3367,6 @@
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
"license": "MIT",
"peer": true,
"bin": {
"acorn": "bin/acorn"
},
@ -4094,7 +4084,8 @@
"resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz",
"integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==",
"dev": true,
"license": "MIT"
"license": "MIT",
"peer": true
},
"node_modules/debug": {
"version": "4.4.3",
@ -4404,7 +4395,6 @@
"dev": true,
"hasInstallScript": true,
"license": "MIT",
"peer": true,
"bin": {
"esbuild": "bin/esbuild"
},
@ -4465,7 +4455,6 @@
"integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@eslint-community/eslint-utils": "^4.8.0",
"@eslint-community/regexpp": "^4.12.1",
@ -5672,7 +5661,6 @@
"resolved": "https://registry.npmjs.org/hono/-/hono-4.11.7.tgz",
"integrity": "sha512-l7qMiNee7t82bH3SeyUCt9UF15EVmaBvsppY2zQtrbIhl/yzBTny+YUxsVjSjQ6gaqaeVtZmGocom8TzBlA4Yw==",
"license": "MIT",
"peer": true,
"engines": {
"node": ">=16.9.0"
}
@ -8097,7 +8085,6 @@
}
],
"license": "MIT",
"peer": true,
"dependencies": {
"nanoid": "^3.3.11",
"picocolors": "^1.1.1",
@ -8231,7 +8218,6 @@
"integrity": "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==",
"dev": true,
"license": "MIT",
"peer": true,
"bin": {
"prettier": "bin/prettier.cjs"
},
@ -8248,7 +8234,6 @@
"integrity": "sha512-pn1ra/0mPObzqoIQn/vUTR3ZZI6UuZ0sHqMK5x2jMLGrs53h0sXhkVuDcrlssHwIMk7FYrMjHBPoUSyyEEDlBQ==",
"dev": true,
"license": "MIT",
"peer": true,
"peerDependencies": {
"prettier": "^3.0.0",
"svelte": "^3.2.0 || ^4.0.0-next.0 || ^5.0.0-next.0"
@ -8480,7 +8465,6 @@
"integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==",
"dev": true,
"license": "MIT",
"peer": true,
"engines": {
"node": ">=0.10.0"
}
@ -8491,7 +8475,6 @@
"integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"scheduler": "^0.26.0"
},
@ -8766,7 +8749,6 @@
"integrity": "sha512-4iya7Jb76fVpQyLoiVpzUrsjQ12r3dM7fIVz+4NwoYvZOShknRmiv+iu9CClZml5ZLGb0XMcYLutK6w9tgxHDw==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@types/estree": "1.0.8"
},
@ -8877,7 +8859,6 @@
"integrity": "sha512-elOcIZRTM76dvxNAjqYrucTSI0teAF/L2Lv0s6f6b7FOwcwIuA357bIE871580AjHJuSvLIRUosgV+lIWx6Rgg==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"chokidar": "^4.0.0",
"immutable": "^5.0.2",
@ -9172,7 +9153,6 @@
"integrity": "sha512-LwF0VZsT4qkgx66Ad/q0QgZZrU2a5WftaADDEcJ3bGq3O2fHvwWPlSZjM1HiXD4vqP9U5JiMqQkV1gkyH0XJkw==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@storybook/global": "^5.0.0",
"@storybook/icons": "^2.0.1",
@ -9387,7 +9367,6 @@
"resolved": "https://registry.npmjs.org/svelte/-/svelte-5.48.3.tgz",
"integrity": "sha512-w7QZ398cdNherTdiQ/v3SYLLGOO4948Jgjh04PYqtTYVohmBvbmFwLmo7pp8gp4/1tceRWfSTjHgjtfpCVNJmQ==",
"license": "MIT",
"peer": true,
"dependencies": {
"@jridgewell/remapping": "^2.3.4",
"@jridgewell/sourcemap-codec": "^1.5.0",
@ -9633,7 +9612,6 @@
"integrity": "sha512-gBXpgUm/3rp1lMZZrM/w7D8GKqshif0zAymAhbCyIt8KMe+0v9DQ7cdYLR4FHH/cKpdTXb+A/tKKU3eolfsI+g==",
"dev": true,
"license": "MIT",
"peer": true,
"funding": {
"type": "github",
"url": "https://github.com/sponsors/dcastil"
@ -9664,8 +9642,7 @@
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.11.tgz",
"integrity": "sha512-2E9TBm6MDD/xKYe+dvJZAmg3yxIEDNRc0jwlNyDg/4Fil2QcSLjFKGVff0lAf1jjeaArlG/M75Ey/EYr/OJtBA==",
"dev": true,
"license": "MIT",
"peer": true
"license": "MIT"
},
"node_modules/tapable": {
"version": "2.2.2",
@ -9942,7 +9919,6 @@
"integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
"dev": true,
"license": "Apache-2.0",
"peer": true,
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
@ -10336,7 +10312,6 @@
"integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"esbuild": "^0.25.0",
"fdir": "^6.5.0",
@ -10497,7 +10472,6 @@
"integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@types/chai": "^5.2.2",
"@vitest/expect": "3.2.4",
@ -10819,7 +10793,6 @@
"resolved": "https://registry.npmjs.org/zod/-/zod-4.2.1.tgz",
"integrity": "sha512-0wZ1IRqGGhMP76gLqz8EyfBXKk0J2qo2+H3fi4mcUP/KtTocoX08nmIAHl1Z2kJIZbZee8KOpBCSNPRgauucjw==",
"license": "MIT",
"peer": true,
"funding": {
"url": "https://github.com/sponsors/colinhacks"
}

View File

@ -11,7 +11,7 @@
iconSize?: string;
class?: string;
disabled?: boolean;
onclick: () => void;
onclick: (e?: MouseEvent) => void;
'aria-label'?: string;
}

View File

@ -65,7 +65,8 @@
$effect(() => {
if (conversationModel) {
modelsStore.selectModelByName(conversationModel);
} else if (isRouter && modelsStore.loadedModelIds.length > 0) {
} else if (isRouter && !modelsStore.selectedModelId && modelsStore.loadedModelIds.length > 0) {
// auto-select the first loaded model only when nothing is selected yet
const first = modelOptions().find((m) => modelsStore.loadedModelIds.includes(m.model));
if (first) modelsStore.selectModelById(first.id);
}

View File

@ -3,6 +3,7 @@
import { Button } from '$lib/components/ui/button';
import { DialogConversationSelection, DialogConfirmation } from '$lib/components/app';
import { createMessageCountMap } from '$lib/utils';
import { ISO_DATE_TIME_SEPARATOR } from '$lib/constants';
import { conversationsStore, conversations } from '$lib/stores/conversations.svelte';
import { toast } from 'svelte-sonner';
@ -55,18 +56,10 @@
})
);
const blob = new Blob([JSON.stringify(allData, null, 2)], {
type: 'application/json'
});
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `conversations_${new Date().toISOString().split('T')[0]}.json`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
conversationsStore.downloadConversationFile(
allData,
`${new Date().toISOString().split(ISO_DATE_TIME_SEPARATOR)[0]}_conversations.json`
);
exportedConversations = selectedConversations;
showExportSummary = true;

View File

@ -5,21 +5,38 @@
import { serverStore } from '$lib/stores/server.svelte';
import { modelsStore, modelOptions, modelsLoading } from '$lib/stores/models.svelte';
import { formatFileSize, formatParameters, formatNumber } from '$lib/utils';
import type { ApiLlamaCppServerProps } from '$lib/types';
interface Props {
open?: boolean;
onOpenChange?: (open: boolean) => void;
// when set, fetch props from the child process (router mode)
modelId?: string | null;
}
let { open = $bindable(), onOpenChange }: Props = $props();
let { open = $bindable(), onOpenChange, modelId = null }: Props = $props();
let serverProps = $derived(serverStore.props);
let modelName = $derived(modelsStore.singleModelName);
let isRouter = $derived(serverStore.isRouterMode);
// per-model props fetched from the child process
let routerModelProps = $state<ApiLlamaCppServerProps | null>(null);
let isLoadingRouterProps = $state(false);
// in router mode use per-model props, otherwise use global props
let serverProps = $derived(isRouter && modelId ? routerModelProps : serverStore.props);
let modelName = $derived(isRouter && modelId ? modelId : modelsStore.singleModelName);
let models = $derived(modelOptions());
let isLoadingModels = $derived(modelsLoading());
// Get the first model for single-model mode display
let firstModel = $derived(models[0] ?? null);
// in router mode, find the model option matching modelId
// in single mode, use the first model as before
let firstModel = $derived.by(() => {
if (isRouter && modelId) {
return models.find((m) => m.model === modelId) ?? null;
}
return models[0] ?? null;
});
// Get modalities from modelStore using the model ID from the first model
let modalities = $derived.by(() => {
@ -33,10 +50,31 @@
modelsStore.fetch();
}
});
// fetch per-model props from child process when dialog opens in router mode
$effect(() => {
if (open && isRouter && modelId) {
isLoadingRouterProps = true;
modelsStore
.fetchModelProps(modelId)
.then((props) => {
routerModelProps = props;
})
.catch(() => {
routerModelProps = null;
})
.finally(() => {
isLoadingRouterProps = false;
});
}
if (!open) {
routerModelProps = null;
}
});
</script>
<Dialog.Root bind:open {onOpenChange}>
<Dialog.Content class="@container z-9999 !max-w-[60rem] max-w-full">
<Dialog.Content class="@container z-9999 !max-h-[80dvh] !max-w-[60rem] max-w-full">
<style>
@container (max-width: 56rem) {
.resizable-text-container {
@ -52,7 +90,7 @@
</Dialog.Header>
<div class="space-y-6 py-4">
{#if isLoadingModels}
{#if isLoadingModels || isLoadingRouterProps}
<div class="flex items-center justify-center py-8">
<div class="text-sm text-muted-foreground">Loading model information...</div>
</div>
@ -212,7 +250,7 @@
<Table.Cell class="align-middle font-medium">Chat Template</Table.Cell>
<Table.Cell class="py-10">
<div class="max-h-120 overflow-y-auto rounded-md bg-muted p-4">
<div class="rounded-md bg-muted p-4">
<pre
class="font-mono text-xs whitespace-pre-wrap">{serverProps.chat_template}</pre>
</div>

View File

@ -6,6 +6,7 @@
import { parseHeadersToArray, serializeHeaders } from '$lib/utils';
import { UrlProtocol } from '$lib/enums';
import { MCP_SERVER_URL_PLACEHOLDER } from '$lib/constants';
import { mcpStore } from '$lib/stores/mcp.svelte';
interface Props {
url: string;
@ -62,14 +63,33 @@
{/if}
{#if !isWebSocket && onUseProxyChange}
<label class="mt-3 flex cursor-pointer items-center gap-2">
<label
class="mt-3 flex items-start gap-2"
class:cursor-pointer={mcpStore.isProxyAvailable}
class:opacity-80={!mcpStore.isProxyAvailable}
>
<Switch
class="mt-1"
id="use-proxy-{id}"
checked={useProxy}
disabled={!mcpStore.isProxyAvailable}
onCheckedChange={(checked) => onUseProxyChange?.(checked)}
/>
<span class="text-xs text-muted-foreground">Use llama-server proxy</span>
<span>
<span class="text-xs text-muted-foreground">Use llama-server proxy</span>
<br />
{#if !mcpStore.isProxyAvailable}
<span class="inline-flex gap-0.75 text-xs text-muted-foreground/60"
>(Run <pre>llama-server</pre>
with
<pre>--webui-mcp-proxy</pre>
flag)</span
>
{/if}
</span>
</label>
{/if}
</div>

View File

@ -1,6 +1,5 @@
<script lang="ts">
import { onMount } from 'svelte';
import { SvelteMap } from 'svelte/reactivity';
import { ChevronDown, Loader2, Package } from '@lucide/svelte';
import * as DropdownMenu from '$lib/components/ui/dropdown-menu';
import * as Tooltip from '$lib/components/ui/tooltip';
@ -19,9 +18,11 @@
DialogModelInformation,
DropdownMenuSearchable,
ModelId,
ModelsSelectorList,
ModelsSelectorOption
} from '$lib/components/app';
import type { ModelOption } from '$lib/types/models';
import { filterModelOptions, groupModelOptions, type ModelItem } from './utils';
interface Props {
class?: string;
@ -73,89 +74,13 @@
let searchTerm = $state('');
let highlightedIndex = $state<number>(-1);
let filteredOptions: ModelOption[] = $derived.by(() => {
const term = searchTerm.trim().toLowerCase();
if (!term) return options;
let filteredOptions = $derived(filterModelOptions(options, searchTerm));
return options.filter(
(option) =>
option.model.toLowerCase().includes(term) ||
option.name?.toLowerCase().includes(term) ||
option.aliases?.some((alias: string) => alias.toLowerCase().includes(term)) ||
option.tags?.some((tag: string) => tag.toLowerCase().includes(term))
);
});
let groupedFilteredOptions = $derived.by(() => {
const favIds = modelsStore.favouriteModelIds;
const result: {
orgName: string | null;
isFavouritesGroup: boolean;
isLoadedGroup: boolean;
items: { option: ModelOption; flatIndex: number }[];
}[] = [];
// Loaded models group (top)
const loadedItems: { option: ModelOption; flatIndex: number }[] = [];
for (let i = 0; i < filteredOptions.length; i++) {
if (modelsStore.isModelLoaded(filteredOptions[i].model)) {
loadedItems.push({ option: filteredOptions[i], flatIndex: i });
}
}
if (loadedItems.length > 0) {
result.push({
orgName: null,
isFavouritesGroup: false,
isLoadedGroup: true,
items: loadedItems
});
}
// Favourites group
const loadedModelIds = new Set(loadedItems.map((item) => item.option.model));
const favItems: { option: ModelOption; flatIndex: number }[] = [];
for (let i = 0; i < filteredOptions.length; i++) {
if (favIds.has(filteredOptions[i].model) && !loadedModelIds.has(filteredOptions[i].model)) {
favItems.push({ option: filteredOptions[i], flatIndex: i });
}
}
if (favItems.length > 0) {
result.push({
orgName: null,
isFavouritesGroup: true,
isLoadedGroup: false,
items: favItems
});
}
// Org groups (excluding loaded and favourites)
const orgGroups = new SvelteMap<string, { option: ModelOption; flatIndex: number }[]>();
for (let i = 0; i < filteredOptions.length; i++) {
const option = filteredOptions[i];
if (loadedModelIds.has(option.model) || favIds.has(option.model)) continue;
const orgName = option.parsedId?.orgName ?? null;
const key = orgName ?? '';
if (!orgGroups.has(key)) orgGroups.set(key, []);
orgGroups.get(key)!.push({ option, flatIndex: i });
}
for (const [orgName, items] of orgGroups) {
result.push({
orgName: orgName || null,
isFavouritesGroup: false,
isLoadedGroup: false,
items
});
}
return result;
});
let groupedFilteredOptions = $derived(
groupModelOptions(filteredOptions, modelsStore.favouriteModelIds, (m) =>
modelsStore.isModelLoaded(m)
)
);
$effect(() => {
void searchTerm;
@ -164,6 +89,12 @@
let isOpen = $state(false);
let showModelDialog = $state(false);
let infoModelId = $state<string | null>(null);
function handleInfoClick(modelName: string) {
infoModelId = modelName;
showModelDialog = true;
}
onMount(() => {
modelsStore.fetch().catch((error) => {
@ -418,45 +349,39 @@
<p class="px-4 py-3 text-sm text-muted-foreground">No models found.</p>
{/if}
{#each groupedFilteredOptions as group (group.isLoadedGroup ? '__loaded__' : group.isFavouritesGroup ? '__favourites__' : group.orgName)}
{#if group.isLoadedGroup}
<p class="px-2 py-2 text-xs font-semibold text-muted-foreground/60 select-none">
Loaded models
</p>
{:else if group.isFavouritesGroup}
<p class="px-2 py-2 text-xs font-semibold text-muted-foreground/60 select-none">
Favourite models
</p>
{:else if group.orgName}
<p
class="px-2 py-2 text-xs font-semibold text-muted-foreground/60 select-none [&:not(:first-child)]:mt-2"
>
{group.orgName}
</p>
{/if}
{#snippet modelOption(item: ModelItem, showOrgName: boolean)}
{@const { option, flatIndex } = item}
{@const isSelected = currentModel === option.model || activeId === option.id}
{@const isHighlighted = flatIndex === highlightedIndex}
{@const isFav = modelsStore.favouriteModelIds.has(option.model)}
{#each group.items as { option, flatIndex } (group.isLoadedGroup ? `loaded-${option.id}` : group.isFavouritesGroup ? `fav-${option.id}` : option.id)}
{@const isSelected = currentModel === option.model || activeId === option.id}
{@const isHighlighted = flatIndex === highlightedIndex}
{@const isFav = modelsStore.favouriteModelIds.has(option.model)}
<ModelsSelectorOption
{option}
{isSelected}
{isHighlighted}
{isFav}
{showOrgName}
onSelect={handleSelect}
onInfoClick={handleInfoClick}
onMouseEnter={() => (highlightedIndex = flatIndex)}
onKeyDown={(e) => {
if (e.key === KeyboardKey.ENTER || e.key === KeyboardKey.SPACE) {
e.preventDefault();
handleSelect(option.id);
}
}}
/>
{/snippet}
<ModelsSelectorOption
{option}
{isSelected}
{isHighlighted}
{isFav}
showOrgName={group.isFavouritesGroup || group.isLoadedGroup}
onSelect={handleSelect}
onMouseEnter={() => (highlightedIndex = flatIndex)}
onKeyDown={(e) => {
if (e.key === KeyboardKey.ENTER || e.key === KeyboardKey.SPACE) {
e.preventDefault();
handleSelect(option.id);
}
}}
/>
{/each}
{/each}
<ModelsSelectorList
groups={groupedFilteredOptions}
{currentModel}
{activeId}
sectionHeaderClass="my-1.5 px-2 py-2 text-[13px] font-semibold text-muted-foreground/70 select-none"
onSelect={handleSelect}
onInfoClick={handleInfoClick}
renderOption={modelOption}
/>
</div>
</DropdownMenuSearchable>
</DropdownMenu.Content>
@ -500,6 +425,6 @@
{/if}
</div>
{#if showModelDialog && !isRouter}
<DialogModelInformation bind:open={showModelDialog} />
{#if showModelDialog}
<DialogModelInformation bind:open={showModelDialog} modelId={infoModelId} />
{/if}

View File

@ -0,0 +1,72 @@
<script lang="ts">
import { modelsStore } from '$lib/stores/models.svelte';
import { ModelsSelectorOption } from '$lib/components/app';
import type { GroupedModelOptions, ModelItem } from './utils';
interface Props {
groups: GroupedModelOptions;
currentModel: string | null;
activeId: string | null;
sectionHeaderClass?: string;
orgHeaderClass?: string;
onSelect: (modelId: string) => void;
onInfoClick: (modelName: string) => void;
renderOption?: import('svelte').Snippet<[ModelItem, boolean]>;
}
let {
groups,
currentModel,
activeId,
sectionHeaderClass = 'my-1 px-2 py-2 text-[13px] font-semibold text-muted-foreground/70 select-none',
orgHeaderClass = 'px-2 py-2 text-[11px] font-semibold text-muted-foreground/50 select-none [&:not(:first-child)]:mt-1',
onSelect,
onInfoClick,
renderOption
}: Props = $props();
let render = $derived(renderOption ?? defaultOption);
</script>
{#snippet defaultOption(item: ModelItem, showOrgName: boolean)}
{@const { option } = item}
{@const isSelected = currentModel === option.model || activeId === option.id}
{@const isFav = modelsStore.favouriteModelIds.has(option.model)}
<ModelsSelectorOption
{option}
{isSelected}
isHighlighted={false}
{isFav}
{showOrgName}
{onSelect}
{onInfoClick}
onMouseEnter={() => {}}
onKeyDown={() => {}}
/>
{/snippet}
{#if groups.loaded.length > 0}
<p class={sectionHeaderClass}>Loaded models</p>
{#each groups.loaded as item (`loaded-${item.option.id}`)}
{@render render(item, true)}
{/each}
{/if}
{#if groups.favourites.length > 0}
<p class={sectionHeaderClass}>Favourite models</p>
{#each groups.favourites as item (`fav-${item.option.id}`)}
{@render render(item, true)}
{/each}
{/if}
{#if groups.available.length > 0}
<p class={sectionHeaderClass}>Available models</p>
{#each groups.available as group (group.orgName)}
{#if group.orgName}
<p class={orgHeaderClass}>{group.orgName}</p>
{/if}
{#each group.items as item (item.option.id)}
{@render render(item, false)}
{/each}
{/each}
{/if}

View File

@ -1,5 +1,14 @@
<script lang="ts">
import { CircleAlert, Heart, HeartOff, Loader2, Power, PowerOff, RotateCw } from '@lucide/svelte';
import {
CircleAlert,
Heart,
HeartOff,
Info,
Loader2,
Power,
PowerOff,
RotateCw
} from '@lucide/svelte';
import { cn } from '$lib/components/ui/utils';
import { ActionIcon, ModelId } from '$lib/components/app';
import type { ModelOption } from '$lib/types/models';
@ -15,6 +24,7 @@
onSelect: (modelId: string) => void;
onMouseEnter: () => void;
onKeyDown: (e: KeyboardEvent) => void;
onInfoClick?: (modelName: string) => void;
}
let {
@ -25,7 +35,8 @@
showOrgName = false,
onSelect,
onMouseEnter,
onKeyDown
onKeyDown,
onInfoClick
}: Props = $props();
let currentRouterModels = $derived(routerModels());
@ -63,11 +74,11 @@
class="flex-1"
/>
<div class="flex shrink-0 items-center gap-2.5">
<div class="flex shrink-0 items-center gap-1">
<!-- svelte-ignore a11y_no_static_element_interactions -->
<!-- svelte-ignore a11y_click_events_have_key_events -->
<div
class="pointer-events-none flex w-4 items-center justify-center pl-2 opacity-0 group-hover:pointer-events-auto group-hover:opacity-100"
class="pointer-events-none flex items-center justify-center gap-0.75 pl-2 opacity-0 group-hover:pointer-events-auto group-hover:opacity-100"
onclick={(e) => e.stopPropagation()}
>
{#if isFav}
@ -87,7 +98,19 @@
onclick={() => modelsStore.toggleFavourite(option.model)}
/>
{/if}
<!-- info button: only shown when model is loaded and callback is provided -->
{#if isLoaded && onInfoClick}
<ActionIcon
iconSize="h-2.5 w-2.5"
icon={Info}
tooltip="Model information"
class="h-3 w-3 hover:text-foreground"
onclick={() => onInfoClick(option.model)}
/>
{/if}
</div>
{#if isLoading}
<Loader2 class="h-4 w-4 animate-spin text-muted-foreground" />
{:else if isFailed}

View File

@ -1,6 +1,5 @@
<script lang="ts">
import { onMount } from 'svelte';
import { SvelteMap } from 'svelte/reactivity';
import { ChevronDown, Loader2, Package } from '@lucide/svelte';
import * as Sheet from '$lib/components/ui/sheet';
import { cn } from '$lib/components/ui/utils';
@ -15,11 +14,12 @@
import { isRouterMode } from '$lib/stores/server.svelte';
import {
DialogModelInformation,
ModelsSelectorList,
SearchInput,
TruncatedText,
ModelsSelectorOption
TruncatedText
} from '$lib/components/app';
import type { ModelOption } from '$lib/types/models';
import { filterModelOptions, groupModelOptions } from './utils';
interface Props {
class?: string;
@ -73,85 +73,22 @@
let searchTerm = $state('');
let filteredOptions: ModelOption[] = $derived.by(() => {
const term = searchTerm.trim().toLowerCase();
if (!term) return options;
let filteredOptions = $derived(filterModelOptions(options, searchTerm));
return options.filter(
(option) =>
option.model.toLowerCase().includes(term) ||
option.name?.toLowerCase().includes(term) ||
option.aliases?.some((alias: string) => alias.toLowerCase().includes(term)) ||
option.tags?.some((tag: string) => tag.toLowerCase().includes(term))
);
});
let groupedFilteredOptions = $derived.by(() => {
const favIds = modelsStore.favouriteModelIds;
const result: {
orgName: string | null;
isFavouritesGroup: boolean;
isLoadedGroup: boolean;
items: { option: ModelOption; flatIndex: number }[];
}[] = [];
// Loaded models group (top)
const loadedItems: { option: ModelOption; flatIndex: number }[] = [];
for (let i = 0; i < filteredOptions.length; i++) {
if (modelsStore.isModelLoaded(filteredOptions[i].model)) {
loadedItems.push({ option: filteredOptions[i], flatIndex: i });
}
}
if (loadedItems.length > 0) {
result.push({
orgName: null,
isFavouritesGroup: false,
isLoadedGroup: true,
items: loadedItems
});
}
// Favourites group
const loadedModelIds = new Set(loadedItems.map((item) => item.option.model));
const favItems: { option: ModelOption; flatIndex: number }[] = [];
for (let i = 0; i < filteredOptions.length; i++) {
if (favIds.has(filteredOptions[i].model) && !loadedModelIds.has(filteredOptions[i].model)) {
favItems.push({ option: filteredOptions[i], flatIndex: i });
}
}
if (favItems.length > 0) {
result.push({
orgName: null,
isFavouritesGroup: true,
isLoadedGroup: false,
items: favItems
});
}
// Org groups (excluding loaded and favourites)
const orgGroups = new SvelteMap<string, { option: ModelOption; flatIndex: number }[]>();
for (let i = 0; i < filteredOptions.length; i++) {
const option = filteredOptions[i];
if (loadedModelIds.has(option.model) || favIds.has(option.model)) continue;
const orgName = option.parsedId?.orgName ?? null;
const key = orgName ?? '';
if (!orgGroups.has(key)) orgGroups.set(key, []);
orgGroups.get(key)!.push({ option, flatIndex: i });
}
for (const [orgName, items] of orgGroups) {
result.push({
orgName: orgName || null,
isFavouritesGroup: false,
isLoadedGroup: false,
items
});
}
return result;
});
let groupedFilteredOptions = $derived(
groupModelOptions(filteredOptions, modelsStore.favouriteModelIds, (m) =>
modelsStore.isModelLoaded(m)
)
);
let sheetOpen = $state(false);
let showModelDialog = $state(false);
let infoModelId = $state<string | null>(null);
function handleInfoClick(modelName: string) {
infoModelId = modelName;
showModelDialog = true;
}
onMount(() => {
modelsStore.fetch().catch((error) => {
@ -339,38 +276,15 @@
<p class="px-3 py-3 text-center text-sm text-muted-foreground">No models found.</p>
{/if}
{#each groupedFilteredOptions as group (group.isLoadedGroup ? '__loaded__' : group.isFavouritesGroup ? '__favourites__' : group.orgName)}
{#if group.isLoadedGroup}
<p class="px-2 py-2 text-xs font-semibold text-muted-foreground/60 select-none">
Loaded models
</p>
{:else if group.isFavouritesGroup}
<p class="px-2 py-2 text-xs font-semibold text-muted-foreground/60 select-none">
Favourite models
</p>
{:else if group.orgName}
<p
class="px-2 py-2 text-xs font-semibold text-muted-foreground/60 select-none [&:not(:first-child)]:mt-2"
>
{group.orgName}
</p>
{/if}
{#each group.items as { option } (group.isLoadedGroup ? `loaded-${option.id}` : group.isFavouritesGroup ? `fav-${option.id}` : option.id)}
{@const isSelected = currentModel === option.model || activeId === option.id}
{@const isFav = modelsStore.favouriteModelIds.has(option.model)}
<ModelsSelectorOption
{option}
{isSelected}
isHighlighted={false}
{isFav}
showOrgName={group.isFavouritesGroup || group.isLoadedGroup}
onSelect={handleSelect}
onMouseEnter={() => {}}
onKeyDown={() => {}}
/>
{/each}
{/each}
<ModelsSelectorList
groups={groupedFilteredOptions}
{currentModel}
{activeId}
sectionHeaderClass="px-2 py-2 text-xs font-semibold text-muted-foreground/60 select-none"
orgHeaderClass="px-2 py-2 text-xs font-semibold text-muted-foreground/60 select-none [&:not(:first-child)]:mt-2"
onSelect={handleSelect}
onInfoClick={handleInfoClick}
/>
</div>
</div>
</Sheet.Content>
@ -403,6 +317,6 @@
{/if}
</div>
{#if showModelDialog && !isRouter}
<DialogModelInformation bind:open={showModelDialog} />
{#if showModelDialog}
<DialogModelInformation bind:open={showModelDialog} modelId={infoModelId} />
{/if}

View File

@ -44,6 +44,27 @@
*/
export { default as ModelsSelector } from './ModelsSelector.svelte';
/**
* **ModelsSelectorList** - Grouped model options list
*
* Renders grouped model options (loaded, favourites, available) with section
* headers and org subgroups. Shared between ModelsSelector and ModelsSelectorSheet
* to avoid template duplication.
*
* Accepts an optional `renderOption` snippet to customize how each option is
* rendered (e.g., to add keyboard navigation or highlighting).
*/
export { default as ModelsSelectorList } from './ModelsSelectorList.svelte';
/**
* **ModelsSelectorOption** - Single model option row
*
* Renders a single model option with selection state, favourite toggle,
* load/unload actions, status indicators, and an info button.
* Used inside ModelsSelectorList or directly in custom render snippets.
*/
export { default as ModelsSelectorOption } from './ModelsSelectorOption.svelte';
/**
* **ModelsSelectorSheet** - Mobile model selection sheet
*
@ -80,5 +101,12 @@ export { default as ModelsSelectorSheet } from './ModelsSelectorSheet.svelte';
* ```
*/
export { default as ModelBadge } from './ModelBadge.svelte';
/**
* **ModelId** - Parsed model identifier display
*
* Displays a model ID with optional org name, parameter badges, quantization,
* aliases, and tags. Supports raw mode to show the unprocessed model name.
* Respects the user's `showRawModelNames` setting.
*/
export { default as ModelId } from './ModelId.svelte';
export { default as ModelsSelectorOption } from './ModelsSelectorOption.svelte';

View File

@ -0,0 +1,75 @@
import { SvelteMap } from 'svelte/reactivity';
import type { ModelOption } from '$lib/types/models';
export interface ModelItem {
option: ModelOption;
flatIndex: number;
}
export interface OrgGroup {
orgName: string | null;
items: ModelItem[];
}
export interface GroupedModelOptions {
loaded: ModelItem[];
favourites: ModelItem[];
available: OrgGroup[];
}
export function filterModelOptions(options: ModelOption[], searchTerm: string): ModelOption[] {
const term = searchTerm.trim().toLowerCase();
if (!term) return options;
return options.filter(
(option) =>
option.model.toLowerCase().includes(term) ||
option.name?.toLowerCase().includes(term) ||
option.aliases?.some((alias: string) => alias.toLowerCase().includes(term)) ||
option.tags?.some((tag: string) => tag.toLowerCase().includes(term))
);
}
export function groupModelOptions(
filteredOptions: ModelOption[],
favouriteIds: Set<string>,
isModelLoaded: (model: string) => boolean
): GroupedModelOptions {
// Loaded models
const loaded: ModelItem[] = [];
for (let i = 0; i < filteredOptions.length; i++) {
if (isModelLoaded(filteredOptions[i].model)) {
loaded.push({ option: filteredOptions[i], flatIndex: i });
}
}
// Favourites (excluding loaded)
const loadedModelIds = new Set(loaded.map((item) => item.option.model));
const favourites: ModelItem[] = [];
for (let i = 0; i < filteredOptions.length; i++) {
if (
favouriteIds.has(filteredOptions[i].model) &&
!loadedModelIds.has(filteredOptions[i].model)
) {
favourites.push({ option: filteredOptions[i], flatIndex: i });
}
}
// Available models grouped by org (excluding loaded and favourites)
const available: OrgGroup[] = [];
const orgGroups = new SvelteMap<string, ModelItem[]>();
for (let i = 0; i < filteredOptions.length; i++) {
const option = filteredOptions[i];
if (loadedModelIds.has(option.model) || favouriteIds.has(option.model)) continue;
const key = option.parsedId?.orgName ?? '';
if (!orgGroups.has(key)) orgGroups.set(key, []);
orgGroups.get(key)!.push({ option, flatIndex: i });
}
for (const [orgName, items] of orgGroups) {
available.push({ orgName: orgName || null, items });
}
return { loaded, favourites, available };
}

View File

@ -24,6 +24,7 @@ export * from './max-bundle-size';
export * from './mcp';
export * from './mcp-form';
export * from './mcp-resource';
export * from './message-export';
export * from './model-id';
export * from './precision';
export * from './processing-info';

View File

@ -0,0 +1,20 @@
// Conversation filename constants
// Length of the trimmed conversation ID in the filename
export const EXPORT_CONV_ID_TRIM_LENGTH = 8;
// Maximum length of the sanitized conversation name snippet
export const EXPORT_CONV_NAME_SUFFIX_MAX_LENGTH = 20;
// Characters to keep in the ISO timestamp. 19 keeps 2026-01-01T00:00:00
export const ISO_TIMESTAMP_SLICE_LENGTH = 19;
// Replacements for making the conversation title filename-friendly
export const NON_ALPHANUMERIC_REGEX = /[^a-z0-9]/gi;
export const EXPORT_CONV_NONALNUM_REPLACEMENT = '_';
export const MULTIPLE_UNDERSCORE_REGEX = /_+/g;
// Replacements to the ISO date for use in the export filename
export const ISO_DATE_TIME_SEPARATOR = 'T';
export const ISO_DATE_TIME_SEPARATOR_REPLACEMENT = '_';
export const ISO_TIME_SEPARATOR = ':';
export const ISO_TIME_SEPARATOR_REPLACEMENT = '-';

View File

@ -26,6 +26,18 @@ import { config } from '$lib/stores/settings.svelte';
import { filterByLeafNodeId, findLeafNode } from '$lib/utils';
import type { McpServerOverride } from '$lib/types/database';
import { MessageRole } from '$lib/enums';
import {
ISO_DATE_TIME_SEPARATOR,
ISO_DATE_TIME_SEPARATOR_REPLACEMENT,
ISO_TIMESTAMP_SLICE_LENGTH,
EXPORT_CONV_ID_TRIM_LENGTH,
EXPORT_CONV_NONALNUM_REPLACEMENT,
EXPORT_CONV_NAME_SUFFIX_MAX_LENGTH,
ISO_TIME_SEPARATOR,
ISO_TIME_SEPARATOR_REPLACEMENT,
NON_ALPHANUMERIC_REGEX,
MULTIPLE_UNDERSCORE_REGEX
} from '$lib/constants';
class ConversationsStore {
/**
@ -619,6 +631,66 @@ class ConversationsStore {
*
*/
/**
* Generates a sanitized filename for a conversation export
* @param conversation - The conversation metadata
* @param msgs - Optional array of messages belonging to the conversation
* @returns The generated filename string
*/
generateConversationFilename(
conversation: { id?: string; name?: string },
msgs?: DatabaseMessage[]
): string {
const conversationName = (conversation.name ?? '').trim().toLowerCase();
const sanitizedName = conversationName
.replace(NON_ALPHANUMERIC_REGEX, EXPORT_CONV_NONALNUM_REPLACEMENT)
.replace(MULTIPLE_UNDERSCORE_REGEX, '_')
.substring(0, EXPORT_CONV_NAME_SUFFIX_MAX_LENGTH);
// If we have messages, use the timestamp of the newest message
const referenceDate = msgs?.length
? new Date(Math.max(...msgs.map((m) => m.timestamp)))
: new Date();
const iso = referenceDate.toISOString().slice(0, ISO_TIMESTAMP_SLICE_LENGTH);
const formattedDate = iso
.replace(ISO_DATE_TIME_SEPARATOR, ISO_DATE_TIME_SEPARATOR_REPLACEMENT)
.replaceAll(ISO_TIME_SEPARATOR, ISO_TIME_SEPARATOR_REPLACEMENT);
const trimmedConvId = conversation.id?.slice(0, EXPORT_CONV_ID_TRIM_LENGTH) ?? '';
return `${formattedDate}_conv_${trimmedConvId}_${sanitizedName}.json`;
}
/**
* Triggers a browser download of the provided exported conversation data
* @param data - The exported conversation payload (either a single conversation or array of them)
* @param filename - Filename; if omitted, a deterministic name is generated
*/
downloadConversationFile(data: ExportedConversations, filename?: string): void {
// Choose the first conversation or message
const conversation =
'conv' in data ? data.conv : Array.isArray(data) ? data[0]?.conv : undefined;
const msgs =
'messages' in data ? data.messages : Array.isArray(data) ? data[0]?.messages : undefined;
if (!conversation) {
console.error('Invalid data: missing conversation');
return;
}
const downloadFilename = filename ?? this.generateConversationFilename(conversation, msgs);
const blob = new Blob([JSON.stringify(data, null, 2)], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = downloadFilename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
}
/**
* Downloads a conversation as JSON file.
* @param convId - The conversation ID to download
@ -636,40 +708,7 @@ class ConversationsStore {
messages = await DatabaseService.getConversationMessages(convId);
}
this.triggerDownload({ conv: conversation, messages });
}
/**
* Exports all conversations with their messages as a JSON file
* @returns The list of exported conversations
*/
async exportAllConversations(): Promise<DatabaseConversation[]> {
const allConversations = await DatabaseService.getAllConversations();
if (allConversations.length === 0) {
throw new Error('No conversations to export');
}
const allData = await Promise.all(
allConversations.map(async (conv) => {
const messages = await DatabaseService.getConversationMessages(conv.id);
return { conv, messages };
})
);
const blob = new Blob([JSON.stringify(allData, null, 2)], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `all_conversations_${new Date().toISOString().split('T')[0]}.json`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
toast.success(`All conversations (${allConversations.length}) prepared for download`);
return allConversations;
this.downloadConversationFile({ conv: conversation, messages });
}
/**
@ -743,37 +782,6 @@ class ConversationsStore {
await this.loadConversations();
return result;
}
/**
* Triggers file download in browser
*/
private triggerDownload(data: ExportedConversations, filename?: string): void {
const conversation =
'conv' in data ? data.conv : Array.isArray(data) ? data[0]?.conv : undefined;
if (!conversation) {
console.error('Invalid data: missing conversation');
return;
}
const conversationName = conversation.name?.trim() || '';
const truncatedSuffix = conversationName
.toLowerCase()
.replace(/[^a-z0-9]/gi, '_')
.replace(/_+/g, '_')
.substring(0, 20);
const downloadFilename = filename || `conversation_${conversation.id}_${truncatedSuffix}.json`;
const blob = new Blob([JSON.stringify(data, null, 2)], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = downloadFilename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
}
}
export const conversationsStore = new ConversationsStore();

View File

@ -20,6 +20,7 @@
*/
import { browser } from '$app/environment';
import { base } from '$app/paths';
import { MCPService } from '$lib/services/mcp.service';
import { config, settingsStore } from '$lib/stores/settings.svelte';
import { mcpResourceStore } from '$lib/stores/mcp-resources.svelte';
@ -42,6 +43,7 @@ import {
ToolCallType
} from '$lib/enums';
import {
CORS_PROXY_ENDPOINT,
DEFAULT_CACHE_TTL_MS,
DEFAULT_MCP_CONFIG,
EXPECTED_THEMED_ICON_PAIR_COUNT,
@ -78,165 +80,13 @@ import type { ListChangedHandlers } from '@modelcontextprotocol/sdk/types.js';
import type { DatabaseMessageExtraMcpResource, McpServerOverride } from '$lib/types/database';
import type { SettingsConfigType } from '$lib/types/settings';
export function buildMcpClientConfig(
cfg: SettingsConfigType,
perChatOverrides?: McpServerOverride[]
): MCPClientConfig | undefined {
return buildMcpClientConfigInternal(cfg, perChatOverrides);
}
/**
* Internal helper to build MCP client config.
* Kept as standalone function for external use and tests.
*/
export function buildMcpClientConfigInternal(
cfg: SettingsConfigType,
perChatOverrides?: McpServerOverride[]
): MCPClientConfig | undefined {
const rawServers = parseServerSettings(cfg.mcpServers);
if (!rawServers.length) {
return undefined;
}
const servers: Record<string, MCPServerConfig> = {};
for (const [index, entry] of rawServers.entries()) {
if (!checkServerEnabled(entry, perChatOverrides)) continue;
const normalized = buildServerConfig(entry);
if (normalized) servers[generateMcpServerId(entry.id, index)] = normalized;
}
if (Object.keys(servers).length === 0) {
return undefined;
}
return {
protocolVersion: DEFAULT_MCP_CONFIG.protocolVersion,
capabilities: DEFAULT_MCP_CONFIG.capabilities,
clientInfo: DEFAULT_MCP_CONFIG.clientInfo,
requestTimeoutMs: Math.round(DEFAULT_MCP_CONFIG.requestTimeoutSeconds * 1000),
servers
};
}
/**
* Generates a unique server ID from an optional ID string or index.
* @deprecated Use MCPStore.#generateServerId instead
*/
function generateMcpServerId(id: unknown, index: number): string {
if (typeof id === 'string' && id.trim()) {
return id.trim();
}
return `${MCP_SERVER_ID_PREFIX}-${index + 1}`;
}
/**
* Parses raw server settings from config into MCPServerSettingsEntry array.
* @deprecated Use MCPStore.#parseServerSettings instead
*/
function parseServerSettings(rawServers: unknown): MCPServerSettingsEntry[] {
if (!rawServers) {
return [];
}
let parsed: unknown;
if (typeof rawServers === 'string') {
const trimmed = rawServers.trim();
if (!trimmed) {
return [];
}
try {
parsed = JSON.parse(trimmed);
} catch (error) {
console.warn('[MCP] Failed to parse mcpServers JSON:', error);
return [];
}
} else {
parsed = rawServers;
}
if (!Array.isArray(parsed)) {
return [];
}
return parsed.map((entry, index) => {
const url = typeof entry?.url === 'string' ? entry.url.trim() : '';
const headers = typeof entry?.headers === 'string' ? entry.headers.trim() : undefined;
return {
id: generateMcpServerId((entry as { id?: unknown })?.id, index),
enabled: Boolean((entry as { enabled?: unknown })?.enabled),
url,
name: (entry as { name?: string })?.name,
requestTimeoutSeconds: DEFAULT_MCP_CONFIG.requestTimeoutSeconds,
headers: headers || undefined,
useProxy: Boolean((entry as { useProxy?: unknown })?.useProxy)
} satisfies MCPServerSettingsEntry;
});
}
/**
* Builds server configuration from a settings entry.
* @deprecated Use MCPStore.#buildServerConfig instead
*/
function buildServerConfig(
entry: MCPServerSettingsEntry,
connectionTimeoutMs = DEFAULT_MCP_CONFIG.connectionTimeoutMs
): MCPServerConfig | undefined {
if (!entry?.url) {
return undefined;
}
let headers: Record<string, string> | undefined;
if (entry.headers) {
try {
const parsed = JSON.parse(entry.headers);
if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed))
headers = parsed as Record<string, string>;
} catch {
console.warn('[MCP] Failed to parse custom headers JSON:', entry.headers);
}
}
return {
url: entry.url,
transport: detectMcpTransportFromUrl(entry.url),
handshakeTimeoutMs: connectionTimeoutMs,
requestTimeoutMs: Math.round(entry.requestTimeoutSeconds * 1000),
headers,
useProxy: entry.useProxy
};
}
/**
* Checks if a server is enabled, considering per-chat overrides.
* @deprecated Use MCPStore.#checkServerEnabled instead
*/
function checkServerEnabled(
server: MCPServerSettingsEntry,
perChatOverrides?: McpServerOverride[]
): boolean {
if (!server.enabled) {
return false;
}
if (perChatOverrides) {
const override = perChatOverrides.find((o) => o.serverId === server.id);
return override?.enabled ?? false;
}
return false;
}
class MCPStore {
private _isInitializing = $state(false);
private _error = $state<string | null>(null);
private _toolCount = $state(0);
private _connectedServers = $state<string[]>([]);
private _healthChecks = $state<Record<string, HealthCheckState>>({});
private _proxyAvailable = $state(false);
private connections = new Map<string, MCPConnection>();
private toolsIndex = new Map<string, string>();
@ -246,6 +96,29 @@ class MCPStore {
private initPromise: Promise<boolean> | null = null;
private activeFlowCount = 0;
constructor() {
if (browser) {
this.probeProxy();
}
}
/**
* Probes the CORS proxy endpoint to determine availability.
* The endpoint is only registered when llama-server runs with --webui-mcp-proxy.
*/
async probeProxy(): Promise<void> {
try {
const response = await fetch(`${base}${CORS_PROXY_ENDPOINT}`, { method: 'HEAD' });
this._proxyAvailable = response.status !== 404;
} catch {
this._proxyAvailable = false;
}
}
get isProxyAvailable(): boolean {
return this._proxyAvailable;
}
/**
* Generates a unique server ID from an optional ID string or index.
*/
@ -520,6 +393,7 @@ class MCPStore {
getServerLabel(server: MCPServerSettingsEntry): string {
const healthState = this.getHealthCheckState(server.id);
if (healthState?.status === HealthCheckStatus.SUCCESS)
return (
healthState.serverInfo?.title || healthState.serverInfo?.name || server.name || server.url
@ -603,6 +477,7 @@ class MCPStore {
*/
#proxyIconSrc(src: string): string {
if (src.startsWith('data:')) return src;
if (!this._proxyAvailable) return src;
return getProxiedUrlString(src);
}
@ -629,7 +504,7 @@ class MCPStore {
}
}
return getFaviconUrl(server.url);
return getFaviconUrl(server.url, this._proxyAvailable);
}
isAnyServerLoading(): boolean {
@ -2072,6 +1947,7 @@ export const mcpIsInitializing = () => mcpStore.isInitializing;
export const mcpIsInitialized = () => mcpStore.isInitialized;
export const mcpError = () => mcpStore.error;
export const mcpIsEnabled = () => mcpStore.isEnabled;
export const mcpIsProxyAvailable = () => mcpStore.isProxyAvailable;
export const mcpAvailableTools = () => mcpStore.availableTools;
export const mcpConnectedServerCount = () => mcpStore.connectedServerCount;
export const mcpConnectedServerNames = () => mcpStore.connectedServerNames;

View File

@ -1,6 +1,7 @@
/**
* Utility functions for conversation data manipulation
*/
import type { DatabaseMessage } from '$lib/types';
/**
* Creates a map of conversation IDs to their message counts from exported conversation data

View File

@ -17,7 +17,7 @@ import {
* @param urlString - The URL to get the favicon for
* @returns The favicon URL or null if invalid
*/
export function getFaviconUrl(urlString: string): string | null {
export function getFaviconUrl(urlString: string, useProxy = true): string | null {
try {
const url = new URL(urlString);
const hostnameParts = url.hostname.split(DOMAIN_SEPARATOR);
@ -27,7 +27,7 @@ export function getFaviconUrl(urlString: string): string | null {
: url.hostname;
const googleFaviconUrl = `${GOOGLE_FAVICON_BASE_URL}?domain=${rootDomain}&sz=${DEFAULT_FAVICON_SIZE}`;
return getProxiedUrlString(googleFaviconUrl);
return useProxy ? getProxiedUrlString(googleFaviconUrl) : googleFaviconUrl;
} catch {
return null;
}

View File

@ -231,7 +231,7 @@
<Sidebar.Trigger
class="transition-left absolute left-0 z-[900] duration-200 ease-linear {sidebarOpen
? 'md:left-[var(--sidebar-width)]'
: ''}"
: 'md:left-0!'}"
style="translate: 1rem 1rem;"
/>
{/if}