From a81b202f570c038342ad227702316957e7425705 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Mon, 12 Jan 2026 10:47:16 -0800 Subject: [PATCH] requant to f16 for Q6 embed on NPU --- ggml/src/ggml-openvino/ggml-openvino-extra.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-openvino/ggml-openvino-extra.cpp b/ggml/src/ggml-openvino/ggml-openvino-extra.cpp index b6fec855fc..bc0362ee46 100644 --- a/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +++ b/ggml/src/ggml-openvino/ggml-openvino-extra.cpp @@ -165,7 +165,7 @@ clEnqueueMemcpyINTEL_fn ggml_openvino_get_clEnqueueMemcpyINTEL() { // Get requantization type for a tensor type (returns nullopt if no requant needed) std::optional ggml_openvino_get_requant_type(const ggml_tensor * tensor) { if (strncmp(tensor->name, "token_embd.weight", 17) == 0) { - return (ggml_openvino_is_npu() ? ExtraQuantType::F16 : ExtraQuantType::Q8_0_C); + return ((ggml_openvino_is_npu() && tensor->type == GGML_TYPE_Q6_K) ? ExtraQuantType::F16 : ExtraQuantType::Q8_0_C); } if (strncmp(tensor->name, "output.weight", 13) == 0) { return ExtraQuantType::Q8_0_C;