npu perf fix

2026-01-09 11:29:40 -08:00 · 2026-01-09 11:29:40 -08:00 · a40a5dfc60
parent 981ec6571d
commit a40a5dfc60
1 changed files with 1 additions and 1 deletions
--- a/ggml/src/ggml-openvino/ggml-openvino-extra.cpp
+++ b/ggml/src/ggml-openvino/ggml-openvino-extra.cpp
@ -165,7 +165,7 @@ clEnqueueMemcpyINTEL_fn ggml_openvino_get_clEnqueueMemcpyINTEL() {
 // Get requantization type for a tensor type (returns nullopt if no requant needed)
 std::optional<ExtraQuantType> ggml_openvino_get_requant_type(const ggml_tensor * tensor) {
    if (strncmp(tensor->name, "token_embd.weight", 17) == 0) {
-        return ExtraQuantType::Q8_0_C;
+        return (ggml_openvino_is_npu() ? ExtraQuantType::F16 : ExtraQuantType::Q8_0_C);
    }
    if (strncmp(tensor->name, "output.weight", 13) == 0) {
        return ExtraQuantType::Q8_0_C;