Fix accuracy: disable cpu_repack

This commit is contained in:
Yu, Zijun 2025-08-07 14:25:20 +08:00 committed by Mustafa Cavus
parent 663a0b8cce
commit 6ab76ed10a
3 changed files with 6 additions and 3 deletions

View File

@ -754,7 +754,7 @@ git switch dev_backend_openvino
# Build with OpenVINO support
source /opt/intel/openvino/setupvars.sh
cmake -B build/ReleaseOV -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_OPENVINO=ON
cmake -B build/ReleaseOV -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_OPENVINO=ON -DGGML_CPU_REPACK=OFF
cmake --build build/ReleaseOV --config Release -j $(nproc)
```

View File

@ -432,6 +432,10 @@ std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor* tensor)
}
// Quantized case
OPENVINO_ASSERT(
tensor->extra == nullptr,
"Unsupported weight tensor: " + std::string(tensor->name) + " Possibly this is a repacked quantized weights");
node_shape.erase(node_shape.begin());
uint64_t weights_per_byte;

View File

@ -420,8 +420,7 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
}
static bool ggml_backend_openvino_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
// TODO quantized weigts are cpu_repack_buffer_type which does not implement ggml_backend_buft_is_host
return ggml_backend_buft_is_host(buft) || strcmp(buft->device->iface.get_name(buft->device), "CPU") == 0;
return ggml_backend_buft_is_host(buft);
GGML_UNUSED(dev);
}