qnn fix: update device capabilities for quantized types in qnn-lib to improve compatibility

This commit is contained in:
hongruichen 2025-06-20 20:16:23 +08:00
parent af620a12f7
commit 332514cd5c
2 changed files with 6 additions and 3 deletions

View File

@ -283,8 +283,11 @@ ggml_backend_t ggml_backend_qnn_init_with_device_context(ggml_backend_dev_t dev,
qnn::get_backend_desc(dev_ctx->device));
dev_ctx->description = buffer;
}
#ifdef GGML_HEXAGON_ENABLE_QUANTIZED_TENSORS
// TODO: remove npu from here if hardware quantization is supported
dev_ctx->enable_cpu_dequantize = device == QNN_BACKEND_CPU;
#endif
ggml_backend_t qnn_backend = new ggml_backend{
/* .guid = */ ggml_backend_qnn_guid(),

View File

@ -38,7 +38,7 @@ constexpr const qnn::device_caps kDeviceCaps[] = {
// all quantized types can be offload to CPU, at current implementation, those types will be dequantized into float32 on cpu
0xFFFFFE,
#else
0,
(1L << GGML_TYPE_F32),
#endif
0, // 0 for no limitation
@ -50,7 +50,7 @@ constexpr const qnn::device_caps kDeviceCaps[] = {
// all quantized types can be offload to GPU, at current implementation, those types will be dequantized into float32 on cpu
0xFFFFFE,
#else
0,
(1L << GGML_TYPE_F32) | (1L << GGML_TYPE_F16),
#endif
(128256L * 4096 *
sizeof(float)), // tested on 8 gen 2, failed to allocate tensor with size 128256x4096 and float32
@ -62,7 +62,7 @@ constexpr const qnn::device_caps kDeviceCaps[] = {
(1L << GGML_TYPE_F32) | (1L << GGML_TYPE_F16) | (1L << GGML_TYPE_I16),
(1L << GGML_TYPE_Q2_K) | (1L << GGML_TYPE_Q3_K) | (1L << GGML_TYPE_Q4_K) | (1L << GGML_TYPE_Q8_K),
#else
0,
(1L << GGML_TYPE_F32) | (1L << GGML_TYPE_F16),
#endif
(8192L * 2048 + 8192 * 512 + 2048 * 512) * sizeof(float), // TODO: should have a better way to get this value
},