bugfix: block large tensor calc in npu

2024-11-29 14:19:34 +08:00 · 2024-11-29 14:19:34 +08:00 · 5103b166ba
parent a2df09b6af
commit 5103b166ba
1 changed files with 9 additions and 0 deletions
--- a/ggml/src/ggml-qnn/backend-ops.cpp
+++ b/ggml/src/ggml-qnn/backend-ops.cpp
@ -566,6 +566,10 @@ bool ggml_qnn_supports_tensor(ggml_backend_qnn_device_context *ctx, const ggml_t
 }

 bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context *ctx, const ggml_tensor *op) {
+    constexpr const auto get_tensor_size = [](const ggml_tensor *tensor) -> size_t {
+        return tensor->ne[0] * tensor->ne[1] * tensor->ne[2] * tensor->ne[3];
+    };
+
    auto *src0 = op->src[0];
    auto *src1 = op->src[1];
    switch (ctx->device) {
@ -578,6 +582,11 @@ bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context *ctx, const ggm
                QNN_LOG_DEBUG("[qnn-npu] src0 and src1 dimensions are not equal, support/unsupported: %d/%d",
                              ctx->support_op_count.load(), ++(ctx->unsupported_op_count));
                return false;
+            } else if (get_tensor_size(src0) + get_tensor_size(src1) + get_tensor_size(op) >=
+                       (8192 * 2048 + 8192 * 512 + 2048 * 512)) {
+                QNN_LOG_DEBUG("[qnn-npu] tensor size is too large, support/unsupported: %d/%d",
+                              ctx->support_op_count.load(), ++(ctx->unsupported_op_count));
+                return false;
            }
            // fall through, from test here, the convert op is super slow on NPU:
            //   https://github.com/usefulsensors/qc_npu_benchmark