diff --git a/ggml/src/ggml-qnn/backend-ops.cpp b/ggml/src/ggml-qnn/backend-ops.cpp index 3a401dd037..95fe35b465 100644 --- a/ggml/src/ggml-qnn/backend-ops.cpp +++ b/ggml/src/ggml-qnn/backend-ops.cpp @@ -369,6 +369,31 @@ bool ggnl_qnn_supports_op_tensor(ggml_backend_qnn_device_context * ctx, const gg return true; } +bool ggml_qnn_have_same_tensor_types(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) { + auto * src0 = op->src[0]; + auto * src1 = op->src[1]; + if (src1) { + if (src0->type != op->type || src1->type != op->type) { + QNN_LOG_DEBUG("[%s][%s]type src0(%s), src1(%s) and op(%s) are not equal\n", + qnn::get_backend_name(ctx->device), ggml_op_name(op->op), ggml_type_name(src0->type), + ggml_type_name(src1->type), ggml_type_name(op->type)); + return false; + } + } else { + if (src0->type != op->type) { + QNN_LOG_DEBUG("[%s][%s]type src0(%s) and op(%s) are not equal\n", qnn::get_backend_name(ctx->device), + ggml_op_name(op->op), ggml_type_name(src0->type), ggml_type_name(op->type)); + return false; + } + } + +#ifdef NDEBUG + GGML_UNUSED(ctx); +#endif + + return true; +} + bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) { constexpr const size_t kMaxNpuTensorSize = 8192L * 2048 + 8192 * 512 + 2048 * 512; constexpr const auto get_tensor_size = [](const ggml_tensor * tensor) -> size_t { @@ -393,10 +418,8 @@ bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context * ctx, const gg // fall through, from test here, the convert op is super slow on NPU: // https://github.com/usefulsensors/qc_npu_benchmark case QNN_BACKEND_GPU: - if (src0->type != src1->type || src0->type != op->type) { + if (ggml_qnn_have_same_tensor_types(ctx, op)) { // there's no convert op for GPU. - QNN_LOG_DEBUG("[qnn-gpu][MUL_MAT]type src0(%s), src1(%s) and op(%s) are not equal\n", - ggml_type_name(src0->type), ggml_type_name(src1->type), ggml_type_name(op->type)); return false; } break; @@ -472,7 +495,7 @@ bool device_supports_op(ggml_backend_qnn_device_context * ctx, const ggml_tensor break; default: - // default to supported + is_op_supported = ggml_qnn_have_same_tensor_types(ctx, op); break; } } diff --git a/ggml/src/ggml-qnn/graph.cpp b/ggml/src/ggml-qnn/graph.cpp index b3ab161e9f..2a282771c2 100644 --- a/ggml/src/ggml-qnn/graph.cpp +++ b/ggml/src/ggml-qnn/graph.cpp @@ -246,7 +246,7 @@ qnn_graph::qnn_graph(const std::string & graph_name, QNNBackend device, std::sha return; } - QNN_LOG_INFO("[%s][%s]create succeed\n", get_backend_name(device), graph_name.c_str()); + QNN_LOG_DEBUG("[%s][%s]create succeed\n", get_backend_name(device), graph_name.c_str()); _graph_handle = graph_handle; _qnn_interface = qnn_interface; } diff --git a/ggml/src/ggml-qnn/logger.cpp b/ggml/src/ggml-qnn/logger.cpp index 5418d03be4..0ffa12e7b1 100644 --- a/ggml/src/ggml-qnn/logger.cpp +++ b/ggml/src/ggml-qnn/logger.cpp @@ -13,7 +13,7 @@ void qnn::sdk_logcallback(const char * fmt, QnnLog_Level_t level, uint64_t /*tim static std::mutex log_mutex; static char s_ggml_qnn_logbuf[4096]; - char log_level_desc = 'U'; + char log_level_desc; switch (level) { case QNN_LOG_LEVEL_ERROR: log_level_desc = 'E'; @@ -30,6 +30,9 @@ void qnn::sdk_logcallback(const char * fmt, QnnLog_Level_t level, uint64_t /*tim case QNN_LOG_LEVEL_VERBOSE: log_level_desc = 'V'; break; + default: + log_level_desc = 'U'; + break; } {