From aed9b4f5bbecf9bcabd3245c72c158fb83bc432d Mon Sep 17 00:00:00 2001 From: hongruichen Date: Sat, 9 Aug 2025 00:35:01 +0800 Subject: [PATCH] fix compiling error --- ggml/src/ggml-qnn/qnn/backend-ops.cpp | 103 +++++++++++++++-------- ggml/src/ggml-qnn/qnn/op-config-caps.cpp | 59 ++++++++----- 2 files changed, 110 insertions(+), 52 deletions(-) diff --git a/ggml/src/ggml-qnn/qnn/backend-ops.cpp b/ggml/src/ggml-qnn/qnn/backend-ops.cpp index f3e06cf09e..a90d802433 100644 --- a/ggml/src/ggml-qnn/qnn/backend-ops.cpp +++ b/ggml/src/ggml-qnn/qnn/backend-ops.cpp @@ -1,8 +1,6 @@ #include "backend-ops.hpp" -#include - #include "ggml-impl.h" #include "graph.hpp" #include "logger.hpp" @@ -10,6 +8,8 @@ #include "tensor.hpp" #include "utils.hpp" +#include + namespace { qnn::qnn_graph * get_qnn_graph_from_cache(qnn::ggml_backend_qnn_device_context * ctx, const ggml_cgraph * cgraph) { @@ -17,8 +17,10 @@ qnn::qnn_graph * get_qnn_graph_from_cache(qnn::ggml_backend_qnn_device_context * std::string graph_key; auto op_data_type = qnn::qnn_graph::get_graph_key_from_cgraph(cgraph, graph_key); if (graph_key.empty()) { - QNN_LOG_DEBUG("[%s]empty graph key for cgraph: %p, size: %d\n", qnn::get_backend_name(ctx->device), - (const void *) cgraph, (int) cgraph->n_nodes); + QNN_LOG_DEBUG("[%s]empty graph key for cgraph: %p, size: %d\n", + qnn::get_backend_name(ctx->device), + (const void *) cgraph, + (int) cgraph->n_nodes); return nullptr; } @@ -26,19 +28,21 @@ qnn::qnn_graph * get_qnn_graph_from_cache(qnn::ggml_backend_qnn_device_context * qnn::qnn_graph * graph_ptr = nullptr; if (it != graph_cache.end()) { auto it = graph_cache.find(graph_key); - QNN_LOG_DEBUG("[%s]found graph %s in cache, cache size: %d\n", qnn::get_backend_name(ctx->device), - graph_key.c_str(), (int) graph_cache.size()); + QNN_LOG_DEBUG("[%s]found graph %s in cache, cache size: %d\n", + qnn::get_backend_name(ctx->device), + graph_key.c_str(), + (int) graph_cache.size()); graph_ptr = it->second.get(); } else { auto precision = qnn::qnn_graph::kHtpDefault; if (op_data_type == GGML_TYPE_F16) { - QNN_LOG_DEBUG("[%s][%s]set graph precision to FP16\n", qnn::get_backend_name(ctx->device), - graph_key.c_str()); + QNN_LOG_DEBUG( + "[%s][%s]set graph precision to FP16\n", qnn::get_backend_name(ctx->device), graph_key.c_str()); precision = qnn::qnn_graph::kHtpFp16; } - auto graph = std::make_unique(graph_key, ctx->device, ctx->instance, precision, - ctx->socinfo.vtcm_size_in_mb); + auto graph = std::make_unique( + graph_key, ctx->device, ctx->instance, precision, ctx->socinfo.vtcm_size_in_mb); if (!graph->is_valid()) { return nullptr; } @@ -50,8 +54,10 @@ qnn::qnn_graph * get_qnn_graph_from_cache(qnn::ggml_backend_qnn_device_context * graph_ptr = graph.get(); graph_cache[graph_key] = std::move(graph); - QNN_LOG_DEBUG("[%s]add graph %s to cache, cache size: %d\n", qnn::get_backend_name(ctx->device), - graph_key.c_str(), (int) graph_cache.size()); + QNN_LOG_DEBUG("[%s]add graph %s to cache, cache size: %d\n", + qnn::get_backend_name(ctx->device), + graph_key.c_str(), + (int) graph_cache.size()); } return graph_ptr; @@ -62,6 +68,7 @@ constexpr const bool kQnnSupportedOps[] = { true, // GGML_OP_NONE false, // GGML_OP_DUP true, // GGML_OP_ADD + false, // GGML_OP_ADD_ID false, // GGML_OP_ADD1 false, // GGML_OP_ACC true, // GGML_OP_SUB @@ -197,8 +204,13 @@ inline bool is_tensor_size_valid(qnn::ggml_backend_qnn_device_context * ctx, con const auto tensor_size = get_tensor_size_in_bytes(tensor, type); if (ctx->max_tensor_size_in_bytes && tensor_size >= ctx->max_tensor_size_in_bytes) { QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) size(%lld) exceeds the limit(%lld)\n", - qnn::get_backend_name(ctx->device), ggml_get_name(tensor), (int) tensor->ne[0], - (int) tensor->ne[1], (int) tensor->ne[2], (int) tensor->ne[3], (long long int) tensor_size, + qnn::get_backend_name(ctx->device), + ggml_get_name(tensor), + (int) tensor->ne[0], + (int) tensor->ne[1], + (int) tensor->ne[2], + (int) tensor->ne[3], + (long long int) tensor_size, (long long int) ctx->max_tensor_size_in_bytes); return false; } @@ -215,10 +227,18 @@ bool is_tensor_type_valid(qnn::ggml_backend_qnn_device_context * ctx, const ggml #ifndef NDEBUG if (tensor->view_src) { auto * src_tensor = tensor->view_src; - QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d\n", qnn::get_backend_name(ctx->device), - ggml_get_name(tensor), (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2], - (int) tensor->ne[3], ggml_get_name(src_tensor), (int) src_tensor->ne[0], (int) src_tensor->ne[1], - (int) src_tensor->ne[2], (int) src_tensor->ne[3]); + QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d\n", + qnn::get_backend_name(ctx->device), + ggml_get_name(tensor), + (int) tensor->ne[0], + (int) tensor->ne[1], + (int) tensor->ne[2], + (int) tensor->ne[3], + ggml_get_name(src_tensor), + (int) src_tensor->ne[0], + (int) src_tensor->ne[1], + (int) src_tensor->ne[2], + (int) src_tensor->ne[3]); } #endif @@ -227,14 +247,15 @@ bool is_tensor_type_valid(qnn::ggml_backend_qnn_device_context * ctx, const ggml case GGML_TYPE_F16: if (!is_type_bit_enabled(ctx->supported_types, tensor->type)) { QNN_LOG_DEBUG("[%s]unsupported data type %s, supported_types: 0x%x\n", - qnn::get_backend_name(ctx->device), ggml_type_name(tensor->type), + qnn::get_backend_name(ctx->device), + ggml_type_name(tensor->type), (unsigned int) ctx->supported_types); return false; } break; default: - QNN_LOG_DEBUG("[%s]unsupported data type %s\n", qnn::get_backend_name(ctx->device), - ggml_type_name(tensor->type)); + QNN_LOG_DEBUG( + "[%s]unsupported data type %s\n", qnn::get_backend_name(ctx->device), ggml_type_name(tensor->type)); return false; } @@ -277,14 +298,20 @@ bool ggml_qnn_have_same_tensor_types(qnn::ggml_backend_qnn_device_context * ctx, if (src1) { if (src0->type != op->type || src1->type != op->type) { QNN_LOG_DEBUG("[%s][%s]type src0(%s), src1(%s) and op(%s) are not equal\n", - qnn::get_backend_name(ctx->device), ggml_op_name(op->op), ggml_type_name(src0->type), - ggml_type_name(src1->type), ggml_type_name(op->type)); + qnn::get_backend_name(ctx->device), + ggml_op_name(op->op), + ggml_type_name(src0->type), + ggml_type_name(src1->type), + ggml_type_name(op->type)); return false; } } else { if (src0->type != op->type) { - QNN_LOG_DEBUG("[%s][%s]type src0(%s) and op(%s) are not equal\n", qnn::get_backend_name(ctx->device), - ggml_op_name(op->op), ggml_type_name(src0->type), ggml_type_name(op->type)); + QNN_LOG_DEBUG("[%s][%s]type src0(%s) and op(%s) are not equal\n", + qnn::get_backend_name(ctx->device), + ggml_op_name(op->op), + ggml_type_name(src0->type), + ggml_type_name(op->type)); return false; } } @@ -303,7 +330,9 @@ bool ggml_qnn_supports_matmul_op(qnn::ggml_backend_qnn_device_context * ctx, con if (is_data_reinterpretation_op(src0->op) || is_data_reinterpretation_op(src1->op)) { // TODO: remove the blocker here when we support permute op QNN_LOG_DEBUG("[%s][MUL_MAT]data reorganization op is not supported, (%s, %s)\n", - qnn::get_backend_name(ctx->device), ggml_op_name(src0->op), ggml_op_name(src1->op)); + qnn::get_backend_name(ctx->device), + ggml_op_name(src0->op), + ggml_op_name(src1->op)); return false; } @@ -330,7 +359,8 @@ bool ggml_qnn_supports_matmul_op(qnn::ggml_backend_qnn_device_context * ctx, con !is_type_bit_enabled(ctx->cpu_preprocess_types, src0->type)) { // for such cases that src0 is quantized and op is float32, check if the quant type is enabled QNN_LOG_DEBUG("[%s][MUL_MAT]quantized src0 type %s is not enabled\n", - qnn::get_backend_name(ctx->device), ggml_type_name(src0->type)); + qnn::get_backend_name(ctx->device), + ggml_type_name(src0->type)); return false; } break; @@ -354,8 +384,12 @@ void print_tensor_info(qnn::ggml_backend_qnn_device_context * ctx, const ggml_te std::string op_key; qnn::get_qnn_op_desc(op, true, GGML_TYPE_COUNT, op_key); - QNN_LOG_DEBUG("[%s][%s]op was %s, support/unsupported: %d/%d\n", qnn::get_backend_name(ctx->device), op_key.c_str(), - supported, ctx->supported_op_count.load(), ctx->unsupported_op_count.load()); + QNN_LOG_DEBUG("[%s][%s]op was %s, support/unsupported: %d/%d\n", + qnn::get_backend_name(ctx->device), + op_key.c_str(), + supported, + ctx->supported_op_count.load(), + ctx->unsupported_op_count.load()); } #endif @@ -402,7 +436,9 @@ bool device_supports_op(qnn::ggml_backend_qnn_device_context * ctx, const ggml_t // TODO: fix this when we have the support for mul with rms_norm if (ctx->enable_cpu_dequantize && (src0->op == GGML_OP_RMS_NORM || src1->op == GGML_OP_RMS_NORM)) { QNN_LOG_DEBUG("[%s][%s]skip unsupported mul with rms norm, (%s, %s)\n", - qnn::get_backend_name(ctx->device), ggml_op_desc(op), ggml_op_desc(src0), + qnn::get_backend_name(ctx->device), + ggml_op_desc(op), + ggml_op_desc(src0), ggml_op_desc(src1)); is_op_supported = false; break; @@ -414,7 +450,8 @@ bool device_supports_op(qnn::ggml_backend_qnn_device_context * ctx, const ggml_t // TODO: move to op caps array? if (!ggml_are_same_shape(src0, src1)) { QNN_LOG_DEBUG("[%s][%s] src0 and src1 dimensions are not equal\n", - qnn::get_backend_name(ctx->device), ggml_op_desc(op)); + qnn::get_backend_name(ctx->device), + ggml_op_desc(op)); is_op_supported = false; } break; @@ -442,8 +479,8 @@ bool device_supports_op(qnn::ggml_backend_qnn_device_context * ctx, const ggml_t } bool device_compute_graph(qnn::ggml_backend_qnn_device_context * ctx, ggml_cgraph * cgraph) { - QNN_LOG_DEBUG("[%s]compute graph start, nodes count: %d\n", qnn::get_backend_name(ctx->device), - (int) cgraph->n_nodes); + QNN_LOG_DEBUG( + "[%s]compute graph start, nodes count: %d\n", qnn::get_backend_name(ctx->device), (int) cgraph->n_nodes); auto qnn_graph = get_qnn_graph_from_cache(ctx, cgraph); bool success = qnn_graph && qnn_graph->execute(cgraph, ctx->convert_context); diff --git a/ggml/src/ggml-qnn/qnn/op-config-caps.cpp b/ggml/src/ggml-qnn/qnn/op-config-caps.cpp index 95c4067655..214dd4efed 100644 --- a/ggml/src/ggml-qnn/qnn/op-config-caps.cpp +++ b/ggml/src/ggml-qnn/qnn/op-config-caps.cpp @@ -3,11 +3,14 @@ namespace { -using op_constructor_t = std::shared_ptr (*)(const ggml_tensor *, const std::string &, +using op_constructor_t = std::shared_ptr (*)(const ggml_tensor *, + const std::string &, std::shared_ptr); -using op_description_generator_t = void (*)(const ggml_tensor * op, bool append_dimensions, - ggml_type override_data_type, std::string & output); +using op_description_generator_t = void (*)(const ggml_tensor * op, + bool append_dimensions, + ggml_type override_data_type, + std::string & output); void append_tensor_shape_and_type_impl(const ggml_tensor * tensor, ggml_type override_data_type, std::string & output) { char buffer[256] = {}; @@ -21,13 +24,24 @@ void append_tensor_shape_and_type_impl(const ggml_tensor * tensor, ggml_type ove len = snprintf(buffer, sizeof(buffer), "%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1], type_name); break; case 3: - len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1], - (long) tensor->ne[2], type_name); + len = snprintf(buffer, + sizeof(buffer), + "%ldx%ldx%ld%s", + (long) tensor->ne[0], + (long) tensor->ne[1], + (long) tensor->ne[2], + type_name); break; case 4: default: - len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1], - (long) tensor->ne[2], (long) tensor->ne[3], type_name); + len = snprintf(buffer, + sizeof(buffer), + "%ldx%ldx%ldx%ld%s", + (long) tensor->ne[0], + (long) tensor->ne[1], + (long) tensor->ne[2], + (long) tensor->ne[3], + type_name); break; } GGML_ASSERT(len > 0 && len < (int) sizeof(buffer)); @@ -61,8 +75,10 @@ void get_op_key_with_src_op_desc(const ggml_tensor * op, std::string & output) { output += ')'; } -void generic_get_op_desc(const ggml_tensor * op, bool append_dimensions, ggml_type override_data_type, - std::string & output) { +void generic_get_op_desc(const ggml_tensor * op, + bool append_dimensions, + ggml_type override_data_type, + std::string & output) { if (append_dimensions) { get_graph_key_from_op(op, override_data_type, output); } else { @@ -83,6 +99,7 @@ constexpr const qnn_op_caps_t kOpCaps[] = { // GGML_OP_ADD QNN_OP_ELEMENT_WISE_ADD, // qnn_op_name }, + {}, // GGML_OP_ADD_ID {}, // GGML_OP_ADD1 {}, // GGML_OP_ACC { @@ -235,8 +252,8 @@ std::shared_ptr mat_mul_op_constructor(const ggml_tenso qnn::qnn_instance_ptr qnn_instance) { if (qnn_instance->has_custom_op_package() && ggml_n_dims(op) == 2) { QNN_LOG_DEBUG("create GgmlMulMat, name %s, use GgmlOpPackage\n", instance_name.c_str()); - return std::make_shared(instance_name, "GgmlOpPackage", "GgmlMulMat", - qnn_instance); + return std::make_shared( + instance_name, "GgmlOpPackage", "GgmlMulMat", qnn_instance); } QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s\n", instance_name.c_str()); @@ -250,8 +267,8 @@ std::shared_ptr generic_op_constructor(const ggml_tenso GGML_UNUSED(op); static_assert(_op < std::size(kOpCaps)); static_assert(kOpCaps[_op].qnn_op_name != nullptr); - return std::make_shared(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, - kOpCaps[_op].qnn_op_name, qnn_instance); + return std::make_shared( + instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, kOpCaps[_op].qnn_op_name, qnn_instance); } void add_type_parameters(std::shared_ptr op, const char * name, float value) { @@ -273,8 +290,8 @@ std::shared_ptr op_constructor_with_type_param(const gg _ggml_op_param_type op_param; memcpy(&op_param, op->op_params, sizeof(op_param)); - auto qnn_op = std::make_shared<_qnn_op_type_name>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_caps.qnn_op_name, - qnn_instance); + auto qnn_op = std::make_shared<_qnn_op_type_name>( + instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_caps.qnn_op_name, qnn_instance); if (op_caps.qnn_param_name) { add_type_parameters(qnn_op, op_caps.qnn_param_name, op_param); } @@ -285,6 +302,7 @@ constexpr const op_constructor_t kOpConstructors[] = { nullptr, // GGML_OP_NONE nullptr, // GGML_OP_DUP generic_op_constructor, // GGML_OP_ADD + nullptr, // GGML_OP_ADD_ID nullptr, // GGML_OP_ADD1 nullptr, // GGML_OP_ACC generic_op_constructor, // GGML_OP_SUB @@ -425,8 +443,10 @@ const char * get_qnn_op_name(const ggml_tensor * op) { return kOpCaps[op_index].qnn_op_name; } -void get_qnn_op_desc(const ggml_tensor * op, bool append_dimensions, ggml_type override_data_type, - std::string & output) { +void get_qnn_op_desc(const ggml_tensor * op, + bool append_dimensions, + ggml_type override_data_type, + std::string & output) { auto op_index = get_qnn_op_index(op); GGML_ASSERT(op_index < std::size(kOpCaps)); auto get_desc = kOpCaps[op_index].get_desc; @@ -437,8 +457,9 @@ void get_qnn_op_desc(const ggml_tensor * op, bool append_dimensions, ggml_type o } } -std::shared_ptr create_op(const ggml_tensor * op, const std::string & name, - qnn_instance_ptr qnn_instance) { +std::shared_ptr create_op(const ggml_tensor * op, + const std::string & name, + qnn_instance_ptr qnn_instance) { auto op_index = get_qnn_op_index(op); GGML_ASSERT(op_index < std::size(kOpCaps)); auto op_constructor = kOpConstructors[op_index];