diff --git a/ggml/src/ggml-qnn/qnn/backend-ops.cpp b/ggml/src/ggml-qnn/qnn/backend-ops.cpp index aace760784..86719b1343 100644 --- a/ggml/src/ggml-qnn/qnn/backend-ops.cpp +++ b/ggml/src/ggml-qnn/qnn/backend-ops.cpp @@ -17,10 +17,8 @@ qnn::qnn_graph * get_qnn_graph_from_cache(qnn::ggml_backend_qnn_device_context * std::string graph_key; auto op_data_type = qnn::qnn_graph::get_graph_key_from_cgraph(cgraph, graph_key); if (graph_key.empty()) { - QNN_LOG_DEBUG("[%s]empty graph key for cgraph: %p, size: %d\n", - qnn::get_backend_name(ctx->device), - (const void *) cgraph, - (int) cgraph->n_nodes); + QNN_LOG_DEBUG("[%s]empty graph key for cgraph: %p, size: %d\n", qnn::get_backend_name(ctx->device), + (const void *) cgraph, (int) cgraph->n_nodes); return nullptr; } @@ -28,21 +26,19 @@ qnn::qnn_graph * get_qnn_graph_from_cache(qnn::ggml_backend_qnn_device_context * qnn::qnn_graph * graph_ptr = nullptr; if (it != graph_cache.end()) { auto it = graph_cache.find(graph_key); - QNN_LOG_DEBUG("[%s]found graph %s in cache, cache size: %d\n", - qnn::get_backend_name(ctx->device), - graph_key.c_str(), - (int) graph_cache.size()); + QNN_LOG_DEBUG("[%s]found graph %s in cache, cache size: %d\n", qnn::get_backend_name(ctx->device), + graph_key.c_str(), (int) graph_cache.size()); graph_ptr = it->second.get(); } else { auto precision = qnn::qnn_graph::kHtpDefault; if (op_data_type == GGML_TYPE_F16) { - QNN_LOG_DEBUG( - "[%s][%s]set graph precision to FP16\n", qnn::get_backend_name(ctx->device), graph_key.c_str()); + QNN_LOG_DEBUG("[%s][%s]set graph precision to FP16\n", qnn::get_backend_name(ctx->device), + graph_key.c_str()); precision = qnn::qnn_graph::kHtpFp16; } - auto graph = std::make_unique( - graph_key, ctx->device, ctx->instance, precision, ctx->socinfo.vtcm_size_in_mb); + auto graph = std::make_unique(graph_key, ctx->device, ctx->instance, precision, + ctx->socinfo.vtcm_size_in_mb); if (!graph->is_valid()) { return nullptr; } @@ -54,10 +50,8 @@ qnn::qnn_graph * get_qnn_graph_from_cache(qnn::ggml_backend_qnn_device_context * graph_ptr = graph.get(); graph_cache[graph_key] = std::move(graph); - QNN_LOG_DEBUG("[%s]add graph %s to cache, cache size: %d\n", - qnn::get_backend_name(ctx->device), - graph_key.c_str(), - (int) graph_cache.size()); + QNN_LOG_DEBUG("[%s]add graph %s to cache, cache size: %d\n", qnn::get_backend_name(ctx->device), + graph_key.c_str(), (int) graph_cache.size()); } return graph_ptr; @@ -179,6 +173,7 @@ constexpr const bool kQnnSupportedOps[] = { false, // GGML_UNARY_OP_HARDSIGMOID false, // GGML_UNARY_OP_EXP false, // GGML_UNARY_OP_GELU_ERF + false, // GGML_UNARY_OP_XIELU }; static_assert(kQnnSupportedOps[GGML_OP_NONE], "GGML_OP_NONE is not true"); @@ -207,13 +202,8 @@ inline bool is_tensor_size_valid(qnn::ggml_backend_qnn_device_context * ctx, con const auto tensor_size = get_tensor_size_in_bytes(tensor, type); if (ctx->max_tensor_size_in_bytes && tensor_size >= ctx->max_tensor_size_in_bytes) { QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) size(%lld) exceeds the limit(%lld)\n", - qnn::get_backend_name(ctx->device), - ggml_get_name(tensor), - (int) tensor->ne[0], - (int) tensor->ne[1], - (int) tensor->ne[2], - (int) tensor->ne[3], - (long long int) tensor_size, + qnn::get_backend_name(ctx->device), ggml_get_name(tensor), (int) tensor->ne[0], + (int) tensor->ne[1], (int) tensor->ne[2], (int) tensor->ne[3], (long long int) tensor_size, (long long int) ctx->max_tensor_size_in_bytes); return false; } @@ -230,18 +220,10 @@ bool is_tensor_type_valid(qnn::ggml_backend_qnn_device_context * ctx, const ggml #ifndef NDEBUG if (tensor->view_src) { auto * src_tensor = tensor->view_src; - QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d\n", - qnn::get_backend_name(ctx->device), - ggml_get_name(tensor), - (int) tensor->ne[0], - (int) tensor->ne[1], - (int) tensor->ne[2], - (int) tensor->ne[3], - ggml_get_name(src_tensor), - (int) src_tensor->ne[0], - (int) src_tensor->ne[1], - (int) src_tensor->ne[2], - (int) src_tensor->ne[3]); + QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d\n", qnn::get_backend_name(ctx->device), + ggml_get_name(tensor), (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2], + (int) tensor->ne[3], ggml_get_name(src_tensor), (int) src_tensor->ne[0], (int) src_tensor->ne[1], + (int) src_tensor->ne[2], (int) src_tensor->ne[3]); } #endif @@ -250,15 +232,14 @@ bool is_tensor_type_valid(qnn::ggml_backend_qnn_device_context * ctx, const ggml case GGML_TYPE_F16: if (!is_type_bit_enabled(ctx->supported_types, tensor->type)) { QNN_LOG_DEBUG("[%s]unsupported data type %s, supported_types: 0x%x\n", - qnn::get_backend_name(ctx->device), - ggml_type_name(tensor->type), + qnn::get_backend_name(ctx->device), ggml_type_name(tensor->type), (unsigned int) ctx->supported_types); return false; } break; default: - QNN_LOG_DEBUG( - "[%s]unsupported data type %s\n", qnn::get_backend_name(ctx->device), ggml_type_name(tensor->type)); + QNN_LOG_DEBUG("[%s]unsupported data type %s\n", qnn::get_backend_name(ctx->device), + ggml_type_name(tensor->type)); return false; } @@ -301,20 +282,14 @@ bool ggml_qnn_have_same_tensor_types(qnn::ggml_backend_qnn_device_context * ctx, if (src1) { if (src0->type != op->type || src1->type != op->type) { QNN_LOG_DEBUG("[%s][%s]type src0(%s), src1(%s) and op(%s) are not equal\n", - qnn::get_backend_name(ctx->device), - ggml_op_name(op->op), - ggml_type_name(src0->type), - ggml_type_name(src1->type), - ggml_type_name(op->type)); + qnn::get_backend_name(ctx->device), ggml_op_name(op->op), ggml_type_name(src0->type), + ggml_type_name(src1->type), ggml_type_name(op->type)); return false; } } else { if (src0->type != op->type) { - QNN_LOG_DEBUG("[%s][%s]type src0(%s) and op(%s) are not equal\n", - qnn::get_backend_name(ctx->device), - ggml_op_name(op->op), - ggml_type_name(src0->type), - ggml_type_name(op->type)); + QNN_LOG_DEBUG("[%s][%s]type src0(%s) and op(%s) are not equal\n", qnn::get_backend_name(ctx->device), + ggml_op_name(op->op), ggml_type_name(src0->type), ggml_type_name(op->type)); return false; } } @@ -333,9 +308,7 @@ bool ggml_qnn_supports_matmul_op(qnn::ggml_backend_qnn_device_context * ctx, con if (is_data_reinterpretation_op(src0->op) || is_data_reinterpretation_op(src1->op)) { // TODO: remove the blocker here when we support permute op QNN_LOG_DEBUG("[%s][MUL_MAT]data reorganization op is not supported, (%s, %s)\n", - qnn::get_backend_name(ctx->device), - ggml_op_name(src0->op), - ggml_op_name(src1->op)); + qnn::get_backend_name(ctx->device), ggml_op_name(src0->op), ggml_op_name(src1->op)); return false; } @@ -362,8 +335,7 @@ bool ggml_qnn_supports_matmul_op(qnn::ggml_backend_qnn_device_context * ctx, con !is_type_bit_enabled(ctx->cpu_preprocess_types, src0->type)) { // for such cases that src0 is quantized and op is float32, check if the quant type is enabled QNN_LOG_DEBUG("[%s][MUL_MAT]quantized src0 type %s is not enabled\n", - qnn::get_backend_name(ctx->device), - ggml_type_name(src0->type)); + qnn::get_backend_name(ctx->device), ggml_type_name(src0->type)); return false; } break; @@ -387,12 +359,8 @@ void print_tensor_info(qnn::ggml_backend_qnn_device_context * ctx, const ggml_te std::string op_key; qnn::get_qnn_op_desc(op, true, GGML_TYPE_COUNT, op_key); - QNN_LOG_DEBUG("[%s][%s]op was %s, support/unsupported: %d/%d\n", - qnn::get_backend_name(ctx->device), - op_key.c_str(), - supported, - ctx->supported_op_count.load(), - ctx->unsupported_op_count.load()); + QNN_LOG_DEBUG("[%s][%s]op was %s, support/unsupported: %d/%d\n", qnn::get_backend_name(ctx->device), op_key.c_str(), + supported, ctx->supported_op_count.load(), ctx->unsupported_op_count.load()); } #endif @@ -439,9 +407,7 @@ bool device_supports_op(qnn::ggml_backend_qnn_device_context * ctx, const ggml_t // TODO: fix this when we have the support for mul with rms_norm if (ctx->enable_cpu_dequantize && (src0->op == GGML_OP_RMS_NORM || src1->op == GGML_OP_RMS_NORM)) { QNN_LOG_DEBUG("[%s][%s]skip unsupported mul with rms norm, (%s, %s)\n", - qnn::get_backend_name(ctx->device), - ggml_op_desc(op), - ggml_op_desc(src0), + qnn::get_backend_name(ctx->device), ggml_op_desc(op), ggml_op_desc(src0), ggml_op_desc(src1)); is_op_supported = false; break; @@ -453,8 +419,7 @@ bool device_supports_op(qnn::ggml_backend_qnn_device_context * ctx, const ggml_t // TODO: move to op caps array? if (!ggml_are_same_shape(src0, src1)) { QNN_LOG_DEBUG("[%s][%s] src0 and src1 dimensions are not equal\n", - qnn::get_backend_name(ctx->device), - ggml_op_desc(op)); + qnn::get_backend_name(ctx->device), ggml_op_desc(op)); is_op_supported = false; } break; @@ -482,8 +447,8 @@ bool device_supports_op(qnn::ggml_backend_qnn_device_context * ctx, const ggml_t } bool device_compute_graph(qnn::ggml_backend_qnn_device_context * ctx, ggml_cgraph * cgraph) { - QNN_LOG_DEBUG( - "[%s]compute graph start, nodes count: %d\n", qnn::get_backend_name(ctx->device), (int) cgraph->n_nodes); + QNN_LOG_DEBUG("[%s]compute graph start, nodes count: %d\n", qnn::get_backend_name(ctx->device), + (int) cgraph->n_nodes); auto qnn_graph = get_qnn_graph_from_cache(ctx, cgraph); bool success = qnn_graph && qnn_graph->execute(cgraph, ctx->convert_context); diff --git a/ggml/src/ggml-qnn/qnn/op-config-caps.cpp b/ggml/src/ggml-qnn/qnn/op-config-caps.cpp index 7bb8b42ce7..34c50707c6 100644 --- a/ggml/src/ggml-qnn/qnn/op-config-caps.cpp +++ b/ggml/src/ggml-qnn/qnn/op-config-caps.cpp @@ -24,24 +24,13 @@ void append_tensor_shape_and_type_impl(const ggml_tensor * tensor, ggml_type ove len = snprintf(buffer, sizeof(buffer), "%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1], type_name); break; case 3: - len = snprintf(buffer, - sizeof(buffer), - "%ldx%ldx%ld%s", - (long) tensor->ne[0], - (long) tensor->ne[1], - (long) tensor->ne[2], - type_name); + len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1], + (long) tensor->ne[2], type_name); break; case 4: default: - len = snprintf(buffer, - sizeof(buffer), - "%ldx%ldx%ldx%ld%s", - (long) tensor->ne[0], - (long) tensor->ne[1], - (long) tensor->ne[2], - (long) tensor->ne[3], - type_name); + len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1], + (long) tensor->ne[2], (long) tensor->ne[3], type_name); break; } GGML_ASSERT(len > 0 && len < (int) sizeof(buffer)); @@ -238,6 +227,7 @@ constexpr const qnn_op_caps_t kOpCaps[] = { {}, // GGML_UNARY_OP_HARDSIGMOID {}, // GGML_UNARY_OP_EXP {}, // GGML_UNARY_OP_GELU_ERF + {}, // GGML_UNARY_OP_XIELU }; static_assert(kOpCaps[GGML_OP_NONE].get_desc == nullptr, "GGML_OP_NONE should not have get_desc function"); @@ -255,8 +245,8 @@ std::shared_ptr mat_mul_op_constructor(const ggml_tenso qnn::qnn_instance_ptr qnn_instance) { if (qnn_instance->has_custom_op_package() && ggml_n_dims(op) == 2) { QNN_LOG_DEBUG("create GgmlMulMat, name %s, use GgmlOpPackage\n", instance_name.c_str()); - return std::make_shared( - instance_name, "GgmlOpPackage", "GgmlMulMat", qnn_instance); + return std::make_shared(instance_name, "GgmlOpPackage", "GgmlMulMat", + qnn_instance); } QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s\n", instance_name.c_str()); @@ -270,8 +260,8 @@ std::shared_ptr generic_op_constructor(const ggml_tenso GGML_UNUSED(op); static_assert(_op < std::size(kOpCaps)); static_assert(kOpCaps[_op].qnn_op_name != nullptr); - return std::make_shared( - instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, kOpCaps[_op].qnn_op_name, qnn_instance); + return std::make_shared(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, + kOpCaps[_op].qnn_op_name, qnn_instance); } void add_type_parameters(std::shared_ptr op, const char * name, float value) { @@ -293,8 +283,8 @@ std::shared_ptr op_constructor_with_type_param(const gg _ggml_op_param_type op_param; memcpy(&op_param, op->op_params, sizeof(op_param)); - auto qnn_op = std::make_shared<_qnn_op_type_name>( - instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_caps.qnn_op_name, qnn_instance); + auto qnn_op = std::make_shared<_qnn_op_type_name>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_caps.qnn_op_name, + qnn_instance); if (op_caps.qnn_param_name) { add_type_parameters(qnn_op, op_caps.qnn_param_name, op_param); } @@ -416,6 +406,7 @@ constexpr const op_constructor_t kOpConstructors[] = { nullptr, // GGML_UNARY_OP_HARDSIGMOID nullptr, // GGML_UNARY_OP_EXP nullptr, // GGML_UNARY_OP_GELU_ERF + nullptr, // GGML_UNARY_OP_XIELU }; static_assert(kOpConstructors[GGML_OP_NONE] == nullptr, "GGML_OP_NONE does not match the nullptr function");