From cb15cdb020ec49d98865e19eb2bdbbc852aca436 Mon Sep 17 00:00:00 2001 From: hipudding Date: Sat, 28 Mar 2026 07:16:07 +0000 Subject: [PATCH] CANN: add SOFTPLUS unary op support Implement GGML_UNARY_OP_SOFTPLUS using aclnnSoftplus with beta=1.0 and threshold=20.0. This enables hybrid models like Qwen3.5 to run entirely on the CANN backend without graph splitting, which fixes graph cache instability caused by the backend scheduler fragmenting the computation graph when SOFTPLUS falls back to CPU. --- ggml/src/ggml-cann/aclnn_ops.h | 1 + ggml/src/ggml-cann/ggml-cann.cpp | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/ggml/src/ggml-cann/aclnn_ops.h b/ggml/src/ggml-cann/aclnn_ops.h index 2fe0874f24..f5aadb38ae 100644 --- a/ggml/src/ggml-cann/aclnn_ops.h +++ b/ggml/src/ggml-cann/aclnn_ops.h @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index 7ef4089147..fa8e9ee50a 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -1748,6 +1748,20 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg case GGML_UNARY_OP_STEP: ggml_cann_step(ctx, dst); break; + case GGML_UNARY_OP_SOFTPLUS: + { + auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { + float beta_val = 1.0f; + float threshold_val = 20.0f; + aclScalar * beta = aclCreateScalar(&beta_val, aclDataType::ACL_FLOAT); + aclScalar * threshold = aclCreateScalar(&threshold_val, aclDataType::ACL_FLOAT); + GGML_CANN_CALL_ACLNN_OP(ctx, Softplus, acl_src, beta, threshold, acl_dst); + aclDestroyScalar(beta); + aclDestroyScalar(threshold); + }; + ggml_cann_op_unary(lambda, ctx, dst); + } + break; default: return false; } @@ -2258,6 +2272,7 @@ static enum ggml_status ggml_backend_cann_graph_compute(ggml_backend_t backend, if (use_cann_graph) { // If no matching graph is found, the graph needs to be recaptured. graph_capture_required = !cann_ctx->graph_lru_cache.find_and_move_to_front(cgraph); + if (graph_capture_required) { // If no matching graph is found, add a new ACL graph. ggml_cann_graph * new_graph = ggml_cann_graph::create_from_cgraph(cgraph); @@ -2316,6 +2331,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten case GGML_UNARY_OP_SGN: case GGML_UNARY_OP_STEP: case GGML_UNARY_OP_GELU_ERF: + case GGML_UNARY_OP_SOFTPLUS: return true; default: return false;