CANN: add SOFTPLUS unary op support
Implement GGML_UNARY_OP_SOFTPLUS using aclnnSoftplus with beta=1.0 and threshold=20.0. This enables hybrid models like Qwen3.5 to run entirely on the CANN backend without graph splitting, which fixes graph cache instability caused by the backend scheduler fragmenting the computation graph when SOFTPLUS falls back to CPU.
This commit is contained in:
parent
168d05f3d5
commit
cb15cdb020
|
|
@ -50,6 +50,7 @@
|
|||
#include <aclnnop/aclnn_sign.h>
|
||||
#include <aclnnop/aclnn_silu.h>
|
||||
#include <aclnnop/aclnn_sin.h>
|
||||
#include <aclnnop/aclnn_softplus.h>
|
||||
#include <aclnnop/aclnn_slice.h>
|
||||
#include <aclnnop/aclnn_sqrt.h>
|
||||
#include <aclnnop/aclnn_tanh.h>
|
||||
|
|
|
|||
|
|
@ -1748,6 +1748,20 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg
|
|||
case GGML_UNARY_OP_STEP:
|
||||
ggml_cann_step(ctx, dst);
|
||||
break;
|
||||
case GGML_UNARY_OP_SOFTPLUS:
|
||||
{
|
||||
auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) {
|
||||
float beta_val = 1.0f;
|
||||
float threshold_val = 20.0f;
|
||||
aclScalar * beta = aclCreateScalar(&beta_val, aclDataType::ACL_FLOAT);
|
||||
aclScalar * threshold = aclCreateScalar(&threshold_val, aclDataType::ACL_FLOAT);
|
||||
GGML_CANN_CALL_ACLNN_OP(ctx, Softplus, acl_src, beta, threshold, acl_dst);
|
||||
aclDestroyScalar(beta);
|
||||
aclDestroyScalar(threshold);
|
||||
};
|
||||
ggml_cann_op_unary(lambda, ctx, dst);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
@ -2258,6 +2272,7 @@ static enum ggml_status ggml_backend_cann_graph_compute(ggml_backend_t backend,
|
|||
if (use_cann_graph) {
|
||||
// If no matching graph is found, the graph needs to be recaptured.
|
||||
graph_capture_required = !cann_ctx->graph_lru_cache.find_and_move_to_front(cgraph);
|
||||
|
||||
if (graph_capture_required) {
|
||||
// If no matching graph is found, add a new ACL graph.
|
||||
ggml_cann_graph * new_graph = ggml_cann_graph::create_from_cgraph(cgraph);
|
||||
|
|
@ -2316,6 +2331,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
|
|||
case GGML_UNARY_OP_SGN:
|
||||
case GGML_UNARY_OP_STEP:
|
||||
case GGML_UNARY_OP_GELU_ERF:
|
||||
case GGML_UNARY_OP_SOFTPLUS:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
|||
Loading…
Reference in New Issue