CANN: implement GGML_OP_SET for CANN backend

Add SET operator support using aclnnInplaceCopy, modeled after the existing ACC implementation. This enables the scheduler to assign SET ops to CANN when the output tensor resides on device memory, avoiding cross-device write issues with delta-net hybrid models. All 12 test-backend-ops SET tests pass (f32/i32, inplace/non-inplace, dim 1/2/3).
2026-03-27 07:42:54 +00:00 · 2026-03-27 07:42:54 +00:00 · c0e78773e9
parent be1492d21f
commit c0e78773e9
3 changed files with 34 additions and 0 deletions
--- a/ggml/src/ggml-cann/aclnn_ops.cpp
+++ b/ggml/src/ggml-cann/aclnn_ops.cpp
@ -579,6 +579,33 @@ void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
                            acl_mean_out.get(), acl_rstd_out.get());
 }

+void ggml_cann_set(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
+    ggml_tensor * src0 = dst->src[0];
+    ggml_tensor * src1 = dst->src[1];
+
+    size_t nb1     = ((int32_t *) dst->op_params)[0];
+    size_t nb2     = ((int32_t *) dst->op_params)[1];
+    size_t nb3     = ((int32_t *) dst->op_params)[2];
+    size_t offset  = ((int32_t *) dst->op_params)[3];
+    bool   inplace = (bool) ((int32_t *) dst->op_params)[4];
+
+    size_t param_nb[] = { ggml_element_size(src0), nb1, nb2, nb3 };
+
+    // Create a view of dst at the target offset with src1's dimensions
+    acl_tensor_ptr acl_dst  = ggml_cann_create_tensor(dst, src1->ne, param_nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset);
+    acl_tensor_ptr acl_src1 = ggml_cann_create_tensor(src1);
+
+    if (!inplace) {
+        // First copy src0 to dst entirely
+        size_t cpy_size = ggml_nbytes(dst);
+        ACL_CHECK(
+            aclrtMemcpyAsync(dst->data, cpy_size, src0->data, cpy_size, ACL_MEMCPY_DEVICE_TO_DEVICE, ctx.stream()));
+    }
+
+    // Copy src1 into the target region of dst
+    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceCopy, acl_dst.get(), acl_src1.get());
+}
+
 void ggml_cann_acc(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
    ggml_tensor * src0 = dst->src[0];
    ggml_tensor * src1 = dst->src[1];
--- a/ggml/src/ggml-cann/aclnn_ops.h
+++ b/ggml/src/ggml-cann/aclnn_ops.h
@ -461,6 +461,9 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context & ctx, ggml_tensor *
 // @see ggml_cann_dup.
 void ggml_cann_cpy(ggml_backend_cann_context & ctx, ggml_tensor * dst);

+// @see ggml_cann_acc, but copies src1 into dst instead of adding.
+void ggml_cann_set(ggml_backend_cann_context & ctx, ggml_tensor * dst);
+
 /**
 * @brief   Computes the softmax activation with optional masking.
 *
--- a/ggml/src/ggml-cann/ggml-cann.cpp
+++ b/ggml/src/ggml-cann/ggml-cann.cpp
@ -1833,6 +1833,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg
        case GGML_OP_CPY:
            ggml_cann_cpy(ctx, dst);
            break;
+        case GGML_OP_SET:
+            ggml_cann_set(ctx, dst);
+            break;
        case GGML_OP_CONT:
            ggml_cann_dup(ctx, dst);
            break;
@ -2485,6 +2488,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
        case GGML_OP_SUM_ROWS:
        case GGML_OP_ARGSORT:
        case GGML_OP_ACC:
+        case GGML_OP_SET:
        case GGML_OP_GROUP_NORM:
            return true;
        case GGML_OP_PAD: