CANN: implement GGML_OP_SET for CANN backend

Add SET operator support using aclnnInplaceCopy, modeled after the
existing ACC implementation. This enables the scheduler to assign
SET ops to CANN when the output tensor resides on device memory,
avoiding cross-device write issues with delta-net hybrid models.

All 12 test-backend-ops SET tests pass (f32/i32, inplace/non-inplace, dim 1/2/3).
This commit is contained in:
hipudding 2026-03-27 07:42:54 +00:00
parent be1492d21f
commit c0e78773e9
3 changed files with 34 additions and 0 deletions

View File

@ -579,6 +579,33 @@ void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
acl_mean_out.get(), acl_rstd_out.get());
}
void ggml_cann_set(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
ggml_tensor * src0 = dst->src[0];
ggml_tensor * src1 = dst->src[1];
size_t nb1 = ((int32_t *) dst->op_params)[0];
size_t nb2 = ((int32_t *) dst->op_params)[1];
size_t nb3 = ((int32_t *) dst->op_params)[2];
size_t offset = ((int32_t *) dst->op_params)[3];
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
size_t param_nb[] = { ggml_element_size(src0), nb1, nb2, nb3 };
// Create a view of dst at the target offset with src1's dimensions
acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst, src1->ne, param_nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset);
acl_tensor_ptr acl_src1 = ggml_cann_create_tensor(src1);
if (!inplace) {
// First copy src0 to dst entirely
size_t cpy_size = ggml_nbytes(dst);
ACL_CHECK(
aclrtMemcpyAsync(dst->data, cpy_size, src0->data, cpy_size, ACL_MEMCPY_DEVICE_TO_DEVICE, ctx.stream()));
}
// Copy src1 into the target region of dst
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceCopy, acl_dst.get(), acl_src1.get());
}
void ggml_cann_acc(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
ggml_tensor * src0 = dst->src[0];
ggml_tensor * src1 = dst->src[1];

View File

@ -461,6 +461,9 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context & ctx, ggml_tensor *
// @see ggml_cann_dup.
void ggml_cann_cpy(ggml_backend_cann_context & ctx, ggml_tensor * dst);
// @see ggml_cann_acc, but copies src1 into dst instead of adding.
void ggml_cann_set(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/**
* @brief Computes the softmax activation with optional masking.
*

View File

@ -1833,6 +1833,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg
case GGML_OP_CPY:
ggml_cann_cpy(ctx, dst);
break;
case GGML_OP_SET:
ggml_cann_set(ctx, dst);
break;
case GGML_OP_CONT:
ggml_cann_dup(ctx, dst);
break;
@ -2485,6 +2488,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
case GGML_OP_SUM_ROWS:
case GGML_OP_ARGSORT:
case GGML_OP_ACC:
case GGML_OP_SET:
case GGML_OP_GROUP_NORM:
return true;
case GGML_OP_PAD: