diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp index adb4d68e86..315bea0c84 100644 --- a/ggml/src/ggml-cann/aclnn_ops.cpp +++ b/ggml/src/ggml-cann/aclnn_ops.cpp @@ -579,6 +579,33 @@ void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) { acl_mean_out.get(), acl_rstd_out.get()); } +void ggml_cann_set(ggml_backend_cann_context & ctx, ggml_tensor * dst) { + ggml_tensor * src0 = dst->src[0]; + ggml_tensor * src1 = dst->src[1]; + + size_t nb1 = ((int32_t *) dst->op_params)[0]; + size_t nb2 = ((int32_t *) dst->op_params)[1]; + size_t nb3 = ((int32_t *) dst->op_params)[2]; + size_t offset = ((int32_t *) dst->op_params)[3]; + bool inplace = (bool) ((int32_t *) dst->op_params)[4]; + + size_t param_nb[] = { ggml_element_size(src0), nb1, nb2, nb3 }; + + // Create a view of dst at the target offset with src1's dimensions + acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst, src1->ne, param_nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset); + acl_tensor_ptr acl_src1 = ggml_cann_create_tensor(src1); + + if (!inplace) { + // First copy src0 to dst entirely + size_t cpy_size = ggml_nbytes(dst); + ACL_CHECK( + aclrtMemcpyAsync(dst->data, cpy_size, src0->data, cpy_size, ACL_MEMCPY_DEVICE_TO_DEVICE, ctx.stream())); + } + + // Copy src1 into the target region of dst + GGML_CANN_CALL_ACLNN_OP(ctx, InplaceCopy, acl_dst.get(), acl_src1.get()); +} + void ggml_cann_acc(ggml_backend_cann_context & ctx, ggml_tensor * dst) { ggml_tensor * src0 = dst->src[0]; ggml_tensor * src1 = dst->src[1]; diff --git a/ggml/src/ggml-cann/aclnn_ops.h b/ggml/src/ggml-cann/aclnn_ops.h index 7f5ba4d330..a68e9119ae 100644 --- a/ggml/src/ggml-cann/aclnn_ops.h +++ b/ggml/src/ggml-cann/aclnn_ops.h @@ -461,6 +461,9 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context & ctx, ggml_tensor * // @see ggml_cann_dup. void ggml_cann_cpy(ggml_backend_cann_context & ctx, ggml_tensor * dst); +// @see ggml_cann_acc, but copies src1 into dst instead of adding. +void ggml_cann_set(ggml_backend_cann_context & ctx, ggml_tensor * dst); + /** * @brief Computes the softmax activation with optional masking. * diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index 4281fd0f9d..f768de4d86 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -1833,6 +1833,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg case GGML_OP_CPY: ggml_cann_cpy(ctx, dst); break; + case GGML_OP_SET: + ggml_cann_set(ctx, dst); + break; case GGML_OP_CONT: ggml_cann_dup(ctx, dst); break; @@ -2485,6 +2488,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten case GGML_OP_SUM_ROWS: case GGML_OP_ARGSORT: case GGML_OP_ACC: + case GGML_OP_SET: case GGML_OP_GROUP_NORM: return true; case GGML_OP_PAD: