ggml : allow fill node alloc inplace (#17870)
This commit is contained in:
parent
63908b631a
commit
86a3f0fad8
|
|
@ -25,6 +25,7 @@ static bool ggml_is_view(const struct ggml_tensor * t) {
|
|||
// ops that return true for this function must not use restrict pointers for their backend implementations
|
||||
bool ggml_op_can_inplace(enum ggml_op op) {
|
||||
switch (op) {
|
||||
case GGML_OP_FILL:
|
||||
case GGML_OP_SCALE:
|
||||
case GGML_OP_DIAG_MASK_ZERO:
|
||||
case GGML_OP_DIAG_MASK_INF:
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#define CUDA_FILL_BLOCK_SIZE 256
|
||||
|
||||
template <typename T>
|
||||
static __global__ void fill_kernel(T * __restrict__ dst, const int64_t k, const T value) {
|
||||
static __global__ void fill_kernel(T * dst, const int64_t k, const T value) {
|
||||
const int64_t i = (int64_t)blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i >= k) {
|
||||
return;
|
||||
|
|
|
|||
Loading…
Reference in New Issue