ggml: allow casting between f32 and i32 (#15783)
* ggml: allow casting between f32 and i32 * fix cuda * add vulkan * fix CPU non-cont * add non-cont test case * add note * extend test number range * correct note * add cont version for vulkan
This commit is contained in:
parent
5ef22d281d
commit
9fcb29f22f
|
|
@ -134,6 +134,7 @@ extern "C" {
|
||||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
|
||||||
|
|
||||||
GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *, float *, int64_t);
|
GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *, float *, int64_t);
|
||||||
|
GGML_BACKEND_API void ggml_cpu_fp32_to_i32 (const float *, int32_t *, int64_t);
|
||||||
GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t);
|
GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t);
|
||||||
GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t);
|
GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t);
|
||||||
GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t);
|
GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t);
|
||||||
|
|
|
||||||
|
|
@ -1404,6 +1404,7 @@ extern "C" {
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b);
|
struct ggml_tensor * b);
|
||||||
|
|
||||||
|
// note: casting from f32 to i32 will discard the fractional part
|
||||||
GGML_API struct ggml_tensor * ggml_cast(
|
GGML_API struct ggml_tensor * ggml_cast(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
|
|
|
||||||
|
|
@ -373,6 +373,9 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
||||||
.vec_dot_type = GGML_TYPE_Q8_K,
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
||||||
.nrows = 1,
|
.nrows = 1,
|
||||||
},
|
},
|
||||||
|
[GGML_TYPE_I32] = {
|
||||||
|
.from_float = (ggml_from_float_t) ggml_cpu_fp32_to_i32,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type) {
|
const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type) {
|
||||||
|
|
@ -2696,7 +2699,10 @@ struct ggml_cplan ggml_graph_plan(
|
||||||
if (ggml_is_quantized(node->type) ||
|
if (ggml_is_quantized(node->type) ||
|
||||||
// F16 -> BF16 and BF16 -> F16 copies go through intermediate F32
|
// F16 -> BF16 and BF16 -> F16 copies go through intermediate F32
|
||||||
(node->src[0]->type == GGML_TYPE_F16 && node->src[1] && node->src[1]->type == GGML_TYPE_BF16) ||
|
(node->src[0]->type == GGML_TYPE_F16 && node->src[1] && node->src[1]->type == GGML_TYPE_BF16) ||
|
||||||
(node->src[0]->type == GGML_TYPE_BF16 && node->src[1] && node->src[1]->type == GGML_TYPE_F16)) {
|
(node->src[0]->type == GGML_TYPE_BF16 && node->src[1] && node->src[1]->type == GGML_TYPE_F16) ||
|
||||||
|
// conversion between F32 and I32
|
||||||
|
(node->src[0]->type == GGML_TYPE_F32 && node->src[1] && node->src[1]->type == GGML_TYPE_I32) ||
|
||||||
|
(node->src[0]->type == GGML_TYPE_I32 && node->src[1] && node->src[1]->type == GGML_TYPE_F32)) {
|
||||||
cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
|
cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
@ -3258,6 +3264,13 @@ void ggml_cpu_fp32_to_bf16(const float * x, ggml_bf16_t * y, int64_t n) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ggml_cpu_fp32_to_i32(const float * x, int32_t * y, int64_t n) {
|
||||||
|
int64_t i = 0;
|
||||||
|
for (; i < n; ++i) {
|
||||||
|
y[i] = x[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void ggml_cpu_bf16_to_fp32(const ggml_bf16_t * x, float * y, int64_t n) {
|
void ggml_cpu_bf16_to_fp32(const ggml_bf16_t * x, float * y, int64_t n) {
|
||||||
int64_t i = 0;
|
int64_t i = 0;
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
|
|
|
||||||
|
|
@ -776,6 +776,24 @@ static void ggml_compute_forward_dup_f32(
|
||||||
id += ne00 * (ne01 - ir1);
|
id += ne00 * (ne01 - ir1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else if (dst->type == GGML_TYPE_I32) {
|
||||||
|
size_t id = 0;
|
||||||
|
int32_t * dst_ptr = (int32_t *) dst->data;
|
||||||
|
|
||||||
|
for (int i03 = 0; i03 < ne03; i03++) {
|
||||||
|
for (int i02 = 0; i02 < ne02; i02++) {
|
||||||
|
id += ne00 * ir0;
|
||||||
|
for (int i01 = ir0; i01 < ir1; i01++) {
|
||||||
|
for (int i00 = 0; i00 < ne00; i00++) {
|
||||||
|
const float * src0_ptr = (float *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
||||||
|
|
||||||
|
dst_ptr[id] = *src0_ptr;
|
||||||
|
id++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
id += ne00 * (ne01 - ir1);
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
GGML_ABORT("fatal error"); // TODO: implement
|
GGML_ABORT("fatal error"); // TODO: implement
|
||||||
}
|
}
|
||||||
|
|
@ -947,6 +965,144 @@ static void ggml_compute_forward_dup_f32(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else if (dst->type == GGML_TYPE_I32) {
|
||||||
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
||||||
|
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
||||||
|
i10 += ne00 * ir0;
|
||||||
|
while (i10 >= ne0) {
|
||||||
|
i10 -= ne0;
|
||||||
|
if (++i11 == ne1) {
|
||||||
|
i11 = 0;
|
||||||
|
if (++i12 == ne2) {
|
||||||
|
i12 = 0;
|
||||||
|
if (++i13 == ne3) {
|
||||||
|
i13 = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int64_t i01 = ir0; i01 < ir1; i01++) {
|
||||||
|
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
||||||
|
const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
||||||
|
char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
|
||||||
|
|
||||||
|
*(int32_t *) dst_ptr = *(const float *) src0_ptr;
|
||||||
|
|
||||||
|
if (++i10 == ne0) {
|
||||||
|
i10 = 0;
|
||||||
|
if (++i11 == ne1) {
|
||||||
|
i11 = 0;
|
||||||
|
if (++i12 == ne2) {
|
||||||
|
i12 = 0;
|
||||||
|
if (++i13 == ne3) {
|
||||||
|
i13 = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i10 += ne00 * (ne01 - ir1);
|
||||||
|
while (i10 >= ne0) {
|
||||||
|
i10 -= ne0;
|
||||||
|
if (++i11 == ne1) {
|
||||||
|
i11 = 0;
|
||||||
|
if (++i12 == ne2) {
|
||||||
|
i12 = 0;
|
||||||
|
if (++i13 == ne3) {
|
||||||
|
i13 = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
GGML_ABORT("fatal error"); // TODO: implement
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_compute_forward_dup_i32(
|
||||||
|
const ggml_compute_params * params,
|
||||||
|
ggml_tensor * dst) {
|
||||||
|
|
||||||
|
const ggml_tensor * src0 = dst->src[0];
|
||||||
|
|
||||||
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
||||||
|
|
||||||
|
GGML_TENSOR_UNARY_OP_LOCALS
|
||||||
|
|
||||||
|
const int ith = params->ith; // thread index
|
||||||
|
const int nth = params->nth; // number of threads
|
||||||
|
|
||||||
|
// parallelize by rows
|
||||||
|
const int nr = ne01;
|
||||||
|
// number of rows per thread
|
||||||
|
const int dr = (nr + nth - 1) / nth;
|
||||||
|
// row range for this thread
|
||||||
|
const int ir0 = dr * ith;
|
||||||
|
const int ir1 = MIN(ir0 + dr, nr);
|
||||||
|
|
||||||
|
// dst counters
|
||||||
|
|
||||||
|
int64_t i10 = 0;
|
||||||
|
int64_t i11 = 0;
|
||||||
|
int64_t i12 = 0;
|
||||||
|
int64_t i13 = 0;
|
||||||
|
|
||||||
|
// TODO: not optimal, but works
|
||||||
|
if (dst->type == GGML_TYPE_F32) {
|
||||||
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
||||||
|
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
||||||
|
i10 += ne00 * ir0;
|
||||||
|
while (i10 >= ne0) {
|
||||||
|
i10 -= ne0;
|
||||||
|
if (++i11 == ne1) {
|
||||||
|
i11 = 0;
|
||||||
|
if (++i12 == ne2) {
|
||||||
|
i12 = 0;
|
||||||
|
if (++i13 == ne3) {
|
||||||
|
i13 = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int64_t i01 = ir0; i01 < ir1; i01++) {
|
||||||
|
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
||||||
|
const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
||||||
|
char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
|
||||||
|
|
||||||
|
*(float *) dst_ptr = *(const int32_t *) src0_ptr;
|
||||||
|
|
||||||
|
if (++i10 == ne0) {
|
||||||
|
i10 = 0;
|
||||||
|
if (++i11 == ne1) {
|
||||||
|
i11 = 0;
|
||||||
|
if (++i12 == ne2) {
|
||||||
|
i12 = 0;
|
||||||
|
if (++i13 == ne3) {
|
||||||
|
i13 = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i10 += ne00 * (ne01 - ir1);
|
||||||
|
while (i10 >= ne0) {
|
||||||
|
i10 -= ne0;
|
||||||
|
if (++i11 == ne1) {
|
||||||
|
i11 = 0;
|
||||||
|
if (++i12 == ne2) {
|
||||||
|
i12 = 0;
|
||||||
|
if (++i13 == ne3) {
|
||||||
|
i13 = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
GGML_ABORT("fatal error"); // TODO: implement
|
GGML_ABORT("fatal error"); // TODO: implement
|
||||||
}
|
}
|
||||||
|
|
@ -1177,6 +1333,10 @@ void ggml_compute_forward_dup(
|
||||||
{
|
{
|
||||||
ggml_compute_forward_dup_f32(params, dst);
|
ggml_compute_forward_dup_f32(params, dst);
|
||||||
} break;
|
} break;
|
||||||
|
case GGML_TYPE_I32:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_dup_i32(params, dst);
|
||||||
|
} break;
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
if (ggml_is_quantized(src0->type) && dst->type == GGML_TYPE_F32) {
|
if (ggml_is_quantized(src0->type) && dst->type == GGML_TYPE_F32) {
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,8 @@ template<typename dst_t, typename src_t>
|
||||||
return __float2bfloat16(float(x));
|
return __float2bfloat16(float(x));
|
||||||
} else if constexpr(std::is_same_v<src_t, nv_bfloat16>) {
|
} else if constexpr(std::is_same_v<src_t, nv_bfloat16>) {
|
||||||
return __bfloat162float(x);
|
return __bfloat162float(x);
|
||||||
|
} else if constexpr(std::is_same_v<dst_t, int32_t>) {
|
||||||
|
return int32_t(x);
|
||||||
} else {
|
} else {
|
||||||
return float(x);
|
return float(x);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -374,6 +374,10 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
|
||||||
ggml_cpy_flt_cuda<nv_bfloat16, half> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
ggml_cpy_flt_cuda<nv_bfloat16, half> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
||||||
} else if (src0->type == GGML_TYPE_BF16 && src1->type == GGML_TYPE_F32) {
|
} else if (src0->type == GGML_TYPE_BF16 && src1->type == GGML_TYPE_F32) {
|
||||||
ggml_cpy_flt_cuda<nv_bfloat16, float> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
ggml_cpy_flt_cuda<nv_bfloat16, float> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
||||||
|
} else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_I32) {
|
||||||
|
ggml_cpy_flt_cuda<float, int32_t> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
||||||
|
} else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_F32) {
|
||||||
|
ggml_cpy_flt_cuda<int32_t, float> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
||||||
} else {
|
} else {
|
||||||
GGML_ABORT("%s: unsupported type combination (%s to %s)\n", __func__,
|
GGML_ABORT("%s: unsupported type combination (%s to %s)\n", __func__,
|
||||||
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
||||||
|
|
|
||||||
|
|
@ -3461,6 +3461,12 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
||||||
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_IQ4_NL) {
|
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_IQ4_NL) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_I32) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (src0_type == GGML_TYPE_I32 && src1_type == GGML_TYPE_F32) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
if (src0_type == src1_type && ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op->src[1])) {
|
if (src0_type == src1_type && ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op->src[1])) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -583,6 +583,8 @@ enum ggml_metal_kernel_type {
|
||||||
GGML_METAL_KERNEL_TYPE_CPY_F16_F32,
|
GGML_METAL_KERNEL_TYPE_CPY_F16_F32,
|
||||||
GGML_METAL_KERNEL_TYPE_CPY_BF16_F32,
|
GGML_METAL_KERNEL_TYPE_CPY_BF16_F32,
|
||||||
GGML_METAL_KERNEL_TYPE_CPY_BF16_BF16,
|
GGML_METAL_KERNEL_TYPE_CPY_BF16_BF16,
|
||||||
|
GGML_METAL_KERNEL_TYPE_CPY_F32_I32,
|
||||||
|
GGML_METAL_KERNEL_TYPE_CPY_I32_F32,
|
||||||
GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0,
|
GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0,
|
||||||
GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_0,
|
GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_0,
|
||||||
GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_1,
|
GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_1,
|
||||||
|
|
@ -1616,6 +1618,8 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
|
||||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F16_F16, cpy_f16_f16, true);
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F16_F16, cpy_f16_f16, true);
|
||||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_BF16_F32, cpy_bf16_f32, use_bfloat);
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_BF16_F32, cpy_bf16_f32, use_bfloat);
|
||||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_BF16_BF16, cpy_bf16_bf16, use_bfloat);
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_BF16_BF16, cpy_bf16_bf16, use_bfloat);
|
||||||
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_I32, cpy_f32_i32, true);
|
||||||
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_I32_F32, cpy_i32_f32, true);
|
||||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0, cpy_f32_q8_0, true);
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0, cpy_f32_q8_0, true);
|
||||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_0, cpy_f32_q4_0, true);
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_0, cpy_f32_q4_0, true);
|
||||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_1, cpy_f32_q4_1, true);
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_1, cpy_f32_q4_1, true);
|
||||||
|
|
@ -1945,6 +1949,7 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
|
||||||
case GGML_TYPE_Q5_0:
|
case GGML_TYPE_Q5_0:
|
||||||
case GGML_TYPE_Q5_1:
|
case GGML_TYPE_Q5_1:
|
||||||
case GGML_TYPE_IQ4_NL:
|
case GGML_TYPE_IQ4_NL:
|
||||||
|
case GGML_TYPE_I32:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -1977,6 +1982,8 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
case GGML_TYPE_I32:
|
||||||
|
return op->type == GGML_TYPE_F32;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
@ -5680,6 +5687,7 @@ static int ggml_metal_encode_node(
|
||||||
|
|
||||||
switch (dstt) {
|
switch (dstt) {
|
||||||
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_F32].pipeline; break;
|
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_F32].pipeline; break;
|
||||||
|
case GGML_TYPE_I32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_I32].pipeline; break;
|
||||||
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_F16].pipeline; break;
|
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_F16].pipeline; break;
|
||||||
case GGML_TYPE_BF16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_BF16].pipeline; break;
|
case GGML_TYPE_BF16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_BF16].pipeline; break;
|
||||||
case GGML_TYPE_Q8_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0].pipeline; break;
|
case GGML_TYPE_Q8_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0].pipeline; break;
|
||||||
|
|
@ -5691,6 +5699,13 @@ static int ggml_metal_encode_node(
|
||||||
default: GGML_ABORT("not implemented");
|
default: GGML_ABORT("not implemented");
|
||||||
};
|
};
|
||||||
} break;
|
} break;
|
||||||
|
case GGML_TYPE_I32:
|
||||||
|
{
|
||||||
|
switch (dstt) {
|
||||||
|
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_I32_F32].pipeline; break;
|
||||||
|
default: GGML_ABORT("not implemented");
|
||||||
|
};
|
||||||
|
} break;
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
{
|
||||||
switch (dstt) {
|
switch (dstt) {
|
||||||
|
|
|
||||||
|
|
@ -5338,6 +5338,8 @@ typedef decltype(kernel_cpy<float, float>) kernel_cpy_t;
|
||||||
|
|
||||||
template [[host_name("kernel_cpy_f32_f32")]] kernel kernel_cpy_t kernel_cpy<float, float>;
|
template [[host_name("kernel_cpy_f32_f32")]] kernel kernel_cpy_t kernel_cpy<float, float>;
|
||||||
template [[host_name("kernel_cpy_f32_f16")]] kernel kernel_cpy_t kernel_cpy<float, half>;
|
template [[host_name("kernel_cpy_f32_f16")]] kernel kernel_cpy_t kernel_cpy<float, half>;
|
||||||
|
template [[host_name("kernel_cpy_f32_i32")]] kernel kernel_cpy_t kernel_cpy<float, int32_t>;
|
||||||
|
template [[host_name("kernel_cpy_i32_f32")]] kernel kernel_cpy_t kernel_cpy<int32_t, float>;
|
||||||
#if defined(GGML_METAL_USE_BF16)
|
#if defined(GGML_METAL_USE_BF16)
|
||||||
template [[host_name("kernel_cpy_f32_bf16")]] kernel kernel_cpy_t kernel_cpy<float, bfloat>;
|
template [[host_name("kernel_cpy_f32_bf16")]] kernel kernel_cpy_t kernel_cpy<float, bfloat>;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -506,8 +506,8 @@ struct vk_device_struct {
|
||||||
vk_pipeline pipeline_pad_f32;
|
vk_pipeline pipeline_pad_f32;
|
||||||
vk_pipeline pipeline_roll_f32;
|
vk_pipeline pipeline_roll_f32;
|
||||||
vk_pipeline pipeline_repeat_f32, pipeline_repeat_back_f32;
|
vk_pipeline pipeline_repeat_f32, pipeline_repeat_back_f32;
|
||||||
vk_pipeline pipeline_cpy_f32_f32, pipeline_cpy_f32_f16, pipeline_cpy_f16_f16, pipeline_cpy_f16_f32, pipeline_cpy_f32_bf16;
|
vk_pipeline pipeline_cpy_f32_f32, pipeline_cpy_f32_f16, pipeline_cpy_f16_f16, pipeline_cpy_f16_f32, pipeline_cpy_f32_bf16, pipeline_cpy_f32_i32, pipeline_cpy_i32_f32;
|
||||||
vk_pipeline pipeline_contig_cpy_f32_f32, pipeline_contig_cpy_f32_f16, pipeline_contig_cpy_f16_f16, pipeline_contig_cpy_f16_f32, pipeline_contig_cpy_f32_bf16;
|
vk_pipeline pipeline_contig_cpy_f32_f32, pipeline_contig_cpy_f32_f16, pipeline_contig_cpy_f16_f16, pipeline_contig_cpy_f16_f32, pipeline_contig_cpy_f32_bf16, pipeline_contig_cpy_f32_i32, pipeline_contig_cpy_i32_f32;
|
||||||
vk_pipeline pipeline_cpy_f32_quant[GGML_TYPE_COUNT];
|
vk_pipeline pipeline_cpy_f32_quant[GGML_TYPE_COUNT];
|
||||||
vk_pipeline pipeline_cpy_quant_f32[GGML_TYPE_COUNT];
|
vk_pipeline pipeline_cpy_quant_f32[GGML_TYPE_COUNT];
|
||||||
vk_pipeline pipeline_set_rows[GGML_TYPE_COUNT];
|
vk_pipeline pipeline_set_rows[GGML_TYPE_COUNT];
|
||||||
|
|
@ -3226,12 +3226,16 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
||||||
ggml_vk_create_pipeline(device, device->pipeline_cpy_f16_f16, "cpy_f16_f16", cpy_f16_f16_len, cpy_f16_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
ggml_vk_create_pipeline(device, device->pipeline_cpy_f16_f16, "cpy_f16_f16", cpy_f16_f16_len, cpy_f16_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
||||||
ggml_vk_create_pipeline(device, device->pipeline_cpy_f16_f32, "cpy_f16_f32", cpy_f16_f32_len, cpy_f16_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
ggml_vk_create_pipeline(device, device->pipeline_cpy_f16_f32, "cpy_f16_f32", cpy_f16_f32_len, cpy_f16_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
||||||
ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_bf16,"cpy_f32_bf16",cpy_f32_bf16_len,cpy_f32_bf16_data,"main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_bf16,"cpy_f32_bf16",cpy_f32_bf16_len,cpy_f32_bf16_data,"main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
||||||
|
ggml_vk_create_pipeline(device, device->pipeline_cpy_i32_f32, "cpy_i32_f32", cpy_i32_f32_len, cpy_i32_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
||||||
|
ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_i32, "cpy_f32_i32", cpy_f32_i32_len, cpy_f32_i32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
||||||
|
|
||||||
ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_f32_f32, "contig_cpy_f32_f32", contig_cpy_f32_f32_len, contig_cpy_f32_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_f32_f32, "contig_cpy_f32_f32", contig_cpy_f32_f32_len, contig_cpy_f32_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
||||||
ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_f32_f16, "contig_cpy_f32_f16", contig_cpy_f32_f16_len, contig_cpy_f32_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_f32_f16, "contig_cpy_f32_f16", contig_cpy_f32_f16_len, contig_cpy_f32_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
||||||
ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_f16_f16, "contig_cpy_f16_f16", contig_cpy_f16_f16_len, contig_cpy_f16_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_f16_f16, "contig_cpy_f16_f16", contig_cpy_f16_f16_len, contig_cpy_f16_f16_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
||||||
ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_f16_f32, "contig_cpy_f16_f32", contig_cpy_f16_f32_len, contig_cpy_f16_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_f16_f32, "contig_cpy_f16_f32", contig_cpy_f16_f32_len, contig_cpy_f16_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
||||||
ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_f32_bf16,"contig_cpy_f32_bf16",contig_cpy_f32_bf16_len,contig_cpy_f32_bf16_data,"main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_f32_bf16,"contig_cpy_f32_bf16",contig_cpy_f32_bf16_len,contig_cpy_f32_bf16_data,"main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
||||||
|
ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_i32_f32, "contig_cpy_i32_f32", contig_cpy_i32_f32_len, contig_cpy_i32_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
||||||
|
ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_f32_i32, "contig_cpy_f32_i32", contig_cpy_f32_i32_len, contig_cpy_f32_i32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
|
||||||
|
|
||||||
if (device->float_controls_rte_fp16) {
|
if (device->float_controls_rte_fp16) {
|
||||||
ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q4_0], "cpy_f32_q4_0", cpy_f32_q4_0_rte_len, cpy_f32_q4_0_rte_data, "main", 2, sizeof(vk_op_unary_push_constants), {32, 1, 1}, {}, 1);
|
ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q4_0], "cpy_f32_q4_0", cpy_f32_q4_0_rte_len, cpy_f32_q4_0_rte_data, "main", 2, sizeof(vk_op_unary_push_constants), {32, 1, 1}, {}, 1);
|
||||||
|
|
@ -5693,6 +5697,20 @@ static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, const
|
||||||
return ctx->device->pipeline_cpy_f32_bf16;
|
return ctx->device->pipeline_cpy_f32_bf16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (src->type == GGML_TYPE_F32 && to == GGML_TYPE_I32) {
|
||||||
|
if (contig) {
|
||||||
|
return ctx->device->pipeline_contig_cpy_f32_i32;
|
||||||
|
} else {
|
||||||
|
return ctx->device->pipeline_cpy_f32_i32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (src->type == GGML_TYPE_I32 && to == GGML_TYPE_F32) {
|
||||||
|
if (contig) {
|
||||||
|
return ctx->device->pipeline_contig_cpy_i32_f32;
|
||||||
|
} else {
|
||||||
|
return ctx->device->pipeline_cpy_i32_f32;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (src->type == GGML_TYPE_F32) {
|
if (src->type == GGML_TYPE_F32) {
|
||||||
switch (to) {
|
switch (to) {
|
||||||
case GGML_TYPE_Q4_0:
|
case GGML_TYPE_Q4_0:
|
||||||
|
|
@ -12224,6 +12242,13 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_I32 ||
|
||||||
|
src0_type == GGML_TYPE_I32 && src1_type == GGML_TYPE_F32
|
||||||
|
) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// We can handle copying from a type to the same type if it's
|
// We can handle copying from a type to the same type if it's
|
||||||
// contiguous (memcpy). We use f16 or f32 shaders to do the copy,
|
// contiguous (memcpy). We use f16 or f32 shaders to do the copy,
|
||||||
// so the type/block size must be a multiple of 4.
|
// so the type/block size must be a multiple of 4.
|
||||||
|
|
|
||||||
|
|
@ -560,10 +560,14 @@ void process_shaders() {
|
||||||
string_to_spv("cpy_f16_f32", "copy.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
|
string_to_spv("cpy_f16_f32", "copy.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
|
||||||
string_to_spv("cpy_f32_bf16","copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "uint16_t"}, {"DATA_D_BF16", "1"}});
|
string_to_spv("cpy_f32_bf16","copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "uint16_t"}, {"DATA_D_BF16", "1"}});
|
||||||
string_to_spv("contig_cpy_f32_f32", "contig_copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
|
string_to_spv("contig_cpy_f32_f32", "contig_copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
|
||||||
|
string_to_spv("contig_cpy_f32_i32", "contig_copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "int"}});
|
||||||
|
string_to_spv("contig_cpy_i32_f32", "contig_copy.comp", {{"A_TYPE", "int"}, {"D_TYPE", "float"}});
|
||||||
string_to_spv("contig_cpy_f32_f16", "contig_copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float16_t"}});
|
string_to_spv("contig_cpy_f32_f16", "contig_copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float16_t"}});
|
||||||
string_to_spv("contig_cpy_f16_f16", "contig_copy.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
|
string_to_spv("contig_cpy_f16_f16", "contig_copy.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
|
||||||
string_to_spv("contig_cpy_f16_f32", "contig_copy.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
|
string_to_spv("contig_cpy_f16_f32", "contig_copy.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
|
||||||
string_to_spv("contig_cpy_f32_bf16","contig_copy.comp",{{"A_TYPE", "float"}, {"D_TYPE", "uint16_t"}, {"DATA_D_BF16", "1"}});
|
string_to_spv("contig_cpy_f32_bf16","contig_copy.comp",{{"A_TYPE", "float"}, {"D_TYPE", "uint16_t"}, {"DATA_D_BF16", "1"}});
|
||||||
|
string_to_spv("cpy_f32_i32", "copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "int"}});
|
||||||
|
string_to_spv("cpy_i32_f32", "copy.comp", {{"A_TYPE", "int"}, {"D_TYPE", "float"}});
|
||||||
|
|
||||||
for (std::string t : {"q4_0", "q4_1", "q5_0", "q5_1", "q8_0", "iq4_nl"}) {
|
for (std::string t : {"q4_0", "q4_1", "q5_0", "q5_1", "q8_0", "iq4_nl"}) {
|
||||||
string_to_spv("cpy_f32_" + t, "copy_to_quant.comp", {{"DATA_A_" + to_uppercase(t), "1"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
|
string_to_spv("cpy_f32_" + t, "copy_to_quant.comp", {{"DATA_A_" + to_uppercase(t), "1"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
|
||||||
|
|
|
||||||
|
|
@ -2457,6 +2457,13 @@ struct test_cpy : public test_case {
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void initialize_tensors(ggml_context * ctx) override {
|
||||||
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||||
|
// test extended range of values to check if casting between f32 and i32 is consistent
|
||||||
|
init_tensor_uniform(t, -150.f, 150.f);
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// GGML_OP_CONT
|
// GGML_OP_CONT
|
||||||
|
|
@ -6007,6 +6014,10 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||||
test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 2, 3, 4}, {1, 0, 2, 3})); // cpy not-contiguous
|
test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 2, 3, 4}, {1, 0, 2, 3})); // cpy not-contiguous
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_I32, {256, 2, 3, 4}));
|
||||||
|
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_I32, {256, 2, 3, 4}, {1, 0, 2, 3}));
|
||||||
|
test_cases.emplace_back(new test_cpy(GGML_TYPE_I32, GGML_TYPE_F32, {256, 2, 3, 4}));
|
||||||
|
test_cases.emplace_back(new test_cpy(GGML_TYPE_I32, GGML_TYPE_F32, {256, 2, 3, 4}, {1, 0, 2, 3}));
|
||||||
|
|
||||||
test_cases.emplace_back(new test_cont());
|
test_cases.emplace_back(new test_cont());
|
||||||
test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1}));
|
test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1}));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue