ggml : simplified testing for nh being power of 2 in Hadamard transform implementations
This commit is contained in:
parent
6011bdd92b
commit
4aec6a86bd
|
|
@ -11207,18 +11207,6 @@ void ggml_compute_forward_opt_step_sgd(const ggml_compute_params * params, ggml_
|
||||||
// MIT license
|
// MIT license
|
||||||
// SPDX-License-Identifier: MIT
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
|
||||||
#include <intrin.h>
|
|
||||||
#include <ammintrin.h>
|
|
||||||
#include <nmmintrin.h>
|
|
||||||
#include <immintrin.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
inline int popcount(uint32_t x) { return __popcnt(x); }
|
|
||||||
#else
|
|
||||||
inline int popcount(uint32_t x) { return __builtin_popcount(x); }
|
|
||||||
#endif
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void fast_ht(int n, T * values) {
|
void fast_ht(int n, T * values) {
|
||||||
constexpr float ksqrt2 = 0.707106781f;
|
constexpr float ksqrt2 = 0.707106781f;
|
||||||
|
|
@ -11250,7 +11238,7 @@ static void ggml_compute_forward_hadamard_f32(
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
|
|
||||||
int nh = dst->op_params[0];
|
int nh = dst->op_params[0];
|
||||||
GGML_ASSERT(nh > 1 && popcount(uint32_t(nh)) == 1);
|
GGML_ASSERT(nh > 1 && ((nh & (nh - 1)) == 0)); // power of 2
|
||||||
GGML_ASSERT(dst->ne[0] % nh == 0);
|
GGML_ASSERT(dst->ne[0] % nh == 0);
|
||||||
|
|
||||||
int nc = dst->ne[0]/nh;
|
int nc = dst->ne[0]/nh;
|
||||||
|
|
|
||||||
|
|
@ -58,19 +58,6 @@ static void hadamard_f32_cuda(int nh, const char * x, char * y, int ne0, int ne1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
|
||||||
#include <intrin.h>
|
|
||||||
#include <ammintrin.h>
|
|
||||||
#include <nmmintrin.h>
|
|
||||||
#include <immintrin.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
static inline int popcount(uint32_t x) { return __popcnt(x); }
|
|
||||||
#else
|
|
||||||
static inline int popcount(uint32_t x) { return __builtin_popcount(x); }
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
void ggml_cuda_op_hadamard(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
void ggml_cuda_op_hadamard(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||||
const ggml_tensor * src = dst->src[0];
|
const ggml_tensor * src = dst->src[0];
|
||||||
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
||||||
|
|
@ -78,8 +65,8 @@ void ggml_cuda_op_hadamard(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||||
GGML_ASSERT(ggml_are_same_shape(src, dst));
|
GGML_ASSERT(ggml_are_same_shape(src, dst));
|
||||||
|
|
||||||
int nh = dst->op_params[0];
|
int nh = dst->op_params[0];
|
||||||
GGML_ASSERT(dst->ne[0]%nh == 0);
|
GGML_ASSERT(dst->ne[0] % nh == 0);
|
||||||
GGML_ASSERT(nh > 1 && popcount(nh) == 1);
|
GGML_ASSERT(nh > 1 && ((nh & (nh - 1)) == 0)); // power of 2
|
||||||
|
|
||||||
hadamard_f32_cuda(nh, (const char *)src->data, (char *)dst->data, src->ne[0], src->ne[1], src->ne[2], src->ne[3],
|
hadamard_f32_cuda(nh, (const char *)src->data, (char *)dst->data, src->ne[0], src->ne[1], src->ne[2], src->ne[3],
|
||||||
src->nb[1], src->nb[2], src->nb[3], dst->nb[1], dst->nb[2], dst->nb[3], ctx.stream());
|
src->nb[1], src->nb[2], src->nb[3], dst->nb[1], dst->nb[2], dst->nb[3], ctx.stream());
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue