ggml : refactor forward_dup for cpu backend (#16062)
* ggml : refactor forward_dup for cpu backend * clean up a bit * add quant/dequant perf test
This commit is contained in:
parent
69ffd89163
commit
0dd58b6877
|
|
@ -28,6 +28,14 @@ static inline float bf16_to_f32(ggml_bf16_t x) {
|
||||||
return GGML_BF16_TO_FP32(x);
|
return GGML_BF16_TO_FP32(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline float i32_to_f32(int32_t x) {
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int32_t f32_to_i32(float x) {
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
static inline float f32_to_f32(float x) {
|
static inline float f32_to_f32(float x) {
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
@ -54,6 +62,12 @@ struct type_conversion_table<ggml_bf16_t> {
|
||||||
static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16;
|
static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct type_conversion_table<int32_t> {
|
||||||
|
static constexpr float (*to_f32)(int32_t) = i32_to_f32;
|
||||||
|
static constexpr int32_t (*from_f32)(float) = f32_to_i32;
|
||||||
|
};
|
||||||
|
|
||||||
static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) {
|
static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) {
|
||||||
const int64_t ith = params->ith;
|
const int64_t ith = params->ith;
|
||||||
const int64_t nth = params->nth;
|
const int64_t nth = params->nth;
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -6629,9 +6629,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
||||||
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {4096, 1, 1, 1}, {1, 1, 1, 1}));
|
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {4096, 1, 1, 1}, {1, 1, 1, 1}));
|
||||||
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {4096, 1, 1, 1}, {1, 512, 1, 1}));
|
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {4096, 1, 1, 1}, {1, 512, 1, 1}));
|
||||||
|
|
||||||
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F16, {512, 3072, 1, 1}));
|
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F16, {512, 3072, 1, 1}));
|
||||||
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {8192, 512, 2, 1}, {0, 2, 1, 3}));
|
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {8192, 512, 2, 1}, {0, 2, 1, 3}));
|
||||||
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {3072, 512, 2, 1}, {0, 2, 1, 3}));
|
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {3072, 512, 2, 1}, {0, 2, 1, 3}));
|
||||||
|
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_Q4_0, {8192, 512, 2, 1}));
|
||||||
|
test_cases.emplace_back(new test_cpy(GGML_TYPE_Q4_0, GGML_TYPE_F32, {8192, 512, 2, 1}));
|
||||||
|
|
||||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {4096, 4096, 5, 1}, false, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));
|
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {4096, 4096, 5, 1}, false, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));
|
||||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {12888, 256, 5, 1}, false, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));
|
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {12888, 256, 5, 1}, false, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue