corrections:

> tensor_traits "backend"
- AMX
- migrate kleidial
- migrate spacemit

> ggml-blas backend
> update GGML_API for c++ support

Not releted to this PR:
> ggml-webgpu backend
> ggm-metal backend
This commit is contained in:
Djip007 2026-03-05 23:33:56 +01:00
parent d53d4e687b
commit 44a86ee644
16 changed files with 294 additions and 303 deletions

View File

@ -70,6 +70,7 @@ if (MSVC)
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>") add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/bigobj>") add_compile_options("$<$<COMPILE_LANGUAGE:C>:/bigobj>")
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/bigobj>") add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/bigobj>")
add_compile_options(/Zc:__cplusplus)
endif() endif()
if (LLAMA_STANDALONE) if (LLAMA_STANDALONE)

View File

@ -7,6 +7,7 @@ function(llama_add_compile_flags)
list(APPEND CXX_FLAGS -Werror) list(APPEND CXX_FLAGS -Werror)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/WX) add_compile_options(/WX)
add_compile_options(/Zc:__cplusplus)
endif() endif()
endif() endif()

View File

@ -174,17 +174,34 @@
// //
#ifdef GGML_SHARED #ifdef GGML_SHARED
# if defined(_WIN32) && !defined(__MINGW32__) # ifdef __cplusplus
# ifdef GGML_BUILD # if defined(_WIN32) && !defined(__MINGW32__)
# define GGML_API __declspec(dllexport) extern # ifdef GGML_BUILD
# else # define GGML_API __declspec(dllexport)
# define GGML_API __declspec(dllimport) extern # else
# endif # define GGML_API __declspec(dllimport)
# endif
# else
//# define GGML_API [[gnu::visibility ("default")]]
# define GGML_API __attribute__ ((visibility ("default")))
# endif
# else # else
# define GGML_API __attribute__ ((visibility ("default"))) extern # if defined(_WIN32) && !defined(__MINGW32__)
# ifdef GGML_BUILD
# define GGML_API __declspec(dllexport) extern
# else
# define GGML_API __declspec(dllimport) extern
# endif
# else
# define GGML_API __attribute__ ((visibility ("default"))) extern
# endif
# endif # endif
#else #else
# define GGML_API extern # ifdef __cplusplus
# define GGML_API
# else
# define GGML_API extern
# endif
#endif #endif
// TODO: support for clang // TODO: support for clang

View File

@ -8,6 +8,10 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>) add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
endif() endif()
if (MSVC)
add_compile_options(/Zc:__cplusplus)
endif()
if (NOT MSVC) if (NOT MSVC)
if (GGML_SANITIZE_THREAD) if (GGML_SANITIZE_THREAD)
add_compile_options(-fsanitize=thread) add_compile_options(-fsanitize=thread)
@ -25,6 +29,7 @@ if (NOT MSVC)
endif() endif()
endif() endif()
if (GGML_FATAL_WARNINGS) if (GGML_FATAL_WARNINGS)
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
list(APPEND C_FLAGS -Werror) list(APPEND C_FLAGS -Werror)

View File

@ -9,7 +9,7 @@ ggml_add_backend_library(ggml-blas
ggml-blas.cpp ggml-blas.cpp
) )
if (GGML_OPENMP) if (GGML_OPENMP_ENABLED STREQUAL "ON")
find_package(OpenMP REQUIRED) find_package(OpenMP REQUIRED)
add_compile_definitions(GGML_USE_OPENMP) add_compile_definitions(GGML_USE_OPENMP)
target_link_libraries(ggml-blas PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX) target_link_libraries(ggml-blas PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
@ -104,8 +104,10 @@ if (BLAS_FOUND)
add_compile_definitions(GGML_BLAS_USE_NVPL) add_compile_definitions(GGML_BLAS_USE_NVPL)
endif() endif()
target_compile_features (ggml-blas PRIVATE c_std_11 cxx_std_20) if (MSVC)
#target_link_libraries (ggml-blas PRIVATE ${BLAS_LIBRARIES}) add_compile_options(/Zc:__cplusplus)
endif()
target_compile_features (ggml-blas PRIVATE c_std_11 cxx_std_17)
target_link_libraries (ggml-blas PRIVATE BLAS::BLAS) target_link_libraries (ggml-blas PRIVATE BLAS::BLAS)
target_include_directories(ggml-blas SYSTEM PRIVATE ${BLAS_INCLUDE_DIRS}) target_include_directories(ggml-blas SYSTEM PRIVATE ${BLAS_INCLUDE_DIRS})
@ -128,7 +130,7 @@ elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS_BUILD")
set(BUILD_TESTING OFF) set(BUILD_TESTING OFF)
set(BUILD_STATIC_LIBS ON) set(BUILD_STATIC_LIBS ON)
set(BUILD_SHARED_LIBS OFF) set(BUILD_SHARED_LIBS OFF)
if (GGML_OPENMP) if (GGML_OPENMP_ENABLED STREQUAL "ON")
set(USE_OPENMP 1) set(USE_OPENMP 1)
set(USE_THREAD 1) set(USE_THREAD 1)
else() else()
@ -151,6 +153,9 @@ endif()
#add_compile_definitions(GGML_BLAS_USE_SBGEMM_BATCHED) #add_compile_definitions(GGML_BLAS_USE_SBGEMM_BATCHED)
#[...] #[...]
if (MSVC)
add_compile_options(/Zc:__cplusplus)
endif()
target_compile_features (ggml-blas PRIVATE c_std_11 cxx_std_17) target_compile_features (ggml-blas PRIVATE c_std_11 cxx_std_17)
target_link_directories (ggml-blas PRIVATE ${openblas_BINARY_DIR}/lib) target_link_directories (ggml-blas PRIVATE ${openblas_BINARY_DIR}/lib)
target_link_libraries (ggml-blas PRIVATE openblas) target_link_libraries (ggml-blas PRIVATE openblas)

View File

@ -11,6 +11,8 @@
#include <string> #include <string>
#include <algorithm> #include <algorithm>
static_assert(__cplusplus >= 201703L, "This file expects a C++17 compatible compiler.");
#if defined(GGML_BLAS_USE_ACCELERATE) #if defined(GGML_BLAS_USE_ACCELERATE)
# include <Accelerate/Accelerate.h> # include <Accelerate/Accelerate.h>
#elif defined(GGML_BLAS_USE_MKL) #elif defined(GGML_BLAS_USE_MKL)
@ -32,7 +34,7 @@
#endif #endif
namespace ggml::backend::blas { namespace ggml::backend::blas {
static constexpr std::size_t MEMORY_ALIGNMENT = 64; // 512 bits static constexpr std::size_t MEMORY_ALIGNMENT = 64; // 512 bits
// backend class // backend class
@ -40,26 +42,28 @@ namespace ggml::backend::blas {
int n_threads = GGML_DEFAULT_N_THREADS; int n_threads = GGML_DEFAULT_N_THREADS;
std::unique_ptr<char[]> work_data; // for tensor convert
size_t work_size = 0;
// for tensor convert (TODO: remove work_data)
// TODO: have a stack off buffer if we need 2+ work_data // TODO: have a stack off buffer if we need 2+ work_data
void* m_work_data = nullptr; // nead C++17 for correct aligned buffer
struct alignas(MEMORY_ALIGNMENT) aligned_uint8_t {
uint8_t val;
};
aligned_uint8_t* m_work_data = nullptr;
std::size_t m_work_size = 0; std::size_t m_work_size = 0;
template<typename T> template<typename T>
T* get_work(std::size_t size) { T* get_work(std::size_t size) {
std::size_t nb_byte = size * sizeof(T); std::size_t nb_byte = size * sizeof(T);
if (nb_byte > m_work_size) { if (nb_byte > m_work_size) {
nb_byte = std::max(nb_byte , 2*m_work_size); nb_byte = std::max(nb_byte , 2*m_work_size);
// force "aligned size" // force "aligned" size
nb_byte = ((nb_byte-1)/MEMORY_ALIGNMENT)+1; nb_byte = ((nb_byte-1)/MEMORY_ALIGNMENT)+1;
nb_byte *= MEMORY_ALIGNMENT; nb_byte *= MEMORY_ALIGNMENT;
if (m_work_data) std::free(m_work_data); if (m_work_data) delete[] m_work_data;
m_work_size = nb_byte; m_work_size = nb_byte;
m_work_data = aligned_alloc(MEMORY_ALIGNMENT, m_work_size); m_work_data = new aligned_uint8_t[m_work_size];
GGML_ASSERT(reinterpret_cast<uintptr_t>(m_work_data) % MEMORY_ALIGNMENT == 0);
} }
return (T*) m_work_data; return reinterpret_cast<T*> (m_work_data);
} }
#ifndef GGML_USE_OPENMP #ifndef GGML_USE_OPENMP
@ -67,8 +71,8 @@ namespace ggml::backend::blas {
#endif #endif
private: private:
//void cblas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, //void cblas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order,
// OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, // OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
// OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, // OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB,
// OPENBLAS_CONST blasint M, // OPENBLAS_CONST blasint M,
@ -79,7 +83,7 @@ namespace ggml::backend::blas {
// OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, // OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb,
// OPENBLAS_CONST float beta, // OPENBLAS_CONST float beta,
// float *C, OPENBLAS_CONST blasint ldc); // float *C, OPENBLAS_CONST blasint ldc);
//void cblas_sgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array, //void cblas_sgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array,
// OPENBLAS_CONST float * alpha_array, OPENBLAS_CONST float ** A_array, OPENBLAS_CONST blasint * lda_array, OPENBLAS_CONST float ** B_array, OPENBLAS_CONST blasint * ldb_array, OPENBLAS_CONST float * beta_array, float ** C_array, OPENBLAS_CONST blasint * ldc_array, OPENBLAS_CONST blasint group_count, OPENBLAS_CONST blasint * group_size); // OPENBLAS_CONST float * alpha_array, OPENBLAS_CONST float ** A_array, OPENBLAS_CONST blasint * lda_array, OPENBLAS_CONST float ** B_array, OPENBLAS_CONST blasint * ldb_array, OPENBLAS_CONST float * beta_array, float ** C_array, OPENBLAS_CONST blasint * ldc_array, OPENBLAS_CONST blasint group_count, OPENBLAS_CONST blasint * group_size);
//void cblas_sgemm_batch_strided(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float * A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST blasint stridea, OPENBLAS_CONST float * B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST blasint strideb, OPENBLAS_CONST float beta, float * C, OPENBLAS_CONST blasint ldc, OPENBLAS_CONST blasint stridec, OPENBLAS_CONST blasint group_size); //void cblas_sgemm_batch_strided(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float * A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST blasint stridea, OPENBLAS_CONST float * B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST blasint strideb, OPENBLAS_CONST float beta, float * C, OPENBLAS_CONST blasint ldc, OPENBLAS_CONST blasint stridec, OPENBLAS_CONST blasint group_size);
@ -90,18 +94,18 @@ namespace ggml::backend::blas {
GGML_ASSERT(A.ne[0] == B.ne[0]); // K GGML_ASSERT(A.ne[0] == B.ne[0]); // K
GGML_ASSERT(B.ne[1] == C.ne[1]); // N GGML_ASSERT(B.ne[1] == C.ne[1]); // N
GGML_ASSERT(A.ne[1] == C.ne[0]); // M GGML_ASSERT(A.ne[1] == C.ne[0]); // M
// for now! // for now only this case:
GGML_ASSERT(A.type == GGML_TYPE_BF16); GGML_ASSERT(A.type == GGML_TYPE_BF16);
GGML_ASSERT(B.type == GGML_TYPE_F32); GGML_ASSERT(B.type == GGML_TYPE_F32);
GGML_ASSERT(C.type == GGML_TYPE_F32); GGML_ASSERT(C.type == GGML_TYPE_F32);
// convert B to BF16: // convert B to BF16:
// - B contigue: (TODO: other case?) // - B contigue: (TODO: other case?)
GGML_ASSERT(((size_t)4*B.ne[0]*B.ne[1]*B.ne[2]) == B.nb[3]); GGML_ASSERT(((size_t)4*B.ne[0]*B.ne[1]*B.ne[2]) == B.nb[3]);
std::size_t sizeB = B.ne[0]*B.ne[1]*B.ne[2]*B.ne[3]; std::size_t sizeB = B.ne[0]*B.ne[1]*B.ne[2]*B.ne[3];
auto* B_work = get_work<bfloat16>(std::max(sizeB, B.ne[0]*(std::size_t)256)); auto* B_work = get_work<bfloat16>(std::max(sizeB, B.ne[0]*(std::size_t)256));
cblas_sbstobf16(sizeB, (const float*)B.data, 1, B_work, 1); cblas_sbstobf16(sizeB, (const float*)B.data, 1, B_work, 1);
// compute: // compute:
if (B.ne[2]*B.ne[3] == 1) { if (B.ne[2]*B.ne[3] == 1) {
if (B.ne[1] == 1) { if (B.ne[1] == 1) {
@ -195,13 +199,8 @@ namespace ggml::backend::blas {
const int64_t r3 = ne13/ne03; const int64_t r3 = ne13/ne03;
const int64_t ne_plane = ne01*ne00; const int64_t ne_plane = ne01*ne00;
const size_t desired_wsize = type == GGML_TYPE_F32 ? 0 : ne03*ne02*ne_plane*sizeof(float);
if (work_size < desired_wsize) { auto * wdata = get_work<float>(type == GGML_TYPE_F32 ? 0 : ne03*ne02*ne_plane);
work_data.reset(new char[desired_wsize]);
work_size = desired_wsize;
}
void * wdata = work_data.get();
// convert src0 to float // convert src0 to float
if (type != GGML_TYPE_F32) { if (type != GGML_TYPE_F32) {
@ -219,7 +218,7 @@ namespace ggml::backend::blas {
for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i03 = 0; i03 < ne03; i03++) {
for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i02 = 0; i02 < ne02; i02++) {
for (int64_t i01 = 0; i01 < ne01; i01++) { for (int64_t i01 = 0; i01 < ne01; i01++) {
to_float(x + i03*nb03 + i02*nb02 + i01*nb01, to_float(x + i03*nb03 + i02*nb02 + i01*nb01,
wplane + i03*nf_plane03 + i02*nf_plane02 + i01*nf_plane01, wplane + i03*nf_plane03 + i02*nf_plane02 + i01*nf_plane01,
ne00); ne00);
} }
@ -354,13 +353,13 @@ namespace ggml::backend::blas {
public: public:
static constexpr ggml_guid s_guid = { 0x12, 0xa8, 0xae, 0xf4, 0xc0, 0x1e, 0x61, 0x97, 0x8f, 0xeb, 0x33, 0x04, 0xa1, 0x33, 0x51, 0x2d }; static constexpr ggml_guid s_guid = { 0x12, 0xa8, 0xae, 0xf4, 0xc0, 0x1e, 0x61, 0x97, 0x8f, 0xeb, 0x33, 0x04, 0xa1, 0x33, 0x51, 0x2d };
backend(const std::string& /*params*/, ggml::cpp::backend::device& dev) : backend(const std::string& /*params*/, ggml::cpp::backend::device& dev) :
ggml::cpp::backend::backend(dev) ggml::cpp::backend::backend(dev)
{ } { }
virtual ~backend() { virtual ~backend() {
if (m_work_data) std::free(m_work_data); if (m_work_data) delete[] m_work_data;
} }
const std::string& get_name() override { const std::string& get_name() override {
@ -490,7 +489,7 @@ namespace ggml::backend::blas {
} }
bool caps_buffer_from_host_ptr() override { return true; } bool caps_buffer_from_host_ptr() override { return true; }
ggml::cpp::backend::buffer_type* get_from_host_ptr_buffer_type() override { ggml::cpp::backend::buffer_type* get_from_host_ptr_buffer_type() override {
return m_cpu_buffer_from_ptr_type; return m_cpu_buffer_from_ptr_type;
} }
@ -523,7 +522,7 @@ namespace ggml::backend::blas {
( src0->type == GGML_TYPE_F32 || ( src0->type == GGML_TYPE_F32 ||
#ifdef GGML_BLAS_USE_SBGEMM #ifdef GGML_BLAS_USE_SBGEMM
( src0->type == GGML_TYPE_BF16 && ( src0->type == GGML_TYPE_BF16 &&
ne1 >= min_batch ne1 >= min_batch
) || ) ||
#endif #endif
( (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) && ( (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) &&
@ -561,7 +560,7 @@ namespace ggml::backend::blas {
public: public:
reg() { reg() {
m_device = new device(); m_device = new device();
#if defined(GGML_BLAS_USE_OPENBLAS) #if defined(GGML_BLAS_USE_OPENBLAS)
if (openblas_get_parallel() == OPENBLAS_SEQUENTIAL) { if (openblas_get_parallel() == OPENBLAS_SEQUENTIAL) {
GGML_LOG_WARN("%s: warning: OpenBLAS was compiled without parallel support\n", __func__); GGML_LOG_WARN("%s: warning: OpenBLAS was compiled without parallel support\n", __func__);
} }

View File

@ -54,6 +54,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
ggml-cpu/ops.cpp ggml-cpu/ops.cpp
) )
if (MSVC)
add_compile_options(/Zc:__cplusplus)
endif()
target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17) target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu) target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)

View File

@ -51,7 +51,7 @@ public:
virtual ~buffer() { } virtual ~buffer() { }
ggml_status init_tensor(ggml_tensor& tensor) override { ggml_status init_tensor(ggml_tensor& tensor) override {
tensor->extra = (void *) ggml::cpu::amx::get_tensor_traits(&tensor); tensor.extra = (void *) ggml::cpu::amx::get_tensor_traits(&tensor);
return GGML_STATUS_SUCCESS; return GGML_STATUS_SUCCESS;
} }
@ -66,7 +66,7 @@ public:
}; };
class extra_buffer_type : ggml::cpu::extra_buffer_type { class extra_buffer_type : public ggml::cpu::extra_buffer_type {
const std::string& get_name() override { const std::string& get_name() override {
static const std::string name {"AMX"}; static const std::string name {"AMX"};

View File

@ -1337,111 +1337,98 @@ public:
} }
}; };
static ggml::cpu::tensor_traits * get_tensor_traits(ggml_backend_buffer_t, struct ggml_tensor *) { static ggml::cpu::tensor_traits * get_tensor_traits(struct ggml_tensor *) {
static tensor_traits traits; static tensor_traits traits;
return &traits; return &traits;
} }
} // namespace ggml::cpu::kleidiai
static enum ggml_status ggml_backend_cpu_kleidiai_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { // kleidiai buffer
tensor->extra = (void *) ggml::cpu::kleidiai::get_tensor_traits(buffer, tensor); class buffer : public ggml::cpu::buffer {
public:
buffer(std::size_t size) : ggml::cpu::buffer(size) { }
return GGML_STATUS_SUCCESS; virtual ~buffer() { }
GGML_UNUSED(buffer);
}
static void ggml_backend_cpu_kleidiai_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, ggml_status init_tensor(ggml_tensor& tensor) override {
const void * data, size_t offset, size_t size) { tensor.extra = (void *) ggml::cpu::kleidiai::get_tensor_traits(&tensor);
GGML_ASSERT(offset == 0); return GGML_STATUS_SUCCESS;
GGML_ASSERT(size == ggml_nbytes(tensor));
auto tensor_traits = (ggml::cpu::kleidiai::tensor_traits *) tensor->extra;
auto OK = tensor_traits->repack(tensor, data, size);
GGML_ASSERT(OK == 0);
GGML_UNUSED(buffer);
}
static const char * ggml_backend_cpu_kleidiai_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
GGML_UNUSED(buft);
return "CPU_KLEIDIAI";
}
static ggml_backend_buffer_t ggml_backend_cpu_kleidiai_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
if (buffer == nullptr) {
return nullptr;
} }
buffer->buft = buft; void set_tensor(ggml_tensor & tensor, const void * data, std::size_t offset, std::size_t size) override {
buffer->iface.init_tensor = ggml_backend_cpu_kleidiai_buffer_init_tensor; GGML_ASSERT(offset == 0);
buffer->iface.set_tensor = ggml_backend_cpu_kleidiai_buffer_set_tensor; GGML_ASSERT(size == ggml_nbytes(&tensor));
buffer->iface.get_tensor = nullptr;
buffer->iface.cpy_tensor = nullptr;
return buffer;
}
static size_t ggml_backend_cpu_kleidiai_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { auto tensor_traits = (ggml::cpu::kleidiai::tensor_traits *) tensor.extra;
GGML_UNUSED(buft); auto OK = tensor_traits->repack(&tensor, data, size);
return TENSOR_ALIGNMENT;
}
static size_t ggml_backend_cpu_kleidiai_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor) { GGML_ASSERT(OK == 0);
GGML_UNUSED(buft);
if (tensor->type != GGML_TYPE_Q4_0 && tensor->type != GGML_TYPE_Q8_0) {
return ggml_nbytes(tensor);
} }
const size_t n = tensor->ne[1]; };
const size_t k = tensor->ne[0];
size_t cursor = sizeof(kleidiai_weight_header); class extra_buffer_type : public ggml::cpu::extra_buffer_type {
cursor = align_up(cursor, GGML_KLEIDIAI_PACK_ALIGN);
std::array<ggml_kleidiai_kernels *, GGML_KLEIDIAI_MAX_KERNEL_SLOTS> kernel_chain; const std::string& get_name() override {
const bool want_q8 = tensor->type == GGML_TYPE_Q8_0; static const std::string name {"CPU_KLEIDIAI"};
const int slot_total = want_q8 ? kleidiai_collect_q8_chain(kernel_chain) return name;
: kleidiai_collect_q4_chain(kernel_chain); }
const bool allow_fallback = kleidiai_pack_fallback_allowed();
size_t slot_count = 0; ggml::cpp::backend::buffer* alloc_buffer(std::size_t size) override {
for (int slot = 0; slot < slot_total; ++slot) { return new buffer(size);
if (!allow_fallback && slot > 0) { }
break;
} size_t get_alloc_size(const ggml_tensor& tensor) override {
ggml_kleidiai_kernels * kernels = kernel_chain[slot];
if (!kernels) { if (tensor.type != GGML_TYPE_Q4_0 && tensor.type != GGML_TYPE_Q8_0) {
continue; return ggml_nbytes(&tensor);
}
kernel_info * kernel = &kernels->gemm;
rhs_packing_info * rhs_info = &kernels->rhs_info;
if (!kernel || !rhs_info || !rhs_info->packed_size_ex) {
continue;
} }
const ggml_type rhs_type = kernels->rhs_type; const size_t n = tensor.ne[1];
const size_t block_len = rhs_type == GGML_TYPE_Q4_0 ? QK4_0 : const size_t k = tensor.ne[0];
rhs_type == GGML_TYPE_Q8_0 ? QK8_0 : 0;
if (block_len == 0) {
continue;
}
size_t cursor = sizeof(kleidiai_weight_header);
cursor = align_up(cursor, GGML_KLEIDIAI_PACK_ALIGN); cursor = align_up(cursor, GGML_KLEIDIAI_PACK_ALIGN);
cursor += rhs_info->packed_size_ex(n, k, kernel->get_nr(), kernel->get_kr(), block_len);
++slot_count; std::array<ggml_kleidiai_kernels *, GGML_KLEIDIAI_MAX_KERNEL_SLOTS> kernel_chain;
const bool want_q8 = tensor.type == GGML_TYPE_Q8_0;
const int slot_total = want_q8 ? kleidiai_collect_q8_chain(kernel_chain)
: kleidiai_collect_q4_chain(kernel_chain);
const bool allow_fallback = kleidiai_pack_fallback_allowed();
size_t slot_count = 0;
for (int slot = 0; slot < slot_total; ++slot) {
if (!allow_fallback && slot > 0) {
break;
}
ggml_kleidiai_kernels * kernels = kernel_chain[slot];
if (!kernels) {
continue;
}
kernel_info * kernel = &kernels->gemm;
rhs_packing_info * rhs_info = &kernels->rhs_info;
if (!kernel || !rhs_info || !rhs_info->packed_size_ex) {
continue;
}
const ggml_type rhs_type = kernels->rhs_type;
const size_t block_len = rhs_type == GGML_TYPE_Q4_0 ? QK4_0 :
rhs_type == GGML_TYPE_Q8_0 ? QK8_0 : 0;
if (block_len == 0) {
continue;
}
cursor = align_up(cursor, GGML_KLEIDIAI_PACK_ALIGN);
cursor += rhs_info->packed_size_ex(n, k, kernel->get_nr(), kernel->get_kr(), block_len);
++slot_count;
}
if (slot_count == 0) {
return ggml_nbytes(&tensor);
}
return std::max(cursor, ggml_nbytes(&tensor));
} }
if (slot_count == 0) {
return ggml_nbytes(tensor);
}
return std::max(cursor, ggml_nbytes(tensor));
}
namespace ggml::cpu::kleidiai {
class extra_buffer_type : ggml::cpu::extra_buffer_type {
bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override { bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override {
std::array<ggml_kleidiai_kernels *, GGML_KLEIDIAI_MAX_KERNEL_SLOTS> kernel_chain; std::array<ggml_kleidiai_kernels *, GGML_KLEIDIAI_MAX_KERNEL_SLOTS> kernel_chain;
const int slot_total = kleidiai_collect_kernel_chain(op, kernel_chain); const int slot_total = kleidiai_collect_kernel_chain(op, kernel_chain);
@ -1481,7 +1468,7 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
(op->src[1]->nb[1] * op->src[1]->ne[1] != op->src[1]->nb[2])) { (op->src[1]->nb[1] * op->src[1]->ne[1] != op->src[1]->nb[2])) {
return nullptr; return nullptr;
} }
return ggml::cpu::kleidiai::get_tensor_traits(NULL, NULL); return ggml::cpu::kleidiai::get_tensor_traits(nullptr);
} }
} }
} }
@ -1491,21 +1478,7 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
} // namespace ggml::cpu::kleidiai } // namespace ggml::cpu::kleidiai
ggml_backend_buffer_type_t ggml_backend_cpu_kleidiai_buffer_type(void) { ggml_backend_buffer_type_t ggml_backend_cpu_kleidiai_buffer_type(void) {
static ggml::cpu::kleidiai::extra_buffer_type ctx; static auto* buffer_type = ggml::cpu::c_wrapper(new ggml::cpu::kleidiai::extra_buffer_type());
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_kleidiai = {
/* .iface = */ {
/* .get_name = */ ggml_backend_cpu_kleidiai_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_kleidiai_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_kleidiai_buffer_type_get_alignment,
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cpu_kleidiai_buffer_type_get_alloc_size,
/* .is_host = */ nullptr,
},
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
/* .context = */ &ctx,
};
init_kleidiai_context(); init_kleidiai_context();
return buffer_type;
return &ggml_backend_cpu_buffer_type_kleidiai;
} }

View File

@ -864,98 +864,80 @@ static const ggml::cpu::tensor_traits * ggml_riscv64_spacemit_get_optimal_repack
return nullptr; return nullptr;
} }
static enum ggml_status ggml_backend_riscv64_spacemit_buffer_init_tensor(ggml_backend_buffer_t buffer,
struct ggml_tensor * tensor) {
tensor->extra =
(void *) const_cast<ggml::cpu::tensor_traits *>(ggml_riscv64_spacemit_get_optimal_repack_type(tensor));
GGML_UNUSED(buffer);
return GGML_STATUS_SUCCESS;
}
static void ggml_backend_riscv64_spacemit_buffer_set_tensor(ggml_backend_buffer_t buffer,
struct ggml_tensor * tensor,
const void * data,
size_t offset,
size_t size) {
GGML_ASSERT(offset == 0);
GGML_ASSERT(size == ggml_nbytes(tensor));
auto tensor_traits = (ggml::cpu::riscv64_spacemit::tensor_traits_base *) tensor->extra;
if (tensor_traits) {
auto OK = tensor_traits->repack(tensor, data, size);
GGML_ASSERT(OK == 0);
}
GGML_UNUSED(buffer);
}
static const char * ggml_backend_cpu_riscv64_spacemit_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
return "CPU_RISCV64_SPACEMIT";
GGML_UNUSED(buft);
}
static ggml_backend_buffer_t ggml_backend_cpu_riscv64_spacemit_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
size_t size) {
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
if (buffer == nullptr) {
return nullptr;
}
buffer->buft = buft;
buffer->iface.init_tensor = ggml_backend_riscv64_spacemit_buffer_init_tensor;
buffer->iface.set_tensor = ggml_backend_riscv64_spacemit_buffer_set_tensor;
buffer->iface.get_tensor = nullptr;
buffer->iface.cpy_tensor = nullptr;
return buffer;
}
static size_t ggml_backend_cpu_riscv64_spacemit_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
return 64;
GGML_UNUSED(buft);
}
static size_t ggml_backend_cpu_riscv64_spacemit_nbytes(ggml_backend_buffer_type_t buft,
const struct ggml_tensor * tensor) {
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
if (tensor->ne[i] <= 0) {
return 0;
}
}
size_t nbytes;
const size_t blck_size = ggml_blck_size(tensor->type);
if (blck_size == 1) {
nbytes = ggml_type_size(tensor->type);
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
nbytes += (tensor->ne[i] - 1) * tensor->nb[i];
}
} else {
nbytes = tensor->ne[0] * tensor->nb[0] / blck_size;
if (tensor->type == GGML_TYPE_Q4_K) {
GGML_ASSERT(nbytes % sizeof(block_q4_K) == 0);
nbytes = (nbytes / sizeof(block_q4_K)) * sizeof(block_q4_1) * 8;
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
nbytes += (tensor->ne[i] - 1) * (tensor->nb[i] / sizeof(block_q4_K)) * sizeof(block_q4_1) * 8;
}
} else {
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
nbytes += (tensor->ne[i] - 1) * tensor->nb[i];
}
}
}
GGML_UNUSED(buft);
return nbytes;
}
namespace ggml::cpu::riscv64_spacemit { namespace ggml::cpu::riscv64_spacemit {
class extra_buffer_type : ggml::cpu::extra_buffer_type { class buffer : public ggml::cpu::buffer {
public:
buffer(std::size_t size) : ggml::cpu::buffer(size) { }
virtual ~buffer() { }
ggml_status init_tensor(ggml_tensor& tensor) override {
tensor.extra =
(void *) const_cast<ggml::cpu::tensor_traits *>(ggml_riscv64_spacemit_get_optimal_repack_type(&tensor));
return GGML_STATUS_SUCCESS;
}
void set_tensor(ggml_tensor & tensor, const void * data, std::size_t offset, std::size_t size) override {
GGML_ASSERT(offset == 0);
GGML_ASSERT(size == ggml_nbytes(&tensor));
auto tensor_traits = (ggml::cpu::riscv64_spacemit::tensor_traits_base *) tensor.extra;
if (tensor_traits) {
auto OK = tensor_traits->repack(&tensor, data, size);
GGML_ASSERT(OK == 0);
}
}
};
class extra_buffer_type : public ggml::cpu::extra_buffer_type {
const std::string& get_name() override {
static const std::string name {"CPU_RISCV64_SPACEMIT"};
return name;
}
ggml::cpp::backend::buffer* alloc_buffer(std::size_t size) override {
return new buffer(size);
}
std::size_t get_alignment() override {
return 64;
}
std::size_t get_alloc_size(const ggml_tensor& tensor) override {
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
if (tensor.ne[i] <= 0) {
return 0;
}
}
size_t nbytes;
const size_t blck_size = ggml_blck_size(tensor.type);
if (blck_size == 1) {
nbytes = ggml_type_size(tensor.type);
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
nbytes += (tensor.ne[i] - 1) * tensor.nb[i];
}
} else {
nbytes = tensor.ne[0] * tensor.nb[0] / blck_size;
if (tensor.type == GGML_TYPE_Q4_K) {
GGML_ASSERT(nbytes % sizeof(block_q4_K) == 0);
nbytes = (nbytes / sizeof(block_q4_K)) * sizeof(block_q4_1) * 8;
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
nbytes += (tensor.ne[i] - 1) * (tensor.nb[i] / sizeof(block_q4_K)) * sizeof(block_q4_1) * 8;
}
} else {
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
nbytes += (tensor.ne[i] - 1) * tensor.nb[i];
}
}
}
return nbytes;
}
bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override { bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override {
switch (op->op) { switch (op->op) {
case GGML_OP_MUL_MAT: case GGML_OP_MUL_MAT:
@ -1005,21 +987,6 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
} // namespace ggml::cpu::riscv64_spacemit } // namespace ggml::cpu::riscv64_spacemit
ggml_backend_buffer_type_t ggml_backend_cpu_riscv64_spacemit_buffer_type(void) { ggml_backend_buffer_type_t ggml_backend_cpu_riscv64_spacemit_buffer_type(void) {
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_riscv64_spacemit = { static auto* buffer_type = ggml::cpu::c_wrapper(new ggml::cpu::riscv64_spacemit::extra_buffer_type());
/* .iface = */ return buffer_type;
{
/* .get_name = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_alignment,
/* .get_max_size = */ nullptr,
/* .get_alloc_size = */ ggml_backend_cpu_riscv64_spacemit_nbytes,
/* .is_host = */ nullptr,
},
/* .device = */
ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
/* .context = */
new ggml::cpu::riscv64_spacemit::extra_buffer_type(),
};
return &ggml_backend_cpu_buffer_type_riscv64_spacemit;
} }

View File

@ -6,11 +6,14 @@
#include <new> #include <new>
static_assert(__cplusplus >= 201703L, "This file expects a C++17 compatible compiler.");
namespace ggml::cpu { namespace ggml::cpu {
buffer::buffer(std::size_t size) : m_size(size) { buffer::buffer(std::size_t size) : m_size(size) {
m_data = new (std::align_val_t(32)) uint8_t[m_size]; m_data = new aligned_uint8_t[m_size];
GGML_ASSERT(m_data); GGML_ASSERT(m_data);
GGML_ASSERT(reinterpret_cast<uintptr_t>(m_data) % TENSOR_ALIGNMENT == 0);
} }
buffer::~buffer() { buffer::~buffer() {

View File

@ -36,8 +36,9 @@ public:
void get_tensor(const ggml_tensor &, void *, std::size_t, std::size_t size) override; void get_tensor(const ggml_tensor &, void *, std::size_t, std::size_t size) override;
void clear(uint8_t value) override; void clear(uint8_t value) override;
protected: protected:
struct alignas(TENSOR_ALIGNMENT) aligned_uint8_t { uint8_t val; };
const std::size_t m_size; const std::size_t m_size;
uint8_t* m_data; aligned_uint8_t* m_data;
}; };
class extra_buffer_type { class extra_buffer_type {

View File

@ -590,9 +590,7 @@ ggml_backend_t ggml_backend_metal_init(void) {
return NULL; return NULL;
} }
ggml_backend_t backend = (ggml_backend_t) malloc(sizeof(ggml_backend)); auto * backend = new ggml_backend {
*backend = {
/* .guid = */ ggml_backend_metal_guid(), /* .guid = */ ggml_backend_metal_guid(),
/* .interface = */ ggml_backend_metal_i, /* .interface = */ ggml_backend_metal_i,
/* .device = */ dev, /* .device = */ dev,
@ -684,9 +682,7 @@ static ggml_backend_t ggml_backend_metal_device_init_backend(ggml_backend_dev_t
return NULL; return NULL;
} }
ggml_backend_t backend = (ggml_backend_t) malloc(sizeof(ggml_backend)); auto * backend = new ggml_backend {
*backend = {
/* .guid = */ ggml_backend_metal_guid(), /* .guid = */ ggml_backend_metal_guid(),
/* .interface = */ ggml_backend_metal_i, /* .interface = */ ggml_backend_metal_i,
/* .device = */ dev, /* .device = */ dev,

View File

@ -2946,8 +2946,7 @@ static ggml_backend_t ggml_backend_webgpu_backend_init(ggml_backend_dev_t dev, c
backend_ctx->webgpu_ctx = initialize_webgpu_context(dev); backend_ctx->webgpu_ctx = initialize_webgpu_context(dev);
// See GGML Backend Interface section // See GGML Backend Interface section
auto * backend = new ggml_backend(); auto * backend = new ggml_backend {
*backend = {
/* .guid = */ ggml_backend_webgpu_guid(), /* .guid = */ ggml_backend_webgpu_guid(),
/* .interface = */ ggml_backend_webgpu_i, /* .interface = */ ggml_backend_webgpu_i,
/* .device = */ dev, /* .device = */ dev,

View File

@ -7,9 +7,11 @@
#include <map> #include <map>
#include <memory> #include <memory>
static_assert(__cplusplus >= 201703L, "This file expects a C++17 compatible compiler.");
namespace ggml::cpp::backend { namespace ggml::cpp::backend {
// TODO: voir si on ne cree pas une fontion static plutot que friend. // may be best with a static methode than with a friend function.
ggml_backend_buffer_type_t* backend_dev_get_extra_bufts(ggml_backend_dev_t device) { ggml_backend_buffer_type_t* backend_dev_get_extra_bufts(ggml_backend_dev_t device) {
auto& ctx = *((ggml::cpp::backend::device*) (device->context)); auto& ctx = *((ggml::cpp::backend::device*) (device->context));
if (ctx.m_ggml_extra_buffers_type.size() == 0) { // need init of extra buffer wrappers if (ctx.m_ggml_extra_buffers_type.size() == 0) { // need init of extra buffer wrappers
@ -22,14 +24,14 @@ ggml_backend_buffer_type_t* backend_dev_get_extra_bufts(ggml_backend_dev_t devic
return ctx.m_ggml_extra_buffers_type.data(); return ctx.m_ggml_extra_buffers_type.data();
} }
namespace { // unnamed namespace namespace { // unamed namespace
//========================================================= //=========================================================
// les wrappper pour ggml_backend_buffer // wrappper for ggml_backend_buffer
void buffer_free_buffer(ggml_backend_buffer_t buf) { void buffer_free_buffer(ggml_backend_buffer_t buf) {
auto* ctx = (ggml::cpp::backend::buffer*) (buf->context); auto* ctx = (ggml::cpp::backend::buffer*) (buf->context);
delete ctx; delete ctx;
// delete buf; NO => deleted by the core. // delete buf; NO => deleted by the ggml_core: ggml_backend_buffer_free().
} }
void * buffer_get_base(ggml_backend_buffer_t buf) { void * buffer_get_base(ggml_backend_buffer_t buf) {
auto& ctx = *((ggml::cpp::backend::buffer*) (buf->context)); auto& ctx = *((ggml::cpp::backend::buffer*) (buf->context));
@ -169,7 +171,6 @@ ggml_backend_buffer_type_t* backend_dev_get_extra_bufts(ggml_backend_dev_t devic
return c_wrapper(dev, &ctx.init_backend(params?params:"")); return c_wrapper(dev, &ctx.init_backend(params?params:""));
} }
ggml_backend_buffer_type_t device_get_buffer_type(ggml_backend_dev_t dev) { ggml_backend_buffer_type_t device_get_buffer_type(ggml_backend_dev_t dev) {
// Note: nothing to delete it.
auto& ctx = *((ggml::cpp::backend::device*) (dev->context)); auto& ctx = *((ggml::cpp::backend::device*) (dev->context));
return c_wrapper(dev, &ctx.get_buffer_type()); return c_wrapper(dev, &ctx.get_buffer_type());
} }
@ -187,7 +188,6 @@ ggml_backend_buffer_type_t* backend_dev_get_extra_bufts(ggml_backend_dev_t devic
if (!bft) { return nullptr; } if (!bft) { return nullptr; }
auto* buf = bft->register_buffer(ptr, size, max_tensor_size); auto* buf = bft->register_buffer(ptr, size, max_tensor_size);
if (!buf) { return nullptr; } if (!buf) { return nullptr; }
// comment / ou memoriser ce wrapper, il n'y a pas de "delete"
auto * ggml_buf_type = c_wrapper(dev, bft); auto * ggml_buf_type = c_wrapper(dev, bft);
return c_wrapper(ggml_buf_type, buf, size); return c_wrapper(ggml_buf_type, buf, size);
} }
@ -247,25 +247,26 @@ ggml_backend_buffer_type_t* backend_dev_get_extra_bufts(ggml_backend_dev_t devic
if (name == "ggml_backend_dev_get_extra_bufts") { if (name == "ggml_backend_dev_get_extra_bufts") {
return (void*) backend_dev_get_extra_bufts; return (void*) backend_dev_get_extra_bufts;
} }
// TODO: add the other elements as needed.
// see how to manage them optionally if useful.
return nullptr; return nullptr;
} }
} }
// les destructeurs... // virtual destructors
buffer::~buffer() {} buffer::~buffer() {}
buffer_type::~buffer_type() {} buffer_type::~buffer_type() {}
event::~event() {} event::~event() {}
backend::backend(device& dev): m_device(dev) {} backend::backend(device& dev): m_device(dev) {}
backend::~backend() { } backend::~backend() { }
device::~device() { device::~device() { }
// TODO: il faut detruire des wrapper des buffer_type???
}
reg::~reg() {} reg::~reg() {}
// non virtual fct: // non virtual fct:
void device::register_extra_buffer_type(buffer_type* buft) { void device::register_extra_buffer_type(buffer_type* buft) {
GGML_ASSERT(m_ggml_extra_buffers_type.size() == 0); // pas encore initialisé! // have to be call early before any app ask for them.
GGML_ASSERT(m_ggml_extra_buffers_type.size() == 0);
m_extra_buffers_type.push_back(buft); m_extra_buffers_type.push_back(buft);
} }
@ -300,7 +301,7 @@ ggml_backend_buffer_type_t* backend_dev_get_extra_bufts(ggml_backend_dev_t devic
typedef std::unique_ptr<ggml_backend_buffer_type, buffer_type_deleter> c_buffer_type_ptr; typedef std::unique_ptr<ggml_backend_buffer_type, buffer_type_deleter> c_buffer_type_ptr;
ggml_backend_buffer_type_t c_wrapper(ggml_backend_dev_t device, buffer_type* ctx) { ggml_backend_buffer_type_t c_wrapper(ggml_backend_dev_t device, buffer_type* ctx) {
// the ctx have to be "static". // the ctx have to be "~static": owned by a device (or static).
static std::map<buffer_type*, c_buffer_type_ptr> map; static std::map<buffer_type*, c_buffer_type_ptr> map;
if (!ctx) { return nullptr; } if (!ctx) { return nullptr; }
@ -417,11 +418,9 @@ ggml_backend_buffer_type_t* backend_dev_get_extra_bufts(ggml_backend_dev_t devic
/* .context = */ ctx, /* .context = */ ctx,
}; };
map[ctx] = c_register_ptr(wrapper); map[ctx] = c_register_ptr(wrapper);
//map[ctx] = wrapper;
return wrapper; return wrapper;
} }
return it->second.get(); return it->second.get();
//return it->second;
} }
} }
@ -431,21 +430,27 @@ ggml_backend_buffer_type_t* backend_dev_get_extra_bufts(ggml_backend_dev_t devic
namespace ggml::cpp::backend::cpu { namespace ggml::cpp::backend::cpu {
// buffer // buffer
template<std::size_t ALIGNMENT>
class buffer : public ggml::cpp::backend::buffer { class buffer : public ggml::cpp::backend::buffer {
uint8_t* m_data = nullptr; // correct aligned data for c++17.
struct alignas(ALIGNMENT) aligned_uint8_t {
uint8_t val;
};
aligned_uint8_t* m_data = nullptr;
const std::size_t m_size; const std::size_t m_size;
public: public:
buffer(std::size_t size, std::size_t alignment): m_size(size) { buffer(std::size_t size): m_size(size) {
m_data = new (std::align_val_t(alignment)) uint8_t[m_size]; m_data = new aligned_uint8_t[m_size];
GGML_ASSERT(reinterpret_cast<uintptr_t>(m_data) % ALIGNMENT == 0);
} }
buffer(void* ptr, std::size_t /*size*/): m_size(0) { buffer(void* ptr, std::size_t /*size*/): m_size(0) {
m_data = (uint8_t*) ptr; m_data = (aligned_uint8_t*) ptr;
} }
virtual ~buffer() { virtual ~buffer() {
if (m_size>0 && m_data) { if (m_size>0 && m_data) {
delete[] m_data; delete[] m_data;
} }
m_data = nullptr; m_data = nullptr;
@ -481,14 +486,14 @@ namespace ggml::cpp::backend::cpu {
}; };
// buffer_type // buffer_type
template<std::size_t ALIGNMENT>
class buffer_type : public ggml::cpp::backend::buffer_type { class buffer_type : public ggml::cpp::backend::buffer_type {
const std::string m_name; const std::string m_name;
const std::size_t m_alignment;
const bool m_from_ptr; const bool m_from_ptr;
public: public:
buffer_type(const std::string& name, bool from_ptr, std::size_t alignment) : buffer_type(const std::string& name, bool from_ptr) :
m_name(name), m_alignment(alignment), m_from_ptr(from_ptr) m_name(name), m_from_ptr(from_ptr)
{} {}
virtual ~buffer_type() {} virtual ~buffer_type() {}
@ -497,23 +502,23 @@ namespace ggml::cpp::backend::cpu {
return m_name; return m_name;
} }
buffer* alloc_buffer(std::size_t size) override { buffer<ALIGNMENT>* alloc_buffer(std::size_t size) override {
GGML_ASSERT(!m_from_ptr && "buffer type not for allocatable buffer"); GGML_ASSERT(!m_from_ptr && "buffer type not for allocatable buffer");
return new buffer(size, m_alignment); return new buffer<ALIGNMENT>(size);
} }
std::size_t get_alignment() override { std::size_t get_alignment() override {
return m_alignment; return ALIGNMENT;
} }
bool is_host() override { bool is_host() override {
return true; return true;
} }
buffer* register_buffer(void * ptr, std::size_t size, std::size_t /*max_tensor_size*/) override { buffer<ALIGNMENT>* register_buffer(void * ptr, std::size_t size, std::size_t /*max_tensor_size*/) override {
GGML_ASSERT(m_from_ptr && "buffer type not for ptr memory"); GGML_ASSERT(m_from_ptr && "buffer type not for ptr memory");
GGML_ASSERT((uintptr_t)ptr % m_alignment == 0 && "buffer pointer must be aligned"); // GGML_ASSERT((uintptr_t)ptr % ALIGNMENT == 0 && "buffer pointer must be aligned");
return new buffer(ptr, size); return new buffer<ALIGNMENT>(ptr, size);
} }
}; };
@ -526,7 +531,21 @@ namespace ggml::cpp::backend {
bool from_ptr, bool from_ptr,
std::size_t alignment std::size_t alignment
) { ) {
return new ggml::cpp::backend::cpu::buffer_type(name, from_ptr, alignment); // May be define alignment with supported SIMD size?
if (alignment <= 8) { // 64 bits
return new ggml::cpp::backend::cpu::buffer_type<8>(name, from_ptr);
} else
if (alignment <= 16) { // 128 bits (AVX)
return new ggml::cpp::backend::cpu::buffer_type<16>(name, from_ptr);
} else
if (alignment <= 32) { // 256 bits (AVX2)
return new ggml::cpp::backend::cpu::buffer_type<32>(name, from_ptr);
} else
if (alignment <= 64) { // 256 bits (AVX512)
return new ggml::cpp::backend::cpu::buffer_type<64>(name, from_ptr);
} else { // do we need more?
return new ggml::cpp::backend::cpu::buffer_type<128>(name, from_ptr);
}
} }
} }

View File

@ -13,7 +13,7 @@
namespace ggml::cpp::backend { namespace ggml::cpp::backend {
class buffer { // ggml_backend_buffer_t class GGML_API buffer { // ggml_backend_buffer_t
public: public:
virtual ~buffer(); virtual ~buffer();
@ -30,7 +30,7 @@ namespace ggml::cpp::backend {
virtual void reset () {} virtual void reset () {}
}; };
class buffer_type { // ggml_backend_buffer_type_t class GGML_API buffer_type { // ggml_backend_buffer_type_t
public: public:
virtual ~buffer_type(); virtual ~buffer_type();
@ -45,7 +45,7 @@ namespace ggml::cpp::backend {
}; };
// TODO: manage event // TODO: manage event
class event { class GGML_API event {
public: public:
virtual ~event(); virtual ~event();
}; };
@ -58,7 +58,7 @@ namespace ggml::cpp::backend {
class device; class device;
class backend { // ggml_backend_t class GGML_API backend { // ggml_backend_t
backend() = delete; backend() = delete;
public: public:
backend(device& dev); backend(device& dev);
@ -92,7 +92,7 @@ namespace ggml::cpp::backend {
device& m_device; device& m_device;
}; };
class device { // ggml_backend_dev_t class GGML_API device { // ggml_backend_dev_t
protected: protected:
friend ggml_backend_buffer_type_t* backend_dev_get_extra_bufts(ggml_backend_dev_t device); friend ggml_backend_buffer_type_t* backend_dev_get_extra_bufts(ggml_backend_dev_t device);
std::vector<buffer_type*> m_extra_buffers_type; std::vector<buffer_type*> m_extra_buffers_type;
@ -125,11 +125,12 @@ namespace ggml::cpp::backend {
virtual bool caps_events() { return false; } virtual bool caps_events() { return false; }
protected: protected:
// have to be call by the device at init.
void register_extra_buffer_type(buffer_type* buft); void register_extra_buffer_type(buffer_type* buft);
}; };
class reg { // ggml_backend_reg_t class GGML_API reg { // ggml_backend_reg_t
public: public:
virtual ~reg(); virtual ~reg();
@ -138,14 +139,14 @@ namespace ggml::cpp::backend {
virtual device& get_device(std::size_t index) = 0; virtual device& get_device(std::size_t index) = 0;
}; };
ggml_backend_buffer_t c_wrapper(ggml_backend_buffer_type_t buft, buffer* ctx, std::size_t size); GGML_API ggml_backend_buffer_t c_wrapper(ggml_backend_buffer_type_t buft, buffer* ctx, std::size_t size);
ggml_backend_buffer_type_t c_wrapper(ggml_backend_dev_t device, buffer_type* ctx); GGML_API ggml_backend_buffer_type_t c_wrapper(ggml_backend_dev_t device, buffer_type* ctx);
ggml_backend_t c_wrapper(ggml_backend_dev_t device, backend* ctx); GGML_API ggml_backend_t c_wrapper(ggml_backend_dev_t device, backend* ctx);
ggml_backend_dev_t c_wrapper(ggml_backend_reg_t reg, device* ctx); GGML_API ggml_backend_dev_t c_wrapper(ggml_backend_reg_t reg, device* ctx);
ggml_backend_reg_t c_wrapper(reg* ctx); GGML_API ggml_backend_reg_t c_wrapper(reg* ctx);
// for simple cpu buffer: // helper for simple cpu buffer type:
buffer_type* new_cpu_buffer_type( GGML_API buffer_type* new_cpu_buffer_type(
const std::string& name, const std::string& name,
bool from_ptr=false, bool from_ptr=false,
std::size_t alignment = TENSOR_ALIGNMENT std::size_t alignment = TENSOR_ALIGNMENT