cont : manage buffer type context memory
This commit is contained in:
parent
7758a58aa8
commit
4d19e61cd6
|
|
@ -175,6 +175,14 @@ struct ggml_backend_metal_buffer_type {
|
||||||
std::string name;
|
std::string name;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ggml_backend_metal_buffer_type_deleter {
|
||||||
|
void operator()(ggml_backend_metal_buffer_type * ctx) const {
|
||||||
|
delete ctx;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef std::unique_ptr<ggml_backend_metal_buffer_type, ggml_backend_metal_buffer_type_deleter> ggml_backend_metal_buffer_type_ptr;
|
||||||
|
|
||||||
// common method for allocating shread or private Metal buffers
|
// common method for allocating shread or private Metal buffers
|
||||||
static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size, bool shared) {
|
static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size, bool shared) {
|
||||||
ggml_metal_device_t ctx_dev = (ggml_metal_device_t)buft->device->context;
|
ggml_metal_device_t ctx_dev = (ggml_metal_device_t)buft->device->context;
|
||||||
|
|
@ -259,12 +267,23 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_shared(int devi
|
||||||
static std::mutex mutex;
|
static std::mutex mutex;
|
||||||
std::lock_guard<std::mutex> lock(mutex);
|
std::lock_guard<std::mutex> lock(mutex);
|
||||||
|
|
||||||
static ggml_backend_buffer_type bufts[GGML_METAL_MAX_DEVICES];
|
static std::vector<ggml_backend_buffer_type> bufts;
|
||||||
|
static std::vector<ggml_backend_metal_buffer_type_ptr> ctxs;
|
||||||
|
|
||||||
static bool initialized = false;
|
static bool initialized = false;
|
||||||
if (!initialized) {
|
if (!initialized) {
|
||||||
|
bufts.reserve(g_devices);
|
||||||
|
ctxs.reserve(g_devices);
|
||||||
|
|
||||||
for (int i = 0; i < g_devices; ++i) {
|
for (int i = 0; i < g_devices; ++i) {
|
||||||
bufts[i] = {
|
ggml_backend_metal_buffer_type * raw_ctx =
|
||||||
|
new ggml_backend_metal_buffer_type {
|
||||||
|
/* .device = */ i,
|
||||||
|
/* .name = */ GGML_METAL_NAME + std::to_string(i),
|
||||||
|
};
|
||||||
|
ctxs.emplace_back(raw_ctx);
|
||||||
|
|
||||||
|
ggml_backend_buffer_type buft = {
|
||||||
/* .iface = */ {
|
/* .iface = */ {
|
||||||
/* .get_name = */ ggml_backend_metal_buffer_type_shared_get_name,
|
/* .get_name = */ ggml_backend_metal_buffer_type_shared_get_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_shared_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_shared_alloc_buffer,
|
||||||
|
|
@ -274,12 +293,14 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_shared(int devi
|
||||||
/* .is_host = */ ggml_backend_metal_buffer_type_shared_is_host,
|
/* .is_host = */ ggml_backend_metal_buffer_type_shared_is_host,
|
||||||
},
|
},
|
||||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
|
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
|
||||||
/* .context = */ new ggml_backend_metal_buffer_type{i, GGML_METAL_NAME + std::to_string(i)},
|
/* .context = */ raw_ctx,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bufts.emplace_back(buft);
|
||||||
}
|
}
|
||||||
|
|
||||||
initialized = true;
|
initialized = true;
|
||||||
};
|
}
|
||||||
|
|
||||||
return &bufts[device];
|
return &bufts[device];
|
||||||
}
|
}
|
||||||
|
|
@ -322,12 +343,22 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_private(int dev
|
||||||
static std::mutex mutex;
|
static std::mutex mutex;
|
||||||
std::lock_guard<std::mutex> lock(mutex);
|
std::lock_guard<std::mutex> lock(mutex);
|
||||||
|
|
||||||
static ggml_backend_buffer_type bufts[GGML_METAL_MAX_DEVICES];
|
static std::vector<ggml_backend_buffer_type> bufts;
|
||||||
|
static std::vector<ggml_backend_metal_buffer_type_ptr> ctxs;
|
||||||
|
|
||||||
static bool initialized = false;
|
static bool initialized = false;
|
||||||
if (!initialized) {
|
if (!initialized) {
|
||||||
|
bufts.reserve(g_devices);
|
||||||
|
ctxs.reserve(g_devices);
|
||||||
|
|
||||||
for (int i = 0; i < g_devices; ++i) {
|
for (int i = 0; i < g_devices; ++i) {
|
||||||
bufts[i] = {
|
ggml_backend_metal_buffer_type * raw_ctx = new ggml_backend_metal_buffer_type{
|
||||||
|
/* .device = */ i,
|
||||||
|
/* .name = */ GGML_METAL_NAME + std::to_string(i) + "_Private"
|
||||||
|
};
|
||||||
|
ctxs.emplace_back(raw_ctx);
|
||||||
|
|
||||||
|
ggml_backend_buffer_type buft = {
|
||||||
/* .iface = */ {
|
/* .iface = */ {
|
||||||
/* .get_name = */ ggml_backend_metal_buffer_type_private_get_name,
|
/* .get_name = */ ggml_backend_metal_buffer_type_private_get_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_private_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_private_alloc_buffer,
|
||||||
|
|
@ -337,12 +368,14 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_private(int dev
|
||||||
/* .is_host = */ ggml_backend_metal_buffer_type_private_is_host,
|
/* .is_host = */ ggml_backend_metal_buffer_type_private_is_host,
|
||||||
},
|
},
|
||||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
|
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
|
||||||
/* .context = */ new ggml_backend_metal_buffer_type{i, GGML_METAL_NAME + std::to_string(i) + "_Private"},
|
/* .context = */ raw_ctx,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bufts.emplace_back(buft);
|
||||||
}
|
}
|
||||||
|
|
||||||
initialized = true;
|
initialized = true;
|
||||||
};
|
}
|
||||||
|
|
||||||
return &bufts[device];
|
return &bufts[device];
|
||||||
}
|
}
|
||||||
|
|
@ -386,14 +419,24 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_mapped(int devi
|
||||||
static std::mutex mutex;
|
static std::mutex mutex;
|
||||||
std::lock_guard<std::mutex> lock(mutex);
|
std::lock_guard<std::mutex> lock(mutex);
|
||||||
|
|
||||||
static ggml_backend_buffer_type bufts[GGML_METAL_MAX_DEVICES];
|
static std::vector<ggml_backend_buffer_type> bufts;
|
||||||
|
static std::vector<ggml_backend_metal_buffer_type_ptr> ctxs;
|
||||||
|
|
||||||
static bool initialized = false;
|
static bool initialized = false;
|
||||||
if (!initialized) {
|
if (!initialized) {
|
||||||
|
bufts.reserve(g_devices);
|
||||||
|
ctxs.reserve(g_devices);
|
||||||
|
|
||||||
for (int i = 0; i < g_devices; ++i) {
|
for (int i = 0; i < g_devices; ++i) {
|
||||||
|
ggml_backend_metal_buffer_type * raw_ctx = new ggml_backend_metal_buffer_type{
|
||||||
|
/* .device = */ i,
|
||||||
|
/* .name = */ GGML_METAL_NAME + std::to_string(i) + "_Mapped"
|
||||||
|
};
|
||||||
|
ctxs.emplace_back(raw_ctx);
|
||||||
|
|
||||||
// note: not obvious, but this buffer type still needs to implement .alloc_buffer:
|
// note: not obvious, but this buffer type still needs to implement .alloc_buffer:
|
||||||
// https://github.com/ggml-org/llama.cpp/pull/15832#discussion_r2333177099
|
// https://github.com/ggml-org/llama.cpp/pull/15832#discussion_r2333177099
|
||||||
bufts[i] = {
|
ggml_backend_buffer_type buft = {
|
||||||
/* .iface = */ {
|
/* .iface = */ {
|
||||||
/* .get_name = */ ggml_backend_metal_buffer_type_mapped_get_name,
|
/* .get_name = */ ggml_backend_metal_buffer_type_mapped_get_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_mapped_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_mapped_alloc_buffer,
|
||||||
|
|
@ -403,12 +446,14 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_mapped(int devi
|
||||||
/* .is_host = */ ggml_backend_metal_buffer_type_mapped_is_host,
|
/* .is_host = */ ggml_backend_metal_buffer_type_mapped_is_host,
|
||||||
},
|
},
|
||||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
|
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
|
||||||
/* .context = */ new ggml_backend_metal_buffer_type{i, GGML_METAL_NAME + std::to_string(i) + "_Mapped"},
|
/* .context = */ raw_ctx,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bufts.emplace_back(buft);
|
||||||
}
|
}
|
||||||
|
|
||||||
initialized = true;
|
initialized = true;
|
||||||
};
|
}
|
||||||
|
|
||||||
return &bufts[device];
|
return &bufts[device];
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue