145 lines
5.4 KiB
C++
145 lines
5.4 KiB
C++
#include "ggml-remoting.h"
|
|
|
|
static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) {
|
|
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
|
|
return apir_device_get_name(gpu);
|
|
}
|
|
|
|
static const char * ggml_backend_remoting_device_get_description(ggml_backend_dev_t dev) {
|
|
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
|
|
return apir_device_get_description(gpu);
|
|
}
|
|
|
|
static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_backend_dev_t dev) {
|
|
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
|
|
static enum ggml_backend_dev_type type;
|
|
static bool has_type = false;
|
|
if (!has_type) {
|
|
has_type = true;
|
|
type = (enum ggml_backend_dev_type) apir_device_get_type(gpu);
|
|
}
|
|
|
|
return type;
|
|
}
|
|
|
|
static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
|
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
|
|
return apir_device_get_memory(gpu, free, total);
|
|
}
|
|
|
|
static bool ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
|
#if USE_ALWAYS_TRUE_SUPPORTS_OP == 1
|
|
/* ggml-rpc cheats it like this */
|
|
/* with the current implementation of serialize_tensor, the src/view aren't properly passed */
|
|
UNUSED(dev);
|
|
UNUSED(op);
|
|
|
|
return true;
|
|
#else
|
|
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
|
|
return apir_device_supports_op(gpu, op);
|
|
#endif
|
|
}
|
|
|
|
static bool ggml_backend_remoting_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
|
bool supported = buft->device == dev;
|
|
|
|
return supported;
|
|
}
|
|
|
|
static bool ggml_backend_remoting_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
|
UNUSED(dev);
|
|
UNUSED(op);
|
|
|
|
return false;
|
|
}
|
|
|
|
static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
|
props->name = ggml_backend_remoting_device_get_name(dev);
|
|
props->description = ggml_backend_remoting_device_get_description(dev);
|
|
props->type = ggml_backend_remoting_device_get_type(dev);
|
|
ggml_backend_remoting_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
|
|
|
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
apir_device_get_props(gpu, &props->caps.async, &props->caps.host_buffer, &props->caps.buffer_from_host_ptr,
|
|
&props->caps.events);
|
|
|
|
props->caps.buffer_from_host_ptr = false;
|
|
props->caps.async = false;
|
|
props->caps.events = false;
|
|
}
|
|
|
|
ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) {
|
|
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
|
|
apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu);
|
|
|
|
static ggml_backend_buffer_type buft{
|
|
/* .iface = */ ggml_backend_remoting_buffer_type_interface,
|
|
/* .device = */ dev,
|
|
/* .context = */ (void *) ctx,
|
|
};
|
|
|
|
return &buft;
|
|
}
|
|
|
|
static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) {
|
|
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
|
|
apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu);
|
|
|
|
static ggml_backend_buffer_type buft{
|
|
/* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface,
|
|
/* .device = */ dev,
|
|
/* .context = */ (void *) ctx,
|
|
};
|
|
|
|
return &buft;
|
|
}
|
|
|
|
static ggml_backend_buffer_t ggml_backend_remoting_device_buffer_from_ptr(ggml_backend_dev_t dev,
|
|
void * ptr,
|
|
size_t size,
|
|
size_t max_tensor_size) {
|
|
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
|
|
ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context));
|
|
if (!context) {
|
|
GGML_ABORT("Couldn't allocate the buffer context ...");
|
|
}
|
|
|
|
context->gpu = gpu;
|
|
context->apir_context = apir_device_buffer_from_ptr(gpu, size, max_tensor_size);
|
|
context->base = ptr;
|
|
context->is_from_ptr = true;
|
|
|
|
ggml_backend_buffer_t buffer =
|
|
ggml_backend_buffer_init(ggml_backend_remoting_device_get_buffer_from_ptr_type(dev),
|
|
ggml_backend_remoting_buffer_from_ptr_interface, (void *) context, size);
|
|
|
|
return buffer;
|
|
}
|
|
|
|
const ggml_backend_device_i ggml_backend_remoting_device_interface = {
|
|
/* .get_name = */ ggml_backend_remoting_device_get_name,
|
|
/* .get_description = */ ggml_backend_remoting_device_get_description,
|
|
/* .get_memory = */ ggml_backend_remoting_device_get_memory,
|
|
/* .get_type = */ ggml_backend_remoting_device_get_type,
|
|
/* .get_props = */ ggml_backend_remoting_device_get_props,
|
|
/* .init_backend = */ ggml_backend_remoting_device_init,
|
|
/* .get_buffer_type = */ ggml_backend_remoting_device_get_buffer_type,
|
|
/* .get_host_buffer_type = */ NULL,
|
|
/* .buffer_from_host_ptr = */ ggml_backend_remoting_device_buffer_from_ptr,
|
|
/* .supports_op = */ ggml_backend_remoting_device_supports_op,
|
|
/* .supports_buft = */ ggml_backend_remoting_device_supports_buft,
|
|
/* .offload_op = */ ggml_backend_remoting_device_offload_op,
|
|
/* .event_new = */ NULL,
|
|
/* .event_free = */ NULL,
|
|
/* .event_synchronize = */ NULL,
|
|
};
|