ggml-virtgpu: improve the reliability of the code (#19846)

* ggml-virtgpu-backend: validate the consistency of the received objects

This patch adds consistency checks in the
ggml-virtgpu-backend (running on the host side) to ensure that the
data received from the guest is consistent (valid pointers, valid
sizes and offsets).

* ggml-virtgpu-backend: add fallback/skips for optional ggml backend methods

```
  1. bck->iface.synchronize(bck)
  2. buft->iface.get_alloc_size(buft, op)
  3. buft->iface.get_max_size(buft)
```

these three methods are optional in the GGML interface. `get_max_size`
was already properly defaulted, but `backend sychronize` and `butf
get_max_size` would have segfaulted the backend if not implemented.

* ggml-virtgpu-backend: fix log format missing argument

* ggml-virtgpu-backend: improve the abort message

* ggml-virtgpu-backend: more safety checks

* ggml-virtgpu-backend: new error code

* ggml-virtgpu-backend: initialize all the error codes

* ggml-virtgpu: add a missing comment generated by the code generator

* ggml-virtgpu: add the '[virtgpu]' prefix to the device/buffer names

* ggml-virtgpu: apir_device_buffer_from_ptr: improve the error message

* ggml-virtgpu: shared: make it match the latest api_remoting.h of Virglrenderer APIR

(still unmerged)

* ggml-virtgpu: update the code generator to have dispatch_command_name in a host/guest shared file

* ggml-virtgpu: REMOTE_CALL: fail if the backend returns an error

* docs/backend/VirtGPU.md: indicate that the RAM+VRAM size is limed to 64 GB with libkrun

* ggml-virtgpu: turn off clang-format header ordering for some of the files

Compilation breaks when ordered alphabetically.

* ggml-virtgpu: clang-format

* ggml-virtgpu/backend/shared/api_remoting: better comments for the APIR return codes
This commit is contained in:
Kevin Pouget 2026-02-26 13:00:57 +01:00 committed by GitHub
parent efba35a860
commit ffaafde16f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
30 changed files with 398 additions and 257 deletions

View File

@ -152,7 +152,9 @@ Commands and data are serialized using a custom binary protocol with:
- **VM-specific**: Only works in virtual machines with virtio-gpu support - **VM-specific**: Only works in virtual machines with virtio-gpu support
- **Host dependency**: Requires properly configured host-side backend - **Host dependency**: Requires properly configured host-side backend
- **Latency**: Small overhead from VM escaping for each operation - **Latency**: Small overhead from VM escaping for each operation
- **Shared-memory size**: with the `libkrun` hypervisor, the RAM + VRAM
addressable memory is limited to 64 GB. So the maximum GPU memory
will be `64GB - RAM`, regardless of the hardware VRAM size.
* This work is pending upstream changes in the VirglRenderer * This work is pending upstream changes in the VirglRenderer
project. project.

View File

@ -7,9 +7,21 @@
#include <cstdint> #include <cstdint>
static uint32_t validate_graph_operation(size_t cgraph_size, uint32_t shmem_res_id, const char * operation) {
if (cgraph_size == 0) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Zero-size computation graph\n", operation);
return 1;
}
// place-holder: validate that the size of shmem_res_id is <= cgraph_size
// need to add another method in the Virgl->APIR callback interface
GGML_UNUSED(shmem_res_id);
return 0; // Valid
}
uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx); GGML_UNUSED(ctx);
GGML_UNUSED(enc);
static bool async_backend_initialized = false; static bool async_backend_initialized = false;
static bool async_backend; static bool async_backend;
@ -34,10 +46,26 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v
size_t cgraph_size; size_t cgraph_size;
apir_decode_size_t(dec, &cgraph_size); apir_decode_size_t(dec, &cgraph_size);
if (validate_graph_operation(cgraph_size, shmem_res_id, __func__) != 0) {
apir_decoder_set_fatal(dec);
return 1;
}
apir_decoder secondary_dec = apir_new_decoder((const char *) shmem_data, cgraph_size); apir_decoder secondary_dec = apir_new_decoder((const char *) shmem_data, cgraph_size);
ggml_cgraph * cgraph = apir_decode_ggml_cgraph(&secondary_dec, cgraph_size); ggml_cgraph * cgraph = apir_decode_ggml_cgraph(&secondary_dec, cgraph_size);
if (!cgraph || apir_decoder_get_fatal(&secondary_dec)) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Failed to deserialize computation graph\n", __func__);
return 1;
}
if (cgraph->n_nodes < 0 || cgraph->n_leafs < 0) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid negative node/leaf count: nodes=%d leafs=%d\n", __func__,
cgraph->n_nodes, cgraph->n_leafs);
return 1;
}
ggml_status status; ggml_status status;
#if APIR_BACKEND_CHECK_SUPPORTS_OP == 1 #if APIR_BACKEND_CHECK_SUPPORTS_OP == 1
for (int idx = 0; idx < cgraph->n_nodes; idx++) { for (int idx = 0; idx < cgraph->n_nodes; idx++) {
@ -45,7 +73,8 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v
if (dev->iface.supports_op(dev, op)) { if (dev->iface.supports_op(dev, op)) {
continue; continue;
} }
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Graph node %d (%s) not supported by the backend\n", idx, ggml_op_desc(op)); GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Graph node %d (%s) not supported by the backend\n", __func__, idx,
ggml_op_desc(op));
status = GGML_STATUS_ABORTED; status = GGML_STATUS_ABORTED;
apir_encode_ggml_status(enc, &status); apir_encode_ggml_status(enc, &status);
@ -53,9 +82,17 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v
return 0; return 0;
} }
#endif #endif
// Check if backend is properly initialized
if (!bck) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Backend not initialized (bck is null)\n", __func__);
return 1;
}
status = bck->iface.graph_compute(bck, cgraph); status = bck->iface.graph_compute(bck, cgraph);
if (async_backend) { if (async_backend && bck->iface.synchronize) {
bck->iface.synchronize(bck); bck->iface.synchronize(bck);
} }

View File

@ -85,7 +85,19 @@ uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, apir_decoder * d
const ggml_tensor * op = apir_decode_ggml_tensor_inplace(dec); const ggml_tensor * op = apir_decode_ggml_tensor_inplace(dec);
size_t value = buft->iface.get_alloc_size(buft, op); // Check for decode error
if (op == nullptr) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Failed to decode tensor\n", __func__);
apir_decoder_set_fatal(dec);
return 1;
}
size_t value;
if (buft->iface.get_alloc_size) {
value = buft->iface.get_alloc_size(buft, op);
} else {
value = ggml_nbytes(op); // Default fallback
}
apir_encode_size_t(enc, &value); apir_encode_size_t(enc, &value);

View File

@ -6,11 +6,26 @@
#include <cstdint> #include <cstdint>
static uint32_t validate_buffer_operation(size_t offset, size_t size, const char * operation) {
// Only check for critical integer overflow - no arbitrary size limits
if (offset > SIZE_MAX - size) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Integer overflow in offset+size: %zu + %zu\n", operation, offset, size);
return 1;
}
return 0; // Valid
}
uint32_t backend_buffer_get_base(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { uint32_t backend_buffer_get_base(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx); GGML_UNUSED(ctx);
ggml_backend_buffer_t buffer; ggml_backend_buffer_t buffer;
buffer = apir_decode_ggml_buffer(dec); buffer = apir_decode_ggml_buffer(dec);
if (!buffer || apir_decoder_get_fatal(dec)) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__);
return 1;
}
uintptr_t base = (uintptr_t) buffer->iface.get_base(buffer); uintptr_t base = (uintptr_t) buffer->iface.get_base(buffer);
apir_encode_uintptr_t(enc, &base); apir_encode_uintptr_t(enc, &base);
@ -24,6 +39,11 @@ uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl
ggml_backend_buffer_t buffer; ggml_backend_buffer_t buffer;
buffer = apir_decode_ggml_buffer(dec); buffer = apir_decode_ggml_buffer(dec);
if (!buffer || apir_decoder_get_fatal(dec)) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__);
return 1;
}
ggml_tensor * tensor; ggml_tensor * tensor;
// safe to remove the const qualifier here // safe to remove the const qualifier here
tensor = (ggml_tensor *) (uintptr_t) apir_decode_ggml_tensor(dec); tensor = (ggml_tensor *) (uintptr_t) apir_decode_ggml_tensor(dec);
@ -37,6 +57,10 @@ uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl
size_t size; size_t size;
apir_decode_size_t(dec, &size); apir_decode_size_t(dec, &size);
if (validate_buffer_operation(offset, size, __func__) != 0) {
return 1;
}
void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id); void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
if (!shmem_data) { if (!shmem_data) {
@ -56,6 +80,11 @@ uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl
ggml_backend_buffer_t buffer; ggml_backend_buffer_t buffer;
buffer = apir_decode_ggml_buffer(dec); buffer = apir_decode_ggml_buffer(dec);
if (!buffer || apir_decoder_get_fatal(dec)) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__);
return 1;
}
const ggml_tensor * tensor; const ggml_tensor * tensor;
// safe to remove the const qualifier here // safe to remove the const qualifier here
tensor = apir_decode_ggml_tensor(dec); tensor = apir_decode_ggml_tensor(dec);
@ -69,6 +98,10 @@ uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl
size_t size; size_t size;
apir_decode_size_t(dec, &size); apir_decode_size_t(dec, &size);
if (validate_buffer_operation(offset, size, __func__) != 0) {
return 1;
}
void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id); void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
if (!shmem_data) { if (!shmem_data) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__); GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__);
@ -86,6 +119,11 @@ uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, apir_decoder * dec, virgl
ggml_backend_buffer_t buffer; ggml_backend_buffer_t buffer;
buffer = apir_decode_ggml_buffer(dec); buffer = apir_decode_ggml_buffer(dec);
if (!buffer || apir_decoder_get_fatal(dec)) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__);
return 1;
}
const ggml_tensor * src; const ggml_tensor * src;
// safe to remove the const qualifier here // safe to remove the const qualifier here
src = apir_decode_ggml_tensor(dec); src = apir_decode_ggml_tensor(dec);
@ -105,6 +143,11 @@ uint32_t backend_buffer_clear(apir_encoder * enc, apir_decoder * dec, virgl_apir
ggml_backend_buffer_t buffer; ggml_backend_buffer_t buffer;
buffer = apir_decode_ggml_buffer(dec); buffer = apir_decode_ggml_buffer(dec);
if (!buffer || apir_decoder_get_fatal(dec)) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__);
return 1;
}
uint8_t value; uint8_t value;
apir_decode_uint8_t(dec, &value); apir_decode_uint8_t(dec, &value);
@ -120,6 +163,11 @@ uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virg
ggml_backend_buffer_t buffer; ggml_backend_buffer_t buffer;
buffer = apir_decode_ggml_buffer(dec); buffer = apir_decode_ggml_buffer(dec);
if (!buffer || apir_decoder_get_fatal(dec)) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__);
return 1;
}
if (!apir_untrack_backend_buffer(buffer)) { if (!apir_untrack_backend_buffer(buffer)) {
GGML_LOG_WARN(GGML_VIRTGPU_BCK "%s: unknown buffer %p\n", __func__, (void *) buffer); GGML_LOG_WARN(GGML_VIRTGPU_BCK "%s: unknown buffer %p\n", __func__, (void *) buffer);
return 1; return 1;

View File

@ -1,6 +1,6 @@
#include "backend-dispatched.h" #include "backend-dispatched.h"
#include "backend-virgl-apir.h"
#include "backend-virgl-apir.h"
#include "ggml-backend-impl.h" #include "ggml-backend-impl.h"
#include "ggml-backend.h" #include "ggml-backend.h"
#include "ggml-impl.h" #include "ggml-impl.h"
@ -28,19 +28,24 @@ uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p) {
return APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED; return APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED;
} }
if (!reg->iface.get_device_count(reg)) { size_t device_count = reg->iface.get_device_count(reg);
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed: no device found\n", __func__); if (!device_count) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: no device found\n", __func__);
return APIR_BACKEND_INITIALIZE_NO_DEVICE; return APIR_BACKEND_INITIALIZE_NO_DEVICE;
} }
dev = reg->iface.get_device(reg, 0); dev = reg->iface.get_device(reg, 0);
if (!dev) { if (!dev) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed: no device received\n", __func__); GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: failed to get device\n", __func__);
return APIR_BACKEND_INITIALIZE_NO_DEVICE; return APIR_BACKEND_INITIALIZE_NO_DEVICE;
} }
bck = dev->iface.init_backend(dev, NULL); bck = dev->iface.init_backend(dev, NULL);
if (!bck) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed\n", __func__);
return APIR_BACKEND_INITIALIZE_BACKEND_INIT_FAILED;
}
return APIR_BACKEND_INITIALIZE_SUCCESS; return APIR_BACKEND_INITIALIZE_SUCCESS;
} }

View File

@ -32,64 +32,6 @@ uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virg
/* backend */ /* backend */
uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
static inline const char * backend_dispatch_command_name(ApirBackendCommandType type) {
switch (type) {
/* device */
case APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT:
return "backend_device_get_device_count";
case APIR_COMMAND_TYPE_DEVICE_GET_COUNT:
return "backend_device_get_count";
case APIR_COMMAND_TYPE_DEVICE_GET_NAME:
return "backend_device_get_name";
case APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION:
return "backend_device_get_description";
case APIR_COMMAND_TYPE_DEVICE_GET_TYPE:
return "backend_device_get_type";
case APIR_COMMAND_TYPE_DEVICE_GET_MEMORY:
return "backend_device_get_memory";
case APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP:
return "backend_device_supports_op";
case APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE:
return "backend_device_get_buffer_type";
case APIR_COMMAND_TYPE_DEVICE_GET_PROPS:
return "backend_device_get_props";
case APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR:
return "backend_device_buffer_from_ptr";
/* buffer-type */
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME:
return "backend_buffer_type_get_name";
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT:
return "backend_buffer_type_get_alignment";
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE:
return "backend_buffer_type_get_max_size";
case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST:
return "backend_buffer_type_is_host (DEPRECATED)";
case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER:
return "backend_buffer_type_alloc_buffer";
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE:
return "backend_buffer_type_get_alloc_size";
/* buffer */
case APIR_COMMAND_TYPE_BUFFER_GET_BASE:
return "backend_buffer_get_base";
case APIR_COMMAND_TYPE_BUFFER_SET_TENSOR:
return "backend_buffer_set_tensor";
case APIR_COMMAND_TYPE_BUFFER_GET_TENSOR:
return "backend_buffer_get_tensor";
case APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR:
return "backend_buffer_cpy_tensor";
case APIR_COMMAND_TYPE_BUFFER_CLEAR:
return "backend_buffer_clear";
case APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER:
return "backend_buffer_free_buffer";
/* backend */
case APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE:
return "backend_backend_graph_compute";
default:
return "unknown";
}
}
extern "C" { extern "C" {
static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = { static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = {

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
// clang-format off
#include <cstdint> #include <cstdint>
#include <cstddef> #include <cstddef>
@ -10,6 +11,7 @@
#include "shared/apir_backend.h" #include "shared/apir_backend.h"
#include "shared/apir_cs.h" #include "shared/apir_cs.h"
#include "shared/apir_cs_ggml.h" #include "shared/apir_cs_ggml.h"
// clang-format on
#define GGML_VIRTGPU_BCK "ggml-virtgpu-backend: " #define GGML_VIRTGPU_BCK "ggml-virtgpu-backend: "

View File

@ -19,7 +19,7 @@ struct virgl_apir_callbacks {
}; };
extern "C" { extern "C" {
ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks *virgl_cbs); ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks * virgl_cbs);
void apir_backend_deinit(uint32_t virgl_ctx_id); void apir_backend_deinit(uint32_t virgl_ctx_id);
uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id, uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id,
virgl_apir_callbacks * virgl_cbs, virgl_apir_callbacks * virgl_cbs,

View File

@ -1,6 +1,5 @@
#include "backend-dispatched.h" #include "backend-dispatched.h"
#include "backend-virgl-apir.h" #include "backend-virgl-apir.h"
#include "shared/api_remoting.h" #include "shared/api_remoting.h"
#include "shared/apir_backend.h" #include "shared/apir_backend.h"
#include "shared/apir_cs.h" #include "shared/apir_cs.h"
@ -17,10 +16,10 @@
#define GGML_DEFAULT_BACKEND_REG "ggml_backend_init" #define GGML_DEFAULT_BACKEND_REG "ggml_backend_init"
static void * backend_library_handle = NULL; static void * backend_library_handle = NULL;
static FILE * apir_logfile = NULL; static FILE * apir_logfile = NULL;
static void log_to_file_callback(enum ggml_log_level level, const char * text, void * user_data) { static void log_to_file_callback(enum ggml_log_level level, const char * text, void * user_data) {
FILE * logfile = (FILE *)user_data; FILE * logfile = (FILE *) user_data;
fprintf(logfile, "[%d] %s", level, text); fprintf(logfile, "[%d] %s", level, text);
fflush(logfile); fflush(logfile);
} }
@ -48,9 +47,9 @@ void apir_backend_deinit(uint32_t virgl_ctx_id) {
} }
#define APIR_GGML_LIBRARY_PATH_KEY "ggml.library.path" #define APIR_GGML_LIBRARY_PATH_KEY "ggml.library.path"
#define APIR_GGML_LIBRARY_REG_KEY "ggml.library.reg" #define APIR_GGML_LIBRARY_REG_KEY "ggml.library.reg"
ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks *virgl_cbs) { ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks * virgl_cbs) {
const char * dlsym_error; const char * dlsym_error;
const char * apir_log_to_file = getenv(APIR_LLAMA_CPP_LOG_TO_FILE_ENV); const char * apir_log_to_file = getenv(APIR_LLAMA_CPP_LOG_TO_FILE_ENV);
@ -63,15 +62,13 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct
} }
} }
const char * library_name = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_PATH_KEY); const char * library_name = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_PATH_KEY);
const char * virgl_library_reg = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_REG_KEY); const char * virgl_library_reg = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_REG_KEY);
const char * library_reg = virgl_library_reg ? virgl_library_reg : GGML_DEFAULT_BACKEND_REG; const char * library_reg = virgl_library_reg ? virgl_library_reg : GGML_DEFAULT_BACKEND_REG;
if (!library_name) { if (!library_name) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: cannot open the GGML library: env var '%s' not defined\n", __func__,
"%s: cannot open the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV);
__func__, APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV);
return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
} }
@ -79,16 +76,14 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct
backend_library_handle = dlopen(library_name, RTLD_LAZY); backend_library_handle = dlopen(library_name, RTLD_LAZY);
if (!backend_library_handle) { if (!backend_library_handle) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: cannot open the GGML library: %s\n", __func__, dlerror());
"%s: cannot open the GGML library: %s\n", __func__, dlerror());
return APIR_LOAD_LIBRARY_CANNOT_OPEN; return APIR_LOAD_LIBRARY_CANNOT_OPEN;
} }
if (!library_reg) { if (!library_reg) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: cannot register the GGML library: env var '%s' not defined\n", __func__,
"%s: cannot register the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV);
__func__, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV);
return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
} }
@ -96,11 +91,9 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct
void * ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg); void * ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg);
dlsym_error = dlerror(); dlsym_error = dlerror();
if (dlsym_error) { if (dlsym_error) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: cannot find the GGML backend registration symbol '%s' (from %s): %s\n",
"%s: cannot find the GGML backend registration symbol '%s' (from %s): %s\n",
__func__, library_reg, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error); __func__, library_reg, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error);
return APIR_LOAD_LIBRARY_SYMBOL_MISSING; return APIR_LOAD_LIBRARY_SYMBOL_MISSING;
} }
@ -132,13 +125,12 @@ uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id,
virgl_apir_context ctx = { virgl_apir_context ctx = {
.ctx_id = virgl_ctx_id, .ctx_id = virgl_ctx_id,
.iface = virgl_cbs, .iface = virgl_cbs,
}; };
if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) { if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) {
GGML_LOG_ERROR(GGML_VIRTGPU_BCK GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Received an invalid dispatch index (%d >= %d)\n", __func__, cmd_type,
"%s: Received an invalid dispatch index (%d >= %d)\n", APIR_BACKEND_DISPATCH_TABLE_COUNT);
__func__, cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT);
return APIR_BACKEND_FORWARD_INDEX_INVALID; return APIR_BACKEND_FORWARD_INDEX_INVALID;
} }

View File

@ -16,28 +16,32 @@ enum ApirCommandType {
APIR_COMMAND_TYPE_LOADLIBRARY = 1, APIR_COMMAND_TYPE_LOADLIBRARY = 1,
APIR_COMMAND_TYPE_FORWARD = 2, APIR_COMMAND_TYPE_FORWARD = 2,
APIR_COMMAND_TYPE_LENGTH = 3, APIR_COMMAND_TYPE_LENGTH = 3,
}; };
typedef uint64_t ApirCommandFlags; typedef uint64_t ApirCommandFlags;
enum ApirLoadLibraryReturnCode { enum ApirLoadLibraryReturnCode {
APIR_LOAD_LIBRARY_SUCCESS = 0, APIR_LOAD_LIBRARY_SUCCESS = 0,
// these error codes are returned by the Virglrenderer APIR component
APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR = 1, APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR = 1,
APIR_LOAD_LIBRARY_ALREADY_LOADED = 2, APIR_LOAD_LIBRARY_ALREADY_LOADED = 2,
APIR_LOAD_LIBRARY_ENV_VAR_MISSING = 3, APIR_LOAD_LIBRARY_ENV_VAR_MISSING = 3,
APIR_LOAD_LIBRARY_CANNOT_OPEN = 4, APIR_LOAD_LIBRARY_CANNOT_OPEN = 4,
APIR_LOAD_LIBRARY_SYMBOL_MISSING = 5, APIR_LOAD_LIBRARY_SYMBOL_MISSING = 5,
APIR_LOAD_LIBRARY_INIT_BASE_INDEX = 6, // anything above this is a APIR backend library initialization return code // any value greater than this is an APIR *backend library* initialization return code
APIR_LOAD_LIBRARY_INIT_BASE_INDEX = 6,
}; };
enum ApirForwardReturnCode { enum ApirForwardReturnCode {
APIR_FORWARD_SUCCESS = 0, APIR_FORWARD_SUCCESS = 0,
APIR_FORWARD_NO_DISPATCH_FCT = 1, // these error codes are returned by the Virglrenderer APIR component
APIR_FORWARD_TIMEOUT = 2, APIR_FORWARD_NO_DISPATCH_FCT = 1,
APIR_FORWARD_TIMEOUT = 2,
APIR_FORWARD_BASE_INDEX = 3, // anything above this is a APIR backend library forward return code APIR_FORWARD_FAILED_TO_SYNC_STREAMS = 3,
} ; // any value greater than this index an APIR *backend library* forward return code
APIR_FORWARD_BASE_INDEX = 4,
};
__attribute__((unused)) static inline const char * apir_command_name(ApirCommandType type) { __attribute__((unused)) static inline const char * apir_command_name(ApirCommandType type) {
switch (type) { switch (type) {
@ -82,6 +86,7 @@ __attribute__((unused)) static const char * apir_forward_error(ApirForwardReturn
APIR_FORWARD_ERROR(APIR_FORWARD_SUCCESS); APIR_FORWARD_ERROR(APIR_FORWARD_SUCCESS);
APIR_FORWARD_ERROR(APIR_FORWARD_NO_DISPATCH_FCT); APIR_FORWARD_ERROR(APIR_FORWARD_NO_DISPATCH_FCT);
APIR_FORWARD_ERROR(APIR_FORWARD_TIMEOUT); APIR_FORWARD_ERROR(APIR_FORWARD_TIMEOUT);
APIR_FORWARD_ERROR(APIR_FORWARD_FAILED_TO_SYNC_STREAMS);
APIR_FORWARD_ERROR(APIR_FORWARD_BASE_INDEX); APIR_FORWARD_ERROR(APIR_FORWARD_BASE_INDEX);
return "Unknown APIR_COMMAND_TYPE_FORWARD error"; return "Unknown APIR_COMMAND_TYPE_FORWARD error";

View File

@ -34,3 +34,61 @@ typedef enum ApirBackendCommandType {
// last command_type index + 1 // last command_type index + 1
APIR_BACKEND_DISPATCH_TABLE_COUNT = 23, APIR_BACKEND_DISPATCH_TABLE_COUNT = 23,
} ApirBackendCommandType; } ApirBackendCommandType;
static inline const char * apir_dispatch_command_name(ApirBackendCommandType type) {
switch (type) {
/* device */
case APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT:
return "device_get_device_count";
case APIR_COMMAND_TYPE_DEVICE_GET_COUNT:
return "device_get_count";
case APIR_COMMAND_TYPE_DEVICE_GET_NAME:
return "device_get_name";
case APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION:
return "device_get_description";
case APIR_COMMAND_TYPE_DEVICE_GET_TYPE:
return "device_get_type";
case APIR_COMMAND_TYPE_DEVICE_GET_MEMORY:
return "device_get_memory";
case APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP:
return "device_supports_op";
case APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE:
return "device_get_buffer_type";
case APIR_COMMAND_TYPE_DEVICE_GET_PROPS:
return "device_get_props";
case APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR:
return "device_buffer_from_ptr";
/* buffer-type */
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME:
return "buffer_type_get_name";
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT:
return "buffer_type_get_alignment";
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE:
return "buffer_type_get_max_size";
case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST:
return "buffer_type_is_host";
case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER:
return "buffer_type_alloc_buffer";
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE:
return "buffer_type_get_alloc_size";
/* buffer */
case APIR_COMMAND_TYPE_BUFFER_GET_BASE:
return "buffer_get_base";
case APIR_COMMAND_TYPE_BUFFER_SET_TENSOR:
return "buffer_set_tensor";
case APIR_COMMAND_TYPE_BUFFER_GET_TENSOR:
return "buffer_get_tensor";
case APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR:
return "buffer_cpy_tensor";
case APIR_COMMAND_TYPE_BUFFER_CLEAR:
return "buffer_clear";
case APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER:
return "buffer_free_buffer";
/* backend */
case APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE:
return "backend_graph_compute";
default:
return "unknown";
}
}

View File

@ -14,7 +14,7 @@
#define APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED 6 #define APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED 6
#define APIR_BACKEND_INITIALIZE_ALREADY_INITED 7 #define APIR_BACKEND_INITIALIZE_ALREADY_INITED 7
#define APIR_BACKEND_INITIALIZE_NO_DEVICE 8 #define APIR_BACKEND_INITIALIZE_NO_DEVICE 8
#define APIR_BACKEND_INITIALIZE_BACKEND_INIT_FAILED 9
// new entries here need to be added to the apir_backend_initialize_error function below // new entries here need to be added to the apir_backend_initialize_error function below
@ -39,6 +39,10 @@ static const char * apir_backend_initialize_error(int code) {
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS); APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS);
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS); APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS);
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_FAILED); APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_FAILED);
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED);
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_ALREADY_INITED);
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_NO_DEVICE);
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_INIT_FAILED);
return "Unknown APIR_BACKEND_INITIALIZE error:/"; return "Unknown APIR_BACKEND_INITIALIZE error:/";

View File

@ -13,7 +13,6 @@ struct apir_encoder {
const char * start; const char * start;
const char * end; const char * end;
bool fatal; bool fatal;
}; };
struct apir_decoder { struct apir_decoder {
@ -28,8 +27,8 @@ struct apir_decoder {
static apir_decoder apir_new_decoder(const char * ptr, size_t size) { static apir_decoder apir_new_decoder(const char * ptr, size_t size) {
apir_decoder dec = { apir_decoder dec = {
.cur = ptr, .cur = ptr,
.end = ptr + size, .end = ptr + size,
.fatal = false, .fatal = false,
}; };
@ -79,10 +78,7 @@ static inline bool apir_decoder_get_fatal(const apir_decoder * dec) {
* encode peek * encode peek
*/ */
static inline bool apir_decoder_peek_internal(apir_decoder * dec, static inline bool apir_decoder_peek_internal(apir_decoder * dec, size_t size, void * val, size_t val_size) {
size_t size,
void * val,
size_t val_size) {
assert(val_size <= size); assert(val_size <= size);
if (unlikely(size > (size_t) (dec->end - dec->cur))) { if (unlikely(size > (size_t) (dec->end - dec->cur))) {
@ -332,8 +328,7 @@ static inline void apir_decode_char_array(apir_decoder * dec, char * val, size_t
static inline void * apir_decoder_alloc_array(size_t size, size_t count) { static inline void * apir_decoder_alloc_array(size_t size, size_t count) {
size_t alloc_size; size_t alloc_size;
if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) { if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) {
GGML_LOG_ERROR("%s: overflow in array allocation of %zu * %zu bytes\n", GGML_LOG_ERROR("%s: overflow in array allocation of %zu * %zu bytes\n", __func__, size, count);
__func__, size, count);
return NULL; return NULL;
} }
@ -352,20 +347,19 @@ static inline void apir_decode_bool_t(apir_decoder * dec, bool * val) {
/* apir_buffer_type_host_handle_t */ /* apir_buffer_type_host_handle_t */
static inline void apir_encode_apir_buffer_type_host_handle_t(apir_encoder * enc, static inline void apir_encode_apir_buffer_type_host_handle_t(apir_encoder * enc,
const apir_buffer_type_host_handle_t * val) { const apir_buffer_type_host_handle_t * val) {
apir_encode(enc, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t)); apir_encode(enc, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t));
} }
static inline void apir_decode_apir_buffer_type_host_handle_t(apir_decoder * dec, static inline void apir_decode_apir_buffer_type_host_handle_t(apir_decoder * dec,
apir_buffer_type_host_handle_t * val) { apir_buffer_type_host_handle_t * val) {
apir_decode(dec, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t)); apir_decode(dec, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t));
} }
/* apir_buffer_host_handle_t */ /* apir_buffer_host_handle_t */
static inline void apir_encode_apir_buffer_host_handle_t(apir_encoder * enc, static inline void apir_encode_apir_buffer_host_handle_t(apir_encoder * enc, const apir_buffer_host_handle_t * val) {
const apir_buffer_host_handle_t * val) {
apir_encode(enc, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t)); apir_encode(enc, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t));
} }

View File

@ -1,11 +1,10 @@
#include "ggml-impl.h"
#include "apir_cs.h" #include "apir_cs.h"
#include "apir_cs_rpc.h" #include "apir_cs_rpc.h"
#include "ggml-impl.h"
// ggml_buffer_to_apir_host_handle(ggml_backend_buffer_t buffer); // ggml_buffer_to_apir_host_handle(ggml_backend_buffer_t buffer);
static inline void apir_encode_ggml_buffer_host_handle(apir_encoder * enc, static inline void apir_encode_ggml_buffer_host_handle(apir_encoder * enc, const apir_buffer_host_handle_t * handle);
const apir_buffer_host_handle_t * handle);
static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec); static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec);
@ -22,8 +21,7 @@ static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_inplace(apir_decoder
return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size); return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size);
} }
static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_array_inplace(apir_decoder * dec, static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_array_inplace(apir_decoder * dec, uint32_t n_tensors) {
uint32_t n_tensors) {
size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor) * n_tensors; size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor) * n_tensors;
return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size); return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size);
@ -45,9 +43,9 @@ static inline const ggml_tensor * apir_decode_ggml_tensor(apir_decoder * dec) {
} }
ggml_init_params params{ ggml_init_params params{
/*.mem_size =*/ ggml_tensor_overhead(), /*.mem_size =*/ggml_tensor_overhead(),
/*.mem_buffer =*/ NULL, /*.mem_buffer =*/NULL,
/*.no_alloc =*/ true, /*.no_alloc =*/true,
}; };
ggml_context * ctx = ggml_init(params); ggml_context * ctx = ggml_init(params);
@ -105,6 +103,19 @@ static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec)
apir_decoder_read(dec, buffer_ptr_size, &buffer, buffer_ptr_size); apir_decoder_read(dec, buffer_ptr_size, &buffer, buffer_ptr_size);
// SECURITY: Validate buffer handle against tracked buffers to prevent
// guest VM from providing arbitrary host memory addresses
if (buffer) {
extern std::unordered_set<ggml_backend_buffer_t> backend_buffers;
if (backend_buffers.find(buffer) == backend_buffers.end()) {
GGML_LOG_WARN("ggml-virtgpu-backend: %s: Invalid buffer handle from guest: %p\n", __func__,
(void *) buffer);
// Set fatal flag to prevent further processing with invalid handle
apir_decoder_set_fatal(dec);
return NULL;
}
}
return buffer; return buffer;
} }

View File

@ -1,3 +1,6 @@
#pragma once
// clang-format off
#include "ggml.h" #include "ggml.h"
#include "ggml-backend-impl.h" #include "ggml-backend-impl.h"
@ -5,6 +8,7 @@
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include <cstdint> #include <cstdint>
// clang-format on
// ggml_tensor is serialized into apir_rpc_tensor // ggml_tensor is serialized into apir_rpc_tensor
struct apir_rpc_tensor { struct apir_rpc_tensor {

View File

@ -34,6 +34,7 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml
static const char * ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) { static const char * ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
virtgpu * gpu = BUFT_TO_GPU(buft); virtgpu * gpu = BUFT_TO_GPU(buft);
// Return the prefixed name that was built once during initialization
return gpu->cached_buffer_type.name; return gpu->cached_buffer_type.name;
} }
@ -53,9 +54,8 @@ static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buff
const ggml_tensor * tensor) { const ggml_tensor * tensor) {
virtgpu * gpu = BUFT_TO_GPU(buft); virtgpu * gpu = BUFT_TO_GPU(buft);
if (tensor->buffer == NULL if (tensor->buffer == NULL || !tensor->buffer->context ||
|| !tensor->buffer->context !buft->device->iface.supports_buft(buft->device, tensor->buffer->buft)) {
|| !buft->device->iface.supports_buft(buft->device, tensor->buffer->buft)) {
return ggml_nbytes(tensor); return ggml_nbytes(tensor);
} }

View File

@ -3,6 +3,7 @@
static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) { static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) {
virtgpu * gpu = DEV_TO_GPU(dev); virtgpu * gpu = DEV_TO_GPU(dev);
// Return the prefixed name that was built once during initialization
return gpu->cached_device_info.name; return gpu->cached_device_info.name;
} }
@ -22,7 +23,7 @@ static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_bac
static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
virtgpu * gpu = DEV_TO_GPU(dev); virtgpu * gpu = DEV_TO_GPU(dev);
*free = gpu->cached_device_info.memory_free; *free = gpu->cached_device_info.memory_free;
*total = gpu->cached_device_info.memory_total; *total = gpu->cached_device_info.memory_total;
} }
@ -72,7 +73,7 @@ static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, ggml_
ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) { ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) {
virtgpu * gpu = DEV_TO_GPU(dev); virtgpu * gpu = DEV_TO_GPU(dev);
static std::atomic<bool> initialized = false; static std::atomic<bool> initialized = false;
static ggml_backend_buffer_type buft; static ggml_backend_buffer_type buft;
if (!initialized) { if (!initialized) {
@ -95,7 +96,7 @@ ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_bac
static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) { static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) {
virtgpu * gpu = DEV_TO_GPU(dev); virtgpu * gpu = DEV_TO_GPU(dev);
static std::atomic<bool> initialized = false; static std::atomic<bool> initialized = false;
static ggml_backend_buffer_type buft; static ggml_backend_buffer_type buft;
if (!initialized) { if (!initialized) {

View File

@ -7,8 +7,8 @@
void ggml_virtgpu_cleanup(virtgpu * gpu); void ggml_virtgpu_cleanup(virtgpu * gpu);
static virtgpu * apir_initialize() { static virtgpu * apir_initialize() {
static virtgpu * gpu = NULL; static virtgpu * gpu = NULL;
static std::atomic<bool> initialized = false; static std::atomic<bool> initialized = false;
if (initialized) { if (initialized) {
// fast track // fast track
@ -31,29 +31,53 @@ static virtgpu * apir_initialize() {
} }
// Pre-fetch and cache all device information, it will not change // Pre-fetch and cache all device information, it will not change
gpu->cached_device_info.description = apir_device_get_description(gpu); gpu->cached_device_info.description = apir_device_get_description(gpu);
if (!gpu->cached_device_info.description) { if (!gpu->cached_device_info.description) {
GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu device description", __func__); GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu device description", __func__);
} }
gpu->cached_device_info.name = apir_device_get_name(gpu);
if (!gpu->cached_device_info.name) {
GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu device name", __func__);
}
gpu->cached_device_info.device_count = apir_device_get_count(gpu); gpu->cached_device_info.device_count = apir_device_get_count(gpu);
gpu->cached_device_info.type = apir_device_get_type(gpu); gpu->cached_device_info.type = apir_device_get_type(gpu);
apir_device_get_memory(gpu, {
&gpu->cached_device_info.memory_free, // Get the remote name and create prefixed version
&gpu->cached_device_info.memory_total); char * rmt_device_name = apir_device_get_name(gpu);
if (!rmt_device_name) {
GGML_ABORT(GGML_VIRTGPU "%s: failed to get the virtgpu device name", __func__);
}
size_t device_name_len = strlen(rmt_device_name) + 11; // "[virtgpu] " + null terminator
gpu->cached_device_info.name = (char *) malloc(device_name_len);
if (!gpu->cached_device_info.name) {
free(rmt_device_name);
GGML_ABORT(GGML_VIRTGPU "%s: failed to allocate memory for prefixed device name", __func__);
}
snprintf(gpu->cached_device_info.name, device_name_len, "[virtgpu] %s", rmt_device_name);
free(rmt_device_name);
}
apir_device_get_memory(gpu, &gpu->cached_device_info.memory_free, &gpu->cached_device_info.memory_total);
apir_buffer_type_host_handle_t buft_host_handle = apir_device_get_buffer_type(gpu); apir_buffer_type_host_handle_t buft_host_handle = apir_device_get_buffer_type(gpu);
gpu->cached_buffer_type.host_handle = buft_host_handle; gpu->cached_buffer_type.host_handle = buft_host_handle;
gpu->cached_buffer_type.name = apir_buffer_type_get_name(gpu, buft_host_handle); {
if (!gpu->cached_buffer_type.name) { // Get the remote name and create prefixed version
GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu buffer type name", __func__); char * rmt_name = apir_buffer_type_get_name(gpu, buft_host_handle);
if (!rmt_name) {
GGML_ABORT(GGML_VIRTGPU "%s: failed to get the virtgpu buffer type name", __func__);
}
size_t prefixed_len = strlen(rmt_name) + 11; // "[virtgpu] " + null terminator
gpu->cached_buffer_type.name = (char *) malloc(prefixed_len);
if (!gpu->cached_buffer_type.name) {
free(rmt_name);
GGML_ABORT(GGML_VIRTGPU "%s: failed to allocate memory for prefixed buffer type name", __func__);
}
snprintf(gpu->cached_buffer_type.name, prefixed_len, "[virtgpu] %s", rmt_name);
free(rmt_name);
} }
gpu->cached_buffer_type.alignment = apir_buffer_type_get_alignment(gpu, buft_host_handle);
gpu->cached_buffer_type.max_size = apir_buffer_type_get_max_size(gpu, buft_host_handle); gpu->cached_buffer_type.alignment = apir_buffer_type_get_alignment(gpu, buft_host_handle);
gpu->cached_buffer_type.max_size = apir_buffer_type_get_max_size(gpu, buft_host_handle);
initialized = true; initialized = true;
} }
@ -98,7 +122,7 @@ static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) {
static std::atomic<bool> initialized = false; static std::atomic<bool> initialized = false;
if (initialized) { if (initialized) {
return; // fast track return; // fast track
} }
{ {

View File

@ -1,5 +1,5 @@
#include "ggml-remoting.h"
#include "../../include/ggml-virtgpu.h" #include "../../include/ggml-virtgpu.h"
#include "ggml-remoting.h"
static const char * ggml_backend_remoting_get_name(ggml_backend_t backend) { static const char * ggml_backend_remoting_get_name(ggml_backend_t backend) {
UNUSED(backend); UNUSED(backend);

View File

@ -9,7 +9,7 @@
#include <string> #include <string>
#define GGML_VIRTGPU_NAME "ggml-virtgpu" #define GGML_VIRTGPU_NAME "ggml-virtgpu"
#define GGML_VIRTGPU "ggml-virtgpu: " #define GGML_VIRTGPU "ggml-virtgpu: "
// USE_ALWAYS_TRUE_SUPPORTS_OP: 1 is fast, 0 avoid micro-benchmark crashes // USE_ALWAYS_TRUE_SUPPORTS_OP: 1 is fast, 0 avoid micro-benchmark crashes

View File

@ -3,7 +3,7 @@
#include <stdint.h> #include <stdint.h>
struct virgl_renderer_capset_apir { struct virgl_renderer_capset_apir {
uint32_t apir_version; uint32_t apir_version;
uint32_t supports_blob_resources; uint32_t supports_blob_resources;
uint32_t reserved[4]; // For future expansion uint32_t reserved[4]; // For future expansion
}; };

View File

@ -145,8 +145,31 @@ class RemotingCodebaseGenerator:
enum_lines.append(f" APIR_BACKEND_DISPATCH_TABLE_COUNT = {total_count},") enum_lines.append(f" APIR_BACKEND_DISPATCH_TABLE_COUNT = {total_count},")
enum_lines.append("} ApirBackendCommandType;") enum_lines.append("} ApirBackendCommandType;")
# Generate function name mapping
func_lines = []
func_lines.append("static inline const char * apir_dispatch_command_name(ApirBackendCommandType type) {")
func_lines.append(" switch (type) {")
current_group = None
for func in functions:
# Add comment for new group
if func['group_name'] != current_group:
func_lines.append(f" /* {func['group_description']} */")
current_group = func['group_name']
# Generate clean function name without backend_ prefix
clean_name = f"{func['group_name']}_{func['function_name']}"
func_lines.append(f" case {func['enum_name']}:")
func_lines.append(f" return \"{clean_name}\";")
func_lines.append("")
func_lines.append(" default:")
func_lines.append(" return \"unknown\";")
func_lines.append(" }")
func_lines.append("}")
# Full header template # Full header template
header_content = NL.join(enum_lines) + "\n" header_content = NL.join(enum_lines) + "\n\n" + NL.join(func_lines) + "\n"
return header_content return header_content
@ -170,19 +193,6 @@ class RemotingCodebaseGenerator:
decl_lines.append(f"{signature} {func['backend_function']}({params});") decl_lines.append(f"{signature} {func['backend_function']}({params});")
# Switch cases
switch_lines = []
current_group = None
for func in functions:
if func['group_name'] != current_group:
switch_lines.append(f" /* {func['group_description']} */")
current_group = func['group_name']
deprecated = " (DEPRECATED)" if func['deprecated'] else ""
switch_lines.append(f" case {func['enum_name']}: return \"{func['backend_function']}{deprecated}\";")
# Dispatch table # Dispatch table
table_lines = [] table_lines = []
current_group = None current_group = None
@ -201,15 +211,6 @@ class RemotingCodebaseGenerator:
{NL.join(decl_lines)} {NL.join(decl_lines)}
static inline const char *backend_dispatch_command_name(ApirBackendCommandType type)
{{
switch (type) {{
{NL.join(switch_lines)}
default: return "unknown";
}}
}}
extern "C" {{ extern "C" {{
static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = {{ static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = {{
{NL.join(table_lines)} {NL.join(table_lines)}

View File

@ -17,8 +17,8 @@ ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) {
size_t cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data); size_t cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data);
virtgpu_shmem temp_shmem; // Local storage for large buffers virtgpu_shmem temp_shmem; // Local storage for large buffers
virtgpu_shmem * shmem = &temp_shmem; virtgpu_shmem * shmem = &temp_shmem;
bool using_shared_shmem = false; bool using_shared_shmem = false;
if (cgraph_size <= gpu->data_shmem.mmap_size) { if (cgraph_size <= gpu->data_shmem.mmap_size) {
// Lock mutex before using shared data_shmem buffer // Lock mutex before using shared data_shmem buffer
@ -26,7 +26,7 @@ ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) {
GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__); GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
} }
using_shared_shmem = true; using_shared_shmem = true;
shmem = &gpu->data_shmem; shmem = &gpu->data_shmem;
} else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) { } else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) {
GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__); GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
} }

View File

@ -62,7 +62,9 @@ size_t apir_buffer_type_get_max_size(virtgpu * gpu, apir_buffer_type_host_handle
return max_size; return max_size;
} }
apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, size_t size) { apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu,
apir_buffer_type_host_handle_t host_handle,
size_t size) {
apir_encoder * encoder; apir_encoder * encoder;
apir_decoder * decoder; apir_decoder * decoder;
ApirForwardReturnCode ret; ApirForwardReturnCode ret;
@ -84,7 +86,9 @@ apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, apir_buffer_t
return buffer_context; return buffer_context;
} }
size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, const ggml_tensor * op) { size_t apir_buffer_type_get_alloc_size(virtgpu * gpu,
apir_buffer_type_host_handle_t host_handle,
const ggml_tensor * op) {
apir_encoder * encoder; apir_encoder * encoder;
apir_decoder * decoder; apir_decoder * decoder;
ApirForwardReturnCode ret; ApirForwardReturnCode ret;

View File

@ -35,8 +35,8 @@ void apir_buffer_set_tensor(virtgpu * gpu,
apir_encode_ggml_tensor(encoder, tensor); apir_encode_ggml_tensor(encoder, tensor);
virtgpu_shmem temp_shmem; // Local storage for large buffers virtgpu_shmem temp_shmem; // Local storage for large buffers
virtgpu_shmem * shmem = &temp_shmem; virtgpu_shmem * shmem = &temp_shmem;
bool using_shared_shmem = false; bool using_shared_shmem = false;
if (size <= gpu->data_shmem.mmap_size) { if (size <= gpu->data_shmem.mmap_size) {
// Lock mutex before using shared data_shmem buffer // Lock mutex before using shared data_shmem buffer
@ -44,7 +44,7 @@ void apir_buffer_set_tensor(virtgpu * gpu,
GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__); GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
} }
using_shared_shmem = true; using_shared_shmem = true;
shmem = &gpu->data_shmem; shmem = &gpu->data_shmem;
} else if (virtgpu_shmem_create(gpu, size, shmem)) { } else if (virtgpu_shmem_create(gpu, size, shmem)) {
GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__); GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
@ -86,8 +86,8 @@ void apir_buffer_get_tensor(virtgpu * gpu,
apir_encode_ggml_tensor(encoder, tensor); apir_encode_ggml_tensor(encoder, tensor);
virtgpu_shmem temp_shmem; // Local storage for large buffers virtgpu_shmem temp_shmem; // Local storage for large buffers
virtgpu_shmem * shmem = &temp_shmem; virtgpu_shmem * shmem = &temp_shmem;
bool using_shared_shmem = false; bool using_shared_shmem = false;
if (size <= gpu->data_shmem.mmap_size) { if (size <= gpu->data_shmem.mmap_size) {
// Lock mutex before using shared data_shmem buffer // Lock mutex before using shared data_shmem buffer
@ -95,7 +95,7 @@ void apir_buffer_get_tensor(virtgpu * gpu,
GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__); GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
} }
using_shared_shmem = true; using_shared_shmem = true;
shmem = &gpu->data_shmem; shmem = &gpu->data_shmem;
} else if (virtgpu_shmem_create(gpu, size, shmem)) { } else if (virtgpu_shmem_create(gpu, size, shmem)) {
GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__); GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);

View File

@ -26,7 +26,7 @@ char * apir_device_get_name(virtgpu * gpu) {
REMOTE_CALL(gpu, encoder, decoder, ret); REMOTE_CALL(gpu, encoder, decoder, ret);
const size_t string_size = apir_decode_array_size_unchecked(decoder); const size_t string_size = apir_decode_array_size_unchecked(decoder);
char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
if (!string) { if (!string) {
GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device name buffer\n", __func__); GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device name buffer\n", __func__);
return NULL; return NULL;
@ -173,7 +173,7 @@ apir_buffer_context_t apir_device_buffer_from_ptr(virtgpu * gpu, size_t size, si
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR); REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR);
if (virtgpu_shmem_create(gpu, size, &buffer_context.shmem)) { if (virtgpu_shmem_create(gpu, size, &buffer_context.shmem)) {
GGML_ABORT(GGML_VIRTGPU "Couldn't allocate the guest-host shared buffer"); GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate %ldb of guest-host shared buffer", __func__, size);
} }
apir_encode_virtgpu_shmem_res_id(encoder, buffer_context.shmem.res_id); apir_encode_virtgpu_shmem_res_id(encoder, buffer_context.shmem.res_id);

View File

@ -1,29 +1,36 @@
#include "virtgpu.h" #pragma once
// clang-format off
#include "virtgpu.h"
#include "ggml-remoting.h" #include "ggml-remoting.h"
#include "backend/shared/apir_backend.h" #include "backend/shared/apir_backend.h"
#include "backend/shared/apir_cs_ggml.h" #include "backend/shared/apir_cs_ggml.h"
#include "ggml-backend-impl.h" #include "ggml-backend-impl.h"
// clang-format on
#define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__) \ #define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__) \
do { \ int32_t REMOTE_CALL_PREPARE_forward_flag = (int32_t) apir_command_type__; \
int32_t forward_flag = (int32_t) apir_command_type__; \ const char * REMOTE_CALL_PREPARE_command_name = apir_dispatch_command_name(apir_command_type__); \
encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_FORWARD, forward_flag); \ do { \
if (!encoder_name) { \ encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_FORWARD, REMOTE_CALL_PREPARE_forward_flag); \
GGML_ABORT(GGML_VIRTGPU "%s: failed to prepare the remote call encoder", __func__); \ if (!encoder_name) { \
} \ GGML_ABORT(GGML_VIRTGPU "%s: failed to prepare the remote call encoder", __func__); \
} \
} while (0) } while (0)
#define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name) \ #define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name) \
do { \ do { \
ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \ ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \
if (!decoder_name) { \ if (!decoder_name) { \
GGML_ABORT(GGML_VIRTGPU "%s: failed to kick the remote call", __func__); \ GGML_ABORT(GGML_VIRTGPU "%s: failed to kick the remote call", __func__); \
} \ } \
if (ret_name < APIR_FORWARD_BASE_INDEX) { \ if (ret_name < APIR_FORWARD_BASE_INDEX) { \
GGML_ABORT(GGML_VIRTGPU "%s: failed to forward the API call: %s: code %d", __func__, \ GGML_ABORT(GGML_VIRTGPU "%s: failed to forward the API call: %s: code %d", __func__, \
apir_forward_error(ret_name), ret_name); \ apir_forward_error(ret_name), ret_name); \
} \ } \
ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \ ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \
if (ret_name != 0) { \
GGML_ABORT(GGML_VIRTGPU "backend function '%s' failed (return code: %d)", \
REMOTE_CALL_PREPARE_command_name, ret_name); \
} \
} while (0) } while (0)

View File

@ -20,6 +20,7 @@ apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu,
char * apir_buffer_type_get_name(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle); char * apir_buffer_type_get_name(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle);
size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle); size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle);
size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle); size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle);
/* apir_buffer_type_is_host is deprecated. */
apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu, apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu,
apir_buffer_type_host_handle_t host_handle, apir_buffer_type_host_handle_t host_handle,
size_t size); size_t size);

View File

@ -53,9 +53,9 @@ static int virtgpu_handshake(virtgpu * gpu) {
if (!decoder) { if (!decoder) {
GGML_ABORT(GGML_VIRTGPU GGML_ABORT(GGML_VIRTGPU
"%s: failed to initiate the communication with the virglrenderer library. " "%s: failed to initiate the communication with the virglrenderer library. "
"Most likely, the wrong virglrenderer library was loaded in the hypervisor.", "Most likely, the wrong virglrenderer library was loaded in the hypervisor.",
__func__); __func__);
return 1; return 1;
} }
@ -65,8 +65,7 @@ static int virtgpu_handshake(virtgpu * gpu) {
uint32_t host_minor; uint32_t host_minor;
if (ret_magic != APIR_HANDSHAKE_MAGIC) { if (ret_magic != APIR_HANDSHAKE_MAGIC) {
GGML_ABORT(GGML_VIRTGPU GGML_ABORT(GGML_VIRTGPU "%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic,
"%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic,
apir_backend_initialize_error(ret_magic)); apir_backend_initialize_error(ret_magic));
} else { } else {
apir_decode_uint32_t(decoder, &host_major); apir_decode_uint32_t(decoder, &host_major);
@ -140,15 +139,13 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) {
"Make sure virglrenderer is correctly configured by the hypervisor. (%s) ", "Make sure virglrenderer is correctly configured by the hypervisor. (%s) ",
__func__, apir_load_library_error(ret)); __func__, apir_load_library_error(ret));
} else { } else {
GGML_ABORT(GGML_VIRTGPU GGML_ABORT(GGML_VIRTGPU "%s: virglrenderer could not load the API Remoting backend library. (%s - code %d)",
"%s: virglrenderer could not load the API Remoting backend library. (%s - code %d)", __func__, __func__, apir_load_library_error(ret), ret);
apir_load_library_error(ret), ret);
} }
return ret; return ret;
} }
GGML_LOG_INFO(GGML_VIRTGPU GGML_LOG_INFO(GGML_VIRTGPU "%s: virglrenderer successfully loaded the API Remoting backend library.\n", __func__);
"%s: virglrenderer successfully loaded the API Remoting backend library.\n", __func__);
ApirLoadLibraryReturnCode apir_ret = (ApirLoadLibraryReturnCode) (ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX); ApirLoadLibraryReturnCode apir_ret = (ApirLoadLibraryReturnCode) (ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX);
@ -158,10 +155,11 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) {
"Make sure virglrenderer is correctly configured by the hypervisor. (%s)", "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
__func__, apir_load_library_error(apir_ret)); __func__, apir_load_library_error(apir_ret));
} else if (apir_ret == APIR_LOAD_LIBRARY_SYMBOL_MISSING) { } else if (apir_ret == APIR_LOAD_LIBRARY_SYMBOL_MISSING) {
GGML_ABORT(GGML_VIRTGPU GGML_ABORT(
"%s: the API Remoting backend library couldn't load the GGML backend library, some symbols are missing. " GGML_VIRTGPU
"Make sure virglrenderer is correctly configured by the hypervisor. (%s)", "%s: the API Remoting backend library couldn't load the GGML backend library, some symbols are missing. "
__func__, apir_load_library_error(apir_ret)); "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
__func__, apir_load_library_error(apir_ret));
} else if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) { } else if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) {
GGML_ABORT(GGML_VIRTGPU GGML_ABORT(GGML_VIRTGPU
"%s: the API Remoting backend library couldn't load the GGML backend library: apir code=%d | %s)", "%s: the API Remoting backend library couldn't load the GGML backend library: apir code=%d | %s)",
@ -169,8 +167,8 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) {
} else { } else {
uint32_t lib_ret = apir_ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX; uint32_t lib_ret = apir_ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX;
GGML_ABORT(GGML_VIRTGPU GGML_ABORT(GGML_VIRTGPU
"%s: the API Remoting backend library initialize its backend library: apir code=%d)", __func__, "%s: the API Remoting backend library failed to initialize its backend library: apir code=%d)",
lib_ret); __func__, lib_ret);
} }
return ret; return ret;
} }
@ -184,55 +182,49 @@ virtgpu * create_virtgpu() {
// Initialize mutex to protect shared data_shmem buffer // Initialize mutex to protect shared data_shmem buffer
if (mtx_init(&gpu->data_shmem_mutex, mtx_plain) != thrd_success) { if (mtx_init(&gpu->data_shmem_mutex, mtx_plain) != thrd_success) {
delete gpu; delete gpu;
GGML_ABORT(GGML_VIRTGPU GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize data_shmem mutex", __func__);
"%s: failed to initialize data_shmem mutex", __func__);
return NULL; return NULL;
} }
if (virtgpu_open(gpu) != APIR_SUCCESS) { if (virtgpu_open(gpu) != APIR_SUCCESS) {
GGML_LOG_ERROR(GGML_VIRTGPU GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to open the virtgpu device\n", __func__);
"%s: failed to open the virtgpu device\n", __func__);
return NULL; return NULL;
} }
if (virtgpu_init_capset(gpu) != APIR_SUCCESS) { if (virtgpu_init_capset(gpu) != APIR_SUCCESS) {
if (gpu->use_apir_capset) { if (gpu->use_apir_capset) {
GGML_ABORT(GGML_VIRTGPU GGML_ABORT(GGML_VIRTGPU
"%s: failed to initialize the virtgpu APIR capset. Make sure that the virglrenderer library supports it.", __func__); "%s: failed to initialize the virtgpu APIR capset. Make sure that the virglrenderer library "
"supports it.",
__func__);
} else { } else {
GGML_ABORT(GGML_VIRTGPU GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu Venus capset", __func__);
"%s: failed to initialize the virtgpu Venus capset", __func__);
} }
return NULL; return NULL;
} }
if (virtgpu_init_context(gpu) != APIR_SUCCESS) { if (virtgpu_init_context(gpu) != APIR_SUCCESS) {
GGML_ABORT(GGML_VIRTGPU GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the GPU context", __func__);
"%s: failed to initialize the GPU context", __func__);
return NULL; return NULL;
} }
if (virtgpu_shmem_create(gpu, SHMEM_REPLY_SIZE, &gpu->reply_shmem)) { if (virtgpu_shmem_create(gpu, SHMEM_REPLY_SIZE, &gpu->reply_shmem)) {
GGML_ABORT(GGML_VIRTGPU GGML_ABORT(GGML_VIRTGPU "%s: failed to create the shared reply memory pages", __func__);
"%s: failed to create the shared reply memory pages", __func__);
return NULL; return NULL;
} }
if (virtgpu_shmem_create(gpu, SHMEM_DATA_SIZE, &gpu->data_shmem)) { if (virtgpu_shmem_create(gpu, SHMEM_DATA_SIZE, &gpu->data_shmem)) {
GGML_ABORT(GGML_VIRTGPU GGML_ABORT(GGML_VIRTGPU "%s: failed to create the shared data memory pages", __func__);
"%s: failed to create the shared data memory pages", __func__);
return NULL; return NULL;
} }
if (virtgpu_handshake(gpu)) { if (virtgpu_handshake(gpu)) {
GGML_ABORT(GGML_VIRTGPU GGML_ABORT(GGML_VIRTGPU "%s: failed to handshake with the virglrenderer library", __func__);
"%s: failed to handshake with the virglrenderer library", __func__);
return NULL; return NULL;
} }
if (virtgpu_load_library(gpu) != APIR_LOAD_LIBRARY_SUCCESS) { if (virtgpu_load_library(gpu) != APIR_LOAD_LIBRARY_SUCCESS) {
GGML_ABORT(GGML_VIRTGPU GGML_ABORT(GGML_VIRTGPU "%s: failed to load the backend library", __func__);
"%s: failed to load the backend library", __func__);
return NULL; return NULL;
} }
@ -243,8 +235,7 @@ static virt_gpu_result_t virtgpu_open(virtgpu * gpu) {
drmDevicePtr devs[8]; drmDevicePtr devs[8];
int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs)); int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs));
if (count < 0) { if (count < 0) {
GGML_LOG_ERROR(GGML_VIRTGPU GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to enumerate DRM devices\n", __func__);
"%s: failed to enumerate DRM devices\n", __func__);
return APIR_ERROR_INITIALIZATION_FAILED; return APIR_ERROR_INITIALIZATION_FAILED;
} }
@ -266,19 +257,17 @@ static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr d
int fd = open(node_path, O_RDWR | O_CLOEXEC); int fd = open(node_path, O_RDWR | O_CLOEXEC);
if (fd < 0) { if (fd < 0) {
GGML_ABORT(GGML_VIRTGPU GGML_ABORT(GGML_VIRTGPU "%s: failed to open %s", __func__, node_path);
"%s: failed to open %s", __func__, node_path);
return APIR_ERROR_INITIALIZATION_FAILED; return APIR_ERROR_INITIALIZATION_FAILED;
} }
drmVersionPtr version = drmGetVersion(fd); drmVersionPtr version = drmGetVersion(fd);
if (!version || strcmp(version->name, "virtio_gpu") || version->version_major != 0) { if (!version || strcmp(version->name, "virtio_gpu") || version->version_major != 0) {
if (version) { if (version) {
GGML_LOG_ERROR(GGML_VIRTGPU GGML_LOG_ERROR(GGML_VIRTGPU "%s: unknown DRM driver %s version %d\n", __func__, version->name,
"%s: unknown DRM driver %s version %d\n", __func__, version->name, version->version_major); version->version_major);
} else { } else {
GGML_LOG_ERROR(GGML_VIRTGPU GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to get DRM driver version\n", __func__);
"%s: failed to get DRM driver version\n", __func__);
} }
if (version) { if (version) {
@ -322,9 +311,8 @@ static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu) {
virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, &gpu->capset.data, sizeof(gpu->capset.data)); virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, &gpu->capset.data, sizeof(gpu->capset.data));
if (ret) { if (ret) {
GGML_LOG_ERROR(GGML_VIRTGPU GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to get APIR v%d capset: %s\n", __func__, gpu->capset.version,
"%s: failed to get APIR v%d capset: %s\n", strerror(errno));
__func__, gpu->capset.version, strerror(errno));
return APIR_ERROR_INITIALIZATION_FAILED; return APIR_ERROR_INITIALIZATION_FAILED;
} }
@ -547,13 +535,10 @@ static void log_call_duration(long long call_duration_ns, const char * name) {
double call_duration_s = (double) call_duration_ns / 1e9; // 1 second = 1e9 nanoseconds double call_duration_s = (double) call_duration_ns / 1e9; // 1 second = 1e9 nanoseconds
if (call_duration_s > 1) { if (call_duration_s > 1) {
GGML_LOG_INFO(GGML_VIRTGPU GGML_LOG_INFO(GGML_VIRTGPU "waited %.2fs for the %s host reply...\n", call_duration_s, name);
"waited %.2fs for the %s host reply...\n", call_duration_s, name);
} else if (call_duration_ms > 1) { } else if (call_duration_ms > 1) {
GGML_LOG_INFO(GGML_VIRTGPU GGML_LOG_INFO(GGML_VIRTGPU "waited %.2fms for the %s host reply...\n", call_duration_ms, name);
"waited %.2fms for the %s host reply...\n", call_duration_ms, name);
} else { } else {
GGML_LOG_INFO(GGML_VIRTGPU GGML_LOG_INFO(GGML_VIRTGPU "waited %lldns for the %s host reply...\n", call_duration_ns, name);
"waited %lldns for the %s host reply...\n", call_duration_ns, name);
} }
} }

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
// clang-format off
#include "virtgpu-utils.h" #include "virtgpu-utils.h"
#include "virtgpu-shm.h" #include "virtgpu-shm.h"
#include "virtgpu-apir.h" #include "virtgpu-apir.h"
@ -23,20 +24,21 @@
#include "apir_hw.h" #include "apir_hw.h"
#include <drm/virtgpu_drm.h> #include <drm/virtgpu_drm.h>
#include "venus_hw.h" #include "venus_hw.h"
// clang-format on
#ifndef VIRTGPU_DRM_CAPSET_APIR #ifndef VIRTGPU_DRM_CAPSET_APIR
// Will be defined include/drm/virtgpu_drm.h when // Will be defined include/drm/virtgpu_drm.h when
// https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1590/diffs // https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1590/diffs
// is merged // is merged
#define VIRTGPU_DRM_CAPSET_APIR 10 # define VIRTGPU_DRM_CAPSET_APIR 10
#endif #endif
// Mesa/Virlgrenderer Venus internal. Only necessary during the // Mesa/Virlgrenderer Venus internal. Only necessary during the
// Venus->APIR transition in Virglrenderer // Venus->APIR transition in Virglrenderer
#define VENUS_COMMAND_TYPE_LENGTH 331 #define VENUS_COMMAND_TYPE_LENGTH 331
#ifndef VIRTGPU_DRM_CAPSET_VENUS // only available with Linux >= v6.16 #ifndef VIRTGPU_DRM_CAPSET_VENUS // only available with Linux >= v6.16
#define VIRTGPU_DRM_CAPSET_VENUS 4 # define VIRTGPU_DRM_CAPSET_VENUS 4
#endif #endif
typedef uint32_t virgl_renderer_capset; typedef uint32_t virgl_renderer_capset;