ggml: new backend for Virglrenderer API Remoting acceleration (v2) (#18718)

This commit is contained in:
Kevin Pouget 2026-01-28 10:49:40 +01:00 committed by GitHub
parent 6ad70c5a77
commit b7feacf7f3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
47 changed files with 4711 additions and 0 deletions

View File

@ -67,6 +67,7 @@
/ggml/src/ggml-rpc/ @rgerganov /ggml/src/ggml-rpc/ @rgerganov
/ggml/src/ggml-threading.* @ggerganov /ggml/src/ggml-threading.* @ggerganov
/ggml/src/ggml-vulkan/ @0cc4m /ggml/src/ggml-vulkan/ @0cc4m
/ggml/src/ggml-virtgpu/ @kpouget
/ggml/src/ggml-webgpu/ @reeselevine /ggml/src/ggml-webgpu/ @reeselevine
/ggml/src/ggml-zdnn/ @taronaeo @Andreas-Krebbel @AlekseiNikiforovIBM /ggml/src/ggml-zdnn/ @taronaeo @Andreas-Krebbel @AlekseiNikiforovIBM
/ggml/src/ggml.c @ggerganov /ggml/src/ggml.c @ggerganov

View File

@ -228,6 +228,8 @@ option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF) option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)
option(GGML_WEBGPU_JSPI "ggml: use JSPI for WebGPU" ON) option(GGML_WEBGPU_JSPI "ggml: use JSPI for WebGPU" ON)
option(GGML_ZDNN "ggml: use zDNN" OFF) option(GGML_ZDNN "ggml: use zDNN" OFF)
option(GGML_VIRTGPU "ggml: use the VirtGPU/Virglrenderer API Remoting frontend" OFF)
option(GGML_VIRTGPU_BACKEND "ggml: build the VirtGPU/Virglrenderer API Remoting backend" OFF)
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT}) option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF) option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF) option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
@ -320,6 +322,7 @@ set(GGML_PUBLIC_HEADERS
include/ggml-opt.h include/ggml-opt.h
include/ggml-metal.h include/ggml-metal.h
include/ggml-rpc.h include/ggml-rpc.h
include/ggml-virtgpu.h
include/ggml-sycl.h include/ggml-sycl.h
include/ggml-vulkan.h include/ggml-vulkan.h
include/ggml-webgpu.h include/ggml-webgpu.h

View File

@ -0,0 +1,16 @@
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
#define GGML_REMOTING_FRONTEND_NAME "RemotingFrontend"
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_virtgpu_reg();
#ifdef __cplusplus
}
#endif

View File

@ -451,6 +451,7 @@ ggml_add_backend(HIP)
ggml_add_backend(METAL) ggml_add_backend(METAL)
ggml_add_backend(MUSA) ggml_add_backend(MUSA)
ggml_add_backend(RPC) ggml_add_backend(RPC)
ggml_add_backend(VirtGPU)
ggml_add_backend(SYCL) ggml_add_backend(SYCL)
ggml_add_backend(Vulkan) ggml_add_backend(Vulkan)
ggml_add_backend(WebGPU) ggml_add_backend(WebGPU)

View File

@ -69,6 +69,10 @@
#include "ggml-rpc.h" #include "ggml-rpc.h"
#endif #endif
#ifdef GGML_USE_VIRTGPU_FRONTEND
#include "ggml-virtgpu.h"
#endif
#ifdef GGML_USE_CANN #ifdef GGML_USE_CANN
#include "ggml-cann.h" #include "ggml-cann.h"
#endif #endif
@ -180,7 +184,12 @@ struct ggml_backend_registry {
register_backend(ggml_backend_sycl_reg()); register_backend(ggml_backend_sycl_reg());
#endif #endif
#ifdef GGML_USE_VULKAN #ifdef GGML_USE_VULKAN
// Add runtime disable check
if (getenv("GGML_DISABLE_VULKAN") == nullptr) {
register_backend(ggml_backend_vk_reg()); register_backend(ggml_backend_vk_reg());
} else {
GGML_LOG_DEBUG("Vulkan backend disabled by GGML_DISABLE_VULKAN environment variable\n");
}
#endif #endif
#ifdef GGML_USE_WEBGPU #ifdef GGML_USE_WEBGPU
register_backend(ggml_backend_webgpu_reg()); register_backend(ggml_backend_webgpu_reg());
@ -188,6 +197,10 @@ struct ggml_backend_registry {
#ifdef GGML_USE_ZDNN #ifdef GGML_USE_ZDNN
register_backend(ggml_backend_zdnn_reg()); register_backend(ggml_backend_zdnn_reg());
#endif #endif
#ifdef GGML_USE_VIRTGPU_FRONTEND
register_backend(ggml_backend_virtgpu_reg());
#endif
#ifdef GGML_USE_OPENCL #ifdef GGML_USE_OPENCL
register_backend(ggml_backend_opencl_reg()); register_backend(ggml_backend_opencl_reg());
#endif #endif
@ -604,6 +617,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
ggml_backend_load_best("rpc", silent, dir_path); ggml_backend_load_best("rpc", silent, dir_path);
ggml_backend_load_best("sycl", silent, dir_path); ggml_backend_load_best("sycl", silent, dir_path);
ggml_backend_load_best("vulkan", silent, dir_path); ggml_backend_load_best("vulkan", silent, dir_path);
ggml_backend_load_best("virtgpu", silent, dir_path);
ggml_backend_load_best("opencl", silent, dir_path); ggml_backend_load_best("opencl", silent, dir_path);
ggml_backend_load_best("hexagon", silent, dir_path); ggml_backend_load_best("hexagon", silent, dir_path);
ggml_backend_load_best("musa", silent, dir_path); ggml_backend_load_best("musa", silent, dir_path);

View File

@ -0,0 +1,70 @@
cmake_minimum_required(VERSION 3.19)
cmake_policy(SET CMP0114 NEW)
include(ExternalProject)
message(STATUS "Including the VirtGPU/Virglrenderer API Remoting")
# Download venus_hw.h from virglrenderer repository
ExternalProject_Add(
venus_hw_header
URL https://gitlab.freedesktop.org/virgl/virglrenderer/-/raw/virglrenderer-1.2.0/src/venus_hw.h
DOWNLOAD_NO_EXTRACT YES
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include
DOWNLOAD_NAME venus_hw.h
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
LOG_DOWNLOAD ON
)
if (NOT GGML_VIRTGPU_BACKEND STREQUAL "ONLY")
message(STATUS "Enable the VirtGPU/Virglrenderer API Remoting frontend library")
find_package(PkgConfig REQUIRED)
pkg_check_modules(DRM REQUIRED libdrm)
if (NOT GGML_BACKEND_DL)
# cannot simply use USE_VIRTGPU, as in the 'else()' case the
# frontend isn't compiled
target_compile_definitions(ggml PUBLIC "GGML_USE_VIRTGPU_FRONTEND")
endif()
ggml_add_backend_library(ggml-virtgpu
ggml-backend-buffer.cpp
ggml-backend.cpp
ggml-backend-device.cpp
ggml-backend-reg.cpp
ggml-backend-buffer-type.cpp
virtgpu-apir.h
virtgpu-forward.gen.h
virtgpu.cpp
virtgpu-shm.cpp
virtgpu-utils.cpp
virtgpu-forward-device.cpp
virtgpu-forward-buffer-type.cpp
virtgpu-forward-buffer.cpp
virtgpu-forward-backend.cpp
virtgpu-forward-impl.h
apir_cs_ggml-rpc-front.cpp
../../include/ggml-virtgpu.h)
target_include_directories(ggml-virtgpu PUBLIC /usr/include/libdrm/)
target_link_libraries(ggml-virtgpu PUBLIC ${DRM_LIBRARIES})
target_include_directories(ggml-virtgpu PUBLIC ${DRM_INCLUDE_DIRS})
target_compile_options(ggml-virtgpu PUBLIC ${DRM_CFLAGS_OTHER})
target_include_directories(ggml-virtgpu PUBLIC ./include)
target_include_directories(ggml-virtgpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
# Ensure venus_hw.h is downloaded before building ggml-virtgpu
add_dependencies(ggml-virtgpu venus_hw_header)
target_compile_options(ggml-virtgpu PRIVATE -std=c++20)
else()
message(STATUS "Not building the VirtGPU/Virglrenderer API Remoting frontend library")
endif()
if (NOT GGML_VIRTGPU_BACKEND STREQUAL "OFF")
add_subdirectory("backend")
endif()

View File

@ -0,0 +1,87 @@
#include "backend/shared/apir_cs_rpc.h"
#include "ggml-backend-impl.h"
#include "ggml-impl.h"
#include "ggml-remoting.h"
#include <cinttypes>
#include <unordered_map>
#include <unordered_set>
#include <vector>
apir_rpc_tensor apir_serialize_tensor(const ggml_tensor * tensor) {
apir_rpc_tensor result;
result.id = reinterpret_cast<uint64_t>(tensor);
result.type = tensor->type;
if (tensor->buffer) {
ggml_backend_buffer_t buffer = tensor->buffer;
result.buffer = BUFFER_TO_HOST_HANDLE(buffer);
} else {
result.buffer = 0;
}
for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) {
result.ne[i] = tensor->ne[i];
result.nb[i] = tensor->nb[i];
}
result.op = tensor->op;
for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) {
result.op_params[i] = tensor->op_params[i];
}
result.flags = tensor->flags;
for (uint32_t i = 0; i < GGML_MAX_SRC; i++) {
result.src[i] = reinterpret_cast<uint64_t>(tensor->src[i]);
}
result.view_src = reinterpret_cast<uint64_t>(tensor->view_src);
result.view_offs = tensor->view_offs;
result.data = reinterpret_cast<uint64_t>(tensor->data);
if (tensor->data) {
if (!tensor->buffer) {
GGML_ABORT("tensor has data but not buffer");
}
// tensor->data is serialized as an offset to the buffer base address
result.data -= reinterpret_cast<uint64_t>(BUFFER_TO_GGML_CONTEXT(tensor->buffer)->base);
}
snprintf(result.name, GGML_MAX_NAME, "%s", tensor->name);
return result;
}
void apir_add_tensor(ggml_tensor * tensor,
std::vector<apir_rpc_tensor> & tensors,
std::unordered_set<ggml_tensor *> & visited) {
if (tensor == nullptr) {
return;
}
if (visited.find(tensor) != visited.end()) {
return;
}
visited.insert(tensor);
for (int i = 0; i < GGML_MAX_SRC; i++) {
apir_add_tensor(tensor->src[i], tensors, visited);
}
apir_add_tensor(tensor->view_src, tensors, visited);
tensors.push_back(apir_serialize_tensor(tensor));
}
void apir_serialize_graph(const ggml_cgraph * cgraph, std::vector<uint8_t> & output) {
uint32_t n_nodes = cgraph->n_nodes;
std::vector<apir_rpc_tensor> tensors;
std::unordered_set<ggml_tensor *> visited;
for (uint32_t i = 0; i < n_nodes; i++) {
apir_add_tensor(cgraph->nodes[i], tensors, visited);
}
// serialization format:
// | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(apir_rpc_tensor)) |
uint32_t n_tensors = tensors.size();
int output_size =
sizeof(uint32_t) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t) + n_tensors * sizeof(apir_rpc_tensor);
output.resize(output_size, 0);
memcpy(output.data(), &n_nodes, sizeof(n_nodes));
for (uint32_t i = 0; i < n_nodes; i++) {
memcpy(output.data() + sizeof(n_nodes) + i * sizeof(uint64_t), &cgraph->nodes[i], sizeof(uint64_t));
}
uint32_t * out_ntensors = (uint32_t *) (output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t));
*out_ntensors = n_tensors;
apir_rpc_tensor * out_tensors =
(apir_rpc_tensor *) (output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t));
memcpy(out_tensors, tensors.data(), n_tensors * sizeof(apir_rpc_tensor));
}

View File

@ -0,0 +1,21 @@
cmake_minimum_required(VERSION 3.19)
cmake_policy(SET CMP0114 NEW)
message(STATUS "Enable the VirtGPU/Virglrenderer backend library")
ggml_add_backend_library(ggml-virtgpu-backend
backend.cpp
backend-dispatched.cpp
backend-dispatched-backend.cpp
backend-dispatched-device.cpp
backend-dispatched-buffer.cpp
backend-dispatched-buffer-type.cpp
shared/api_remoting.h
shared/apir_backend.h
shared/apir_cs.h
apir_cs_ggml-rpc-back.cpp)
target_compile_options(ggml-virtgpu-backend PRIVATE -std=c++20)
# Add include directory for ggml-backend-impl.h and other core headers
target_include_directories(ggml-virtgpu-backend PRIVATE ../..)

View File

@ -0,0 +1,115 @@
#include "ggml-backend-impl.h"
#include "ggml-impl.h"
#include "shared/apir_cs_rpc.h"
#include <cinttypes>
#include <unordered_map>
#include <unordered_set>
#include <vector>
std::unordered_set<ggml_backend_buffer_t> backend_buffers;
void apir_track_backend_buffer(ggml_backend_buffer_t buffer) {
backend_buffers.insert(buffer);
}
bool apir_untrack_backend_buffer(ggml_backend_buffer_t buffer) {
auto it = backend_buffers.find(buffer);
if (it == backend_buffers.end()) {
return false;
}
backend_buffers.erase(it);
return true;
}
std::unordered_set<ggml_backend_buffer_t> apir_get_track_backend_buffers() {
return backend_buffers;
}
ggml_tensor * apir_deserialize_tensor(ggml_context * ctx, const apir_rpc_tensor * tensor) {
ggml_tensor * result =
ggml_new_tensor_4d(ctx, (ggml_type) tensor->type, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) {
result->nb[i] = tensor->nb[i];
}
result->buffer = reinterpret_cast<ggml_backend_buffer_t>(tensor->buffer);
if (result->buffer && backend_buffers.find(result->buffer) == backend_buffers.end()) {
printf("WARNING: HOST BUFFER NOT FOUND | %p\n", (void *) result->buffer);
result->buffer = nullptr;
}
uint64_t tensor_data = tensor->data;
if (result->buffer) {
// require that the tensor data does not go beyond the buffer end
uint64_t tensor_size = (uint64_t) ggml_nbytes(result);
uint64_t buffer_start = (uint64_t) ggml_backend_buffer_get_base(result->buffer);
uint64_t buffer_size = (uint64_t) ggml_backend_buffer_get_size(result->buffer);
// tensor->data is serialized as an offset to the buffer base address
tensor_data += buffer_start;
GGML_ASSERT(tensor_data + tensor_size >= tensor_data); // check for overflow
GGML_ASSERT(tensor_data >= buffer_start && tensor_data + tensor_size <= buffer_start + buffer_size);
}
result->op = (ggml_op) tensor->op;
for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) {
result->op_params[i] = tensor->op_params[i];
}
result->flags = tensor->flags;
result->data = reinterpret_cast<void *>(tensor_data);
ggml_set_name(result, tensor->name);
return result;
}
ggml_tensor * apir_create_node(uint64_t id,
ggml_context * ctx,
const std::unordered_map<uint64_t, const apir_rpc_tensor *> & tensor_ptrs,
std::unordered_map<uint64_t, ggml_tensor *> & tensor_map) {
if (id == 0) {
return nullptr;
}
if (tensor_map.find(id) != tensor_map.end()) {
return tensor_map[id];
}
const apir_rpc_tensor * tensor = tensor_ptrs.at(id);
ggml_tensor * result = apir_deserialize_tensor(ctx, tensor);
if (result == nullptr) {
return nullptr;
}
tensor_map[id] = result;
for (int i = 0; i < GGML_MAX_SRC; i++) {
result->src[i] = apir_create_node(tensor->src[i], ctx, tensor_ptrs, tensor_map);
}
result->view_src = apir_create_node(tensor->view_src, ctx, tensor_ptrs, tensor_map);
result->view_offs = tensor->view_offs;
return result;
}
ggml_cgraph * apir_deserialize_graph(uint32_t n_nodes,
uint32_t n_tensors,
const apir_rpc_tensor * tensors,
const uint64_t * nodes) {
size_t buf_size = ggml_tensor_overhead() * (n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false);
ggml_init_params params = {
/*.mem_size =*/buf_size,
/*.mem_buffer =*/NULL,
/*.no_alloc =*/true,
};
ggml_context * ctx = ggml_init(params);
ggml_cgraph * graph = ggml_new_graph_custom(ctx, n_nodes, false);
graph->n_nodes = n_nodes;
std::unordered_map<uint64_t, const apir_rpc_tensor *> tensor_ptrs;
for (uint32_t i = 0; i < n_tensors; i++) {
tensor_ptrs[tensors[i].id] = &tensors[i];
}
std::unordered_map<uint64_t, ggml_tensor *> tensor_map;
for (uint32_t i = 0; i < n_nodes; i++) {
int64_t id;
memcpy(&id, &nodes[i], sizeof(id));
graph->nodes[i] = apir_create_node(id, ctx, tensor_ptrs, tensor_map);
}
return graph;
}

View File

@ -0,0 +1,13 @@
#include "shared/apir_backend.h"
#define BUFFER_TO_HOST_HANDLE(name) ggml_buffer_to_apir_handle(name)
static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) {
// in the backend, the buffer handle is the buffer pointer
return (apir_buffer_host_handle_t) buffer;
}
static inline apir_buffer_type_host_handle_t ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) {
// in the backend, the buffer handle is the buffer pointer
return (apir_buffer_type_host_handle_t) buft;
}

View File

@ -0,0 +1,65 @@
#include "backend-dispatched.h"
#include "backend-virgl-apir.h"
#include "ggml-backend-impl.h"
#include "ggml-backend.h"
#include "ggml-impl.h"
#include "shared/apir_backend.h"
#include <cstdint>
uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
GGML_UNUSED(enc);
static bool async_backend_initialized = false;
static bool async_backend;
if (!async_backend_initialized) {
ggml_backend_dev_props props;
dev->iface.get_props(dev, &props);
async_backend = props.caps.async;
async_backend_initialized = true;
}
uint32_t shmem_res_id;
apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id);
const void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
if (!shmem_data) {
GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
apir_decoder_set_fatal(dec);
return 1;
}
size_t cgraph_size;
apir_decode_size_t(dec, &cgraph_size);
apir_decoder secondary_dec = apir_new_decoder((const char *) shmem_data, cgraph_size);
ggml_cgraph * cgraph = apir_decode_ggml_cgraph(&secondary_dec, cgraph_size);
ggml_status status;
#if APIR_BACKEND_CHECK_SUPPORTS_OP == 1
for (int idx = 0; idx < cgraph->n_nodes; idx++) {
ggml_tensor * op = ggml_graph_node(cgraph, idx);
if (dev->iface.supports_op(dev, op)) {
continue;
}
GGML_LOG_ERROR("Graph node %d (%s) not supported by the backend\n", idx, ggml_op_desc(op));
status = GGML_STATUS_ABORTED;
apir_encode_ggml_status(enc, &status);
return 0;
}
#endif
status = bck->iface.graph_compute(bck, cgraph);
if (async_backend) {
bck->iface.synchronize(bck);
}
apir_encode_ggml_status(enc, &status);
return 0;
}

View File

@ -0,0 +1,89 @@
#include "backend-dispatched.h"
#include "backend-virgl-apir.h"
#include "ggml-backend-impl.h"
#include "ggml-backend.h"
#include "ggml-impl.h"
#include <cstdint>
uint32_t backend_buffer_type_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
ggml_backend_buffer_type_t buft;
buft = apir_decode_ggml_buffer_type(dec);
const char * string = buft->iface.get_name(buft);
const size_t string_size = strlen(string) + 1;
apir_encode_array_size(enc, string_size);
apir_encode_char_array(enc, string, string_size);
return 0;
}
uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
ggml_backend_buffer_type_t buft;
buft = apir_decode_ggml_buffer_type(dec);
size_t value = buft->iface.get_alignment(buft);
apir_encode_size_t(enc, &value);
return 0;
}
uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
ggml_backend_buffer_type_t buft;
buft = apir_decode_ggml_buffer_type(dec);
size_t value = buft->iface.get_max_size(buft);
apir_encode_size_t(enc, &value);
return 0;
}
uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
ggml_backend_buffer_type_t buft;
buft = apir_decode_ggml_buffer_type(dec);
bool is_host = buft->iface.is_host(buft);
apir_encode_bool_t(enc, &is_host);
return 0;
}
uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
ggml_backend_buffer_type_t buft;
buft = apir_decode_ggml_buffer_type(dec);
size_t size;
apir_decode_size_t(dec, &size);
ggml_backend_buffer_t buffer;
buffer = buft->iface.alloc_buffer(buft, size);
apir_encode_ggml_buffer(enc, buffer);
if (buffer) {
apir_track_backend_buffer(buffer);
}
return 0;
}
uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
ggml_backend_buffer_type_t buft;
buft = apir_decode_ggml_buffer_type(dec);
const ggml_tensor * op = apir_decode_ggml_tensor_inplace(dec);
size_t value = buft->iface.get_alloc_size(buft, op);
apir_encode_size_t(enc, &value);
return 0;
}

View File

@ -0,0 +1,131 @@
#include "backend-dispatched.h"
#include "backend-virgl-apir.h"
#include "ggml-backend-impl.h"
#include "ggml-backend.h"
#include "ggml-impl.h"
#include <cstdint>
uint32_t backend_buffer_get_base(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
ggml_backend_buffer_t buffer;
buffer = apir_decode_ggml_buffer(dec);
uintptr_t base = (uintptr_t) buffer->iface.get_base(buffer);
apir_encode_uintptr_t(enc, &base);
return 0;
}
uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
GGML_UNUSED(enc);
ggml_backend_buffer_t buffer;
buffer = apir_decode_ggml_buffer(dec);
ggml_tensor * tensor;
// safe to remove the const qualifier here
tensor = (ggml_tensor *) (uintptr_t) apir_decode_ggml_tensor(dec);
uint32_t shmem_res_id;
apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id);
size_t offset;
apir_decode_size_t(dec, &offset);
size_t size;
apir_decode_size_t(dec, &size);
void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
if (!shmem_data) {
GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
return 1;
}
buffer->iface.set_tensor(buffer, tensor, shmem_data, offset, size);
return 0;
}
uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
GGML_UNUSED(enc);
ggml_backend_buffer_t buffer;
buffer = apir_decode_ggml_buffer(dec);
const ggml_tensor * tensor;
// safe to remove the const qualifier here
tensor = apir_decode_ggml_tensor(dec);
uint32_t shmem_res_id;
apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id);
size_t offset;
apir_decode_size_t(dec, &offset);
size_t size;
apir_decode_size_t(dec, &size);
void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
if (!shmem_data) {
GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
return 1;
}
buffer->iface.get_tensor(buffer, tensor, shmem_data, offset, size);
return 0;
}
uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
ggml_backend_buffer_t buffer;
buffer = apir_decode_ggml_buffer(dec);
const ggml_tensor * src;
// safe to remove the const qualifier here
src = apir_decode_ggml_tensor(dec);
ggml_tensor * dst = (ggml_tensor *) (uintptr_t) apir_decode_ggml_tensor(dec);
bool ret = buffer->iface.cpy_tensor(buffer, src, (ggml_tensor *) dst);
apir_encode_bool_t(enc, &ret);
return 0;
}
uint32_t backend_buffer_clear(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
GGML_UNUSED(enc);
ggml_backend_buffer_t buffer;
buffer = apir_decode_ggml_buffer(dec);
uint8_t value;
apir_decode_uint8_t(dec, &value);
buffer->iface.clear(buffer, value);
return 0;
}
uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
GGML_UNUSED(enc);
ggml_backend_buffer_t buffer;
buffer = apir_decode_ggml_buffer(dec);
if (!apir_untrack_backend_buffer(buffer)) {
GGML_LOG_WARN("%s: unknown buffer %p\n", __func__, (void *) buffer);
return 1;
}
buffer->iface.free_buffer(buffer);
return 0;
}

View File

@ -0,0 +1,148 @@
#include "backend-dispatched.h"
#include "backend-virgl-apir.h"
#include "ggml-backend-impl.h"
#include "ggml-backend.h"
#include "ggml-impl.h"
#include <cstdint>
uint32_t backend_device_get_device_count(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
GGML_UNUSED(ctx);
GGML_UNUSED(dec);
int32_t dev_count = reg->iface.get_device_count(reg);
apir_encode_int32_t(enc, &dev_count);
return 0;
}
uint32_t backend_device_get_count(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
GGML_UNUSED(ctx);
GGML_UNUSED(dec);
int32_t dev_count = reg->iface.get_device_count(reg);
apir_encode_int32_t(enc, &dev_count);
return 0;
}
uint32_t backend_device_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
GGML_UNUSED(dec);
const char * string = dev->iface.get_name(dev);
const size_t string_size = strlen(string) + 1;
apir_encode_array_size(enc, string_size);
apir_encode_char_array(enc, string, string_size);
return 0;
}
uint32_t backend_device_get_description(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
GGML_UNUSED(dec);
const char * string = dev->iface.get_description(dev);
const size_t string_size = strlen(string) + 1;
apir_encode_array_size(enc, string_size);
apir_encode_char_array(enc, string, string_size);
return 0;
}
uint32_t backend_device_get_type(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
GGML_UNUSED(dec);
uint32_t type = dev->iface.get_type(dev);
apir_encode_uint32_t(enc, &type);
return 0;
}
uint32_t backend_device_get_memory(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
GGML_UNUSED(dec);
size_t free, total;
dev->iface.get_memory(dev, &free, &total);
apir_encode_size_t(enc, &free);
apir_encode_size_t(enc, &total);
return 0;
}
uint32_t backend_device_supports_op(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
const ggml_tensor * op = apir_decode_ggml_tensor_inplace(dec);
bool supports_op = dev->iface.supports_op(dev, op);
apir_encode_bool_t(enc, &supports_op);
return 0;
}
uint32_t backend_device_get_buffer_type(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
GGML_UNUSED(dec);
ggml_backend_buffer_type_t bufft = dev->iface.get_buffer_type(dev);
apir_encode_ggml_buffer_type(enc, bufft);
return 0;
}
uint32_t backend_device_get_props(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
GGML_UNUSED(dec);
ggml_backend_dev_props props;
dev->iface.get_props(dev, &props);
apir_encode_bool_t(enc, &props.caps.async);
apir_encode_bool_t(enc, &props.caps.host_buffer);
apir_encode_bool_t(enc, &props.caps.buffer_from_host_ptr);
apir_encode_bool_t(enc, &props.caps.events);
return 0;
}
uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
GGML_UNUSED(dec);
uint32_t shmem_res_id;
apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id);
void * shmem_ptr = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
if (!shmem_ptr) {
GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
apir_decoder_set_fatal(dec);
return 1;
}
size_t size;
apir_decode_size_t(dec, &size);
size_t max_tensor_size;
apir_decode_size_t(dec, &max_tensor_size);
ggml_backend_buffer_t buffer;
buffer = dev->iface.buffer_from_host_ptr(dev, shmem_ptr, size, max_tensor_size);
apir_encode_ggml_buffer(enc, buffer);
apir_encode_ggml_buffer_type(enc, buffer->buft);
if (buffer) {
apir_track_backend_buffer(buffer);
}
return 0;
}

View File

@ -0,0 +1,46 @@
#include "backend-dispatched.h"
#include "backend-virgl-apir.h"
#include "ggml-backend-impl.h"
#include "ggml-backend.h"
#include "ggml-impl.h"
#include <cstdint>
ggml_backend_reg_t reg = NULL;
ggml_backend_dev_t dev = NULL;
ggml_backend_t bck = NULL;
uint64_t timer_start = 0;
uint64_t timer_total = 0;
uint64_t timer_count = 0;
uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p) {
if (reg != NULL) {
GGML_LOG_WARN("%s: already initialized\n", __func__);
return APIR_BACKEND_INITIALIZE_ALREADY_INITED;
}
ggml_backend_reg_t (*ggml_backend_reg_fct)(void) = (ggml_backend_reg_t (*)()) ggml_backend_reg_fct_p;
reg = ggml_backend_reg_fct();
if (reg == NULL) {
GGML_LOG_ERROR("%s: backend registration failed\n", __func__);
return APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED;
}
if (!reg->iface.get_device_count(reg)) {
GGML_LOG_ERROR("%s: backend initialization failed: no device found\n", __func__);
return APIR_BACKEND_INITIALIZE_NO_DEVICE;
}
dev = reg->iface.get_device(reg, 0);
if (!dev) {
GGML_LOG_ERROR("%s: backend initialization failed: no device received\n", __func__);
return APIR_BACKEND_INITIALIZE_NO_DEVICE;
}
bck = dev->iface.init_backend(dev, NULL);
return APIR_BACKEND_INITIALIZE_SUCCESS;
}

View File

@ -0,0 +1,130 @@
#pragma once
/* device */
uint32_t backend_device_get_device_count(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_device_get_count(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_device_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_device_get_description(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_device_get_type(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_device_get_memory(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_device_supports_op(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_device_get_buffer_type(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_device_get_props(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
/* buffer-type */
uint32_t backend_buffer_type_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
/* buffer */
uint32_t backend_buffer_get_base(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_clear(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
/* backend */
uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
static inline const char * backend_dispatch_command_name(ApirBackendCommandType type) {
switch (type) {
/* device */
case APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT:
return "backend_device_get_device_count";
case APIR_COMMAND_TYPE_DEVICE_GET_COUNT:
return "backend_device_get_count";
case APIR_COMMAND_TYPE_DEVICE_GET_NAME:
return "backend_device_get_name";
case APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION:
return "backend_device_get_description";
case APIR_COMMAND_TYPE_DEVICE_GET_TYPE:
return "backend_device_get_type";
case APIR_COMMAND_TYPE_DEVICE_GET_MEMORY:
return "backend_device_get_memory";
case APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP:
return "backend_device_supports_op";
case APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE:
return "backend_device_get_buffer_type";
case APIR_COMMAND_TYPE_DEVICE_GET_PROPS:
return "backend_device_get_props";
case APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR:
return "backend_device_buffer_from_ptr";
/* buffer-type */
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME:
return "backend_buffer_type_get_name";
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT:
return "backend_buffer_type_get_alignment";
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE:
return "backend_buffer_type_get_max_size";
case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST:
return "backend_buffer_type_is_host";
case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER:
return "backend_buffer_type_alloc_buffer";
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE:
return "backend_buffer_type_get_alloc_size";
/* buffer */
case APIR_COMMAND_TYPE_BUFFER_GET_BASE:
return "backend_buffer_get_base";
case APIR_COMMAND_TYPE_BUFFER_SET_TENSOR:
return "backend_buffer_set_tensor";
case APIR_COMMAND_TYPE_BUFFER_GET_TENSOR:
return "backend_buffer_get_tensor";
case APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR:
return "backend_buffer_cpy_tensor";
case APIR_COMMAND_TYPE_BUFFER_CLEAR:
return "backend_buffer_clear";
case APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER:
return "backend_buffer_free_buffer";
/* backend */
case APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE:
return "backend_backend_graph_compute";
default:
return "unknown";
}
}
extern "C" {
static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = {
/* device */
/* APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT = */ backend_device_get_device_count,
/* APIR_COMMAND_TYPE_DEVICE_GET_COUNT = */ backend_device_get_count,
/* APIR_COMMAND_TYPE_DEVICE_GET_NAME = */ backend_device_get_name,
/* APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION = */ backend_device_get_description,
/* APIR_COMMAND_TYPE_DEVICE_GET_TYPE = */ backend_device_get_type,
/* APIR_COMMAND_TYPE_DEVICE_GET_MEMORY = */ backend_device_get_memory,
/* APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP = */ backend_device_supports_op,
/* APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE = */ backend_device_get_buffer_type,
/* APIR_COMMAND_TYPE_DEVICE_GET_PROPS = */ backend_device_get_props,
/* APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR = */ backend_device_buffer_from_ptr,
/* buffer-type */
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME = */ backend_buffer_type_get_name,
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT = */ backend_buffer_type_get_alignment,
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE = */ backend_buffer_type_get_max_size,
/* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = */ backend_buffer_type_is_host,
/* APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER = */ backend_buffer_type_alloc_buffer,
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE = */ backend_buffer_type_get_alloc_size,
/* buffer */
/* APIR_COMMAND_TYPE_BUFFER_GET_BASE = */ backend_buffer_get_base,
/* APIR_COMMAND_TYPE_BUFFER_SET_TENSOR = */ backend_buffer_set_tensor,
/* APIR_COMMAND_TYPE_BUFFER_GET_TENSOR = */ backend_buffer_get_tensor,
/* APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR = */ backend_buffer_cpy_tensor,
/* APIR_COMMAND_TYPE_BUFFER_CLEAR = */ backend_buffer_clear,
/* APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER = */ backend_buffer_free_buffer,
/* backend */
/* APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE = */ backend_backend_graph_compute,
};
}

View File

@ -0,0 +1,23 @@
#pragma once
#include <cstdint>
#include <cstddef>
#include <ggml-backend.h>
#include "backend-convert.h"
#include "backend-virgl-apir.h"
#include "shared/apir_backend.h"
#include "shared/apir_cs.h"
#include "shared/apir_cs_ggml.h"
struct virgl_apir_context {
uint32_t ctx_id;
virgl_apir_callbacks * iface;
};
typedef uint32_t (*backend_dispatch_t)(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
#include "backend-dispatched.gen.h"
uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p);

View File

@ -0,0 +1,32 @@
#pragma once
#include "ggml-backend-impl.h"
#include "ggml-backend.h"
#include "ggml-impl.h"
#include "shared/api_remoting.h"
#include <cstdarg>
#include <cstdio>
#include <cstdlib>
extern ggml_backend_reg_t reg;
extern ggml_backend_dev_t dev;
extern ggml_backend_t bck;
struct virgl_apir_callbacks {
const char * (*get_config)(uint32_t virgl_ctx_id, const char * key);
void * (*get_shmem_ptr)(uint32_t virgl_ctx_id, uint32_t res_id);
};
extern "C" {
ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks *virgl_cbs);
void apir_backend_deinit(uint32_t virgl_ctx_id);
uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id,
virgl_apir_callbacks * virgl_cbs,
uint32_t cmd_type,
char * dec_cur,
const char * dec_end,
char * enc_cur,
const char * enc_end,
char ** enc_cur_after);
}

View File

@ -0,0 +1,148 @@
#include "backend-dispatched.h"
#include "backend-virgl-apir.h"
#include "shared/api_remoting.h"
#include "shared/apir_backend.h"
#include "shared/apir_cs.h"
#include <dlfcn.h>
#include <ggml-backend.h>
#include <iostream>
#define APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_PATH"
#define APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_REG"
#define APIR_LLAMA_CPP_LOG_TO_FILE_ENV "APIR_LLAMA_CPP_LOG_TO_FILE"
#define GGML_DEFAULT_BACKEND_REG "ggml_backend_init"
static void * backend_library_handle = NULL;
static FILE * apir_logfile = NULL;
static void log_to_file_callback(enum ggml_log_level level, const char * text, void * user_data) {
FILE * logfile = (FILE *)user_data;
fprintf(logfile, "[%d] %s", level, text);
fflush(logfile);
}
extern "C" {
void apir_backend_deinit(uint32_t virgl_ctx_id) {
GGML_UNUSED(virgl_ctx_id);
auto buffers = apir_get_track_backend_buffers();
for (const auto & buffer : buffers) {
apir_untrack_backend_buffer(buffer);
buffer->iface.free_buffer(buffer);
}
if (dev) {
size_t free, total;
dev->iface.get_memory(dev, &free, &total);
GGML_LOG_INFO("%s: free memory: %ld MB\n", __func__, (size_t) free / 1024 / 1024);
}
if (backend_library_handle) {
GGML_LOG_INFO("%s: The GGML backend library was loaded. Unloading it.\n", __func__);
dlclose(backend_library_handle);
backend_library_handle = NULL;
}
if (apir_logfile) {
fclose(apir_logfile);
apir_logfile = NULL;
}
}
#define APIR_GGML_LIBRARY_PATH_KEY "ggml.library.path"
#define APIR_GGML_LIBRARY_REG_KEY "ggml.library.reg"
ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks *virgl_cbs) {
const char * dlsym_error;
const char * apir_log_to_file = getenv(APIR_LLAMA_CPP_LOG_TO_FILE_ENV);
if (apir_log_to_file) {
apir_logfile = fopen(apir_log_to_file, "w");
if (apir_logfile) {
ggml_log_set(log_to_file_callback, apir_logfile);
} else {
GGML_LOG_INFO("Could not open the log file at '%s'\n", apir_log_to_file);
}
}
const char * library_name = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_PATH_KEY);
const char * virgl_library_reg = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_REG_KEY);
const char * library_reg = virgl_library_reg ? virgl_library_reg : GGML_DEFAULT_BACKEND_REG;
if (!library_name) {
GGML_LOG_ERROR("cannot open the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV);
return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
}
backend_library_handle = dlopen(library_name, RTLD_LAZY);
if (!backend_library_handle) {
GGML_LOG_ERROR("cannot open the GGML library: %s\n", dlerror());
return APIR_LOAD_LIBRARY_CANNOT_OPEN;
}
if (!library_reg) {
GGML_LOG_ERROR("cannot register the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV);
return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
}
void * ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg);
dlsym_error = dlerror();
if (dlsym_error) {
GGML_LOG_ERROR("cannot find the GGML backend registration symbol '%s' (from %s): %s\n", library_reg,
APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error);
return APIR_LOAD_LIBRARY_SYMBOL_MISSING;
}
uint32_t ret = backend_dispatch_initialize(ggml_backend_reg_fct);
return (ApirLoadLibraryReturnCode) (APIR_LOAD_LIBRARY_INIT_BASE_INDEX + ret);
}
uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id,
virgl_apir_callbacks * virgl_cbs,
uint32_t cmd_type,
char * dec_cur,
const char * dec_end,
char * enc_cur,
const char * enc_end,
char ** enc_cur_after) {
apir_encoder enc = {
.cur = enc_cur,
.start = enc_cur,
.end = enc_end,
.fatal = false,
};
apir_decoder dec = {
.cur = dec_cur,
.end = dec_end,
.fatal = false,
};
virgl_apir_context ctx = {
.ctx_id = virgl_ctx_id,
.iface = virgl_cbs,
};
if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) {
GGML_LOG_ERROR("Received an invalid dispatch index (%d >= %d)\n", cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT);
return APIR_BACKEND_FORWARD_INDEX_INVALID;
}
backend_dispatch_t forward_fct = apir_backend_dispatch_table[cmd_type];
uint32_t ret = forward_fct(&enc, &dec, &ctx);
*enc_cur_after = enc.cur;
return ret;
}
}

View File

@ -0,0 +1,90 @@
#pragma once
/* the rest of this file must match virglrenderer/src/apir-protocol.h */
#include <unistd.h>
#include <cstdint>
#define APIR_PROTOCOL_MAJOR 0
#define APIR_PROTOCOL_MINOR 1
#define APIR_HANDSHAKE_MAGIC 0xab1e
enum ApirCommandType {
APIR_COMMAND_TYPE_HANDSHAKE = 0,
APIR_COMMAND_TYPE_LOADLIBRARY = 1,
APIR_COMMAND_TYPE_FORWARD = 2,
APIR_COMMAND_TYPE_LENGTH = 3,
};
typedef uint64_t ApirCommandFlags;
enum ApirLoadLibraryReturnCode {
APIR_LOAD_LIBRARY_SUCCESS = 0,
APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR = 1,
APIR_LOAD_LIBRARY_ALREADY_LOADED = 2,
APIR_LOAD_LIBRARY_ENV_VAR_MISSING = 3,
APIR_LOAD_LIBRARY_CANNOT_OPEN = 4,
APIR_LOAD_LIBRARY_SYMBOL_MISSING = 5,
APIR_LOAD_LIBRARY_INIT_BASE_INDEX = 6, // anything above this is a APIR backend library initialization return code
};
enum ApirForwardReturnCode {
APIR_FORWARD_SUCCESS = 0,
APIR_FORWARD_NO_DISPATCH_FCT = 1,
APIR_FORWARD_TIMEOUT = 2,
APIR_FORWARD_BASE_INDEX = 3, // anything above this is a APIR backend library forward return code
} ;
__attribute__((unused)) static inline const char * apir_command_name(ApirCommandType type) {
switch (type) {
case APIR_COMMAND_TYPE_HANDSHAKE:
return "HandShake";
case APIR_COMMAND_TYPE_LOADLIBRARY:
return "LoadLibrary";
case APIR_COMMAND_TYPE_FORWARD:
return "Forward";
default:
return "unknown";
}
}
__attribute__((unused)) static const char * apir_load_library_error(ApirLoadLibraryReturnCode code) {
#define APIR_LOAD_LIBRARY_ERROR(code_name) \
do { \
if (code == code_name) \
return #code_name; \
} while (0)
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_SUCCESS);
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR);
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_ALREADY_LOADED);
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_ENV_VAR_MISSING);
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_CANNOT_OPEN);
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_SYMBOL_MISSING);
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_INIT_BASE_INDEX);
return "Unknown APIR_COMMAND_TYPE_LoadLibrary error";
#undef APIR_LOAD_LIBRARY_ERROR
}
__attribute__((unused)) static const char * apir_forward_error(ApirForwardReturnCode code) {
#define APIR_FORWARD_ERROR(code_name) \
do { \
if (code == code_name) \
return #code_name; \
} while (0)
APIR_FORWARD_ERROR(APIR_FORWARD_SUCCESS);
APIR_FORWARD_ERROR(APIR_FORWARD_NO_DISPATCH_FCT);
APIR_FORWARD_ERROR(APIR_FORWARD_TIMEOUT);
APIR_FORWARD_ERROR(APIR_FORWARD_BASE_INDEX);
return "Unknown APIR_COMMAND_TYPE_FORWARD error";
#undef APIR_FORWARD_ERROR
}

View File

@ -0,0 +1,36 @@
typedef enum ApirBackendCommandType {
/* device */
APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT = 0,
APIR_COMMAND_TYPE_DEVICE_GET_COUNT = 1,
APIR_COMMAND_TYPE_DEVICE_GET_NAME = 2,
APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION = 3,
APIR_COMMAND_TYPE_DEVICE_GET_TYPE = 4,
APIR_COMMAND_TYPE_DEVICE_GET_MEMORY = 5,
APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP = 6,
APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE = 7,
APIR_COMMAND_TYPE_DEVICE_GET_PROPS = 8,
APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR = 9,
/* buffer-type */
APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME = 10,
APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT = 11,
APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE = 12,
APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = 13,
APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER = 14,
APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE = 15,
/* buffer */
APIR_COMMAND_TYPE_BUFFER_GET_BASE = 16,
APIR_COMMAND_TYPE_BUFFER_SET_TENSOR = 17,
APIR_COMMAND_TYPE_BUFFER_GET_TENSOR = 18,
APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR = 19,
APIR_COMMAND_TYPE_BUFFER_CLEAR = 20,
APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER = 21,
/* backend */
APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE = 22,
// last command_type index + 1
APIR_BACKEND_DISPATCH_TABLE_COUNT = 23,
} ApirBackendCommandType;

View File

@ -0,0 +1,46 @@
#pragma once
#include "apir_backend.gen.h"
#include <stdint.h> // for uintptr_t
#include <time.h> // for timespec, clock_gettime
#define APIR_BACKEND_INITIALIZE_SUCCESS 0
#define APIR_BACKEND_INITIALIZE_CANNOT_OPEN_BACKEND_LIBRARY 1
#define APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY 2
#define APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS 3
#define APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS 4
#define APIR_BACKEND_INITIALIZE_BACKEND_FAILED 5
#define APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED 6
#define APIR_BACKEND_INITIALIZE_ALREADY_INITED 7
#define APIR_BACKEND_INITIALIZE_NO_DEVICE 8
// new entries here need to be added to the apir_backend_initialize_error function below
#define APIR_BACKEND_FORWARD_INDEX_INVALID 6
// 0 is fast, 1 avoids the backend to crash if an unsupported tensor is received
#define APIR_BACKEND_CHECK_SUPPORTS_OP 0
typedef uintptr_t apir_buffer_type_host_handle_t;
typedef uintptr_t apir_buffer_host_handle_t;
static const char * apir_backend_initialize_error(int code) {
#define APIR_BACKEND_INITIALIZE_ERROR(code_name) \
do { \
if (code == code_name) \
return #code_name; \
} while (0)
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_SUCCESS);
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_CANNOT_OPEN_BACKEND_LIBRARY);
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY);
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS);
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS);
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_FAILED);
return "Unknown APIR_BACKEND_INITIALIZE error:/";
#undef APIR_BACKEND_INITIALIZE_ERROR
}

View File

@ -0,0 +1,383 @@
#pragma once
#include "ggml-impl.h"
#include <cassert>
#include <cstring>
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
struct apir_encoder {
char * cur;
const char * start;
const char * end;
bool fatal;
};
struct apir_decoder {
const char * cur;
const char * end;
bool fatal;
};
/*
* new encoder and decoder
*/
static apir_decoder apir_new_decoder(const char * ptr, size_t size) {
apir_decoder dec = {
.cur = ptr,
.end = ptr + size,
.fatal = false,
};
return dec;
}
static apir_encoder apir_new_encoder(char * ptr, size_t size) {
apir_encoder enc = {
.cur = ptr,
.start = ptr,
.end = ptr + size,
.fatal = false,
};
return enc;
}
/*
* fatal flag handling
*/
static inline void apir_encoder_reset_fatal(apir_encoder * enc) {
enc->fatal = false;
}
static inline void apir_encoder_set_fatal(apir_encoder * enc) {
enc->fatal = true;
}
static inline bool apir_encoder_get_fatal(const apir_encoder * enc) {
return enc->fatal;
}
static inline void apir_decoder_reset_fatal(apir_decoder * dec) {
dec->fatal = false;
}
static inline void apir_decoder_set_fatal(apir_decoder * dec) {
dec->fatal = true;
}
static inline bool apir_decoder_get_fatal(const apir_decoder * dec) {
return dec->fatal;
}
/*
* encode peek
*/
static inline bool apir_decoder_peek_internal(apir_decoder * dec,
size_t size,
void * val,
size_t val_size) {
assert(val_size <= size);
if (unlikely(size > (size_t) (dec->end - dec->cur))) {
GGML_LOG_ERROR("reading too much from the decoder ...\n");
apir_decoder_set_fatal(dec);
memset(val, 0, val_size);
return false;
}
/* we should not rely on the compiler to optimize away memcpy... */
memcpy(val, dec->cur, val_size);
return true;
}
static inline void apir_decoder_peek(apir_decoder * dec, size_t size, void * val, size_t val_size) {
apir_decoder_peek_internal(dec, size, val, val_size);
}
static inline const void * apir_decoder_use_inplace(apir_decoder * dec, size_t size) {
if (unlikely(size > (size_t) (dec->end - dec->cur))) {
GGML_LOG_ERROR("reading too much from the decoder ...\n");
apir_decoder_set_fatal(dec);
return NULL;
}
const void * addr = dec->cur;
dec->cur += size;
return addr;
}
/*
* read/write
*/
static inline void apir_decoder_read(apir_decoder * dec, size_t size, void * val, size_t val_size) {
if (apir_decoder_peek_internal(dec, size, val, val_size)) {
dec->cur += size;
}
}
static inline char * apir_encoder_write(apir_encoder * enc, size_t size, const void * val, size_t val_size) {
assert(val_size <= size);
assert(size <= ((size_t) (enc->end - enc->cur)));
char * write_addr = enc->cur;
/* we should not rely on the compiler to optimize away memcpy... */
memcpy(write_addr, val, val_size);
enc->cur += size;
return write_addr;
}
/*
* encode/decode
*/
static inline void apir_decode(apir_decoder * dec, size_t size, void * data, size_t data_size) {
assert(size % 4 == 0);
apir_decoder_read(dec, size, data, data_size);
}
static inline void apir_encode(apir_encoder * enc, size_t size, const void * data, size_t data_size) {
assert(size % 4 == 0);
apir_encoder_write(enc, size, data, data_size);
}
/*
* typed encode/decode
*/
/* uint8_t */
static inline void apir_encode_uint8_t(apir_encoder * enc, const uint8_t * val) {
apir_encode(enc, sizeof(int), val, sizeof(*val));
}
static inline void apir_decode_uint8_t(apir_decoder * dec, uint8_t * val) {
apir_decode(dec, sizeof(int), val, sizeof(*val));
}
/* uint64_t */
static inline void apir_encode_uint64_t(apir_encoder * enc, const uint64_t * val) {
apir_encode(enc, 8, val, sizeof(*val));
}
static inline void apir_decode_uint64_t(apir_decoder * dec, uint64_t * val) {
apir_decode(dec, 8, val, sizeof(*val));
}
static inline void apir_encode_uint64_t_array(apir_encoder * enc, const uint64_t * val, uint32_t count) {
const size_t size = sizeof(*val) * count;
assert(size >= count);
apir_encode(enc, size, val, size);
}
static inline void apir_decode_uint64_t_array(apir_decoder * dec, uint64_t * val, uint32_t count) {
const size_t size = sizeof(*val) * count;
assert(size >= count);
apir_decode(dec, size, val, size);
}
static inline const uint64_t * apir_decode_uint64_t_array_inplace(apir_decoder * dec, uint32_t count) {
return (uint64_t *) (uintptr_t) apir_decoder_use_inplace(dec, count * sizeof(uint64_t));
}
/* int32_t */
static inline void apir_encode_int32_t(apir_encoder * enc, const int32_t * val) {
apir_encode(enc, 4, val, sizeof(*val));
}
static inline void apir_decode_int32_t(apir_decoder * dec, int32_t * val) {
apir_decode(dec, 4, val, sizeof(*val));
}
static inline void apir_encode_int32_t_array(apir_encoder * enc, const int32_t * val, uint32_t count) {
const size_t size = sizeof(*val) * count;
assert(size >= count);
apir_encode(enc, size, val, size);
}
static inline void apir_decode_int32_t_array(apir_decoder * dec, int32_t * val, uint32_t count) {
const size_t size = sizeof(*val) * count;
assert(size >= count);
apir_decode(dec, size, val, size);
}
/* array size (uint64_t) */
static inline void apir_encode_array_size(apir_encoder * enc, uint64_t size) {
apir_encode_uint64_t(enc, &size);
}
static inline uint64_t apir_decode_array_size(apir_decoder * dec, uint64_t expected_size) {
uint64_t size;
apir_decode_uint64_t(dec, &size);
if (size != expected_size) {
GGML_LOG_ERROR("Couldn't decode array from the decoder\n");
apir_decoder_set_fatal(dec);
size = 0;
}
return size;
}
static inline uint64_t apir_decode_array_size_unchecked(apir_decoder * dec) {
uint64_t size;
apir_decode_uint64_t(dec, &size);
return size;
}
/* non-array pointer */
static inline bool apir_encode_simple_pointer(apir_encoder * enc, const void * val) {
apir_encode_array_size(enc, val ? 1 : 0);
return val;
}
static inline bool apir_decode_simple_pointer(apir_decoder * dec) {
return apir_decode_array_size_unchecked(dec);
}
/* uint32_t */
static inline void apir_encode_uint32_t(apir_encoder * enc, const uint32_t * val) {
apir_encode(enc, 4, val, sizeof(*val));
}
static inline void apir_decode_uint32_t(apir_decoder * dec, uint32_t * val) {
apir_decode(dec, 4, val, sizeof(*val));
}
static inline void apir_encode_uint32_t_array(apir_encoder * enc, const uint32_t * val, uint32_t count) {
const size_t size = sizeof(*val) * count;
assert(size >= count);
apir_encode(enc, size, val, size);
}
static inline void apir_decode_uint32_t_array(apir_decoder * dec, uint32_t * val, uint32_t count) {
const size_t size = sizeof(*val) * count;
assert(size >= count);
apir_decode(dec, size, val, size);
}
/* size_t */
static inline void apir_encode_size_t(apir_encoder * enc, const size_t * val) {
const uint64_t tmp = *val;
apir_encode_uint64_t(enc, &tmp);
}
static inline void apir_decode_size_t(apir_decoder * dec, size_t * val) {
uint64_t tmp;
apir_decode_uint64_t(dec, &tmp);
*val = tmp;
}
static inline void apir_encode_size_t_array(apir_encoder * enc, const size_t * val, uint32_t count) {
if (sizeof(size_t) == sizeof(uint64_t)) {
apir_encode_uint64_t_array(enc, (const uint64_t *) val, count);
} else {
for (uint32_t i = 0; i < count; i++) {
apir_encode_size_t(enc, &val[i]);
}
}
}
static inline void apir_decode_size_t_array(apir_decoder * dec, size_t * val, uint32_t count) {
if (sizeof(size_t) == sizeof(uint64_t)) {
apir_decode_uint64_t_array(dec, (uint64_t *) val, count);
} else {
for (uint32_t i = 0; i < count; i++) {
apir_decode_size_t(dec, &val[i]);
}
}
}
/* opaque blob */
static inline void apir_encode_blob_array(apir_encoder * enc, const void * val, size_t size) {
apir_encode(enc, (size + 3) & ~3, val, size);
}
static inline void apir_decode_blob_array(apir_decoder * dec, void * val, size_t size) {
apir_decode(dec, (size + 3) & ~3, val, size);
}
/* string */
static inline void apir_encode_char_array(apir_encoder * enc, const char * val, size_t size) {
assert(size && strlen(val) < size);
apir_encode_blob_array(enc, val, size);
}
static inline void apir_decode_char_array(apir_decoder * dec, char * val, size_t size) {
apir_decode_blob_array(dec, val, size);
if (size) {
val[size - 1] = '\0';
} else {
GGML_LOG_ERROR("Couldn't decode the blog array\n");
apir_decoder_set_fatal(dec);
}
}
/* (temp) buffer allocation */
static inline void * apir_decoder_alloc_array(size_t size, size_t count) {
size_t alloc_size;
if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) {
GGML_LOG_ERROR("overflow in array allocation of %zu * %zu bytes\n", size, count);
return NULL;
}
return malloc(alloc_size);
}
/* bool */
static inline void apir_encode_bool_t(apir_encoder * enc, const bool * val) {
apir_encode(enc, sizeof(int), val, sizeof(bool));
}
static inline void apir_decode_bool_t(apir_decoder * dec, bool * val) {
apir_decode(dec, sizeof(int), val, sizeof(bool));
}
/* apir_buffer_type_host_handle_t */
static inline void apir_encode_apir_buffer_type_host_handle_t(apir_encoder * enc,
const apir_buffer_type_host_handle_t * val) {
apir_encode(enc, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t));
}
static inline void apir_decode_apir_buffer_type_host_handle_t(apir_decoder * dec,
apir_buffer_type_host_handle_t * val) {
apir_decode(dec, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t));
}
/* apir_buffer_host_handle_t */
static inline void apir_encode_apir_buffer_host_handle_t(apir_encoder * enc,
const apir_buffer_host_handle_t * val) {
apir_encode(enc, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t));
}
static inline void apir_decode_apir_buffer_host_handle_t(apir_decoder * dec, apir_buffer_host_handle_t * val) {
apir_decode(dec, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t));
}
/* uintptr_t */
static inline void apir_encode_uintptr_t(apir_encoder * enc, const uintptr_t * val) {
apir_encode(enc, sizeof(*val), val, sizeof(*val));
}
static inline void apir_decode_uintptr_t(apir_decoder * dec, uintptr_t * val) {
apir_decode(dec, sizeof(*val), val, sizeof(*val));
}

View File

@ -0,0 +1,211 @@
#include "ggml-impl.h"
#include "apir_cs.h"
#include "apir_cs_rpc.h"
// ggml_buffer_to_apir_host_handle(ggml_backend_buffer_t buffer);
static inline void apir_encode_ggml_buffer_host_handle(apir_encoder * enc,
const apir_buffer_host_handle_t * handle);
static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec);
/* apir_rpc_tensor */
static inline void apir_encode_rcp_tensor(apir_encoder * enc, const apir_rpc_tensor * apir_rpc_tensor) {
size_t apir_rpc_tensor_size = sizeof(*apir_rpc_tensor);
apir_encode(enc, apir_rpc_tensor_size, apir_rpc_tensor, apir_rpc_tensor_size);
}
static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_inplace(apir_decoder * dec) {
size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor);
return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size);
}
static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_array_inplace(apir_decoder * dec,
uint32_t n_tensors) {
size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor) * n_tensors;
return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size);
}
/* ggml_tensor */
static inline void apir_encode_ggml_tensor(apir_encoder * enc, const ggml_tensor * tensor) {
apir_rpc_tensor serialized = apir_serialize_tensor(tensor);
apir_encode_rcp_tensor(enc, &serialized);
}
static inline const ggml_tensor * apir_decode_ggml_tensor(apir_decoder * dec) {
const apir_rpc_tensor * apir_rpc_tensor = apir_decode_apir_rpc_tensor_inplace(dec);
ggml_init_params params{
/*.mem_size =*/ ggml_tensor_overhead(),
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ true,
};
ggml_context * ctx = ggml_init(params);
const ggml_tensor * tensor = apir_deserialize_tensor(ctx, apir_rpc_tensor);
return tensor;
}
/* *** ggml_backend_buffer_type_t *** */
// ggml_backend_buffer_type_t is a POINTER (to a struct).
// Only the host pointer is shared between the host and guest.
// The guest stores it in `buft->context`.
// The host simply writes the pointer address in the buffer variable.
static inline void apir_encode_ggml_buffer_type(apir_encoder * enc, ggml_backend_buffer_type_t buft) {
apir_buffer_type_host_handle_t handle = ggml_buffer_type_to_apir_handle(buft);
apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle));
}
static inline ggml_backend_buffer_type_t apir_decode_ggml_buffer_type(apir_decoder * dec) {
apir_buffer_type_host_handle_t handle;
apir_decoder_read(dec, sizeof(handle), &handle, sizeof(handle));
return (ggml_backend_buffer_type_t) handle;
}
static inline apir_buffer_type_host_handle_t apir_decode_apir_buffer_type_host_handle(apir_decoder * dec) {
apir_buffer_type_host_handle_t handle;
apir_decoder_read(dec, sizeof(handle), &handle, sizeof(handle));
return handle;
}
/* *** ggml_backend_type_t *** */
// ggml_backend_buffer_t is a POINTER.
// same logic as for ggml_backend_buffer_type_t
static inline void apir_encode_ggml_buffer(apir_encoder * enc, const ggml_backend_buffer_t buffer) {
apir_buffer_host_handle_t handle = BUFFER_TO_HOST_HANDLE(buffer);
apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle));
}
static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec) {
ggml_backend_buffer_t buffer;
size_t buffer_ptr_size = sizeof(buffer);
apir_decoder_read(dec, buffer_ptr_size, &buffer, buffer_ptr_size);
return buffer;
}
/* enum ggml_status */
static inline void apir_encode_ggml_status(apir_encoder * enc, const ggml_status * status) {
apir_encoder_write(enc, sizeof(*status), status, sizeof(*status));
}
static inline void apir_decode_ggml_status(apir_decoder * dec, ggml_status * status) {
apir_decoder_read(dec, sizeof(*status), status, sizeof(*status));
}
/* virtgpu_shmem */
static inline void apir_encode_virtgpu_shmem_res_id(apir_encoder * enc, uint32_t shmem_res_id) {
apir_encode_uint32_t(enc, &shmem_res_id);
}
static inline void apir_decode_virtgpu_shmem_res_id(apir_decoder * dec, uint32_t * shmem_res_id) {
apir_decode_uint32_t(dec, shmem_res_id);
}
/* ggml_cgraph */
static inline size_t apir_serialize_ggml_cgraph(ggml_cgraph * cgraph, std::vector<uint8_t> & cgraph_data) {
apir_serialize_graph(cgraph, cgraph_data);
return cgraph_data.size();
}
static inline void apir_encode_cgraph_data(apir_encoder * enc, std::vector<uint8_t> & cgraph_data) {
size_t cgraph_size = cgraph_data.size();
apir_encode(enc, cgraph_size, cgraph_data.data(), cgraph_size);
}
static inline ggml_cgraph * apir_decode_ggml_cgraph(apir_decoder * dec, size_t cgraph_size) {
GGML_UNUSED(cgraph_size);
uint32_t n_nodes;
apir_decode_uint32_t(dec, &n_nodes);
const uint64_t * nodes = apir_decode_uint64_t_array_inplace(dec, n_nodes);
uint32_t n_tensors;
apir_decode_uint32_t(dec, &n_tensors);
const apir_rpc_tensor * tensors = apir_decode_apir_rpc_tensor_array_inplace(dec, n_tensors);
return apir_deserialize_graph(n_nodes, n_tensors, tensors, nodes);
}
static inline void apir_encode_ggml_buffer_handle(apir_encoder * enc, const apir_buffer_host_handle_t * handle) {
apir_encoder_write(enc, sizeof(*handle), &handle, sizeof(*handle));
}
static inline void apir_encode_ggml_tensor_inline(apir_encoder * enc, const ggml_tensor * tensor) {
size_t tensor_size = sizeof(*tensor);
if (tensor->extra) {
GGML_ABORT("Cannot pass tensors with extra");
}
if (tensor->src[0] && tensor->buffer) {
static int first = 1;
if (first) {
GGML_LOG_WARN("Cannot pass tensors with src and buffer\n");
first = 0;
}
}
apir_encoder_write(enc, tensor_size, tensor, tensor_size);
// tensor->data is a pointer inside the device buffer. No need to touch it
// tensor->buffer is a pointer to a buffer. Encoding the buffer handle in sequence.
// (could also make a copy of the tensor, and update locally.)
if (tensor->buffer) {
apir_buffer_host_handle_t buffer_handle = ggml_buffer_to_apir_handle(tensor->buffer);
apir_encode_ggml_buffer_handle(enc, &buffer_handle);
}
if (tensor->view_src) {
apir_encoder_write(enc, tensor_size, tensor->view_src, tensor_size);
}
for (int i = 0; tensor->src[i]; i++) {
const ggml_tensor * tensor_src = tensor->src[i];
apir_encoder_write(enc, tensor_size, tensor_src, tensor_size);
}
}
static inline const ggml_tensor * apir_decode_ggml_tensor_inplace(apir_decoder * dec) {
// it safe to remove the `const` qualifier here, we *do* want to
// modify the shared memory data to fix the `src` pointers.
ggml_tensor * tensor = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor));
// tensor->data is a pointer inside the device buffer. No need to touch it
// tensor->buffer is a pointer to a buffer. Decode the buffer handle encoded in sequence.
if (tensor->buffer) {
tensor->buffer = apir_decode_ggml_buffer(dec);
}
if (tensor->view_src) {
ggml_tensor * tensor_view_src = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor));
tensor->view_src = tensor_view_src;
}
for (int i = 0; tensor->src[i]; i++) {
ggml_tensor * tensor_src = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor));
tensor->src[i] = tensor_src; // overwrite op->src[i] pointer with the actual location of the src tensor
}
return tensor;
}

View File

@ -0,0 +1,54 @@
#include "ggml.h"
#include "ggml-backend-impl.h"
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <cstdint>
// ggml_tensor is serialized into apir_rpc_tensor
struct apir_rpc_tensor {
uint64_t id;
uint32_t type;
uint64_t buffer;
uint32_t ne[GGML_MAX_DIMS];
uint32_t nb[GGML_MAX_DIMS];
uint32_t op;
int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
int32_t flags;
uint64_t src[GGML_MAX_SRC];
uint64_t view_src;
uint64_t view_offs;
uint64_t data;
char name[GGML_MAX_NAME];
char padding[4];
};
/* frontend */
apir_rpc_tensor apir_serialize_tensor(const ggml_tensor * tensor);
void apir_serialize_graph(const ggml_cgraph * cgraph, std::vector<uint8_t> & output);
/* backend */
void apir_track_backend_buffer(ggml_backend_buffer_t buffer);
bool apir_untrack_backend_buffer(ggml_backend_buffer_t buffer);
std::unordered_set<ggml_backend_buffer_t> apir_get_track_backend_buffers();
void apir_add_tensor(ggml_tensor * tensor,
std::vector<apir_rpc_tensor> & tensors,
std::unordered_set<ggml_tensor *> & visited);
ggml_tensor * apir_deserialize_tensor(ggml_context * ctx, const apir_rpc_tensor * tensor);
ggml_tensor * apir_create_node(uint64_t id,
ggml_context * ctx,
const std::unordered_map<uint64_t, const apir_rpc_tensor *> & tensor_ptrs,
std::unordered_map<uint64_t, ggml_tensor *> & tensor_map);
ggml_cgraph * apir_deserialize_graph(uint32_t n_nodes,
uint32_t n_tensors,
const apir_rpc_tensor * tensors,
const uint64_t * nodes);

View File

@ -0,0 +1,98 @@
#include "ggml-remoting.h"
static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
size_t size) {
virtgpu * gpu = BUFT_TO_GPU(buft);
ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context));
if (!context) {
GGML_ABORT("Couldn't allocate the buffer context ...");
}
context->gpu = gpu;
bool async__unused, host_buffer__unused, events__unused;
bool buffer_from_host_ptr;
apir_device_get_props(gpu, &async__unused, &host_buffer__unused, &buffer_from_host_ptr, &events__unused);
if (buffer_from_host_ptr) {
context->apir_context = apir_device_buffer_from_ptr(gpu, size, size);
context->base = context->apir_context.shmem.mmap_ptr;
context->is_from_ptr = true;
} else {
context->apir_context = apir_buffer_type_alloc_buffer(gpu, buft, size);
context->is_from_ptr = false;
context->base = NULL;
}
ggml_backend_buffer_t buffer =
ggml_backend_buffer_init(buft, ggml_backend_remoting_buffer_interface, (void *) context, size);
return buffer;
}
static const char * ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
virtgpu * gpu = BUFT_TO_GPU(buft);
return apir_buffer_type_get_name(gpu, buft);
}
static size_t ggml_backend_remoting_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
virtgpu * gpu = BUFT_TO_GPU(buft);
static size_t align = 0;
if (align == 0) {
align = apir_buffer_type_get_alignment(gpu, buft);
}
return align;
}
static size_t ggml_backend_remoting_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
virtgpu * gpu = BUFT_TO_GPU(buft);
static size_t max_size = 0;
if (max_size == 0) {
max_size = apir_buffer_type_get_max_size(gpu, buft);
}
return max_size;
}
static bool ggml_backend_remoting_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
virtgpu * gpu = BUFT_TO_GPU(buft);
return apir_buffer_type_is_host(gpu, buft);
}
static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft,
const ggml_tensor * tensor) {
virtgpu * gpu = BUFT_TO_GPU(buft);
if (tensor->buffer == NULL
|| !tensor->buffer->context
|| !buft->device->iface.supports_buft(buft->device, tensor->buffer->buft)) {
return ggml_nbytes(tensor);
}
return apir_buffer_type_get_alloc_size(gpu, buft, tensor);
}
const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface = {
/* .get_name = */ ggml_backend_remoting_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_remoting_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
/* .is_host = */ NULL,
};
const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface = {
/* .get_name = */ ggml_backend_remoting_buffer_type_get_name,
/* .alloc_buffer = */ NULL,
/* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
/* .is_host = */ NULL,
};

View File

@ -0,0 +1,119 @@
#include "ggml-remoting.h"
#define BUFFER_TO_GPU(name) ((ggml_backend_remoting_buffer_context *) (name)->context)->gpu
static void * ggml_backend_remoting_buffer_get_base(ggml_backend_buffer_t buffer) {
ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) buffer->context;
if (context->base) {
return context->base;
}
context->base = apir_buffer_get_base(BUFFER_TO_GPU(buffer), BUFFER_TO_APIR_CONTEXT(buffer));
return context->base;
}
static void ggml_backend_remoting_buffer_set_tensor(ggml_backend_buffer_t buffer,
ggml_tensor * tensor,
const void * data,
size_t offset,
size_t size) {
virtgpu * gpu = BUFFER_TO_GPU(buffer);
ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer);
if (context->is_from_ptr) {
memcpy((char *) tensor->data + offset, data, size);
} else {
apir_buffer_set_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), tensor, data, offset, size);
}
return;
}
static void ggml_backend_remoting_buffer_get_tensor(ggml_backend_buffer_t buffer,
const ggml_tensor * tensor,
void * data,
size_t offset,
size_t size) {
virtgpu * gpu = BUFFER_TO_GPU(buffer);
ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer);
if (context->is_from_ptr) {
memcpy(data, (const char *) tensor->data + offset, size);
} else {
apir_buffer_get_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), tensor, data, offset, size);
}
}
static void ggml_backend_remoting_buffer_set_tensor_from_ptr(ggml_backend_buffer_t buffer,
ggml_tensor * tensor,
const void * data,
size_t offset,
size_t size) {
UNUSED(buffer);
memcpy((char *) tensor->data + offset, data, size);
return;
}
static void ggml_backend_remoting_buffer_get_tensor_from_ptr(ggml_backend_buffer_t buffer,
const ggml_tensor * tensor,
void * data,
size_t offset,
size_t size) {
UNUSED(buffer);
memcpy(data, (const char *) tensor->data + offset, size);
}
static bool ggml_backend_remoting_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
const ggml_tensor * src,
ggml_tensor * dst) {
virtgpu * gpu = BUFFER_TO_GPU(buffer);
bool ret = apir_buffer_cpy_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), src, dst);
return ret;
}
static void ggml_backend_remoting_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
virtgpu * gpu = BUFFER_TO_GPU(buffer);
apir_buffer_clear(gpu, BUFFER_TO_APIR_CONTEXT(buffer), value);
return;
}
static void ggml_backend_remoting_buffer_free_buffer(ggml_backend_buffer_t buffer) {
virtgpu * gpu = BUFFER_TO_GPU(buffer);
apir_buffer_free_buffer(gpu, BUFFER_TO_APIR_CONTEXT(buffer));
ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer);
free(context);
buffer->context = NULL;
}
const ggml_backend_buffer_i ggml_backend_remoting_buffer_interface = {
/* .free_buffer = */ ggml_backend_remoting_buffer_free_buffer,
/* .get_base = */ ggml_backend_remoting_buffer_get_base,
/* .init_tensor = */ NULL,
/* .memset_tensor = */ NULL,
/* .set_tensor = */ ggml_backend_remoting_buffer_set_tensor,
/* .get_tensor = */ ggml_backend_remoting_buffer_get_tensor,
/* .cpy_tensor = */ ggml_backend_remoting_buffer_cpy_tensor,
/* .clear = */ ggml_backend_remoting_buffer_clear,
/* .reset = */ NULL,
};
const ggml_backend_buffer_i ggml_backend_remoting_buffer_from_ptr_interface = {
/* .free_buffer = */ ggml_backend_remoting_buffer_free_buffer,
/* .get_base = */ ggml_backend_remoting_buffer_get_base,
/* .init_tensor = */ NULL,
/* .memset_tensor = */ NULL,
/* .set_tensor = */ ggml_backend_remoting_buffer_set_tensor_from_ptr,
/* .get_tensor = */ ggml_backend_remoting_buffer_get_tensor_from_ptr,
/* .cpy_tensor = */ ggml_backend_remoting_buffer_cpy_tensor,
/* .clear = */ ggml_backend_remoting_buffer_clear,
/* .reset = */ NULL,
};

View File

@ -0,0 +1,144 @@
#include "ggml-remoting.h"
static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) {
virtgpu * gpu = DEV_TO_GPU(dev);
return apir_device_get_name(gpu);
}
static const char * ggml_backend_remoting_device_get_description(ggml_backend_dev_t dev) {
virtgpu * gpu = DEV_TO_GPU(dev);
return apir_device_get_description(gpu);
}
static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_backend_dev_t dev) {
virtgpu * gpu = DEV_TO_GPU(dev);
static enum ggml_backend_dev_type type;
static bool has_type = false;
if (!has_type) {
has_type = true;
type = (enum ggml_backend_dev_type) apir_device_get_type(gpu);
}
return type;
}
static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
virtgpu * gpu = DEV_TO_GPU(dev);
return apir_device_get_memory(gpu, free, total);
}
static bool ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
#if USE_ALWAYS_TRUE_SUPPORTS_OP == 1
/* ggml-rpc cheats it like this */
/* with the current implementation of serialize_tensor, the src/view aren't properly passed */
UNUSED(dev);
UNUSED(op);
return true;
#else
virtgpu * gpu = DEV_TO_GPU(dev);
return apir_device_supports_op(gpu, op);
#endif
}
static bool ggml_backend_remoting_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
bool supported = buft->device == dev;
return supported;
}
static bool ggml_backend_remoting_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
UNUSED(dev);
UNUSED(op);
return false;
}
static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
props->name = ggml_backend_remoting_device_get_name(dev);
props->description = ggml_backend_remoting_device_get_description(dev);
props->type = ggml_backend_remoting_device_get_type(dev);
ggml_backend_remoting_device_get_memory(dev, &props->memory_free, &props->memory_total);
virtgpu * gpu = DEV_TO_GPU(dev);
apir_device_get_props(gpu, &props->caps.async, &props->caps.host_buffer, &props->caps.buffer_from_host_ptr,
&props->caps.events);
props->caps.buffer_from_host_ptr = false;
props->caps.async = false;
props->caps.events = false;
}
ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) {
virtgpu * gpu = DEV_TO_GPU(dev);
apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu);
static ggml_backend_buffer_type buft{
/* .iface = */ ggml_backend_remoting_buffer_type_interface,
/* .device = */ dev,
/* .context = */ (void *) ctx,
};
return &buft;
}
static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) {
virtgpu * gpu = DEV_TO_GPU(dev);
apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu);
static ggml_backend_buffer_type buft{
/* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface,
/* .device = */ dev,
/* .context = */ (void *) ctx,
};
return &buft;
}
static ggml_backend_buffer_t ggml_backend_remoting_device_buffer_from_ptr(ggml_backend_dev_t dev,
void * ptr,
size_t size,
size_t max_tensor_size) {
virtgpu * gpu = DEV_TO_GPU(dev);
ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context));
if (!context) {
GGML_ABORT("Couldn't allocate the buffer context ...");
}
context->gpu = gpu;
context->apir_context = apir_device_buffer_from_ptr(gpu, size, max_tensor_size);
context->base = ptr;
context->is_from_ptr = true;
ggml_backend_buffer_t buffer =
ggml_backend_buffer_init(ggml_backend_remoting_device_get_buffer_from_ptr_type(dev),
ggml_backend_remoting_buffer_from_ptr_interface, (void *) context, size);
return buffer;
}
const ggml_backend_device_i ggml_backend_remoting_device_interface = {
/* .get_name = */ ggml_backend_remoting_device_get_name,
/* .get_description = */ ggml_backend_remoting_device_get_description,
/* .get_memory = */ ggml_backend_remoting_device_get_memory,
/* .get_type = */ ggml_backend_remoting_device_get_type,
/* .get_props = */ ggml_backend_remoting_device_get_props,
/* .init_backend = */ ggml_backend_remoting_device_init,
/* .get_buffer_type = */ ggml_backend_remoting_device_get_buffer_type,
/* .get_host_buffer_type = */ NULL,
/* .buffer_from_host_ptr = */ ggml_backend_remoting_device_buffer_from_ptr,
/* .supports_op = */ ggml_backend_remoting_device_supports_op,
/* .supports_buft = */ ggml_backend_remoting_device_supports_buft,
/* .offload_op = */ ggml_backend_remoting_device_offload_op,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_synchronize = */ NULL,
};

View File

@ -0,0 +1,137 @@
#include "ggml-remoting.h"
#include "ggml-virtgpu.h"
#include <iostream>
#include <mutex>
static virtgpu * apir_initialize() {
static virtgpu * apir_gpu_instance = NULL;
static bool apir_initialized = false;
{
static std::mutex mutex;
std::lock_guard<std::mutex> lock(mutex);
if (apir_initialized) {
return apir_gpu_instance;
}
apir_gpu_instance = create_virtgpu();
if (!apir_gpu_instance) {
GGML_ABORT("failed to initialize the virtgpu");
}
apir_initialized = true;
}
return apir_gpu_instance;
}
static int ggml_backend_remoting_get_device_count() {
virtgpu * gpu = apir_initialize();
if (!gpu) {
GGML_LOG_WARN("apir_initialize failed\n");
return 0;
}
return apir_device_get_count(gpu);
}
static size_t ggml_backend_remoting_reg_get_device_count(ggml_backend_reg_t reg) {
UNUSED(reg);
return ggml_backend_remoting_get_device_count();
}
static std::vector<ggml_backend_dev_t> devices;
ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device) {
GGML_ASSERT(device < devices.size());
return devices[device];
}
static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) {
if (devices.size() > 0) {
GGML_LOG_INFO("%s: already initialized\n", __func__);
return;
}
virtgpu * gpu = apir_initialize();
if (!gpu) {
GGML_LOG_ERROR("apir_initialize failed\n");
return;
}
static bool initialized = false;
{
static std::mutex mutex;
std::lock_guard<std::mutex> lock(mutex);
if (!initialized) {
for (int i = 0; i < ggml_backend_remoting_get_device_count(); i++) {
ggml_backend_remoting_device_context * ctx = new ggml_backend_remoting_device_context;
char desc[256] = "API Remoting device";
ctx->device = i;
ctx->name = GGML_REMOTING_FRONTEND_NAME + std::to_string(i);
ctx->description = desc;
ctx->gpu = gpu;
ggml_backend_dev_t dev = new ggml_backend_device{
/* .iface = */ ggml_backend_remoting_device_interface,
/* .reg = */ reg,
/* .context = */ ctx,
};
devices.push_back(dev);
}
initialized = true;
}
}
}
static ggml_backend_dev_t ggml_backend_remoting_reg_get_device(ggml_backend_reg_t reg, size_t device) {
UNUSED(reg);
return ggml_backend_remoting_get_device(device);
}
static const char * ggml_backend_remoting_reg_get_name(ggml_backend_reg_t reg) {
UNUSED(reg);
return GGML_REMOTING_FRONTEND_NAME;
}
static const ggml_backend_reg_i ggml_backend_remoting_reg_i = {
/* .get_name = */ ggml_backend_remoting_reg_get_name,
/* .get_device_count = */ ggml_backend_remoting_reg_get_device_count,
/* .get_device = */ ggml_backend_remoting_reg_get_device,
/* .get_proc_address = */ NULL,
};
ggml_backend_reg_t ggml_backend_virtgpu_reg() {
virtgpu * gpu = apir_initialize();
if (!gpu) {
GGML_LOG_ERROR("virtgpu_apir_initialize failed\n");
return NULL;
}
static ggml_backend_reg reg = {
/* .api_version = */ GGML_BACKEND_API_VERSION,
/* .iface = */ ggml_backend_remoting_reg_i,
/* .context = */ gpu,
};
static bool initialized = false;
if (initialized) {
return &reg;
}
initialized = true;
ggml_backend_remoting_reg_init_devices(&reg);
GGML_LOG_INFO("%s: initialized\n", __func__);
return &reg;
}
GGML_BACKEND_DL_IMPL(ggml_backend_virtgpu_reg)

View File

@ -0,0 +1,69 @@
#include "ggml-remoting.h"
#include "../../include/ggml-virtgpu.h"
static const char * ggml_backend_remoting_get_name(ggml_backend_t backend) {
UNUSED(backend);
return "API Remoting backend";
}
static void ggml_backend_remoting_free(ggml_backend_t backend) {
delete backend;
}
static ggml_status ggml_backend_remoting_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
virtgpu * gpu = DEV_TO_GPU(backend->device);
return apir_backend_graph_compute(gpu, cgraph);
}
static void ggml_backend_remoting_graph_optimize(ggml_backend_t backend, ggml_cgraph * cgraph) {
virtgpu * gpu = DEV_TO_GPU(backend->device);
#if true
UNUSED(gpu);
UNUSED(cgraph);
#else
// not working yet
apir_backend_graph_optimize(gpu, cgraph);
#endif
}
static ggml_backend_i ggml_backend_remoting_interface = {
/* .get_name = */ ggml_backend_remoting_get_name,
/* .free = */ ggml_backend_remoting_free,
/* .set_tensor_async = */ NULL, // ggml_backend_remoting_set_tensor_async,
/* .get_tensor_async = */ NULL, // ggml_backend_remoting_get_tensor_async,
/* .cpy_tensor_async = */ NULL, // ggml_backend_remoting_cpy_tensor_async,
/* .synchronize = */ NULL, // ggml_backend_remoting_synchronize,
/* .graph_plan_create = */ NULL,
/* .graph_plan_free = */ NULL,
/* .graph_plan_update = */ NULL,
/* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_remoting_graph_compute,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
/* .graph_optimize = */ ggml_backend_remoting_graph_optimize,
};
static ggml_guid_t ggml_backend_remoting_guid() {
static ggml_guid guid = { 0xb8, 0xf7, 0x4f, 0x86, 0x14, 0x03, 0x86, 0x02,
0x91, 0xc8, 0xdd, 0xe9, 0x02, 0x3f, 0xc0, 0x2b };
return &guid;
}
ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params) {
UNUSED(params);
ggml_backend_remoting_device_context * ctx = (ggml_backend_remoting_device_context *) dev->context;
ggml_backend_t remoting_backend = new ggml_backend{
/* .guid = */ ggml_backend_remoting_guid(),
/* .interface = */ ggml_backend_remoting_interface,
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_virtgpu_reg(), ctx->device),
/* .context = */ ctx,
};
return remoting_backend;
}

View File

@ -0,0 +1,68 @@
#pragma once
#include "ggml-backend-impl.h"
#include "ggml-backend.h"
#include "ggml-impl.h"
#include "virtgpu.h"
#include <memory>
#include <string>
// USE_ALWAYS_TRUE_SUPPORTS_OP: 1 is fast, 0 avoid micro-benchmark crashes
#define USE_ALWAYS_TRUE_SUPPORTS_OP 1
#define USE_METAL_GUEST_SUPPORTS_OP 0
#define DEV_TO_GPU(name) ((ggml_backend_remoting_device_context *) (name)->context)->gpu
#define BUFFER_TO_GGML_CONTEXT(name) ((ggml_backend_remoting_buffer_context *) (name)->context)
#define BUFFER_TO_APIR_CONTEXT(name) &((ggml_backend_remoting_buffer_context *) (name)->context)->apir_context
#define BUFFER_TO_HOST_HANDLE(name) ((ggml_backend_remoting_buffer_context *) (name)->context)->apir_context.host_handle
#define GET_DEVICE_CONTEXT() (ggml_backend_remoting_device_context *) ggml_backend_remoting_get_device(0)->context
#define BUFT_TO_GPU(name) ((ggml_backend_remoting_device_context *) (name)->device->context)->gpu
struct ggml_backend_remoting_device_context {
size_t device;
std::string name;
std::string description;
std::vector<std::tuple<void *, size_t, virtgpu_shmem *>> shared_memory;
virtgpu * gpu;
};
struct ggml_backend_remoting_buffer_context {
apir_buffer_context_t apir_context;
virtgpu * gpu;
void * base;
bool is_from_ptr;
};
extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface;
extern const ggml_backend_device_i ggml_backend_remoting_device_interface;
extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_interface;
extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface;
extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_from_ptr_interface;
ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device);
ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params);
ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev);
static inline apir_buffer_type_host_handle_t ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) {
// in the backend, the buffer handle is the buffer pointer
return (apir_buffer_type_host_handle_t) buft->context;
}
static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) {
if (!buffer->context) {
GGML_ABORT("%s: no context available :/", __func__);
}
return BUFFER_TO_HOST_HANDLE(buffer);
}

View File

@ -0,0 +1,168 @@
# YAML schema for GGML remoting API functions
# This defines the structure for generating the remoting layer code
# Configuration for the generated files
config:
# Base path for the generated files
base_path: "ggml/src"
# Header files to update
files:
apir_backend_header: "ggml-virtgpu-apir/backend/shared/apir_backend.gen.h"
backend_dispatched_header: "ggml-virtgpu-apir/backend/backend-dispatched.gen.h"
virtgpu_forward_header: "ggml-virtgpu-apir/virtgpu-forward.gen.h"
# Simplified function definitions with grouping and metadata combined
functions:
device:
group_description: "device"
functions:
get_device_count:
# No specific metadata - uses default void return and base params
get_count:
frontend_return: "int"
get_name:
frontend_return: "const char *"
get_description:
frontend_return: "const char *"
get_type:
frontend_return: "uint32_t"
get_memory:
frontend_return: "void"
frontend_extra_params:
- "size_t *free"
- "size_t *total"
supports_op:
frontend_return: "bool"
frontend_extra_params:
- "const ggml_tensor *op"
get_buffer_type:
frontend_return: "apir_buffer_type_host_handle_t"
get_props:
frontend_return: "void"
frontend_extra_params:
- "bool *async"
- "bool *host_buffer"
- "bool *buffer_from_host_ptr"
- "bool *events"
buffer_from_ptr:
frontend_return: "apir_buffer_context_t"
frontend_extra_params:
- "size_t size"
- "size_t max_tensor_size"
buffer_type:
group_description: "buffer-type"
functions:
get_name:
frontend_return: "const char *"
frontend_extra_params:
- "ggml_backend_buffer_type_t buft"
get_alignment:
frontend_return: "size_t"
frontend_extra_params:
- "ggml_backend_buffer_type_t buft"
get_max_size:
frontend_return: "size_t"
frontend_extra_params:
- "ggml_backend_buffer_type_t buft"
is_host:
frontend_return: "bool"
frontend_extra_params:
- "ggml_backend_buffer_type_t buft"
alloc_buffer:
frontend_return: "apir_buffer_context_t"
frontend_extra_params:
- "ggml_backend_buffer_type_t buffer_buft"
- "size_t size"
get_alloc_size:
frontend_return: "size_t"
frontend_extra_params:
- "ggml_backend_buffer_type_t buft"
- "const ggml_tensor *op"
buffer:
group_description: "buffer"
functions:
get_base:
frontend_return: "void *"
frontend_extra_params:
- "apir_buffer_context_t *buffer_context"
set_tensor:
frontend_return: "void"
frontend_extra_params:
- "apir_buffer_context_t *buffer_context"
- "ggml_tensor *tensor"
- "const void *data"
- "size_t offset"
- "size_t size"
get_tensor:
frontend_return: "void"
frontend_extra_params:
- "apir_buffer_context_t *buffer_context"
- "const ggml_tensor *tensor"
- "void *data"
- "size_t offset"
- "size_t size"
cpy_tensor:
frontend_return: "bool"
frontend_extra_params:
- "apir_buffer_context_t *buffer_context"
- "const ggml_tensor *src"
- "const ggml_tensor *dst"
clear:
frontend_return: "void"
frontend_extra_params:
- "apir_buffer_context_t *buffer_context"
- "uint8_t value"
free_buffer:
frontend_return: "void"
frontend_extra_params:
- "apir_buffer_context_t *buffer_context"
backend:
group_description: "backend"
functions:
graph_compute:
frontend_return: "ggml_status"
frontend_extra_params:
- "ggml_cgraph *cgraph"
graph_optimize:
frontend_return: "ggml_cgraph *"
frontend_extra_params:
- "ggml_cgraph *cgraph"
enabled: false
# Naming patterns used for code generation
naming_patterns:
# How to generate enum names
enum_prefix: "APIR_COMMAND_TYPE_"
# How to generate backend function names
backend_function_prefix: "backend_"
# How to generate frontend function names
frontend_function_prefix: "apir_"
# Standard frontend first parameter
frontend_base_param: "struct virtgpu *gpu"

View File

@ -0,0 +1,9 @@
#pragma once
#include <stdint.h>
struct virgl_renderer_capset_apir {
uint32_t apir_version;
uint32_t supports_blob_resources;
uint32_t reserved[4]; // For future expansion
};

View File

@ -0,0 +1,322 @@
#!/usr/bin/env python3
"""
# Generated by Claude AI
Script to completely regenerate the GGML remoting codebase from YAML configuration.
This script reads api_functions.yaml and regenerates all the header files and
implementation templates for the GGML remoting layer.
Usage:
python regenerate_remoting.py
The script will:
1. Read ggmlremoting_functions.yaml configuration
2. Generate updated header files
3. Generate implementation templates in dedicated files
4. Show a summary of what was generated
"""
import yaml
from typing import Dict, List, Any
from pathlib import Path
import os
import subprocess
import shutil
import logging
NL = '\n' # can't have f"{'\n'}" in f-strings
class RemotingCodebaseGenerator:
def __init__(self, yaml_path: str = "ggmlremoting_functions.yaml"):
"""Initialize the generator with the YAML configuration."""
self.yaml_path = yaml_path
if not Path(yaml_path).exists():
raise FileNotFoundError(f"Configuration file {yaml_path} not found")
with open(yaml_path, 'r') as f:
self.config = yaml.safe_load(f)
self.functions = self.config['functions']
self.naming_patterns = self.config['naming_patterns']
self.config_data = self.config['config']
# Check if clang-format is available
self.clang_format_available = self._check_clang_format_available()
def _check_clang_format_available(self) -> bool:
"""Check if clang-format is available in the system PATH."""
return shutil.which("clang-format") is not None
def _format_file_with_clang_format(self, file_path: Path) -> bool:
"""Format a file with clang-format -i. Returns True if successful, False otherwise."""
if not self.clang_format_available:
return False
try:
subprocess.run(
["clang-format", "-i", str(file_path)],
check=True,
capture_output=True,
text=True
)
return True
except subprocess.CalledProcessError:
logging.exception(f" ⚠️ clang-format failed for {file_path}")
return False
except Exception as e:
logging.exception(f" ⚠️ Unexpected error formatting {file_path}: {e}")
return False
def generate_enum_name(self, group_name: str, function_name: str) -> str:
"""Generate the APIR_COMMAND_TYPE enum name for a function."""
prefix = self.naming_patterns['enum_prefix']
return f"{prefix}{group_name.upper()}_{function_name.upper()}"
def generate_backend_function_name(self, group_name: str, function_name: str) -> str:
"""Generate the backend function name."""
function_key = f"{group_name}_{function_name}"
overrides = self.naming_patterns.get('backend_function_overrides', {})
if function_key in overrides:
return overrides[function_key]
prefix = self.naming_patterns['backend_function_prefix']
return f"{prefix}{group_name}_{function_name}"
def generate_frontend_function_name(self, group_name: str, function_name: str) -> str:
"""Generate the frontend function name."""
prefix = self.naming_patterns['frontend_function_prefix']
return f"{prefix}{group_name}_{function_name}"
def get_enabled_functions(self) -> List[Dict[str, Any]]:
"""Get all enabled functions with their metadata."""
functions = []
enum_value = 0
for group_name, group_data in self.functions.items():
group_description = group_data['group_description']
for function_name, func_metadata in group_data['functions'].items():
# Handle case where func_metadata is None or empty (functions with only comments)
if func_metadata is None:
func_metadata = {}
# Functions are enabled by default unless explicitly disabled
if func_metadata.get('enabled', True):
functions.append({
'group_name': group_name,
'function_name': function_name,
'enum_name': self.generate_enum_name(group_name, function_name),
'enum_value': enum_value,
'backend_function': self.generate_backend_function_name(group_name, function_name),
'frontend_function': self.generate_frontend_function_name(group_name, function_name),
'frontend_return': func_metadata.get('frontend_return', 'void'),
'frontend_extra_params': func_metadata.get('frontend_extra_params', []),
'group_description': group_description,
'newly_added': func_metadata.get('newly_added', False)
})
enum_value += 1
return functions
def generate_apir_backend_header(self) -> str:
"""Generate the complete apir_backend.h file."""
functions = self.get_enabled_functions()
# Generate the enum section
enum_lines = ["typedef enum ApirBackendCommandType {"]
current_group = None
for func in functions:
# Add comment for new group
if func['group_name'] != current_group:
enum_lines.append("")
enum_lines.append(f" /* {func['group_description']} */")
current_group = func['group_name']
enum_lines.append(f" {func['enum_name']} = {func['enum_value']},")
# Add the count
total_count = len(functions)
enum_lines.append("\n // last command_type index + 1")
enum_lines.append(f" APIR_BACKEND_DISPATCH_TABLE_COUNT = {total_count},")
enum_lines.append("} ApirBackendCommandType;")
# Full header template
header_content = NL.join(enum_lines) + "\n"
return header_content
def generate_backend_dispatched_header(self) -> str:
"""Generate the complete backend-dispatched.h file."""
functions = self.get_enabled_functions()
# Function declarations
decl_lines = []
current_group = None
for func in functions:
if func['group_name'] != current_group:
decl_lines.append(f"\n/* {func['group_description']} */")
current_group = func['group_name']
signature = "uint32_t"
params = "apir_encoder *enc, apir_decoder *dec, virgl_apir_context *ctx"
decl_lines.append(f"{signature} {func['backend_function']}({params});")
# Switch cases
switch_lines = []
current_group = None
for func in functions:
if func['group_name'] != current_group:
switch_lines.append(f" /* {func['group_description']} */")
current_group = func['group_name']
switch_lines.append(f" case {func['enum_name']}: return \"{func['backend_function']}\";")
# Dispatch table
table_lines = []
current_group = None
for func in functions:
if func['group_name'] != current_group:
table_lines.append(f"\n /* {func['group_description']} */")
table_lines.append("")
current_group = func['group_name']
table_lines.append(f" /* {func['enum_name']} = */ {func['backend_function']},")
header_content = f'''\
#pragma once
{NL.join(decl_lines)}
static inline const char *backend_dispatch_command_name(ApirBackendCommandType type)
{{
switch (type) {{
{NL.join(switch_lines)}
default: return "unknown";
}}
}}
extern "C" {{
static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = {{
{NL.join(table_lines)}
}};
}}
'''
return header_content
def generate_virtgpu_forward_header(self) -> str:
"""Generate the complete virtgpu-forward.gen.h file."""
functions = self.get_enabled_functions()
decl_lines = []
current_group = None
for func in functions:
if func['group_name'] != current_group:
decl_lines.append("")
decl_lines.append(f"/* {func['group_description']} */")
current_group = func['group_name']
# Build parameter list
params = [self.naming_patterns['frontend_base_param']]
params.extend(func['frontend_extra_params'])
param_str = ', '.join(params)
decl_lines.append(f"{func['frontend_return']} {func['frontend_function']}({param_str});")
header_content = f'''\
#pragma once
{NL.join(decl_lines)}
'''
return header_content
def regenerate_codebase(self) -> None:
"""Regenerate the entire remoting codebase."""
logging.info("🔄 Regenerating GGML Remoting Codebase...")
logging.info("=" * 50)
# Detect if we're running from frontend directory
current_dir = os.getcwd()
is_frontend_dir = current_dir.endswith('ggml-virtgpu')
if is_frontend_dir:
# Running from ggml/src/ggml-virtgpu-apir
logging.info("📍 Detected frontend directory execution")
frontend_base = Path(".")
else:
# Running from project root (fallback to original behavior)
logging.info("📍 Detected project root execution")
base_path = self.config_data.get('base_path', 'ggml/src')
frontend_base = Path(base_path) / "ggml-virtgpu"
# Compute final file paths
backend_base = frontend_base / "backend"
apir_backend_path = backend_base / "shared" / "apir_backend.gen.h"
backend_dispatched_path = backend_base / "backend-dispatched.gen.h"
virtgpu_forward_path = frontend_base / "virtgpu-forward.gen.h"
# Create output directories for each file
apir_backend_path.parent.mkdir(parents=True, exist_ok=True)
backend_dispatched_path.parent.mkdir(parents=True, exist_ok=True)
virtgpu_forward_path.parent.mkdir(parents=True, exist_ok=True)
# Generate header files
logging.info("📁 Generating header files...")
apir_backend_content = self.generate_apir_backend_header()
apir_backend_path.write_text(apir_backend_content)
logging.info(f"{apir_backend_path.resolve()}")
backend_dispatched_content = self.generate_backend_dispatched_header()
backend_dispatched_path.write_text(backend_dispatched_content)
logging.info(f"{backend_dispatched_path.resolve()}")
virtgpu_forward_content = self.generate_virtgpu_forward_header()
virtgpu_forward_path.write_text(virtgpu_forward_content)
logging.info(f"{virtgpu_forward_path.resolve()}")
# Format generated files with clang-format
generated_files = [apir_backend_path, backend_dispatched_path, virtgpu_forward_path]
if not self.clang_format_available:
logging.warning("\nclang-format not found in PATH. Generated files will not be formatted."
" Install clang-format to enable automatic code formatting.")
else:
logging.info("\n🎨 Formatting files with clang-format...")
for file_path in generated_files:
if self._format_file_with_clang_format(file_path):
logging.info(f" ✅ Formatted {file_path.name}")
else:
logging.warning(f" ❌ Failed to format {file_path.name}")
# Generate summary
functions = self.get_enabled_functions()
total_functions = len(functions)
logging.info("\n📊 Generation Summary:")
logging.info("=" * 50)
logging.info(f" Total functions: {total_functions}")
logging.info(f" Function groups: {len(self.functions)}")
logging.info(" Header files: 3")
logging.info(f" Working directory: {current_dir}")
def main():
try:
generator = RemotingCodebaseGenerator()
generator.regenerate_codebase()
except Exception as e:
logging.exception(f"❌ Error: {e}")
exit(1)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,15 @@
#include "backend/shared/apir_backend.h"
#include "ggml-alloc.h"
#include "ggml-impl.h"
#include "ggml.h"
#include "virtgpu-shm.h"
#include "virtgpu-utils.h"
struct apir_buffer_context_t {
apir_buffer_host_handle_t host_handle;
struct virtgpu_shmem shmem;
apir_buffer_type_host_handle_t buft_host_handle;
};
#include "virtgpu-forward.gen.h"

View File

@ -0,0 +1,50 @@
#include "virtgpu-forward-impl.h"
static long long current_time_ms() {
timespec ts;
clock_gettime(CLOCK_REALTIME, &ts); // Use CLOCK_MONOTONIC for elapsed time
return (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec;
}
ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE);
std::vector<uint8_t> cgraph_data;
size_t cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data);
virtgpu_shmem temp_shmem; // Local storage for large buffers
virtgpu_shmem * shmem = &temp_shmem;
if (cgraph_size <= gpu->data_shmem.mmap_size) {
// prefer the init-time allocated page, if large enough
shmem = &gpu->data_shmem;
} else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) {
GGML_ABORT("Couldn't allocate the guest-host shared buffer");
}
apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
apir_encode_size_t(encoder, &cgraph_size);
char * shmem_data = (char *) shmem->mmap_ptr;
apir_encoder secondary_enc = apir_new_encoder(shmem_data, cgraph_size);
apir_encode_cgraph_data(&secondary_enc, cgraph_data);
REMOTE_CALL(gpu, encoder, decoder, ret);
ggml_status status = GGML_STATUS_ABORTED;
apir_decode_ggml_status(decoder, &status);
remote_call_finish(gpu, encoder, decoder);
if (shmem != &gpu->data_shmem) {
virtgpu_shmem_destroy(gpu, shmem);
}
return status;
}

View File

@ -0,0 +1,125 @@
#include "virtgpu-forward-impl.h"
const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME);
apir_encode_ggml_buffer_type(encoder, buft);
REMOTE_CALL(gpu, encoder, decoder, ret);
const size_t string_size = apir_decode_array_size_unchecked(decoder);
char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
if (!string) {
GGML_LOG_ERROR("%s: Could not allocate the device name buffer\n", __func__);
apir_decoder_set_fatal(decoder);
}
apir_decode_char_array(decoder, string, string_size);
remote_call_finish(gpu, encoder, decoder);
return string;
}
size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT);
apir_encode_ggml_buffer_type(encoder, buft);
REMOTE_CALL(gpu, encoder, decoder, ret);
size_t alignment;
apir_decode_size_t(decoder, &alignment);
remote_call_finish(gpu, encoder, decoder);
return alignment;
}
size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE);
apir_encode_ggml_buffer_type(encoder, buft);
REMOTE_CALL(gpu, encoder, decoder, ret);
size_t max_size;
apir_decode_size_t(decoder, &max_size);
remote_call_finish(gpu, encoder, decoder);
return max_size;
}
bool apir_buffer_type_is_host(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST);
apir_encode_ggml_buffer_type(encoder, buft);
REMOTE_CALL(gpu, encoder, decoder, ret);
bool is_host;
apir_decode_bool_t(decoder, &is_host);
remote_call_finish(gpu, encoder, decoder);
return is_host;
}
apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_buffer_type_t buft, size_t size) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
apir_buffer_context_t buffer_context;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER);
apir_encode_ggml_buffer_type(encoder, buft);
apir_encode_size_t(encoder, &size);
REMOTE_CALL(gpu, encoder, decoder, ret);
apir_decode_apir_buffer_host_handle_t(decoder, &buffer_context.host_handle);
remote_call_finish(gpu, encoder, decoder);
return buffer_context;
}
size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE);
apir_encode_ggml_buffer_type(encoder, buft);
apir_encode_ggml_tensor_inline(encoder, op);
REMOTE_CALL(gpu, encoder, decoder, ret);
size_t alloc_size;
apir_decode_size_t(decoder, &alloc_size);
remote_call_finish(gpu, encoder, decoder);
return alloc_size;
}

View File

@ -0,0 +1,157 @@
#include "virtgpu-forward-impl.h"
void * apir_buffer_get_base(virtgpu * gpu, apir_buffer_context_t * buffer_context) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_GET_BASE);
apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
REMOTE_CALL(gpu, encoder, decoder, ret);
uintptr_t base;
apir_decode_uintptr_t(decoder, &base);
remote_call_finish(gpu, encoder, decoder);
return (void *) base;
}
void apir_buffer_set_tensor(virtgpu * gpu,
apir_buffer_context_t * buffer_context,
ggml_tensor * tensor,
const void * data,
size_t offset,
size_t size) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_SET_TENSOR);
apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
apir_encode_ggml_tensor(encoder, tensor);
virtgpu_shmem temp_shmem; // Local storage for large buffers
virtgpu_shmem * shmem = &temp_shmem;
if (size <= gpu->data_shmem.mmap_size) {
// prefer the init-time allocated page, if large enough
shmem = &gpu->data_shmem;
} else if (virtgpu_shmem_create(gpu, size, shmem)) {
GGML_ABORT("Couldn't allocate the guest-host shared buffer");
}
memcpy(shmem->mmap_ptr, data, size);
apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
apir_encode_size_t(encoder, &offset);
apir_encode_size_t(encoder, &size);
REMOTE_CALL(gpu, encoder, decoder, ret);
remote_call_finish(gpu, encoder, decoder);
if (shmem != &gpu->data_shmem) {
virtgpu_shmem_destroy(gpu, shmem);
}
return;
}
void apir_buffer_get_tensor(virtgpu * gpu,
apir_buffer_context_t * buffer_context,
const ggml_tensor * tensor,
void * data,
size_t offset,
size_t size) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_GET_TENSOR);
apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
apir_encode_ggml_tensor(encoder, tensor);
virtgpu_shmem temp_shmem; // Local storage for large buffers
virtgpu_shmem * shmem = &temp_shmem;
if (size <= gpu->data_shmem.mmap_size) {
// prefer the init-time allocated page, if large enough
shmem = &gpu->data_shmem;
} else if (virtgpu_shmem_create(gpu, size, shmem)) {
GGML_ABORT("Couldn't allocate the guest-host shared buffer");
}
apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
apir_encode_size_t(encoder, &offset);
apir_encode_size_t(encoder, &size);
REMOTE_CALL(gpu, encoder, decoder, ret);
memcpy(data, shmem->mmap_ptr, size);
remote_call_finish(gpu, encoder, decoder);
if (shmem != &gpu->data_shmem) {
virtgpu_shmem_destroy(gpu, shmem);
}
}
bool apir_buffer_cpy_tensor(virtgpu * gpu,
apir_buffer_context_t * buffer_context,
const ggml_tensor * src,
const ggml_tensor * dst) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR);
apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
apir_encode_ggml_tensor(encoder, src);
apir_encode_ggml_tensor(encoder, dst);
REMOTE_CALL(gpu, encoder, decoder, ret);
bool ret_val;
apir_decode_bool_t(decoder, &ret_val);
remote_call_finish(gpu, encoder, decoder);
return ret_val;
}
void apir_buffer_clear(virtgpu * gpu, apir_buffer_context_t * buffer_context, uint8_t value) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_CLEAR);
apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
apir_encode_uint8_t(encoder, &value);
REMOTE_CALL(gpu, encoder, decoder, ret);
remote_call_finish(gpu, encoder, decoder);
}
void apir_buffer_free_buffer(virtgpu * gpu, apir_buffer_context_t * buffer_context) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER);
apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
REMOTE_CALL(gpu, encoder, decoder, ret);
remote_call_finish(gpu, encoder, decoder);
}

View File

@ -0,0 +1,200 @@
#include "virtgpu-forward-impl.h"
#include "virtgpu-shm.h"
int apir_device_get_count(virtgpu * gpu) {
static int32_t dev_count = -1;
if (dev_count != -1) {
return dev_count;
}
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_COUNT);
REMOTE_CALL(gpu, encoder, decoder, ret);
apir_decode_int32_t(decoder, &dev_count);
remote_call_finish(gpu, encoder, decoder);
return dev_count;
}
const char * apir_device_get_name(virtgpu * gpu) {
static char * string = nullptr;
if (string) {
return string;
}
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_NAME);
REMOTE_CALL(gpu, encoder, decoder, ret);
const size_t string_size = apir_decode_array_size_unchecked(decoder);
string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
if (!string) {
GGML_LOG_ERROR("%s: Could not allocate the device name buffer\n", __func__);
return NULL;
}
apir_decode_char_array(decoder, string, string_size);
remote_call_finish(gpu, encoder, decoder);
return string;
}
const char * apir_device_get_description(virtgpu * gpu) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION);
REMOTE_CALL(gpu, encoder, decoder, ret);
const size_t string_size = apir_decode_array_size_unchecked(decoder);
char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
if (!string) {
GGML_LOG_ERROR("%s: Could not allocate the device description buffer\n", __func__);
return NULL;
}
apir_decode_char_array(decoder, string, string_size);
remote_call_finish(gpu, encoder, decoder);
return string;
}
uint32_t apir_device_get_type(virtgpu * gpu) {
static uint32_t dev_type = 255;
if (dev_type != 255) {
return dev_type;
}
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_TYPE);
REMOTE_CALL(gpu, encoder, decoder, ret);
apir_decode_uint32_t(decoder, &dev_type);
remote_call_finish(gpu, encoder, decoder);
return dev_type;
}
void apir_device_get_memory(virtgpu * gpu, size_t * free, size_t * total) {
static size_t dev_free = 0;
static size_t dev_total = 0;
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_MEMORY);
REMOTE_CALL(gpu, encoder, decoder, ret);
apir_decode_size_t(decoder, &dev_free);
apir_decode_size_t(decoder, &dev_total);
*free = dev_free;
*total = dev_total;
remote_call_finish(gpu, encoder, decoder);
return;
}
bool apir_device_supports_op(virtgpu * gpu, const ggml_tensor * op) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP);
apir_encode_ggml_tensor_inline(encoder, op);
REMOTE_CALL(gpu, encoder, decoder, ret);
bool supports_op;
apir_decode_bool_t(decoder, &supports_op);
remote_call_finish(gpu, encoder, decoder);
return supports_op;
}
apir_buffer_type_host_handle_t apir_device_get_buffer_type(virtgpu * gpu) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE);
REMOTE_CALL(gpu, encoder, decoder, ret);
apir_buffer_type_host_handle_t buft_handle;
apir_decode_apir_buffer_type_host_handle_t(decoder, &buft_handle);
remote_call_finish(gpu, encoder, decoder);
return buft_handle;
}
void apir_device_get_props(virtgpu * gpu,
bool * async,
bool * host_buffer,
bool * buffer_from_host_ptr,
bool * events) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_PROPS);
REMOTE_CALL(gpu, encoder, decoder, ret);
apir_decode_bool_t(decoder, async);
apir_decode_bool_t(decoder, host_buffer);
apir_decode_bool_t(decoder, buffer_from_host_ptr);
apir_decode_bool_t(decoder, events);
remote_call_finish(gpu, encoder, decoder);
return;
}
apir_buffer_context_t apir_device_buffer_from_ptr(virtgpu * gpu, size_t size, size_t max_tensor_size) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
apir_buffer_context_t buffer_context;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR);
if (virtgpu_shmem_create(gpu, size, &buffer_context.shmem)) {
GGML_ABORT("Couldn't allocate the guest-host shared buffer");
}
apir_encode_virtgpu_shmem_res_id(encoder, buffer_context.shmem.res_id);
apir_encode_size_t(encoder, &size);
apir_encode_size_t(encoder, &max_tensor_size);
REMOTE_CALL(gpu, encoder, decoder, ret);
apir_decode_apir_buffer_host_handle_t(decoder, &buffer_context.host_handle);
buffer_context.buft_host_handle = apir_decode_apir_buffer_type_host_handle(decoder);
remote_call_finish(gpu, encoder, decoder);
return buffer_context;
}

View File

@ -0,0 +1,29 @@
#include "virtgpu.h"
#include "ggml-remoting.h"
#include "backend/shared/apir_backend.h"
#include "backend/shared/apir_cs_ggml.h"
#include "ggml-backend-impl.h"
#define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__) \
do { \
int32_t forward_flag = (int32_t) apir_command_type__; \
encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_FORWARD, forward_flag); \
if (!encoder_name) { \
GGML_ABORT("%s: failed to prepare the remote call encoder", __func__); \
} \
} while (0)
#define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name) \
do { \
ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \
if (!decoder_name) { \
GGML_ABORT("%s: failed to kick the remote call", __func__); \
} \
if (ret_name < APIR_FORWARD_BASE_INDEX) { \
GGML_ABORT("%s: failed to forward the API call: %s: code %d", __func__, \
apir_forward_error(ret_name), ret_name); \
} \
ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \
} while (0)

View File

@ -0,0 +1,51 @@
#pragma once
/* device */
void apir_device_get_device_count(struct virtgpu * gpu);
int apir_device_get_count(struct virtgpu * gpu);
const char * apir_device_get_name(struct virtgpu * gpu);
const char * apir_device_get_description(struct virtgpu * gpu);
uint32_t apir_device_get_type(struct virtgpu * gpu);
void apir_device_get_memory(struct virtgpu * gpu, size_t * free, size_t * total);
bool apir_device_supports_op(struct virtgpu * gpu, const ggml_tensor * op);
apir_buffer_type_host_handle_t apir_device_get_buffer_type(struct virtgpu * gpu);
void apir_device_get_props(struct virtgpu * gpu,
bool * async,
bool * host_buffer,
bool * buffer_from_host_ptr,
bool * events);
apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu, size_t size, size_t max_tensor_size);
/* buffer-type */
const char * apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
bool apir_buffer_type_is_host(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu,
ggml_backend_buffer_type_t buffer_buft,
size_t size);
size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op);
/* buffer */
void * apir_buffer_get_base(struct virtgpu * gpu, apir_buffer_context_t * buffer_context);
void apir_buffer_set_tensor(struct virtgpu * gpu,
apir_buffer_context_t * buffer_context,
ggml_tensor * tensor,
const void * data,
size_t offset,
size_t size);
void apir_buffer_get_tensor(struct virtgpu * gpu,
apir_buffer_context_t * buffer_context,
const ggml_tensor * tensor,
void * data,
size_t offset,
size_t size);
bool apir_buffer_cpy_tensor(struct virtgpu * gpu,
apir_buffer_context_t * buffer_context,
const ggml_tensor * src,
const ggml_tensor * dst);
void apir_buffer_clear(struct virtgpu * gpu, apir_buffer_context_t * buffer_context, uint8_t value);
void apir_buffer_free_buffer(struct virtgpu * gpu, apir_buffer_context_t * buffer_context);
/* backend */
ggml_status apir_backend_graph_compute(struct virtgpu * gpu, ggml_cgraph * cgraph);

View File

@ -0,0 +1,99 @@
#include "virtgpu-shm.h"
#include "virtgpu.h"
#include <assert.h>
static uint32_t virtgpu_ioctl_resource_create_blob(virtgpu * gpu,
uint32_t blob_mem,
uint32_t blob_flags,
size_t blob_size,
uint64_t blob_id,
uint32_t * res_id) {
#ifdef SIMULATE_BO_SIZE_FIX
blob_size = align64(blob_size, 4096);
#endif
drm_virtgpu_resource_create_blob args = {
.blob_mem = blob_mem,
.blob_flags = blob_flags,
.bo_handle = 0,
.res_handle = 0,
.size = blob_size,
.pad = 0,
.cmd_size = 0,
.cmd = 0,
.blob_id = blob_id,
};
if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB, &args)) {
return 0;
}
*res_id = args.res_handle;
return args.bo_handle;
}
static void virtgpu_ioctl_gem_close(virtgpu * gpu, uint32_t gem_handle) {
drm_gem_close args = {
.handle = gem_handle,
.pad = 0,
};
const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_GEM_CLOSE, &args);
assert(!ret);
#ifdef NDEBUG
UNUSED(ret);
#endif
}
static void * virtgpu_ioctl_map(virtgpu * gpu, uint32_t gem_handle, size_t size) {
drm_virtgpu_map args = {
.offset = 0,
.handle = gem_handle,
.pad = 0,
};
if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_MAP, &args)) {
return NULL;
}
void * ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gpu->fd, args.offset);
if (ptr == MAP_FAILED) {
return NULL;
}
return ptr;
}
void virtgpu_shmem_destroy(virtgpu * gpu, virtgpu_shmem * shmem) {
munmap(shmem->mmap_ptr, shmem->mmap_size);
virtgpu_ioctl_gem_close(gpu, shmem->gem_handle);
}
int virtgpu_shmem_create(virtgpu * gpu, size_t size, virtgpu_shmem * shmem) {
size = align64(size, 16384);
uint32_t res_id;
uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(gpu, VIRTGPU_BLOB_MEM_HOST3D,
VIRTGPU_BLOB_FLAG_USE_MAPPABLE, size, 0, &res_id);
if (!gem_handle) {
return 1;
}
void * ptr = virtgpu_ioctl_map(gpu, gem_handle, size);
if (!ptr) {
virtgpu_ioctl_gem_close(gpu, gem_handle);
GGML_LOG_ERROR("virtgpu_ioctl_map FAILED\n");
exit(1);
return 1;
}
shmem->res_id = res_id;
shmem->mmap_size = size;
shmem->mmap_ptr = ptr;
shmem->gem_handle = gem_handle;
return 0;
}

View File

@ -0,0 +1,23 @@
#pragma once
#include "virtgpu-utils.h"
#include <sys/mman.h>
#include <atomic>
#include <cassert>
#include <cstddef>
#include <cstdint>
struct virtgpu;
struct virtgpu_shmem {
uint32_t res_id;
size_t mmap_size;
void * mmap_ptr;
uint32_t gem_handle;
};
int virtgpu_shmem_create(virtgpu * gpu, size_t size, virtgpu_shmem * shmem);
void virtgpu_shmem_destroy(virtgpu * gpu, virtgpu_shmem * shmem);

View File

@ -0,0 +1,179 @@
#include "virtgpu-utils.h"
#include <malloc.h>
#include <stdlib.h>
#include <cstring>
#define NODE_ALLOC_ALIGN 64
#define NODE_PTR_MASK (~((uintptr_t) NODE_ALLOC_ALIGN - 1))
#define NODE_LEVEL_MASK ((uintptr_t) NODE_ALLOC_ALIGN - 1)
#define NULL_NODE 0
#define os_malloc_aligned(_size, _align) _aligned_malloc(_size, _align)
#define os_free_aligned(_ptr) free(_ptr)
#define p_atomic_cmpxchg(v, old, _new) __sync_val_compare_and_swap((v), (old), (_new))
static inline uint64_t util_logbase2_64(uint64_t n) {
#if defined(HAVE___BUILTIN_CLZLL)
return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1));
#else
uint64_t pos = 0ull;
if (n >= 1ull << 32) {
n >>= 32;
pos += 32;
}
if (n >= 1ull << 16) {
n >>= 16;
pos += 16;
}
if (n >= 1ull << 8) {
n >>= 8;
pos += 8;
}
if (n >= 1ull << 4) {
n >>= 4;
pos += 4;
}
if (n >= 1ull << 2) {
n >>= 2;
pos += 2;
}
if (n >= 1ull << 1) {
pos += 1;
}
return pos;
#endif
}
void util_sparse_array_init(util_sparse_array * arr, size_t elem_size, size_t node_size) {
memset(arr, 0, sizeof(*arr));
arr->elem_size = elem_size;
arr->node_size_log2 = util_logbase2_64(node_size);
assert(node_size >= 2 && node_size == (1ull << arr->node_size_log2));
}
static inline void * os_malloc_aligned(size_t size, size_t alignment) {
void * ptr;
alignment = (alignment + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
if (posix_memalign(&ptr, alignment, size) != 0) {
return NULL;
}
return ptr;
}
static inline void * _util_sparse_array_node_data(uintptr_t handle) {
return (void *) (handle & NODE_PTR_MASK);
}
static inline unsigned _util_sparse_array_node_level(uintptr_t handle) {
return handle & NODE_LEVEL_MASK;
}
static inline void _util_sparse_array_node_finish(util_sparse_array * arr, uintptr_t node) {
if (_util_sparse_array_node_level(node) > 0) {
uintptr_t * children = (uintptr_t *) _util_sparse_array_node_data(node);
size_t node_size = 1ull << arr->node_size_log2;
for (size_t i = 0; i < node_size; i++) {
if (children[i]) {
_util_sparse_array_node_finish(arr, children[i]);
}
}
}
os_free_aligned(_util_sparse_array_node_data(node));
}
static inline uintptr_t _util_sparse_array_node(void * data, unsigned level) {
assert(data != NULL);
assert(((uintptr_t) data & NODE_LEVEL_MASK) == 0);
assert((level & NODE_PTR_MASK) == 0);
return (uintptr_t) data | level;
}
inline uintptr_t _util_sparse_array_node_alloc(util_sparse_array * arr, unsigned level) {
size_t size;
if (level == 0) {
size = arr->elem_size << arr->node_size_log2;
} else {
size = sizeof(uintptr_t) << arr->node_size_log2;
}
void * data = os_malloc_aligned(size, NODE_ALLOC_ALIGN);
memset(data, 0, size);
return _util_sparse_array_node(data, level);
}
static inline uintptr_t _util_sparse_array_set_or_free_node(uintptr_t * node_ptr, uintptr_t cmp_node, uintptr_t node) {
uintptr_t prev_node = p_atomic_cmpxchg(node_ptr, cmp_node, node);
if (prev_node != cmp_node) {
/* We lost the race. Free this one and return the one that was already
* allocated.
*/
os_free_aligned(_util_sparse_array_node_data(node));
return prev_node;
} else {
return node;
}
}
void * util_sparse_array_get(util_sparse_array * arr, uint64_t idx) {
const unsigned node_size_log2 = arr->node_size_log2;
uintptr_t root = p_atomic_read(&arr->root);
if (unlikely(!root)) {
unsigned root_level = 0;
uint64_t idx_iter = idx >> node_size_log2;
while (idx_iter) {
idx_iter >>= node_size_log2;
root_level++;
}
uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level);
root = _util_sparse_array_set_or_free_node(&arr->root, NULL_NODE, new_root);
}
while (1) {
unsigned root_level = _util_sparse_array_node_level(root);
uint64_t root_idx = idx >> (root_level * node_size_log2);
if (likely(root_idx < (1ull << node_size_log2))) {
break;
}
/* In this case, we have a root but its level is low enough that the
* requested index is out-of-bounds.
*/
uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level + 1);
uintptr_t * new_root_children = (uintptr_t *) _util_sparse_array_node_data(new_root);
new_root_children[0] = root;
/* We only add one at a time instead of the whole tree because it's
* easier to ensure correctness of both the tree building and the
* clean-up path. Because we're only adding one node we never have to
* worry about trying to free multiple things without freeing the old
* things.
*/
root = _util_sparse_array_set_or_free_node(&arr->root, root, new_root);
}
void * node_data = _util_sparse_array_node_data(root);
unsigned node_level = _util_sparse_array_node_level(root);
while (node_level > 0) {
uint64_t child_idx = (idx >> (node_level * node_size_log2)) & ((1ull << node_size_log2) - 1);
uintptr_t * children = (uintptr_t *) node_data;
uintptr_t child = p_atomic_read(&children[child_idx]);
if (unlikely(!child)) {
child = _util_sparse_array_node_alloc(arr, node_level - 1);
child = _util_sparse_array_set_or_free_node(&children[child_idx], NULL_NODE, child);
}
node_data = _util_sparse_array_node_data(child);
node_level = _util_sparse_array_node_level(child);
}
uint64_t elem_idx = idx & ((1ull << node_size_log2) - 1);
return (void *) ((char *) node_data + (elem_idx * arr->elem_size));
}

View File

@ -0,0 +1,86 @@
#pragma once
#include <atomic>
#include <cassert>
#include <cerrno>
#include <cstdarg>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <ctime>
#define unlikely(x) __builtin_expect(!!(x), 0)
#define likely(x) __builtin_expect(!!(x), 1)
#ifndef UNUSED
# define UNUSED(x) (void) (x)
#endif
/** Checks is a value is a power of two. Does not handle zero. */
#define IS_POT(v) (((v) & ((v) - 1)) == 0)
/** Checks is a value is a power of two. Zero handled. */
#define IS_POT_NONZERO(v) ((v) != 0 && IS_POT(v))
/** Align a value to a power of two */
#define ALIGN_POT(x, pot_align) (((x) + (pot_align) - 1) & ~((pot_align) - 1))
#define p_atomic_read(_v) __atomic_load_n((_v), __ATOMIC_ACQUIRE)
static inline bool util_is_power_of_two_nonzero64(uint64_t v) {
return IS_POT_NONZERO(v);
}
static inline uint64_t align64(uint64_t value, uint64_t alignment) {
assert(util_is_power_of_two_nonzero64(alignment));
return ALIGN_POT(value, alignment);
}
struct list_head {
list_head * prev;
list_head * next;
};
struct util_sparse_array {
size_t elem_size;
unsigned node_size_log2;
uintptr_t root;
};
void * util_sparse_array_get(util_sparse_array * arr, uint64_t idx);
void util_sparse_array_init(util_sparse_array * arr, size_t elem_size, size_t node_size);
inline void os_time_sleep(int64_t usecs) {
timespec time;
time.tv_sec = usecs / 1000000;
time.tv_nsec = (usecs % 1000000) * 1000;
while (clock_nanosleep(CLOCK_MONOTONIC, 0, &time, &time) == EINTR)
;
}
struct timer_data {
long long start;
long long total;
long long count;
};
static inline void start_timer(timer_data * timer) {
timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
timer->start = (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec;
}
// returns the duration in ns
static inline long long stop_timer(timer_data * timer) {
timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
long long timer_end = (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec;
long long duration = (timer_end - timer->start);
timer->total += duration;
timer->count += 1;
return duration;
}

View File

@ -0,0 +1,498 @@
#include "virtgpu.h"
#include <stdio.h>
#include <unistd.h>
#include <cassert>
#include <cerrno>
#include <cstdlib>
static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr dev);
static virt_gpu_result_t virtgpu_open(virtgpu * gpu);
static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu);
static virt_gpu_result_t virtgpu_init_context(virtgpu * gpu);
static int virtgpu_ioctl_context_init(virtgpu * gpu, virgl_renderer_capset capset_id);
static int virtgpu_ioctl_get_caps(virtgpu * gpu,
virgl_renderer_capset id,
uint32_t version,
void * capset,
size_t capset_size);
static uint64_t virtgpu_ioctl_getparam(virtgpu * gpu, uint64_t param);
static void virtgpu_init_renderer_info(virtgpu * gpu);
static void log_call_duration(long long call_duration_ns, const char * name);
const uint64_t APIR_HANDSHAKE_MAX_WAIT_MS = 2 * 1000; // 2s
const uint64_t APIR_LOADLIBRARY_MAX_WAIT_MS = 60 * 1000; // 60s
static int virtgpu_handshake(virtgpu * gpu) {
apir_encoder * encoder;
apir_decoder * decoder;
encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_HANDSHAKE, 0);
if (!encoder) {
GGML_ABORT("%s: failed to prepare the remote call encoder", __func__);
return 1;
}
/* write handshake props */
uint32_t guest_major = APIR_PROTOCOL_MAJOR;
uint32_t guest_minor = APIR_PROTOCOL_MINOR;
apir_encode_uint32_t(encoder, &guest_major);
apir_encode_uint32_t(encoder, &guest_minor);
/* *** */
uint32_t ret_magic;
long long call_duration_ns;
ret_magic = remote_call(gpu, encoder, &decoder, APIR_HANDSHAKE_MAX_WAIT_MS, &call_duration_ns);
log_call_duration(call_duration_ns, "API Remoting handshake");
if (!decoder) {
GGML_ABORT(
"%s: failed to initiate the communication with the virglrenderer library. "
"Most likely, the wrong virglrenderer library was loaded in the hypervisor.",
__func__);
return 1;
}
/* read handshake return values */
uint32_t host_major;
uint32_t host_minor;
if (ret_magic != APIR_HANDSHAKE_MAGIC) {
GGML_ABORT("%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic,
apir_backend_initialize_error(ret_magic));
} else {
apir_decode_uint32_t(decoder, &host_major);
apir_decode_uint32_t(decoder, &host_minor);
}
remote_call_finish(gpu, encoder, decoder);
if (ret_magic != APIR_HANDSHAKE_MAGIC) {
return 1;
}
GGML_LOG_INFO("%s: Guest is running with %u.%u\n", __func__, guest_major, guest_minor);
GGML_LOG_INFO("%s: Host is running with %u.%u\n", __func__, host_major, host_minor);
if (guest_major != host_major) {
GGML_LOG_ERROR("Host major (%d) and guest major (%d) version differ\n", host_major, guest_major);
} else if (guest_minor != host_minor) {
GGML_LOG_WARN("Host minor (%d) and guest minor (%d) version differ\n", host_minor, guest_minor);
}
return 0;
}
static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirLoadLibraryReturnCode ret;
encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_LOADLIBRARY, 0);
if (!encoder) {
GGML_ABORT("%s: hypercall error: failed to prepare the remote call encoder", __func__);
return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR;
}
long long call_duration_ns;
ret = (ApirLoadLibraryReturnCode) remote_call(gpu, encoder, &decoder, APIR_LOADLIBRARY_MAX_WAIT_MS,
&call_duration_ns);
log_call_duration(call_duration_ns, "API Remoting LoadLibrary");
if (!decoder) {
GGML_ABORT("%s: hypercall error: failed to kick the API remoting hypercall.\n", __func__);
return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR;
}
remote_call_finish(gpu, encoder, decoder);
if (ret == APIR_LOAD_LIBRARY_SUCCESS) {
GGML_LOG_INFO("%s: The API Remoting backend was successfully loaded and initialized\n", __func__);
return ret;
}
// something wrong happened, find out what.
if (ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) {
GGML_ABORT("%s: virglrenderer could not load the API Remoting backend library: %s (code %d)", __func__,
apir_load_library_error(ret), ret);
return ret;
}
GGML_LOG_INFO("%s: virglrenderer successfully loaded the API Remoting backend library", __func__);
ApirLoadLibraryReturnCode apir_ret = (ApirLoadLibraryReturnCode) (ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX);
if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) {
GGML_ABORT("%s: the API Remoting backend library couldn't load the backend library: apir code=%d | %s)",
__func__, apir_ret, apir_load_library_error(apir_ret));
} else {
uint32_t lib_ret = apir_ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX;
GGML_ABORT("%s: the API Remoting backend library initialize its backend library: apir code=%d)", __func__,
lib_ret);
}
return ret;
}
virtgpu * create_virtgpu() {
virtgpu * gpu = new virtgpu();
gpu->use_apir_capset = getenv("GGML_REMOTING_USE_APIR_CAPSET") != nullptr;
util_sparse_array_init(&gpu->shmem_array, sizeof(virtgpu_shmem), 1024);
if (virtgpu_open(gpu) != APIR_SUCCESS) {
GGML_ABORT("%s: failed to open the virtgpu device", __func__);
return NULL;
}
if (virtgpu_init_capset(gpu) != APIR_SUCCESS) {
GGML_ABORT("%s: failed to initialize the GPU capset", __func__);
return NULL;
}
if (virtgpu_init_context(gpu) != APIR_SUCCESS) {
GGML_ABORT("%s: failed to initialize the GPU context", __func__);
return NULL;
}
if (virtgpu_shmem_create(gpu, SHMEM_REPLY_SIZE, &gpu->reply_shmem)) {
GGML_ABORT("%s: failed to create the shared reply memory pages", __func__);
return NULL;
}
if (virtgpu_shmem_create(gpu, SHMEM_DATA_SIZE, &gpu->data_shmem)) {
GGML_ABORT("%s: failed to create the shared data memory pages", __func__);
return NULL;
}
if (virtgpu_handshake(gpu)) {
GGML_ABORT("%s: failed to handshake with the virglrenderer library", __func__);
return NULL;
}
if (virtgpu_load_library(gpu) != APIR_LOAD_LIBRARY_SUCCESS) {
GGML_ABORT("%s: failed to load the backend library", __func__);
return NULL;
}
return gpu;
}
static virt_gpu_result_t virtgpu_open(virtgpu * gpu) {
drmDevicePtr devs[8];
int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs));
if (count < 0) {
GGML_LOG_ERROR("%s: failed to enumerate DRM devices\n", __func__);
return APIR_ERROR_INITIALIZATION_FAILED;
}
virt_gpu_result_t result = APIR_ERROR_INITIALIZATION_FAILED;
for (int i = 0; i < count; i++) {
result = virtgpu_open_device(gpu, devs[i]);
if (result == APIR_SUCCESS) {
break;
}
}
drmFreeDevices(devs, count);
return result;
}
static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr dev) {
const char * node_path = dev->nodes[DRM_NODE_RENDER];
int fd = open(node_path, O_RDWR | O_CLOEXEC);
if (fd < 0) {
GGML_ABORT("failed to open %s", node_path);
return APIR_ERROR_INITIALIZATION_FAILED;
}
drmVersionPtr version = drmGetVersion(fd);
if (!version || strcmp(version->name, "virtio_gpu") || version->version_major != 0) {
if (version) {
GGML_ABORT("unknown DRM driver %s version %d", version->name, version->version_major);
} else {
GGML_ABORT("failed to get DRM driver version");
}
if (version) {
drmFreeVersion(version);
}
close(fd);
return APIR_ERROR_INITIALIZATION_FAILED;
}
gpu->fd = fd;
drmFreeVersion(version);
GGML_LOG_INFO("using DRM device %s\n", node_path);
return APIR_SUCCESS;
}
static virt_gpu_result_t virtgpu_init_context(virtgpu * gpu) {
assert(!gpu->capset.version);
const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id);
if (ret) {
GGML_LOG_INFO("failed to initialize context: %s\n", strerror(errno));
return APIR_ERROR_INITIALIZATION_FAILED;
}
return APIR_SUCCESS;
}
static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu) {
if (gpu->use_apir_capset) {
GGML_LOG_INFO("Using the APIR capset\n");
gpu->capset.id = VIRTGPU_DRM_CAPSET_APIR;
} else {
GGML_LOG_INFO("Using the Venus capset\n");
gpu->capset.id = VIRTGPU_DRM_CAPSET_VENUS;
}
gpu->capset.version = 0;
int ret =
virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, &gpu->capset.data, sizeof(gpu->capset.data));
if (ret) {
GGML_LOG_INFO("failed to get APIR v%d capset: %s\n", gpu->capset.version, strerror(errno));
return APIR_ERROR_INITIALIZATION_FAILED;
}
assert(gpu->capset.data.supports_blob_resources);
return APIR_SUCCESS;
}
static int virtgpu_ioctl_context_init(virtgpu * gpu, virgl_renderer_capset capset_id) {
drm_virtgpu_context_set_param ctx_set_params[3] = {
{
.param = VIRTGPU_CONTEXT_PARAM_CAPSET_ID,
.value = capset_id,
},
{
.param = VIRTGPU_CONTEXT_PARAM_NUM_RINGS,
.value = 1,
},
{
.param = VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK,
.value = 0, /* don't generate drm_events on fence signaling */
},
};
drm_virtgpu_context_init args = {
.num_params = ARRAY_SIZE(ctx_set_params),
.pad = 0,
.ctx_set_params = (uintptr_t) &ctx_set_params,
};
return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_CONTEXT_INIT, &args);
}
static int virtgpu_ioctl_get_caps(virtgpu * gpu,
virgl_renderer_capset id,
uint32_t version,
void * capset,
size_t capset_size) {
drm_virtgpu_get_caps args = {
.cap_set_id = id,
.cap_set_ver = version,
.addr = (uintptr_t) capset,
.size = (__u32) capset_size,
.pad = 0,
};
return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GET_CAPS, &args);
}
static uint64_t virtgpu_ioctl_getparam(virtgpu * gpu, uint64_t param) {
/* val must be zeroed because kernel only writes the lower 32 bits */
uint64_t val = 0;
drm_virtgpu_getparam args = {
.param = param,
.value = (uintptr_t) &val,
};
const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GETPARAM, &args);
return ret ? 0 : val;
}
apir_encoder * remote_call_prepare(virtgpu * gpu, ApirCommandType apir_cmd_type, int32_t cmd_flags) {
/*
* Prepare the command encoder and its buffer
*/
static char encoder_buffer[4096];
static apir_encoder enc;
enc = {
.cur = encoder_buffer,
.start = encoder_buffer,
.end = encoder_buffer + sizeof(encoder_buffer),
.fatal = false,
};
/*
* Fill the command encoder with the common args:
* - cmd_type (int32_t)
* - cmd_flags (int32_t)
* - reply res id (uint32_t)
*/
int32_t cmd_type = apir_cmd_type;
// for testing during the hypervisor transition
if (!gpu->use_apir_capset) {
cmd_type += VENUS_COMMAND_TYPE_LENGTH;
}
apir_encode_int32_t(&enc, &cmd_type);
apir_encode_int32_t(&enc, &cmd_flags);
uint32_t reply_res_id = gpu->reply_shmem.res_id;
apir_encode_uint32_t(&enc, &reply_res_id);
return &enc;
}
void remote_call_finish(virtgpu * gpu, apir_encoder * enc, apir_decoder * dec) {
UNUSED(gpu);
if (!enc) {
GGML_LOG_ERROR("Invalid (null) encoder\n");
}
if (!dec) {
GGML_LOG_ERROR("Invalid (null) decoder\n");
}
if (apir_encoder_get_fatal(enc)) {
GGML_LOG_ERROR("Failed to encode the output parameters.\n");
}
if (apir_decoder_get_fatal(dec)) {
GGML_LOG_ERROR("Failed to decode the input parameters.\n");
}
}
uint32_t remote_call(virtgpu * gpu,
apir_encoder * encoder,
apir_decoder ** decoder,
float max_wait_ms,
long long * call_duration_ns) {
/*
* Prepare the reply notification pointer
*/
volatile std::atomic_uint * atomic_reply_notif = (volatile std::atomic_uint *) gpu->reply_shmem.mmap_ptr;
*atomic_reply_notif = 0;
/*
* Trigger the execbuf ioctl
*/
drm_virtgpu_execbuffer args = {
.flags = VIRTGPU_EXECBUF_RING_IDX,
.size = (uint32_t) (encoder->cur - encoder->start),
.command = (uintptr_t) encoder->start,
.bo_handles = 0,
.num_bo_handles = 0,
.fence_fd = 0,
.ring_idx = 0,
.syncobj_stride = 0,
.num_in_syncobjs = 0,
.num_out_syncobjs = 0,
.in_syncobjs = 0,
.out_syncobjs = 0,
};
*decoder = NULL;
int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
if (ret != 0) {
GGML_ABORT("%s: the virtgpu EXECBUFFER ioctl failed (%d)", __func__, ret);
}
/*
* Wait for the response notification
*/
timer_data wait_host_reply_timer = { 0, 0, 0 };
start_timer(&wait_host_reply_timer);
timespec ts_start, ts_end;
clock_gettime(CLOCK_MONOTONIC, &ts_start);
long long start_time = (long long) ts_start.tv_sec * 1000000000LL + ts_start.tv_nsec;
bool timedout = false;
uint32_t notif_value = 0;
while (true) {
notif_value = std::atomic_load_explicit(atomic_reply_notif, std::memory_order_acquire);
if (notif_value != 0) {
break;
}
int64_t base_sleep_us = 15;
os_time_sleep(base_sleep_us);
if (max_wait_ms) {
clock_gettime(CLOCK_MONOTONIC, &ts_end);
long long end_time = (long long) ts_end.tv_sec * 1000000000LL + ts_end.tv_nsec;
float duration_ms = (end_time - start_time) / 1000000;
if (duration_ms > max_wait_ms) {
timedout = true;
break;
}
}
}
if (call_duration_ns) {
*call_duration_ns = stop_timer(&wait_host_reply_timer);
}
if (max_wait_ms && timedout) {
GGML_LOG_ERROR("timed out waiting for the host answer...\n");
return APIR_FORWARD_TIMEOUT;
}
/*
* Prepare the decoder
*/
static apir_decoder response_dec;
response_dec.cur = (char *) gpu->reply_shmem.mmap_ptr + sizeof(*atomic_reply_notif);
response_dec.end = (char *) gpu->reply_shmem.mmap_ptr + gpu->reply_shmem.mmap_size;
*decoder = &response_dec;
// extract the actual return value from the notif flag
uint32_t returned_value = notif_value - 1;
return returned_value;
}
static void log_call_duration(long long call_duration_ns, const char * name) {
double call_duration_ms = (double) call_duration_ns / 1e6; // 1 millisecond = 1e6 nanoseconds
double call_duration_s = (double) call_duration_ns / 1e9; // 1 second = 1e9 nanoseconds
if (call_duration_s > 1) {
GGML_LOG_INFO("%s: waited %.2fs for the %s host reply...\n", __func__, call_duration_s, name);
} else if (call_duration_ms > 1) {
GGML_LOG_INFO("%s: waited %.2fms for the %s host reply...\n", __func__, call_duration_ms, name);
} else {
GGML_LOG_INFO("%s: waited %lldns for the %s host reply...\n", __func__, call_duration_ns, name);
}
}

View File

@ -0,0 +1,92 @@
#pragma once
#include "virtgpu-utils.h"
#include "virtgpu-shm.h"
#include "virtgpu-apir.h"
#include "backend/shared/api_remoting.h"
#include "backend/shared/apir_cs.h"
#include <fcntl.h>
#include <stdbool.h>
#include <stdio.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
#include <threads.h>
#include <xf86drm.h>
#include <cstring>
#define VIRGL_RENDERER_UNSTABLE_APIS 1
#include "apir_hw.h"
#include <drm/virtgpu_drm.h>
#include "venus_hw.h"
#ifndef VIRTGPU_DRM_CAPSET_APIR
// Will be defined include/drm/virtgpu_drm.h when
// https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1590/diffs
// is merged
#define VIRTGPU_DRM_CAPSET_APIR 10
#endif
// Mesa/Virlgrenderer Venus internal. Only necessary during the
// Venus->APIR transition in Virglrenderer
#define VENUS_COMMAND_TYPE_LENGTH 331
#ifndef VIRTGPU_DRM_CAPSET_VENUS // only available with Linux >= v6.16
#define VIRTGPU_DRM_CAPSET_VENUS 4
#endif
typedef uint32_t virgl_renderer_capset;
/* from src/virtio/vulkan/vn_renderer_virtgpu.c */
#define VIRTGPU_PCI_VENDOR_ID 0x1af4
#define VIRTGPU_PCI_DEVICE_ID 0x1050
#define VIRTGPU_BLOB_MEM_GUEST_VRAM 0x0004
#define VIRTGPU_PARAM_GUEST_VRAM 9
#define SHMEM_DATA_SIZE 0x1830000 // 24MiB
#define SHMEM_REPLY_SIZE 0x4000
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
enum virt_gpu_result_t {
APIR_SUCCESS = 0,
APIR_ERROR_INITIALIZATION_FAILED = -1,
};
#define PRINTFLIKE(f, a) __attribute__((format(__printf__, f, a)))
struct virtgpu {
bool use_apir_capset;
int fd;
struct {
virgl_renderer_capset id;
uint32_t version;
virgl_renderer_capset_apir data;
} capset;
util_sparse_array shmem_array;
/* APIR communication pages */
virtgpu_shmem reply_shmem;
virtgpu_shmem data_shmem;
};
static inline int virtgpu_ioctl(virtgpu * gpu, unsigned long request, void * args) {
return drmIoctl(gpu->fd, request, args);
}
virtgpu * create_virtgpu();
apir_encoder * remote_call_prepare(virtgpu * gpu, ApirCommandType apir_cmd_type, int32_t cmd_flags);
uint32_t remote_call(virtgpu * gpu,
apir_encoder * enc,
apir_decoder ** dec,
float max_wait_ms,
long long * call_duration_ns);
void remote_call_finish(virtgpu * gpu, apir_encoder * enc, apir_decoder * dec);