ggml: new backend for Virglrenderer API Remoting acceleration (v2) (#18718)
This commit is contained in:
parent
6ad70c5a77
commit
b7feacf7f3
|
|
@ -67,6 +67,7 @@
|
|||
/ggml/src/ggml-rpc/ @rgerganov
|
||||
/ggml/src/ggml-threading.* @ggerganov
|
||||
/ggml/src/ggml-vulkan/ @0cc4m
|
||||
/ggml/src/ggml-virtgpu/ @kpouget
|
||||
/ggml/src/ggml-webgpu/ @reeselevine
|
||||
/ggml/src/ggml-zdnn/ @taronaeo @Andreas-Krebbel @AlekseiNikiforovIBM
|
||||
/ggml/src/ggml.c @ggerganov
|
||||
|
|
|
|||
|
|
@ -228,6 +228,8 @@ option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)
|
|||
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)
|
||||
option(GGML_WEBGPU_JSPI "ggml: use JSPI for WebGPU" ON)
|
||||
option(GGML_ZDNN "ggml: use zDNN" OFF)
|
||||
option(GGML_VIRTGPU "ggml: use the VirtGPU/Virglrenderer API Remoting frontend" OFF)
|
||||
option(GGML_VIRTGPU_BACKEND "ggml: build the VirtGPU/Virglrenderer API Remoting backend" OFF)
|
||||
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
|
||||
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
|
||||
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
|
||||
|
|
@ -320,6 +322,7 @@ set(GGML_PUBLIC_HEADERS
|
|||
include/ggml-opt.h
|
||||
include/ggml-metal.h
|
||||
include/ggml-rpc.h
|
||||
include/ggml-virtgpu.h
|
||||
include/ggml-sycl.h
|
||||
include/ggml-vulkan.h
|
||||
include/ggml-webgpu.h
|
||||
|
|
|
|||
|
|
@ -0,0 +1,16 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define GGML_REMOTING_FRONTEND_NAME "RemotingFrontend"
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_virtgpu_reg();
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
@ -451,6 +451,7 @@ ggml_add_backend(HIP)
|
|||
ggml_add_backend(METAL)
|
||||
ggml_add_backend(MUSA)
|
||||
ggml_add_backend(RPC)
|
||||
ggml_add_backend(VirtGPU)
|
||||
ggml_add_backend(SYCL)
|
||||
ggml_add_backend(Vulkan)
|
||||
ggml_add_backend(WebGPU)
|
||||
|
|
|
|||
|
|
@ -69,6 +69,10 @@
|
|||
#include "ggml-rpc.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_VIRTGPU_FRONTEND
|
||||
#include "ggml-virtgpu.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_CANN
|
||||
#include "ggml-cann.h"
|
||||
#endif
|
||||
|
|
@ -180,7 +184,12 @@ struct ggml_backend_registry {
|
|||
register_backend(ggml_backend_sycl_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_VULKAN
|
||||
// Add runtime disable check
|
||||
if (getenv("GGML_DISABLE_VULKAN") == nullptr) {
|
||||
register_backend(ggml_backend_vk_reg());
|
||||
} else {
|
||||
GGML_LOG_DEBUG("Vulkan backend disabled by GGML_DISABLE_VULKAN environment variable\n");
|
||||
}
|
||||
#endif
|
||||
#ifdef GGML_USE_WEBGPU
|
||||
register_backend(ggml_backend_webgpu_reg());
|
||||
|
|
@ -188,6 +197,10 @@ struct ggml_backend_registry {
|
|||
#ifdef GGML_USE_ZDNN
|
||||
register_backend(ggml_backend_zdnn_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_VIRTGPU_FRONTEND
|
||||
register_backend(ggml_backend_virtgpu_reg());
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_OPENCL
|
||||
register_backend(ggml_backend_opencl_reg());
|
||||
#endif
|
||||
|
|
@ -604,6 +617,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
|
|||
ggml_backend_load_best("rpc", silent, dir_path);
|
||||
ggml_backend_load_best("sycl", silent, dir_path);
|
||||
ggml_backend_load_best("vulkan", silent, dir_path);
|
||||
ggml_backend_load_best("virtgpu", silent, dir_path);
|
||||
ggml_backend_load_best("opencl", silent, dir_path);
|
||||
ggml_backend_load_best("hexagon", silent, dir_path);
|
||||
ggml_backend_load_best("musa", silent, dir_path);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,70 @@
|
|||
cmake_minimum_required(VERSION 3.19)
|
||||
cmake_policy(SET CMP0114 NEW)
|
||||
|
||||
include(ExternalProject)
|
||||
|
||||
message(STATUS "Including the VirtGPU/Virglrenderer API Remoting")
|
||||
|
||||
# Download venus_hw.h from virglrenderer repository
|
||||
ExternalProject_Add(
|
||||
venus_hw_header
|
||||
URL https://gitlab.freedesktop.org/virgl/virglrenderer/-/raw/virglrenderer-1.2.0/src/venus_hw.h
|
||||
DOWNLOAD_NO_EXTRACT YES
|
||||
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
DOWNLOAD_NAME venus_hw.h
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
INSTALL_COMMAND ""
|
||||
LOG_DOWNLOAD ON
|
||||
)
|
||||
|
||||
if (NOT GGML_VIRTGPU_BACKEND STREQUAL "ONLY")
|
||||
message(STATUS "Enable the VirtGPU/Virglrenderer API Remoting frontend library")
|
||||
|
||||
find_package(PkgConfig REQUIRED)
|
||||
pkg_check_modules(DRM REQUIRED libdrm)
|
||||
if (NOT GGML_BACKEND_DL)
|
||||
# cannot simply use USE_VIRTGPU, as in the 'else()' case the
|
||||
# frontend isn't compiled
|
||||
target_compile_definitions(ggml PUBLIC "GGML_USE_VIRTGPU_FRONTEND")
|
||||
endif()
|
||||
|
||||
ggml_add_backend_library(ggml-virtgpu
|
||||
ggml-backend-buffer.cpp
|
||||
ggml-backend.cpp
|
||||
ggml-backend-device.cpp
|
||||
ggml-backend-reg.cpp
|
||||
ggml-backend-buffer-type.cpp
|
||||
virtgpu-apir.h
|
||||
virtgpu-forward.gen.h
|
||||
virtgpu.cpp
|
||||
virtgpu-shm.cpp
|
||||
virtgpu-utils.cpp
|
||||
virtgpu-forward-device.cpp
|
||||
virtgpu-forward-buffer-type.cpp
|
||||
virtgpu-forward-buffer.cpp
|
||||
virtgpu-forward-backend.cpp
|
||||
virtgpu-forward-impl.h
|
||||
apir_cs_ggml-rpc-front.cpp
|
||||
../../include/ggml-virtgpu.h)
|
||||
|
||||
target_include_directories(ggml-virtgpu PUBLIC /usr/include/libdrm/)
|
||||
|
||||
target_link_libraries(ggml-virtgpu PUBLIC ${DRM_LIBRARIES})
|
||||
target_include_directories(ggml-virtgpu PUBLIC ${DRM_INCLUDE_DIRS})
|
||||
target_compile_options(ggml-virtgpu PUBLIC ${DRM_CFLAGS_OTHER})
|
||||
|
||||
target_include_directories(ggml-virtgpu PUBLIC ./include)
|
||||
target_include_directories(ggml-virtgpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
# Ensure venus_hw.h is downloaded before building ggml-virtgpu
|
||||
add_dependencies(ggml-virtgpu venus_hw_header)
|
||||
|
||||
target_compile_options(ggml-virtgpu PRIVATE -std=c++20)
|
||||
else()
|
||||
message(STATUS "Not building the VirtGPU/Virglrenderer API Remoting frontend library")
|
||||
endif()
|
||||
|
||||
if (NOT GGML_VIRTGPU_BACKEND STREQUAL "OFF")
|
||||
add_subdirectory("backend")
|
||||
endif()
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
#include "backend/shared/apir_cs_rpc.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-impl.h"
|
||||
#include "ggml-remoting.h"
|
||||
|
||||
#include <cinttypes>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
apir_rpc_tensor apir_serialize_tensor(const ggml_tensor * tensor) {
|
||||
apir_rpc_tensor result;
|
||||
result.id = reinterpret_cast<uint64_t>(tensor);
|
||||
result.type = tensor->type;
|
||||
if (tensor->buffer) {
|
||||
ggml_backend_buffer_t buffer = tensor->buffer;
|
||||
|
||||
result.buffer = BUFFER_TO_HOST_HANDLE(buffer);
|
||||
} else {
|
||||
result.buffer = 0;
|
||||
}
|
||||
for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) {
|
||||
result.ne[i] = tensor->ne[i];
|
||||
result.nb[i] = tensor->nb[i];
|
||||
}
|
||||
result.op = tensor->op;
|
||||
for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) {
|
||||
result.op_params[i] = tensor->op_params[i];
|
||||
}
|
||||
result.flags = tensor->flags;
|
||||
for (uint32_t i = 0; i < GGML_MAX_SRC; i++) {
|
||||
result.src[i] = reinterpret_cast<uint64_t>(tensor->src[i]);
|
||||
}
|
||||
result.view_src = reinterpret_cast<uint64_t>(tensor->view_src);
|
||||
result.view_offs = tensor->view_offs;
|
||||
result.data = reinterpret_cast<uint64_t>(tensor->data);
|
||||
if (tensor->data) {
|
||||
if (!tensor->buffer) {
|
||||
GGML_ABORT("tensor has data but not buffer");
|
||||
}
|
||||
// tensor->data is serialized as an offset to the buffer base address
|
||||
result.data -= reinterpret_cast<uint64_t>(BUFFER_TO_GGML_CONTEXT(tensor->buffer)->base);
|
||||
}
|
||||
snprintf(result.name, GGML_MAX_NAME, "%s", tensor->name);
|
||||
return result;
|
||||
}
|
||||
|
||||
void apir_add_tensor(ggml_tensor * tensor,
|
||||
std::vector<apir_rpc_tensor> & tensors,
|
||||
std::unordered_set<ggml_tensor *> & visited) {
|
||||
if (tensor == nullptr) {
|
||||
return;
|
||||
}
|
||||
if (visited.find(tensor) != visited.end()) {
|
||||
return;
|
||||
}
|
||||
visited.insert(tensor);
|
||||
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
||||
apir_add_tensor(tensor->src[i], tensors, visited);
|
||||
}
|
||||
apir_add_tensor(tensor->view_src, tensors, visited);
|
||||
tensors.push_back(apir_serialize_tensor(tensor));
|
||||
}
|
||||
|
||||
void apir_serialize_graph(const ggml_cgraph * cgraph, std::vector<uint8_t> & output) {
|
||||
uint32_t n_nodes = cgraph->n_nodes;
|
||||
std::vector<apir_rpc_tensor> tensors;
|
||||
std::unordered_set<ggml_tensor *> visited;
|
||||
for (uint32_t i = 0; i < n_nodes; i++) {
|
||||
apir_add_tensor(cgraph->nodes[i], tensors, visited);
|
||||
}
|
||||
// serialization format:
|
||||
// | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(apir_rpc_tensor)) |
|
||||
uint32_t n_tensors = tensors.size();
|
||||
int output_size =
|
||||
sizeof(uint32_t) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t) + n_tensors * sizeof(apir_rpc_tensor);
|
||||
output.resize(output_size, 0);
|
||||
memcpy(output.data(), &n_nodes, sizeof(n_nodes));
|
||||
for (uint32_t i = 0; i < n_nodes; i++) {
|
||||
memcpy(output.data() + sizeof(n_nodes) + i * sizeof(uint64_t), &cgraph->nodes[i], sizeof(uint64_t));
|
||||
}
|
||||
uint32_t * out_ntensors = (uint32_t *) (output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t));
|
||||
*out_ntensors = n_tensors;
|
||||
apir_rpc_tensor * out_tensors =
|
||||
(apir_rpc_tensor *) (output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t));
|
||||
memcpy(out_tensors, tensors.data(), n_tensors * sizeof(apir_rpc_tensor));
|
||||
}
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
cmake_minimum_required(VERSION 3.19)
|
||||
cmake_policy(SET CMP0114 NEW)
|
||||
|
||||
message(STATUS "Enable the VirtGPU/Virglrenderer backend library")
|
||||
|
||||
ggml_add_backend_library(ggml-virtgpu-backend
|
||||
backend.cpp
|
||||
backend-dispatched.cpp
|
||||
backend-dispatched-backend.cpp
|
||||
backend-dispatched-device.cpp
|
||||
backend-dispatched-buffer.cpp
|
||||
backend-dispatched-buffer-type.cpp
|
||||
shared/api_remoting.h
|
||||
shared/apir_backend.h
|
||||
shared/apir_cs.h
|
||||
apir_cs_ggml-rpc-back.cpp)
|
||||
|
||||
target_compile_options(ggml-virtgpu-backend PRIVATE -std=c++20)
|
||||
|
||||
# Add include directory for ggml-backend-impl.h and other core headers
|
||||
target_include_directories(ggml-virtgpu-backend PRIVATE ../..)
|
||||
|
|
@ -0,0 +1,115 @@
|
|||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-impl.h"
|
||||
#include "shared/apir_cs_rpc.h"
|
||||
|
||||
#include <cinttypes>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
std::unordered_set<ggml_backend_buffer_t> backend_buffers;
|
||||
|
||||
void apir_track_backend_buffer(ggml_backend_buffer_t buffer) {
|
||||
backend_buffers.insert(buffer);
|
||||
}
|
||||
|
||||
bool apir_untrack_backend_buffer(ggml_backend_buffer_t buffer) {
|
||||
auto it = backend_buffers.find(buffer);
|
||||
if (it == backend_buffers.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
backend_buffers.erase(it);
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unordered_set<ggml_backend_buffer_t> apir_get_track_backend_buffers() {
|
||||
return backend_buffers;
|
||||
}
|
||||
|
||||
ggml_tensor * apir_deserialize_tensor(ggml_context * ctx, const apir_rpc_tensor * tensor) {
|
||||
ggml_tensor * result =
|
||||
ggml_new_tensor_4d(ctx, (ggml_type) tensor->type, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
|
||||
for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) {
|
||||
result->nb[i] = tensor->nb[i];
|
||||
}
|
||||
result->buffer = reinterpret_cast<ggml_backend_buffer_t>(tensor->buffer);
|
||||
if (result->buffer && backend_buffers.find(result->buffer) == backend_buffers.end()) {
|
||||
printf("WARNING: HOST BUFFER NOT FOUND | %p\n", (void *) result->buffer);
|
||||
result->buffer = nullptr;
|
||||
}
|
||||
|
||||
uint64_t tensor_data = tensor->data;
|
||||
if (result->buffer) {
|
||||
// require that the tensor data does not go beyond the buffer end
|
||||
uint64_t tensor_size = (uint64_t) ggml_nbytes(result);
|
||||
uint64_t buffer_start = (uint64_t) ggml_backend_buffer_get_base(result->buffer);
|
||||
uint64_t buffer_size = (uint64_t) ggml_backend_buffer_get_size(result->buffer);
|
||||
|
||||
// tensor->data is serialized as an offset to the buffer base address
|
||||
tensor_data += buffer_start;
|
||||
|
||||
GGML_ASSERT(tensor_data + tensor_size >= tensor_data); // check for overflow
|
||||
GGML_ASSERT(tensor_data >= buffer_start && tensor_data + tensor_size <= buffer_start + buffer_size);
|
||||
}
|
||||
|
||||
result->op = (ggml_op) tensor->op;
|
||||
for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) {
|
||||
result->op_params[i] = tensor->op_params[i];
|
||||
}
|
||||
result->flags = tensor->flags;
|
||||
result->data = reinterpret_cast<void *>(tensor_data);
|
||||
ggml_set_name(result, tensor->name);
|
||||
return result;
|
||||
}
|
||||
|
||||
ggml_tensor * apir_create_node(uint64_t id,
|
||||
ggml_context * ctx,
|
||||
const std::unordered_map<uint64_t, const apir_rpc_tensor *> & tensor_ptrs,
|
||||
std::unordered_map<uint64_t, ggml_tensor *> & tensor_map) {
|
||||
if (id == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
if (tensor_map.find(id) != tensor_map.end()) {
|
||||
return tensor_map[id];
|
||||
}
|
||||
const apir_rpc_tensor * tensor = tensor_ptrs.at(id);
|
||||
ggml_tensor * result = apir_deserialize_tensor(ctx, tensor);
|
||||
if (result == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
tensor_map[id] = result;
|
||||
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
||||
result->src[i] = apir_create_node(tensor->src[i], ctx, tensor_ptrs, tensor_map);
|
||||
}
|
||||
result->view_src = apir_create_node(tensor->view_src, ctx, tensor_ptrs, tensor_map);
|
||||
result->view_offs = tensor->view_offs;
|
||||
return result;
|
||||
}
|
||||
|
||||
ggml_cgraph * apir_deserialize_graph(uint32_t n_nodes,
|
||||
uint32_t n_tensors,
|
||||
const apir_rpc_tensor * tensors,
|
||||
const uint64_t * nodes) {
|
||||
size_t buf_size = ggml_tensor_overhead() * (n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false);
|
||||
ggml_init_params params = {
|
||||
/*.mem_size =*/buf_size,
|
||||
/*.mem_buffer =*/NULL,
|
||||
/*.no_alloc =*/true,
|
||||
};
|
||||
ggml_context * ctx = ggml_init(params);
|
||||
ggml_cgraph * graph = ggml_new_graph_custom(ctx, n_nodes, false);
|
||||
graph->n_nodes = n_nodes;
|
||||
std::unordered_map<uint64_t, const apir_rpc_tensor *> tensor_ptrs;
|
||||
for (uint32_t i = 0; i < n_tensors; i++) {
|
||||
tensor_ptrs[tensors[i].id] = &tensors[i];
|
||||
}
|
||||
std::unordered_map<uint64_t, ggml_tensor *> tensor_map;
|
||||
for (uint32_t i = 0; i < n_nodes; i++) {
|
||||
int64_t id;
|
||||
memcpy(&id, &nodes[i], sizeof(id));
|
||||
graph->nodes[i] = apir_create_node(id, ctx, tensor_ptrs, tensor_map);
|
||||
}
|
||||
|
||||
return graph;
|
||||
}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
#include "shared/apir_backend.h"
|
||||
|
||||
#define BUFFER_TO_HOST_HANDLE(name) ggml_buffer_to_apir_handle(name)
|
||||
|
||||
static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) {
|
||||
// in the backend, the buffer handle is the buffer pointer
|
||||
return (apir_buffer_host_handle_t) buffer;
|
||||
}
|
||||
|
||||
static inline apir_buffer_type_host_handle_t ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) {
|
||||
// in the backend, the buffer handle is the buffer pointer
|
||||
return (apir_buffer_type_host_handle_t) buft;
|
||||
}
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
#include "backend-dispatched.h"
|
||||
#include "backend-virgl-apir.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-impl.h"
|
||||
#include "shared/apir_backend.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(enc);
|
||||
|
||||
static bool async_backend_initialized = false;
|
||||
static bool async_backend;
|
||||
|
||||
if (!async_backend_initialized) {
|
||||
ggml_backend_dev_props props;
|
||||
|
||||
dev->iface.get_props(dev, &props);
|
||||
async_backend = props.caps.async;
|
||||
async_backend_initialized = true;
|
||||
}
|
||||
|
||||
uint32_t shmem_res_id;
|
||||
apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id);
|
||||
|
||||
const void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
|
||||
if (!shmem_data) {
|
||||
GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
|
||||
apir_decoder_set_fatal(dec);
|
||||
return 1;
|
||||
}
|
||||
size_t cgraph_size;
|
||||
apir_decode_size_t(dec, &cgraph_size);
|
||||
|
||||
apir_decoder secondary_dec = apir_new_decoder((const char *) shmem_data, cgraph_size);
|
||||
|
||||
ggml_cgraph * cgraph = apir_decode_ggml_cgraph(&secondary_dec, cgraph_size);
|
||||
|
||||
ggml_status status;
|
||||
#if APIR_BACKEND_CHECK_SUPPORTS_OP == 1
|
||||
for (int idx = 0; idx < cgraph->n_nodes; idx++) {
|
||||
ggml_tensor * op = ggml_graph_node(cgraph, idx);
|
||||
if (dev->iface.supports_op(dev, op)) {
|
||||
continue;
|
||||
}
|
||||
GGML_LOG_ERROR("Graph node %d (%s) not supported by the backend\n", idx, ggml_op_desc(op));
|
||||
|
||||
status = GGML_STATUS_ABORTED;
|
||||
apir_encode_ggml_status(enc, &status);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
status = bck->iface.graph_compute(bck, cgraph);
|
||||
|
||||
if (async_backend) {
|
||||
bck->iface.synchronize(bck);
|
||||
}
|
||||
|
||||
apir_encode_ggml_status(enc, &status);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
#include "backend-dispatched.h"
|
||||
#include "backend-virgl-apir.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-impl.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
uint32_t backend_buffer_type_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
ggml_backend_buffer_type_t buft;
|
||||
buft = apir_decode_ggml_buffer_type(dec);
|
||||
|
||||
const char * string = buft->iface.get_name(buft);
|
||||
|
||||
const size_t string_size = strlen(string) + 1;
|
||||
apir_encode_array_size(enc, string_size);
|
||||
apir_encode_char_array(enc, string, string_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
ggml_backend_buffer_type_t buft;
|
||||
buft = apir_decode_ggml_buffer_type(dec);
|
||||
|
||||
size_t value = buft->iface.get_alignment(buft);
|
||||
apir_encode_size_t(enc, &value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
ggml_backend_buffer_type_t buft;
|
||||
buft = apir_decode_ggml_buffer_type(dec);
|
||||
|
||||
size_t value = buft->iface.get_max_size(buft);
|
||||
apir_encode_size_t(enc, &value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
ggml_backend_buffer_type_t buft;
|
||||
buft = apir_decode_ggml_buffer_type(dec);
|
||||
|
||||
bool is_host = buft->iface.is_host(buft);
|
||||
apir_encode_bool_t(enc, &is_host);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
ggml_backend_buffer_type_t buft;
|
||||
buft = apir_decode_ggml_buffer_type(dec);
|
||||
|
||||
size_t size;
|
||||
apir_decode_size_t(dec, &size);
|
||||
|
||||
ggml_backend_buffer_t buffer;
|
||||
|
||||
buffer = buft->iface.alloc_buffer(buft, size);
|
||||
|
||||
apir_encode_ggml_buffer(enc, buffer);
|
||||
|
||||
if (buffer) {
|
||||
apir_track_backend_buffer(buffer);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
ggml_backend_buffer_type_t buft;
|
||||
buft = apir_decode_ggml_buffer_type(dec);
|
||||
|
||||
const ggml_tensor * op = apir_decode_ggml_tensor_inplace(dec);
|
||||
|
||||
size_t value = buft->iface.get_alloc_size(buft, op);
|
||||
|
||||
apir_encode_size_t(enc, &value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,131 @@
|
|||
#include "backend-dispatched.h"
|
||||
#include "backend-virgl-apir.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-impl.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
uint32_t backend_buffer_get_base(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
ggml_backend_buffer_t buffer;
|
||||
buffer = apir_decode_ggml_buffer(dec);
|
||||
|
||||
uintptr_t base = (uintptr_t) buffer->iface.get_base(buffer);
|
||||
apir_encode_uintptr_t(enc, &base);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(enc);
|
||||
|
||||
ggml_backend_buffer_t buffer;
|
||||
buffer = apir_decode_ggml_buffer(dec);
|
||||
|
||||
ggml_tensor * tensor;
|
||||
// safe to remove the const qualifier here
|
||||
tensor = (ggml_tensor *) (uintptr_t) apir_decode_ggml_tensor(dec);
|
||||
|
||||
uint32_t shmem_res_id;
|
||||
apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id);
|
||||
|
||||
size_t offset;
|
||||
apir_decode_size_t(dec, &offset);
|
||||
|
||||
size_t size;
|
||||
apir_decode_size_t(dec, &size);
|
||||
|
||||
void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
|
||||
|
||||
if (!shmem_data) {
|
||||
GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
buffer->iface.set_tensor(buffer, tensor, shmem_data, offset, size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(enc);
|
||||
|
||||
ggml_backend_buffer_t buffer;
|
||||
buffer = apir_decode_ggml_buffer(dec);
|
||||
|
||||
const ggml_tensor * tensor;
|
||||
// safe to remove the const qualifier here
|
||||
tensor = apir_decode_ggml_tensor(dec);
|
||||
|
||||
uint32_t shmem_res_id;
|
||||
apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id);
|
||||
|
||||
size_t offset;
|
||||
apir_decode_size_t(dec, &offset);
|
||||
|
||||
size_t size;
|
||||
apir_decode_size_t(dec, &size);
|
||||
|
||||
void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
|
||||
if (!shmem_data) {
|
||||
GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
buffer->iface.get_tensor(buffer, tensor, shmem_data, offset, size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
|
||||
ggml_backend_buffer_t buffer;
|
||||
buffer = apir_decode_ggml_buffer(dec);
|
||||
|
||||
const ggml_tensor * src;
|
||||
// safe to remove the const qualifier here
|
||||
src = apir_decode_ggml_tensor(dec);
|
||||
ggml_tensor * dst = (ggml_tensor *) (uintptr_t) apir_decode_ggml_tensor(dec);
|
||||
|
||||
bool ret = buffer->iface.cpy_tensor(buffer, src, (ggml_tensor *) dst);
|
||||
|
||||
apir_encode_bool_t(enc, &ret);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_buffer_clear(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(enc);
|
||||
|
||||
ggml_backend_buffer_t buffer;
|
||||
buffer = apir_decode_ggml_buffer(dec);
|
||||
|
||||
uint8_t value;
|
||||
apir_decode_uint8_t(dec, &value);
|
||||
|
||||
buffer->iface.clear(buffer, value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(enc);
|
||||
|
||||
ggml_backend_buffer_t buffer;
|
||||
buffer = apir_decode_ggml_buffer(dec);
|
||||
|
||||
if (!apir_untrack_backend_buffer(buffer)) {
|
||||
GGML_LOG_WARN("%s: unknown buffer %p\n", __func__, (void *) buffer);
|
||||
return 1;
|
||||
}
|
||||
|
||||
buffer->iface.free_buffer(buffer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,148 @@
|
|||
#include "backend-dispatched.h"
|
||||
#include "backend-virgl-apir.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-impl.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
uint32_t backend_device_get_device_count(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(dec);
|
||||
|
||||
int32_t dev_count = reg->iface.get_device_count(reg);
|
||||
apir_encode_int32_t(enc, &dev_count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_device_get_count(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(dec);
|
||||
|
||||
int32_t dev_count = reg->iface.get_device_count(reg);
|
||||
apir_encode_int32_t(enc, &dev_count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_device_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(dec);
|
||||
|
||||
const char * string = dev->iface.get_name(dev);
|
||||
|
||||
const size_t string_size = strlen(string) + 1;
|
||||
apir_encode_array_size(enc, string_size);
|
||||
apir_encode_char_array(enc, string, string_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_device_get_description(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(dec);
|
||||
|
||||
const char * string = dev->iface.get_description(dev);
|
||||
|
||||
const size_t string_size = strlen(string) + 1;
|
||||
apir_encode_array_size(enc, string_size);
|
||||
apir_encode_char_array(enc, string, string_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_device_get_type(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(dec);
|
||||
|
||||
uint32_t type = dev->iface.get_type(dev);
|
||||
apir_encode_uint32_t(enc, &type);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_device_get_memory(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(dec);
|
||||
|
||||
size_t free, total;
|
||||
dev->iface.get_memory(dev, &free, &total);
|
||||
|
||||
apir_encode_size_t(enc, &free);
|
||||
apir_encode_size_t(enc, &total);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_device_supports_op(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
|
||||
const ggml_tensor * op = apir_decode_ggml_tensor_inplace(dec);
|
||||
|
||||
bool supports_op = dev->iface.supports_op(dev, op);
|
||||
|
||||
apir_encode_bool_t(enc, &supports_op);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_device_get_buffer_type(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(dec);
|
||||
|
||||
ggml_backend_buffer_type_t bufft = dev->iface.get_buffer_type(dev);
|
||||
|
||||
apir_encode_ggml_buffer_type(enc, bufft);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_device_get_props(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(dec);
|
||||
|
||||
ggml_backend_dev_props props;
|
||||
dev->iface.get_props(dev, &props);
|
||||
|
||||
apir_encode_bool_t(enc, &props.caps.async);
|
||||
apir_encode_bool_t(enc, &props.caps.host_buffer);
|
||||
apir_encode_bool_t(enc, &props.caps.buffer_from_host_ptr);
|
||||
apir_encode_bool_t(enc, &props.caps.events);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(dec);
|
||||
|
||||
uint32_t shmem_res_id;
|
||||
apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id);
|
||||
|
||||
void * shmem_ptr = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
|
||||
if (!shmem_ptr) {
|
||||
GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
|
||||
apir_decoder_set_fatal(dec);
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t size;
|
||||
apir_decode_size_t(dec, &size);
|
||||
size_t max_tensor_size;
|
||||
apir_decode_size_t(dec, &max_tensor_size);
|
||||
|
||||
ggml_backend_buffer_t buffer;
|
||||
buffer = dev->iface.buffer_from_host_ptr(dev, shmem_ptr, size, max_tensor_size);
|
||||
|
||||
apir_encode_ggml_buffer(enc, buffer);
|
||||
apir_encode_ggml_buffer_type(enc, buffer->buft);
|
||||
|
||||
if (buffer) {
|
||||
apir_track_backend_buffer(buffer);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
#include "backend-dispatched.h"
|
||||
#include "backend-virgl-apir.h"
|
||||
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-impl.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
ggml_backend_reg_t reg = NULL;
|
||||
ggml_backend_dev_t dev = NULL;
|
||||
ggml_backend_t bck = NULL;
|
||||
|
||||
uint64_t timer_start = 0;
|
||||
uint64_t timer_total = 0;
|
||||
uint64_t timer_count = 0;
|
||||
|
||||
uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p) {
|
||||
if (reg != NULL) {
|
||||
GGML_LOG_WARN("%s: already initialized\n", __func__);
|
||||
return APIR_BACKEND_INITIALIZE_ALREADY_INITED;
|
||||
}
|
||||
ggml_backend_reg_t (*ggml_backend_reg_fct)(void) = (ggml_backend_reg_t (*)()) ggml_backend_reg_fct_p;
|
||||
|
||||
reg = ggml_backend_reg_fct();
|
||||
if (reg == NULL) {
|
||||
GGML_LOG_ERROR("%s: backend registration failed\n", __func__);
|
||||
return APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED;
|
||||
}
|
||||
|
||||
if (!reg->iface.get_device_count(reg)) {
|
||||
GGML_LOG_ERROR("%s: backend initialization failed: no device found\n", __func__);
|
||||
return APIR_BACKEND_INITIALIZE_NO_DEVICE;
|
||||
}
|
||||
|
||||
dev = reg->iface.get_device(reg, 0);
|
||||
|
||||
if (!dev) {
|
||||
GGML_LOG_ERROR("%s: backend initialization failed: no device received\n", __func__);
|
||||
return APIR_BACKEND_INITIALIZE_NO_DEVICE;
|
||||
}
|
||||
|
||||
bck = dev->iface.init_backend(dev, NULL);
|
||||
|
||||
return APIR_BACKEND_INITIALIZE_SUCCESS;
|
||||
}
|
||||
|
|
@ -0,0 +1,130 @@
|
|||
#pragma once
|
||||
|
||||
/* device */
|
||||
uint32_t backend_device_get_device_count(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_device_get_count(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_device_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_device_get_description(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_device_get_type(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_device_get_memory(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_device_supports_op(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_device_get_buffer_type(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_device_get_props(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
|
||||
/* buffer-type */
|
||||
uint32_t backend_buffer_type_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
|
||||
/* buffer */
|
||||
uint32_t backend_buffer_get_base(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_buffer_clear(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
|
||||
/* backend */
|
||||
uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
|
||||
static inline const char * backend_dispatch_command_name(ApirBackendCommandType type) {
|
||||
switch (type) {
|
||||
/* device */
|
||||
case APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT:
|
||||
return "backend_device_get_device_count";
|
||||
case APIR_COMMAND_TYPE_DEVICE_GET_COUNT:
|
||||
return "backend_device_get_count";
|
||||
case APIR_COMMAND_TYPE_DEVICE_GET_NAME:
|
||||
return "backend_device_get_name";
|
||||
case APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION:
|
||||
return "backend_device_get_description";
|
||||
case APIR_COMMAND_TYPE_DEVICE_GET_TYPE:
|
||||
return "backend_device_get_type";
|
||||
case APIR_COMMAND_TYPE_DEVICE_GET_MEMORY:
|
||||
return "backend_device_get_memory";
|
||||
case APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP:
|
||||
return "backend_device_supports_op";
|
||||
case APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE:
|
||||
return "backend_device_get_buffer_type";
|
||||
case APIR_COMMAND_TYPE_DEVICE_GET_PROPS:
|
||||
return "backend_device_get_props";
|
||||
case APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR:
|
||||
return "backend_device_buffer_from_ptr";
|
||||
/* buffer-type */
|
||||
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME:
|
||||
return "backend_buffer_type_get_name";
|
||||
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT:
|
||||
return "backend_buffer_type_get_alignment";
|
||||
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE:
|
||||
return "backend_buffer_type_get_max_size";
|
||||
case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST:
|
||||
return "backend_buffer_type_is_host";
|
||||
case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER:
|
||||
return "backend_buffer_type_alloc_buffer";
|
||||
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE:
|
||||
return "backend_buffer_type_get_alloc_size";
|
||||
/* buffer */
|
||||
case APIR_COMMAND_TYPE_BUFFER_GET_BASE:
|
||||
return "backend_buffer_get_base";
|
||||
case APIR_COMMAND_TYPE_BUFFER_SET_TENSOR:
|
||||
return "backend_buffer_set_tensor";
|
||||
case APIR_COMMAND_TYPE_BUFFER_GET_TENSOR:
|
||||
return "backend_buffer_get_tensor";
|
||||
case APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR:
|
||||
return "backend_buffer_cpy_tensor";
|
||||
case APIR_COMMAND_TYPE_BUFFER_CLEAR:
|
||||
return "backend_buffer_clear";
|
||||
case APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER:
|
||||
return "backend_buffer_free_buffer";
|
||||
/* backend */
|
||||
case APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE:
|
||||
return "backend_backend_graph_compute";
|
||||
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = {
|
||||
|
||||
/* device */
|
||||
|
||||
/* APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT = */ backend_device_get_device_count,
|
||||
/* APIR_COMMAND_TYPE_DEVICE_GET_COUNT = */ backend_device_get_count,
|
||||
/* APIR_COMMAND_TYPE_DEVICE_GET_NAME = */ backend_device_get_name,
|
||||
/* APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION = */ backend_device_get_description,
|
||||
/* APIR_COMMAND_TYPE_DEVICE_GET_TYPE = */ backend_device_get_type,
|
||||
/* APIR_COMMAND_TYPE_DEVICE_GET_MEMORY = */ backend_device_get_memory,
|
||||
/* APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP = */ backend_device_supports_op,
|
||||
/* APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE = */ backend_device_get_buffer_type,
|
||||
/* APIR_COMMAND_TYPE_DEVICE_GET_PROPS = */ backend_device_get_props,
|
||||
/* APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR = */ backend_device_buffer_from_ptr,
|
||||
|
||||
/* buffer-type */
|
||||
|
||||
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME = */ backend_buffer_type_get_name,
|
||||
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT = */ backend_buffer_type_get_alignment,
|
||||
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE = */ backend_buffer_type_get_max_size,
|
||||
/* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = */ backend_buffer_type_is_host,
|
||||
/* APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER = */ backend_buffer_type_alloc_buffer,
|
||||
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE = */ backend_buffer_type_get_alloc_size,
|
||||
|
||||
/* buffer */
|
||||
|
||||
/* APIR_COMMAND_TYPE_BUFFER_GET_BASE = */ backend_buffer_get_base,
|
||||
/* APIR_COMMAND_TYPE_BUFFER_SET_TENSOR = */ backend_buffer_set_tensor,
|
||||
/* APIR_COMMAND_TYPE_BUFFER_GET_TENSOR = */ backend_buffer_get_tensor,
|
||||
/* APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR = */ backend_buffer_cpy_tensor,
|
||||
/* APIR_COMMAND_TYPE_BUFFER_CLEAR = */ backend_buffer_clear,
|
||||
/* APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER = */ backend_buffer_free_buffer,
|
||||
|
||||
/* backend */
|
||||
|
||||
/* APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE = */ backend_backend_graph_compute,
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstddef>
|
||||
|
||||
#include <ggml-backend.h>
|
||||
|
||||
#include "backend-convert.h"
|
||||
#include "backend-virgl-apir.h"
|
||||
#include "shared/apir_backend.h"
|
||||
#include "shared/apir_cs.h"
|
||||
#include "shared/apir_cs_ggml.h"
|
||||
|
||||
struct virgl_apir_context {
|
||||
uint32_t ctx_id;
|
||||
virgl_apir_callbacks * iface;
|
||||
};
|
||||
|
||||
typedef uint32_t (*backend_dispatch_t)(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
|
||||
|
||||
#include "backend-dispatched.gen.h"
|
||||
|
||||
uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p);
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-impl.h"
|
||||
#include "shared/api_remoting.h"
|
||||
|
||||
#include <cstdarg>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
extern ggml_backend_reg_t reg;
|
||||
extern ggml_backend_dev_t dev;
|
||||
extern ggml_backend_t bck;
|
||||
|
||||
struct virgl_apir_callbacks {
|
||||
const char * (*get_config)(uint32_t virgl_ctx_id, const char * key);
|
||||
void * (*get_shmem_ptr)(uint32_t virgl_ctx_id, uint32_t res_id);
|
||||
};
|
||||
|
||||
extern "C" {
|
||||
ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks *virgl_cbs);
|
||||
void apir_backend_deinit(uint32_t virgl_ctx_id);
|
||||
uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id,
|
||||
virgl_apir_callbacks * virgl_cbs,
|
||||
uint32_t cmd_type,
|
||||
char * dec_cur,
|
||||
const char * dec_end,
|
||||
char * enc_cur,
|
||||
const char * enc_end,
|
||||
char ** enc_cur_after);
|
||||
}
|
||||
|
|
@ -0,0 +1,148 @@
|
|||
#include "backend-dispatched.h"
|
||||
#include "backend-virgl-apir.h"
|
||||
|
||||
#include "shared/api_remoting.h"
|
||||
#include "shared/apir_backend.h"
|
||||
#include "shared/apir_cs.h"
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <ggml-backend.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#define APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_PATH"
|
||||
#define APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_REG"
|
||||
#define APIR_LLAMA_CPP_LOG_TO_FILE_ENV "APIR_LLAMA_CPP_LOG_TO_FILE"
|
||||
|
||||
#define GGML_DEFAULT_BACKEND_REG "ggml_backend_init"
|
||||
|
||||
static void * backend_library_handle = NULL;
|
||||
static FILE * apir_logfile = NULL;
|
||||
|
||||
static void log_to_file_callback(enum ggml_log_level level, const char * text, void * user_data) {
|
||||
FILE * logfile = (FILE *)user_data;
|
||||
fprintf(logfile, "[%d] %s", level, text);
|
||||
fflush(logfile);
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
void apir_backend_deinit(uint32_t virgl_ctx_id) {
|
||||
GGML_UNUSED(virgl_ctx_id);
|
||||
|
||||
auto buffers = apir_get_track_backend_buffers();
|
||||
for (const auto & buffer : buffers) {
|
||||
apir_untrack_backend_buffer(buffer);
|
||||
buffer->iface.free_buffer(buffer);
|
||||
}
|
||||
|
||||
if (dev) {
|
||||
size_t free, total;
|
||||
dev->iface.get_memory(dev, &free, &total);
|
||||
GGML_LOG_INFO("%s: free memory: %ld MB\n", __func__, (size_t) free / 1024 / 1024);
|
||||
}
|
||||
|
||||
if (backend_library_handle) {
|
||||
GGML_LOG_INFO("%s: The GGML backend library was loaded. Unloading it.\n", __func__);
|
||||
dlclose(backend_library_handle);
|
||||
backend_library_handle = NULL;
|
||||
}
|
||||
|
||||
if (apir_logfile) {
|
||||
fclose(apir_logfile);
|
||||
apir_logfile = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#define APIR_GGML_LIBRARY_PATH_KEY "ggml.library.path"
|
||||
#define APIR_GGML_LIBRARY_REG_KEY "ggml.library.reg"
|
||||
|
||||
ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks *virgl_cbs) {
|
||||
const char * dlsym_error;
|
||||
|
||||
const char * apir_log_to_file = getenv(APIR_LLAMA_CPP_LOG_TO_FILE_ENV);
|
||||
if (apir_log_to_file) {
|
||||
apir_logfile = fopen(apir_log_to_file, "w");
|
||||
if (apir_logfile) {
|
||||
ggml_log_set(log_to_file_callback, apir_logfile);
|
||||
} else {
|
||||
GGML_LOG_INFO("Could not open the log file at '%s'\n", apir_log_to_file);
|
||||
}
|
||||
}
|
||||
|
||||
const char * library_name = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_PATH_KEY);
|
||||
const char * virgl_library_reg = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_REG_KEY);
|
||||
const char * library_reg = virgl_library_reg ? virgl_library_reg : GGML_DEFAULT_BACKEND_REG;
|
||||
|
||||
if (!library_name) {
|
||||
GGML_LOG_ERROR("cannot open the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV);
|
||||
|
||||
return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
|
||||
}
|
||||
|
||||
backend_library_handle = dlopen(library_name, RTLD_LAZY);
|
||||
|
||||
if (!backend_library_handle) {
|
||||
GGML_LOG_ERROR("cannot open the GGML library: %s\n", dlerror());
|
||||
|
||||
return APIR_LOAD_LIBRARY_CANNOT_OPEN;
|
||||
}
|
||||
|
||||
if (!library_reg) {
|
||||
GGML_LOG_ERROR("cannot register the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV);
|
||||
|
||||
return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
|
||||
}
|
||||
|
||||
void * ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg);
|
||||
dlsym_error = dlerror();
|
||||
if (dlsym_error) {
|
||||
GGML_LOG_ERROR("cannot find the GGML backend registration symbol '%s' (from %s): %s\n", library_reg,
|
||||
APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error);
|
||||
|
||||
return APIR_LOAD_LIBRARY_SYMBOL_MISSING;
|
||||
}
|
||||
|
||||
uint32_t ret = backend_dispatch_initialize(ggml_backend_reg_fct);
|
||||
|
||||
return (ApirLoadLibraryReturnCode) (APIR_LOAD_LIBRARY_INIT_BASE_INDEX + ret);
|
||||
}
|
||||
|
||||
uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id,
|
||||
virgl_apir_callbacks * virgl_cbs,
|
||||
uint32_t cmd_type,
|
||||
char * dec_cur,
|
||||
const char * dec_end,
|
||||
char * enc_cur,
|
||||
const char * enc_end,
|
||||
char ** enc_cur_after) {
|
||||
apir_encoder enc = {
|
||||
.cur = enc_cur,
|
||||
.start = enc_cur,
|
||||
.end = enc_end,
|
||||
.fatal = false,
|
||||
};
|
||||
|
||||
apir_decoder dec = {
|
||||
.cur = dec_cur,
|
||||
.end = dec_end,
|
||||
.fatal = false,
|
||||
};
|
||||
|
||||
virgl_apir_context ctx = {
|
||||
.ctx_id = virgl_ctx_id,
|
||||
.iface = virgl_cbs,
|
||||
};
|
||||
|
||||
if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) {
|
||||
GGML_LOG_ERROR("Received an invalid dispatch index (%d >= %d)\n", cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT);
|
||||
return APIR_BACKEND_FORWARD_INDEX_INVALID;
|
||||
}
|
||||
|
||||
backend_dispatch_t forward_fct = apir_backend_dispatch_table[cmd_type];
|
||||
uint32_t ret = forward_fct(&enc, &dec, &ctx);
|
||||
|
||||
*enc_cur_after = enc.cur;
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
#pragma once
|
||||
|
||||
/* the rest of this file must match virglrenderer/src/apir-protocol.h */
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#define APIR_PROTOCOL_MAJOR 0
|
||||
#define APIR_PROTOCOL_MINOR 1
|
||||
|
||||
#define APIR_HANDSHAKE_MAGIC 0xab1e
|
||||
|
||||
enum ApirCommandType {
|
||||
APIR_COMMAND_TYPE_HANDSHAKE = 0,
|
||||
APIR_COMMAND_TYPE_LOADLIBRARY = 1,
|
||||
APIR_COMMAND_TYPE_FORWARD = 2,
|
||||
|
||||
APIR_COMMAND_TYPE_LENGTH = 3,
|
||||
};
|
||||
|
||||
typedef uint64_t ApirCommandFlags;
|
||||
|
||||
enum ApirLoadLibraryReturnCode {
|
||||
APIR_LOAD_LIBRARY_SUCCESS = 0,
|
||||
APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR = 1,
|
||||
APIR_LOAD_LIBRARY_ALREADY_LOADED = 2,
|
||||
APIR_LOAD_LIBRARY_ENV_VAR_MISSING = 3,
|
||||
APIR_LOAD_LIBRARY_CANNOT_OPEN = 4,
|
||||
APIR_LOAD_LIBRARY_SYMBOL_MISSING = 5,
|
||||
APIR_LOAD_LIBRARY_INIT_BASE_INDEX = 6, // anything above this is a APIR backend library initialization return code
|
||||
};
|
||||
|
||||
enum ApirForwardReturnCode {
|
||||
APIR_FORWARD_SUCCESS = 0,
|
||||
APIR_FORWARD_NO_DISPATCH_FCT = 1,
|
||||
APIR_FORWARD_TIMEOUT = 2,
|
||||
|
||||
APIR_FORWARD_BASE_INDEX = 3, // anything above this is a APIR backend library forward return code
|
||||
} ;
|
||||
|
||||
__attribute__((unused)) static inline const char * apir_command_name(ApirCommandType type) {
|
||||
switch (type) {
|
||||
case APIR_COMMAND_TYPE_HANDSHAKE:
|
||||
return "HandShake";
|
||||
case APIR_COMMAND_TYPE_LOADLIBRARY:
|
||||
return "LoadLibrary";
|
||||
case APIR_COMMAND_TYPE_FORWARD:
|
||||
return "Forward";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((unused)) static const char * apir_load_library_error(ApirLoadLibraryReturnCode code) {
|
||||
#define APIR_LOAD_LIBRARY_ERROR(code_name) \
|
||||
do { \
|
||||
if (code == code_name) \
|
||||
return #code_name; \
|
||||
} while (0)
|
||||
|
||||
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_SUCCESS);
|
||||
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR);
|
||||
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_ALREADY_LOADED);
|
||||
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_ENV_VAR_MISSING);
|
||||
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_CANNOT_OPEN);
|
||||
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_SYMBOL_MISSING);
|
||||
APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_INIT_BASE_INDEX);
|
||||
|
||||
return "Unknown APIR_COMMAND_TYPE_LoadLibrary error";
|
||||
|
||||
#undef APIR_LOAD_LIBRARY_ERROR
|
||||
}
|
||||
|
||||
__attribute__((unused)) static const char * apir_forward_error(ApirForwardReturnCode code) {
|
||||
#define APIR_FORWARD_ERROR(code_name) \
|
||||
do { \
|
||||
if (code == code_name) \
|
||||
return #code_name; \
|
||||
} while (0)
|
||||
|
||||
APIR_FORWARD_ERROR(APIR_FORWARD_SUCCESS);
|
||||
APIR_FORWARD_ERROR(APIR_FORWARD_NO_DISPATCH_FCT);
|
||||
APIR_FORWARD_ERROR(APIR_FORWARD_TIMEOUT);
|
||||
APIR_FORWARD_ERROR(APIR_FORWARD_BASE_INDEX);
|
||||
|
||||
return "Unknown APIR_COMMAND_TYPE_FORWARD error";
|
||||
|
||||
#undef APIR_FORWARD_ERROR
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
typedef enum ApirBackendCommandType {
|
||||
|
||||
/* device */
|
||||
APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT = 0,
|
||||
APIR_COMMAND_TYPE_DEVICE_GET_COUNT = 1,
|
||||
APIR_COMMAND_TYPE_DEVICE_GET_NAME = 2,
|
||||
APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION = 3,
|
||||
APIR_COMMAND_TYPE_DEVICE_GET_TYPE = 4,
|
||||
APIR_COMMAND_TYPE_DEVICE_GET_MEMORY = 5,
|
||||
APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP = 6,
|
||||
APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE = 7,
|
||||
APIR_COMMAND_TYPE_DEVICE_GET_PROPS = 8,
|
||||
APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR = 9,
|
||||
|
||||
/* buffer-type */
|
||||
APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME = 10,
|
||||
APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT = 11,
|
||||
APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE = 12,
|
||||
APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = 13,
|
||||
APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER = 14,
|
||||
APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE = 15,
|
||||
|
||||
/* buffer */
|
||||
APIR_COMMAND_TYPE_BUFFER_GET_BASE = 16,
|
||||
APIR_COMMAND_TYPE_BUFFER_SET_TENSOR = 17,
|
||||
APIR_COMMAND_TYPE_BUFFER_GET_TENSOR = 18,
|
||||
APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR = 19,
|
||||
APIR_COMMAND_TYPE_BUFFER_CLEAR = 20,
|
||||
APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER = 21,
|
||||
|
||||
/* backend */
|
||||
APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE = 22,
|
||||
|
||||
// last command_type index + 1
|
||||
APIR_BACKEND_DISPATCH_TABLE_COUNT = 23,
|
||||
} ApirBackendCommandType;
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
#pragma once
|
||||
|
||||
#include "apir_backend.gen.h"
|
||||
|
||||
#include <stdint.h> // for uintptr_t
|
||||
#include <time.h> // for timespec, clock_gettime
|
||||
|
||||
#define APIR_BACKEND_INITIALIZE_SUCCESS 0
|
||||
#define APIR_BACKEND_INITIALIZE_CANNOT_OPEN_BACKEND_LIBRARY 1
|
||||
#define APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY 2
|
||||
#define APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS 3
|
||||
#define APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS 4
|
||||
#define APIR_BACKEND_INITIALIZE_BACKEND_FAILED 5
|
||||
#define APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED 6
|
||||
#define APIR_BACKEND_INITIALIZE_ALREADY_INITED 7
|
||||
#define APIR_BACKEND_INITIALIZE_NO_DEVICE 8
|
||||
|
||||
|
||||
// new entries here need to be added to the apir_backend_initialize_error function below
|
||||
|
||||
#define APIR_BACKEND_FORWARD_INDEX_INVALID 6
|
||||
|
||||
// 0 is fast, 1 avoids the backend to crash if an unsupported tensor is received
|
||||
#define APIR_BACKEND_CHECK_SUPPORTS_OP 0
|
||||
|
||||
typedef uintptr_t apir_buffer_type_host_handle_t;
|
||||
typedef uintptr_t apir_buffer_host_handle_t;
|
||||
|
||||
static const char * apir_backend_initialize_error(int code) {
|
||||
#define APIR_BACKEND_INITIALIZE_ERROR(code_name) \
|
||||
do { \
|
||||
if (code == code_name) \
|
||||
return #code_name; \
|
||||
} while (0)
|
||||
|
||||
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_SUCCESS);
|
||||
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_CANNOT_OPEN_BACKEND_LIBRARY);
|
||||
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY);
|
||||
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS);
|
||||
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS);
|
||||
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_FAILED);
|
||||
|
||||
return "Unknown APIR_BACKEND_INITIALIZE error:/";
|
||||
|
||||
#undef APIR_BACKEND_INITIALIZE_ERROR
|
||||
}
|
||||
|
|
@ -0,0 +1,383 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml-impl.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
|
||||
struct apir_encoder {
|
||||
char * cur;
|
||||
const char * start;
|
||||
const char * end;
|
||||
bool fatal;
|
||||
|
||||
};
|
||||
|
||||
struct apir_decoder {
|
||||
const char * cur;
|
||||
const char * end;
|
||||
bool fatal;
|
||||
};
|
||||
|
||||
/*
|
||||
* new encoder and decoder
|
||||
*/
|
||||
|
||||
static apir_decoder apir_new_decoder(const char * ptr, size_t size) {
|
||||
apir_decoder dec = {
|
||||
.cur = ptr,
|
||||
.end = ptr + size,
|
||||
.fatal = false,
|
||||
};
|
||||
|
||||
return dec;
|
||||
}
|
||||
|
||||
static apir_encoder apir_new_encoder(char * ptr, size_t size) {
|
||||
apir_encoder enc = {
|
||||
.cur = ptr,
|
||||
.start = ptr,
|
||||
.end = ptr + size,
|
||||
.fatal = false,
|
||||
};
|
||||
|
||||
return enc;
|
||||
}
|
||||
|
||||
/*
|
||||
* fatal flag handling
|
||||
*/
|
||||
|
||||
static inline void apir_encoder_reset_fatal(apir_encoder * enc) {
|
||||
enc->fatal = false;
|
||||
}
|
||||
|
||||
static inline void apir_encoder_set_fatal(apir_encoder * enc) {
|
||||
enc->fatal = true;
|
||||
}
|
||||
|
||||
static inline bool apir_encoder_get_fatal(const apir_encoder * enc) {
|
||||
return enc->fatal;
|
||||
}
|
||||
|
||||
static inline void apir_decoder_reset_fatal(apir_decoder * dec) {
|
||||
dec->fatal = false;
|
||||
}
|
||||
|
||||
static inline void apir_decoder_set_fatal(apir_decoder * dec) {
|
||||
dec->fatal = true;
|
||||
}
|
||||
|
||||
static inline bool apir_decoder_get_fatal(const apir_decoder * dec) {
|
||||
return dec->fatal;
|
||||
}
|
||||
|
||||
/*
|
||||
* encode peek
|
||||
*/
|
||||
|
||||
static inline bool apir_decoder_peek_internal(apir_decoder * dec,
|
||||
size_t size,
|
||||
void * val,
|
||||
size_t val_size) {
|
||||
assert(val_size <= size);
|
||||
|
||||
if (unlikely(size > (size_t) (dec->end - dec->cur))) {
|
||||
GGML_LOG_ERROR("reading too much from the decoder ...\n");
|
||||
apir_decoder_set_fatal(dec);
|
||||
memset(val, 0, val_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* we should not rely on the compiler to optimize away memcpy... */
|
||||
memcpy(val, dec->cur, val_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void apir_decoder_peek(apir_decoder * dec, size_t size, void * val, size_t val_size) {
|
||||
apir_decoder_peek_internal(dec, size, val, val_size);
|
||||
}
|
||||
|
||||
static inline const void * apir_decoder_use_inplace(apir_decoder * dec, size_t size) {
|
||||
if (unlikely(size > (size_t) (dec->end - dec->cur))) {
|
||||
GGML_LOG_ERROR("reading too much from the decoder ...\n");
|
||||
apir_decoder_set_fatal(dec);
|
||||
return NULL;
|
||||
}
|
||||
const void * addr = dec->cur;
|
||||
dec->cur += size;
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
/*
|
||||
* read/write
|
||||
*/
|
||||
|
||||
static inline void apir_decoder_read(apir_decoder * dec, size_t size, void * val, size_t val_size) {
|
||||
if (apir_decoder_peek_internal(dec, size, val, val_size)) {
|
||||
dec->cur += size;
|
||||
}
|
||||
}
|
||||
|
||||
static inline char * apir_encoder_write(apir_encoder * enc, size_t size, const void * val, size_t val_size) {
|
||||
assert(val_size <= size);
|
||||
assert(size <= ((size_t) (enc->end - enc->cur)));
|
||||
|
||||
char * write_addr = enc->cur;
|
||||
/* we should not rely on the compiler to optimize away memcpy... */
|
||||
memcpy(write_addr, val, val_size);
|
||||
enc->cur += size;
|
||||
|
||||
return write_addr;
|
||||
}
|
||||
|
||||
/*
|
||||
* encode/decode
|
||||
*/
|
||||
|
||||
static inline void apir_decode(apir_decoder * dec, size_t size, void * data, size_t data_size) {
|
||||
assert(size % 4 == 0);
|
||||
apir_decoder_read(dec, size, data, data_size);
|
||||
}
|
||||
|
||||
static inline void apir_encode(apir_encoder * enc, size_t size, const void * data, size_t data_size) {
|
||||
assert(size % 4 == 0);
|
||||
apir_encoder_write(enc, size, data, data_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* typed encode/decode
|
||||
*/
|
||||
|
||||
/* uint8_t */
|
||||
|
||||
static inline void apir_encode_uint8_t(apir_encoder * enc, const uint8_t * val) {
|
||||
apir_encode(enc, sizeof(int), val, sizeof(*val));
|
||||
}
|
||||
|
||||
static inline void apir_decode_uint8_t(apir_decoder * dec, uint8_t * val) {
|
||||
apir_decode(dec, sizeof(int), val, sizeof(*val));
|
||||
}
|
||||
|
||||
/* uint64_t */
|
||||
|
||||
static inline void apir_encode_uint64_t(apir_encoder * enc, const uint64_t * val) {
|
||||
apir_encode(enc, 8, val, sizeof(*val));
|
||||
}
|
||||
|
||||
static inline void apir_decode_uint64_t(apir_decoder * dec, uint64_t * val) {
|
||||
apir_decode(dec, 8, val, sizeof(*val));
|
||||
}
|
||||
|
||||
static inline void apir_encode_uint64_t_array(apir_encoder * enc, const uint64_t * val, uint32_t count) {
|
||||
const size_t size = sizeof(*val) * count;
|
||||
assert(size >= count);
|
||||
apir_encode(enc, size, val, size);
|
||||
}
|
||||
|
||||
static inline void apir_decode_uint64_t_array(apir_decoder * dec, uint64_t * val, uint32_t count) {
|
||||
const size_t size = sizeof(*val) * count;
|
||||
assert(size >= count);
|
||||
apir_decode(dec, size, val, size);
|
||||
}
|
||||
|
||||
static inline const uint64_t * apir_decode_uint64_t_array_inplace(apir_decoder * dec, uint32_t count) {
|
||||
return (uint64_t *) (uintptr_t) apir_decoder_use_inplace(dec, count * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
/* int32_t */
|
||||
|
||||
static inline void apir_encode_int32_t(apir_encoder * enc, const int32_t * val) {
|
||||
apir_encode(enc, 4, val, sizeof(*val));
|
||||
}
|
||||
|
||||
static inline void apir_decode_int32_t(apir_decoder * dec, int32_t * val) {
|
||||
apir_decode(dec, 4, val, sizeof(*val));
|
||||
}
|
||||
|
||||
static inline void apir_encode_int32_t_array(apir_encoder * enc, const int32_t * val, uint32_t count) {
|
||||
const size_t size = sizeof(*val) * count;
|
||||
assert(size >= count);
|
||||
apir_encode(enc, size, val, size);
|
||||
}
|
||||
|
||||
static inline void apir_decode_int32_t_array(apir_decoder * dec, int32_t * val, uint32_t count) {
|
||||
const size_t size = sizeof(*val) * count;
|
||||
assert(size >= count);
|
||||
apir_decode(dec, size, val, size);
|
||||
}
|
||||
|
||||
/* array size (uint64_t) */
|
||||
|
||||
static inline void apir_encode_array_size(apir_encoder * enc, uint64_t size) {
|
||||
apir_encode_uint64_t(enc, &size);
|
||||
}
|
||||
|
||||
static inline uint64_t apir_decode_array_size(apir_decoder * dec, uint64_t expected_size) {
|
||||
uint64_t size;
|
||||
apir_decode_uint64_t(dec, &size);
|
||||
if (size != expected_size) {
|
||||
GGML_LOG_ERROR("Couldn't decode array from the decoder\n");
|
||||
apir_decoder_set_fatal(dec);
|
||||
size = 0;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
static inline uint64_t apir_decode_array_size_unchecked(apir_decoder * dec) {
|
||||
uint64_t size;
|
||||
apir_decode_uint64_t(dec, &size);
|
||||
return size;
|
||||
}
|
||||
|
||||
/* non-array pointer */
|
||||
|
||||
static inline bool apir_encode_simple_pointer(apir_encoder * enc, const void * val) {
|
||||
apir_encode_array_size(enc, val ? 1 : 0);
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline bool apir_decode_simple_pointer(apir_decoder * dec) {
|
||||
return apir_decode_array_size_unchecked(dec);
|
||||
}
|
||||
|
||||
/* uint32_t */
|
||||
|
||||
static inline void apir_encode_uint32_t(apir_encoder * enc, const uint32_t * val) {
|
||||
apir_encode(enc, 4, val, sizeof(*val));
|
||||
}
|
||||
|
||||
static inline void apir_decode_uint32_t(apir_decoder * dec, uint32_t * val) {
|
||||
apir_decode(dec, 4, val, sizeof(*val));
|
||||
}
|
||||
|
||||
static inline void apir_encode_uint32_t_array(apir_encoder * enc, const uint32_t * val, uint32_t count) {
|
||||
const size_t size = sizeof(*val) * count;
|
||||
assert(size >= count);
|
||||
apir_encode(enc, size, val, size);
|
||||
}
|
||||
|
||||
static inline void apir_decode_uint32_t_array(apir_decoder * dec, uint32_t * val, uint32_t count) {
|
||||
const size_t size = sizeof(*val) * count;
|
||||
assert(size >= count);
|
||||
apir_decode(dec, size, val, size);
|
||||
}
|
||||
|
||||
/* size_t */
|
||||
|
||||
static inline void apir_encode_size_t(apir_encoder * enc, const size_t * val) {
|
||||
const uint64_t tmp = *val;
|
||||
apir_encode_uint64_t(enc, &tmp);
|
||||
}
|
||||
|
||||
static inline void apir_decode_size_t(apir_decoder * dec, size_t * val) {
|
||||
uint64_t tmp;
|
||||
apir_decode_uint64_t(dec, &tmp);
|
||||
*val = tmp;
|
||||
}
|
||||
|
||||
static inline void apir_encode_size_t_array(apir_encoder * enc, const size_t * val, uint32_t count) {
|
||||
if (sizeof(size_t) == sizeof(uint64_t)) {
|
||||
apir_encode_uint64_t_array(enc, (const uint64_t *) val, count);
|
||||
} else {
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
apir_encode_size_t(enc, &val[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void apir_decode_size_t_array(apir_decoder * dec, size_t * val, uint32_t count) {
|
||||
if (sizeof(size_t) == sizeof(uint64_t)) {
|
||||
apir_decode_uint64_t_array(dec, (uint64_t *) val, count);
|
||||
} else {
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
apir_decode_size_t(dec, &val[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* opaque blob */
|
||||
|
||||
static inline void apir_encode_blob_array(apir_encoder * enc, const void * val, size_t size) {
|
||||
apir_encode(enc, (size + 3) & ~3, val, size);
|
||||
}
|
||||
|
||||
static inline void apir_decode_blob_array(apir_decoder * dec, void * val, size_t size) {
|
||||
apir_decode(dec, (size + 3) & ~3, val, size);
|
||||
}
|
||||
|
||||
/* string */
|
||||
|
||||
static inline void apir_encode_char_array(apir_encoder * enc, const char * val, size_t size) {
|
||||
assert(size && strlen(val) < size);
|
||||
apir_encode_blob_array(enc, val, size);
|
||||
}
|
||||
|
||||
static inline void apir_decode_char_array(apir_decoder * dec, char * val, size_t size) {
|
||||
apir_decode_blob_array(dec, val, size);
|
||||
if (size) {
|
||||
val[size - 1] = '\0';
|
||||
} else {
|
||||
GGML_LOG_ERROR("Couldn't decode the blog array\n");
|
||||
apir_decoder_set_fatal(dec);
|
||||
}
|
||||
}
|
||||
|
||||
/* (temp) buffer allocation */
|
||||
|
||||
static inline void * apir_decoder_alloc_array(size_t size, size_t count) {
|
||||
size_t alloc_size;
|
||||
if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) {
|
||||
GGML_LOG_ERROR("overflow in array allocation of %zu * %zu bytes\n", size, count);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return malloc(alloc_size);
|
||||
}
|
||||
|
||||
/* bool */
|
||||
|
||||
static inline void apir_encode_bool_t(apir_encoder * enc, const bool * val) {
|
||||
apir_encode(enc, sizeof(int), val, sizeof(bool));
|
||||
}
|
||||
|
||||
static inline void apir_decode_bool_t(apir_decoder * dec, bool * val) {
|
||||
apir_decode(dec, sizeof(int), val, sizeof(bool));
|
||||
}
|
||||
|
||||
/* apir_buffer_type_host_handle_t */
|
||||
|
||||
static inline void apir_encode_apir_buffer_type_host_handle_t(apir_encoder * enc,
|
||||
const apir_buffer_type_host_handle_t * val) {
|
||||
apir_encode(enc, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t));
|
||||
}
|
||||
|
||||
static inline void apir_decode_apir_buffer_type_host_handle_t(apir_decoder * dec,
|
||||
apir_buffer_type_host_handle_t * val) {
|
||||
apir_decode(dec, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t));
|
||||
}
|
||||
|
||||
/* apir_buffer_host_handle_t */
|
||||
|
||||
static inline void apir_encode_apir_buffer_host_handle_t(apir_encoder * enc,
|
||||
const apir_buffer_host_handle_t * val) {
|
||||
apir_encode(enc, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t));
|
||||
}
|
||||
|
||||
static inline void apir_decode_apir_buffer_host_handle_t(apir_decoder * dec, apir_buffer_host_handle_t * val) {
|
||||
apir_decode(dec, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t));
|
||||
}
|
||||
|
||||
/* uintptr_t */
|
||||
|
||||
static inline void apir_encode_uintptr_t(apir_encoder * enc, const uintptr_t * val) {
|
||||
apir_encode(enc, sizeof(*val), val, sizeof(*val));
|
||||
}
|
||||
|
||||
static inline void apir_decode_uintptr_t(apir_decoder * dec, uintptr_t * val) {
|
||||
apir_decode(dec, sizeof(*val), val, sizeof(*val));
|
||||
}
|
||||
|
|
@ -0,0 +1,211 @@
|
|||
#include "ggml-impl.h"
|
||||
#include "apir_cs.h"
|
||||
#include "apir_cs_rpc.h"
|
||||
|
||||
// ggml_buffer_to_apir_host_handle(ggml_backend_buffer_t buffer);
|
||||
|
||||
static inline void apir_encode_ggml_buffer_host_handle(apir_encoder * enc,
|
||||
const apir_buffer_host_handle_t * handle);
|
||||
|
||||
static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec);
|
||||
|
||||
/* apir_rpc_tensor */
|
||||
|
||||
static inline void apir_encode_rcp_tensor(apir_encoder * enc, const apir_rpc_tensor * apir_rpc_tensor) {
|
||||
size_t apir_rpc_tensor_size = sizeof(*apir_rpc_tensor);
|
||||
apir_encode(enc, apir_rpc_tensor_size, apir_rpc_tensor, apir_rpc_tensor_size);
|
||||
}
|
||||
|
||||
static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_inplace(apir_decoder * dec) {
|
||||
size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor);
|
||||
|
||||
return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size);
|
||||
}
|
||||
|
||||
static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_array_inplace(apir_decoder * dec,
|
||||
uint32_t n_tensors) {
|
||||
size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor) * n_tensors;
|
||||
|
||||
return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size);
|
||||
}
|
||||
|
||||
/* ggml_tensor */
|
||||
|
||||
static inline void apir_encode_ggml_tensor(apir_encoder * enc, const ggml_tensor * tensor) {
|
||||
apir_rpc_tensor serialized = apir_serialize_tensor(tensor);
|
||||
|
||||
apir_encode_rcp_tensor(enc, &serialized);
|
||||
}
|
||||
|
||||
static inline const ggml_tensor * apir_decode_ggml_tensor(apir_decoder * dec) {
|
||||
const apir_rpc_tensor * apir_rpc_tensor = apir_decode_apir_rpc_tensor_inplace(dec);
|
||||
ggml_init_params params{
|
||||
/*.mem_size =*/ ggml_tensor_overhead(),
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/*.no_alloc =*/ true,
|
||||
};
|
||||
ggml_context * ctx = ggml_init(params);
|
||||
|
||||
const ggml_tensor * tensor = apir_deserialize_tensor(ctx, apir_rpc_tensor);
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
||||
/* *** ggml_backend_buffer_type_t *** */
|
||||
|
||||
// ggml_backend_buffer_type_t is a POINTER (to a struct).
|
||||
// Only the host pointer is shared between the host and guest.
|
||||
// The guest stores it in `buft->context`.
|
||||
// The host simply writes the pointer address in the buffer variable.
|
||||
|
||||
static inline void apir_encode_ggml_buffer_type(apir_encoder * enc, ggml_backend_buffer_type_t buft) {
|
||||
apir_buffer_type_host_handle_t handle = ggml_buffer_type_to_apir_handle(buft);
|
||||
apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle));
|
||||
}
|
||||
|
||||
static inline ggml_backend_buffer_type_t apir_decode_ggml_buffer_type(apir_decoder * dec) {
|
||||
apir_buffer_type_host_handle_t handle;
|
||||
|
||||
apir_decoder_read(dec, sizeof(handle), &handle, sizeof(handle));
|
||||
|
||||
return (ggml_backend_buffer_type_t) handle;
|
||||
}
|
||||
|
||||
static inline apir_buffer_type_host_handle_t apir_decode_apir_buffer_type_host_handle(apir_decoder * dec) {
|
||||
apir_buffer_type_host_handle_t handle;
|
||||
|
||||
apir_decoder_read(dec, sizeof(handle), &handle, sizeof(handle));
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
/* *** ggml_backend_type_t *** */
|
||||
|
||||
// ggml_backend_buffer_t is a POINTER.
|
||||
// same logic as for ggml_backend_buffer_type_t
|
||||
|
||||
static inline void apir_encode_ggml_buffer(apir_encoder * enc, const ggml_backend_buffer_t buffer) {
|
||||
apir_buffer_host_handle_t handle = BUFFER_TO_HOST_HANDLE(buffer);
|
||||
apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle));
|
||||
}
|
||||
|
||||
static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec) {
|
||||
ggml_backend_buffer_t buffer;
|
||||
size_t buffer_ptr_size = sizeof(buffer);
|
||||
|
||||
apir_decoder_read(dec, buffer_ptr_size, &buffer, buffer_ptr_size);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/* enum ggml_status */
|
||||
|
||||
static inline void apir_encode_ggml_status(apir_encoder * enc, const ggml_status * status) {
|
||||
apir_encoder_write(enc, sizeof(*status), status, sizeof(*status));
|
||||
}
|
||||
|
||||
static inline void apir_decode_ggml_status(apir_decoder * dec, ggml_status * status) {
|
||||
apir_decoder_read(dec, sizeof(*status), status, sizeof(*status));
|
||||
}
|
||||
|
||||
/* virtgpu_shmem */
|
||||
|
||||
static inline void apir_encode_virtgpu_shmem_res_id(apir_encoder * enc, uint32_t shmem_res_id) {
|
||||
apir_encode_uint32_t(enc, &shmem_res_id);
|
||||
}
|
||||
|
||||
static inline void apir_decode_virtgpu_shmem_res_id(apir_decoder * dec, uint32_t * shmem_res_id) {
|
||||
apir_decode_uint32_t(dec, shmem_res_id);
|
||||
}
|
||||
|
||||
/* ggml_cgraph */
|
||||
|
||||
static inline size_t apir_serialize_ggml_cgraph(ggml_cgraph * cgraph, std::vector<uint8_t> & cgraph_data) {
|
||||
apir_serialize_graph(cgraph, cgraph_data);
|
||||
|
||||
return cgraph_data.size();
|
||||
}
|
||||
|
||||
static inline void apir_encode_cgraph_data(apir_encoder * enc, std::vector<uint8_t> & cgraph_data) {
|
||||
size_t cgraph_size = cgraph_data.size();
|
||||
|
||||
apir_encode(enc, cgraph_size, cgraph_data.data(), cgraph_size);
|
||||
}
|
||||
|
||||
static inline ggml_cgraph * apir_decode_ggml_cgraph(apir_decoder * dec, size_t cgraph_size) {
|
||||
GGML_UNUSED(cgraph_size);
|
||||
|
||||
uint32_t n_nodes;
|
||||
apir_decode_uint32_t(dec, &n_nodes);
|
||||
const uint64_t * nodes = apir_decode_uint64_t_array_inplace(dec, n_nodes);
|
||||
|
||||
uint32_t n_tensors;
|
||||
apir_decode_uint32_t(dec, &n_tensors);
|
||||
const apir_rpc_tensor * tensors = apir_decode_apir_rpc_tensor_array_inplace(dec, n_tensors);
|
||||
|
||||
return apir_deserialize_graph(n_nodes, n_tensors, tensors, nodes);
|
||||
}
|
||||
|
||||
static inline void apir_encode_ggml_buffer_handle(apir_encoder * enc, const apir_buffer_host_handle_t * handle) {
|
||||
apir_encoder_write(enc, sizeof(*handle), &handle, sizeof(*handle));
|
||||
}
|
||||
|
||||
static inline void apir_encode_ggml_tensor_inline(apir_encoder * enc, const ggml_tensor * tensor) {
|
||||
size_t tensor_size = sizeof(*tensor);
|
||||
|
||||
if (tensor->extra) {
|
||||
GGML_ABORT("Cannot pass tensors with extra");
|
||||
}
|
||||
|
||||
if (tensor->src[0] && tensor->buffer) {
|
||||
static int first = 1;
|
||||
if (first) {
|
||||
GGML_LOG_WARN("Cannot pass tensors with src and buffer\n");
|
||||
first = 0;
|
||||
}
|
||||
}
|
||||
|
||||
apir_encoder_write(enc, tensor_size, tensor, tensor_size);
|
||||
|
||||
// tensor->data is a pointer inside the device buffer. No need to touch it
|
||||
// tensor->buffer is a pointer to a buffer. Encoding the buffer handle in sequence.
|
||||
// (could also make a copy of the tensor, and update locally.)
|
||||
|
||||
if (tensor->buffer) {
|
||||
apir_buffer_host_handle_t buffer_handle = ggml_buffer_to_apir_handle(tensor->buffer);
|
||||
apir_encode_ggml_buffer_handle(enc, &buffer_handle);
|
||||
}
|
||||
|
||||
if (tensor->view_src) {
|
||||
apir_encoder_write(enc, tensor_size, tensor->view_src, tensor_size);
|
||||
}
|
||||
|
||||
for (int i = 0; tensor->src[i]; i++) {
|
||||
const ggml_tensor * tensor_src = tensor->src[i];
|
||||
apir_encoder_write(enc, tensor_size, tensor_src, tensor_size);
|
||||
}
|
||||
}
|
||||
|
||||
static inline const ggml_tensor * apir_decode_ggml_tensor_inplace(apir_decoder * dec) {
|
||||
// it safe to remove the `const` qualifier here, we *do* want to
|
||||
// modify the shared memory data to fix the `src` pointers.
|
||||
ggml_tensor * tensor = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor));
|
||||
|
||||
// tensor->data is a pointer inside the device buffer. No need to touch it
|
||||
// tensor->buffer is a pointer to a buffer. Decode the buffer handle encoded in sequence.
|
||||
if (tensor->buffer) {
|
||||
tensor->buffer = apir_decode_ggml_buffer(dec);
|
||||
}
|
||||
|
||||
if (tensor->view_src) {
|
||||
ggml_tensor * tensor_view_src = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor));
|
||||
tensor->view_src = tensor_view_src;
|
||||
}
|
||||
|
||||
for (int i = 0; tensor->src[i]; i++) {
|
||||
ggml_tensor * tensor_src = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor));
|
||||
tensor->src[i] = tensor_src; // overwrite op->src[i] pointer with the actual location of the src tensor
|
||||
}
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
#include "ggml.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
|
||||
// ggml_tensor is serialized into apir_rpc_tensor
|
||||
struct apir_rpc_tensor {
|
||||
uint64_t id;
|
||||
uint32_t type;
|
||||
uint64_t buffer;
|
||||
uint32_t ne[GGML_MAX_DIMS];
|
||||
uint32_t nb[GGML_MAX_DIMS];
|
||||
uint32_t op;
|
||||
int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
|
||||
int32_t flags;
|
||||
uint64_t src[GGML_MAX_SRC];
|
||||
uint64_t view_src;
|
||||
uint64_t view_offs;
|
||||
uint64_t data;
|
||||
char name[GGML_MAX_NAME];
|
||||
|
||||
char padding[4];
|
||||
};
|
||||
|
||||
/* frontend */
|
||||
|
||||
apir_rpc_tensor apir_serialize_tensor(const ggml_tensor * tensor);
|
||||
|
||||
void apir_serialize_graph(const ggml_cgraph * cgraph, std::vector<uint8_t> & output);
|
||||
|
||||
/* backend */
|
||||
|
||||
void apir_track_backend_buffer(ggml_backend_buffer_t buffer);
|
||||
bool apir_untrack_backend_buffer(ggml_backend_buffer_t buffer);
|
||||
std::unordered_set<ggml_backend_buffer_t> apir_get_track_backend_buffers();
|
||||
|
||||
void apir_add_tensor(ggml_tensor * tensor,
|
||||
std::vector<apir_rpc_tensor> & tensors,
|
||||
std::unordered_set<ggml_tensor *> & visited);
|
||||
|
||||
ggml_tensor * apir_deserialize_tensor(ggml_context * ctx, const apir_rpc_tensor * tensor);
|
||||
|
||||
ggml_tensor * apir_create_node(uint64_t id,
|
||||
ggml_context * ctx,
|
||||
const std::unordered_map<uint64_t, const apir_rpc_tensor *> & tensor_ptrs,
|
||||
std::unordered_map<uint64_t, ggml_tensor *> & tensor_map);
|
||||
|
||||
ggml_cgraph * apir_deserialize_graph(uint32_t n_nodes,
|
||||
uint32_t n_tensors,
|
||||
const apir_rpc_tensor * tensors,
|
||||
const uint64_t * nodes);
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
#include "ggml-remoting.h"
|
||||
|
||||
static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
|
||||
size_t size) {
|
||||
virtgpu * gpu = BUFT_TO_GPU(buft);
|
||||
|
||||
ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context));
|
||||
if (!context) {
|
||||
GGML_ABORT("Couldn't allocate the buffer context ...");
|
||||
}
|
||||
|
||||
context->gpu = gpu;
|
||||
|
||||
bool async__unused, host_buffer__unused, events__unused;
|
||||
bool buffer_from_host_ptr;
|
||||
apir_device_get_props(gpu, &async__unused, &host_buffer__unused, &buffer_from_host_ptr, &events__unused);
|
||||
|
||||
if (buffer_from_host_ptr) {
|
||||
context->apir_context = apir_device_buffer_from_ptr(gpu, size, size);
|
||||
context->base = context->apir_context.shmem.mmap_ptr;
|
||||
context->is_from_ptr = true;
|
||||
} else {
|
||||
context->apir_context = apir_buffer_type_alloc_buffer(gpu, buft, size);
|
||||
context->is_from_ptr = false;
|
||||
context->base = NULL;
|
||||
}
|
||||
|
||||
ggml_backend_buffer_t buffer =
|
||||
ggml_backend_buffer_init(buft, ggml_backend_remoting_buffer_interface, (void *) context, size);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static const char * ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
|
||||
virtgpu * gpu = BUFT_TO_GPU(buft);
|
||||
|
||||
return apir_buffer_type_get_name(gpu, buft);
|
||||
}
|
||||
|
||||
static size_t ggml_backend_remoting_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
|
||||
virtgpu * gpu = BUFT_TO_GPU(buft);
|
||||
|
||||
static size_t align = 0;
|
||||
|
||||
if (align == 0) {
|
||||
align = apir_buffer_type_get_alignment(gpu, buft);
|
||||
}
|
||||
|
||||
return align;
|
||||
}
|
||||
|
||||
static size_t ggml_backend_remoting_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
|
||||
virtgpu * gpu = BUFT_TO_GPU(buft);
|
||||
|
||||
static size_t max_size = 0;
|
||||
if (max_size == 0) {
|
||||
max_size = apir_buffer_type_get_max_size(gpu, buft);
|
||||
}
|
||||
|
||||
return max_size;
|
||||
}
|
||||
|
||||
static bool ggml_backend_remoting_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
|
||||
virtgpu * gpu = BUFT_TO_GPU(buft);
|
||||
|
||||
return apir_buffer_type_is_host(gpu, buft);
|
||||
}
|
||||
|
||||
static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft,
|
||||
const ggml_tensor * tensor) {
|
||||
virtgpu * gpu = BUFT_TO_GPU(buft);
|
||||
|
||||
if (tensor->buffer == NULL
|
||||
|| !tensor->buffer->context
|
||||
|| !buft->device->iface.supports_buft(buft->device, tensor->buffer->buft)) {
|
||||
return ggml_nbytes(tensor);
|
||||
}
|
||||
|
||||
return apir_buffer_type_get_alloc_size(gpu, buft, tensor);
|
||||
}
|
||||
|
||||
const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface = {
|
||||
/* .get_name = */ ggml_backend_remoting_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_remoting_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ NULL,
|
||||
};
|
||||
|
||||
const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface = {
|
||||
/* .get_name = */ ggml_backend_remoting_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ NULL,
|
||||
};
|
||||
|
|
@ -0,0 +1,119 @@
|
|||
#include "ggml-remoting.h"
|
||||
|
||||
#define BUFFER_TO_GPU(name) ((ggml_backend_remoting_buffer_context *) (name)->context)->gpu
|
||||
|
||||
static void * ggml_backend_remoting_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) buffer->context;
|
||||
if (context->base) {
|
||||
return context->base;
|
||||
}
|
||||
|
||||
context->base = apir_buffer_get_base(BUFFER_TO_GPU(buffer), BUFFER_TO_APIR_CONTEXT(buffer));
|
||||
|
||||
return context->base;
|
||||
}
|
||||
|
||||
static void ggml_backend_remoting_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
||||
ggml_tensor * tensor,
|
||||
const void * data,
|
||||
size_t offset,
|
||||
size_t size) {
|
||||
virtgpu * gpu = BUFFER_TO_GPU(buffer);
|
||||
|
||||
ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer);
|
||||
if (context->is_from_ptr) {
|
||||
memcpy((char *) tensor->data + offset, data, size);
|
||||
} else {
|
||||
apir_buffer_set_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), tensor, data, offset, size);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void ggml_backend_remoting_buffer_get_tensor(ggml_backend_buffer_t buffer,
|
||||
const ggml_tensor * tensor,
|
||||
void * data,
|
||||
size_t offset,
|
||||
size_t size) {
|
||||
virtgpu * gpu = BUFFER_TO_GPU(buffer);
|
||||
ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer);
|
||||
if (context->is_from_ptr) {
|
||||
memcpy(data, (const char *) tensor->data + offset, size);
|
||||
} else {
|
||||
apir_buffer_get_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), tensor, data, offset, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void ggml_backend_remoting_buffer_set_tensor_from_ptr(ggml_backend_buffer_t buffer,
|
||||
ggml_tensor * tensor,
|
||||
const void * data,
|
||||
size_t offset,
|
||||
size_t size) {
|
||||
UNUSED(buffer);
|
||||
|
||||
memcpy((char *) tensor->data + offset, data, size);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void ggml_backend_remoting_buffer_get_tensor_from_ptr(ggml_backend_buffer_t buffer,
|
||||
const ggml_tensor * tensor,
|
||||
void * data,
|
||||
size_t offset,
|
||||
size_t size) {
|
||||
UNUSED(buffer);
|
||||
|
||||
memcpy(data, (const char *) tensor->data + offset, size);
|
||||
}
|
||||
|
||||
static bool ggml_backend_remoting_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
|
||||
const ggml_tensor * src,
|
||||
ggml_tensor * dst) {
|
||||
virtgpu * gpu = BUFFER_TO_GPU(buffer);
|
||||
|
||||
bool ret = apir_buffer_cpy_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), src, dst);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ggml_backend_remoting_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
||||
virtgpu * gpu = BUFFER_TO_GPU(buffer);
|
||||
|
||||
apir_buffer_clear(gpu, BUFFER_TO_APIR_CONTEXT(buffer), value);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void ggml_backend_remoting_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
virtgpu * gpu = BUFFER_TO_GPU(buffer);
|
||||
|
||||
apir_buffer_free_buffer(gpu, BUFFER_TO_APIR_CONTEXT(buffer));
|
||||
|
||||
ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer);
|
||||
free(context);
|
||||
buffer->context = NULL;
|
||||
}
|
||||
|
||||
const ggml_backend_buffer_i ggml_backend_remoting_buffer_interface = {
|
||||
/* .free_buffer = */ ggml_backend_remoting_buffer_free_buffer,
|
||||
/* .get_base = */ ggml_backend_remoting_buffer_get_base,
|
||||
/* .init_tensor = */ NULL,
|
||||
/* .memset_tensor = */ NULL,
|
||||
/* .set_tensor = */ ggml_backend_remoting_buffer_set_tensor,
|
||||
/* .get_tensor = */ ggml_backend_remoting_buffer_get_tensor,
|
||||
/* .cpy_tensor = */ ggml_backend_remoting_buffer_cpy_tensor,
|
||||
/* .clear = */ ggml_backend_remoting_buffer_clear,
|
||||
/* .reset = */ NULL,
|
||||
};
|
||||
|
||||
const ggml_backend_buffer_i ggml_backend_remoting_buffer_from_ptr_interface = {
|
||||
/* .free_buffer = */ ggml_backend_remoting_buffer_free_buffer,
|
||||
/* .get_base = */ ggml_backend_remoting_buffer_get_base,
|
||||
/* .init_tensor = */ NULL,
|
||||
/* .memset_tensor = */ NULL,
|
||||
/* .set_tensor = */ ggml_backend_remoting_buffer_set_tensor_from_ptr,
|
||||
/* .get_tensor = */ ggml_backend_remoting_buffer_get_tensor_from_ptr,
|
||||
/* .cpy_tensor = */ ggml_backend_remoting_buffer_cpy_tensor,
|
||||
/* .clear = */ ggml_backend_remoting_buffer_clear,
|
||||
/* .reset = */ NULL,
|
||||
};
|
||||
|
|
@ -0,0 +1,144 @@
|
|||
#include "ggml-remoting.h"
|
||||
|
||||
static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) {
|
||||
virtgpu * gpu = DEV_TO_GPU(dev);
|
||||
|
||||
return apir_device_get_name(gpu);
|
||||
}
|
||||
|
||||
static const char * ggml_backend_remoting_device_get_description(ggml_backend_dev_t dev) {
|
||||
virtgpu * gpu = DEV_TO_GPU(dev);
|
||||
|
||||
return apir_device_get_description(gpu);
|
||||
}
|
||||
|
||||
static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_backend_dev_t dev) {
|
||||
virtgpu * gpu = DEV_TO_GPU(dev);
|
||||
|
||||
static enum ggml_backend_dev_type type;
|
||||
static bool has_type = false;
|
||||
if (!has_type) {
|
||||
has_type = true;
|
||||
type = (enum ggml_backend_dev_type) apir_device_get_type(gpu);
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
||||
virtgpu * gpu = DEV_TO_GPU(dev);
|
||||
|
||||
return apir_device_get_memory(gpu, free, total);
|
||||
}
|
||||
|
||||
static bool ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
||||
#if USE_ALWAYS_TRUE_SUPPORTS_OP == 1
|
||||
/* ggml-rpc cheats it like this */
|
||||
/* with the current implementation of serialize_tensor, the src/view aren't properly passed */
|
||||
UNUSED(dev);
|
||||
UNUSED(op);
|
||||
|
||||
return true;
|
||||
#else
|
||||
virtgpu * gpu = DEV_TO_GPU(dev);
|
||||
|
||||
return apir_device_supports_op(gpu, op);
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool ggml_backend_remoting_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
||||
bool supported = buft->device == dev;
|
||||
|
||||
return supported;
|
||||
}
|
||||
|
||||
static bool ggml_backend_remoting_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
||||
UNUSED(dev);
|
||||
UNUSED(op);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
||||
props->name = ggml_backend_remoting_device_get_name(dev);
|
||||
props->description = ggml_backend_remoting_device_get_description(dev);
|
||||
props->type = ggml_backend_remoting_device_get_type(dev);
|
||||
ggml_backend_remoting_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
||||
|
||||
virtgpu * gpu = DEV_TO_GPU(dev);
|
||||
apir_device_get_props(gpu, &props->caps.async, &props->caps.host_buffer, &props->caps.buffer_from_host_ptr,
|
||||
&props->caps.events);
|
||||
|
||||
props->caps.buffer_from_host_ptr = false;
|
||||
props->caps.async = false;
|
||||
props->caps.events = false;
|
||||
}
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) {
|
||||
virtgpu * gpu = DEV_TO_GPU(dev);
|
||||
|
||||
apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu);
|
||||
|
||||
static ggml_backend_buffer_type buft{
|
||||
/* .iface = */ ggml_backend_remoting_buffer_type_interface,
|
||||
/* .device = */ dev,
|
||||
/* .context = */ (void *) ctx,
|
||||
};
|
||||
|
||||
return &buft;
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) {
|
||||
virtgpu * gpu = DEV_TO_GPU(dev);
|
||||
|
||||
apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu);
|
||||
|
||||
static ggml_backend_buffer_type buft{
|
||||
/* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface,
|
||||
/* .device = */ dev,
|
||||
/* .context = */ (void *) ctx,
|
||||
};
|
||||
|
||||
return &buft;
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_t ggml_backend_remoting_device_buffer_from_ptr(ggml_backend_dev_t dev,
|
||||
void * ptr,
|
||||
size_t size,
|
||||
size_t max_tensor_size) {
|
||||
virtgpu * gpu = DEV_TO_GPU(dev);
|
||||
|
||||
ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context));
|
||||
if (!context) {
|
||||
GGML_ABORT("Couldn't allocate the buffer context ...");
|
||||
}
|
||||
|
||||
context->gpu = gpu;
|
||||
context->apir_context = apir_device_buffer_from_ptr(gpu, size, max_tensor_size);
|
||||
context->base = ptr;
|
||||
context->is_from_ptr = true;
|
||||
|
||||
ggml_backend_buffer_t buffer =
|
||||
ggml_backend_buffer_init(ggml_backend_remoting_device_get_buffer_from_ptr_type(dev),
|
||||
ggml_backend_remoting_buffer_from_ptr_interface, (void *) context, size);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
const ggml_backend_device_i ggml_backend_remoting_device_interface = {
|
||||
/* .get_name = */ ggml_backend_remoting_device_get_name,
|
||||
/* .get_description = */ ggml_backend_remoting_device_get_description,
|
||||
/* .get_memory = */ ggml_backend_remoting_device_get_memory,
|
||||
/* .get_type = */ ggml_backend_remoting_device_get_type,
|
||||
/* .get_props = */ ggml_backend_remoting_device_get_props,
|
||||
/* .init_backend = */ ggml_backend_remoting_device_init,
|
||||
/* .get_buffer_type = */ ggml_backend_remoting_device_get_buffer_type,
|
||||
/* .get_host_buffer_type = */ NULL,
|
||||
/* .buffer_from_host_ptr = */ ggml_backend_remoting_device_buffer_from_ptr,
|
||||
/* .supports_op = */ ggml_backend_remoting_device_supports_op,
|
||||
/* .supports_buft = */ ggml_backend_remoting_device_supports_buft,
|
||||
/* .offload_op = */ ggml_backend_remoting_device_offload_op,
|
||||
/* .event_new = */ NULL,
|
||||
/* .event_free = */ NULL,
|
||||
/* .event_synchronize = */ NULL,
|
||||
};
|
||||
|
|
@ -0,0 +1,137 @@
|
|||
#include "ggml-remoting.h"
|
||||
#include "ggml-virtgpu.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
|
||||
static virtgpu * apir_initialize() {
|
||||
static virtgpu * apir_gpu_instance = NULL;
|
||||
static bool apir_initialized = false;
|
||||
|
||||
{
|
||||
static std::mutex mutex;
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
|
||||
if (apir_initialized) {
|
||||
return apir_gpu_instance;
|
||||
}
|
||||
|
||||
apir_gpu_instance = create_virtgpu();
|
||||
if (!apir_gpu_instance) {
|
||||
GGML_ABORT("failed to initialize the virtgpu");
|
||||
}
|
||||
|
||||
apir_initialized = true;
|
||||
}
|
||||
|
||||
return apir_gpu_instance;
|
||||
}
|
||||
|
||||
static int ggml_backend_remoting_get_device_count() {
|
||||
virtgpu * gpu = apir_initialize();
|
||||
if (!gpu) {
|
||||
GGML_LOG_WARN("apir_initialize failed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return apir_device_get_count(gpu);
|
||||
}
|
||||
|
||||
static size_t ggml_backend_remoting_reg_get_device_count(ggml_backend_reg_t reg) {
|
||||
UNUSED(reg);
|
||||
|
||||
return ggml_backend_remoting_get_device_count();
|
||||
}
|
||||
|
||||
static std::vector<ggml_backend_dev_t> devices;
|
||||
|
||||
ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device) {
|
||||
GGML_ASSERT(device < devices.size());
|
||||
return devices[device];
|
||||
}
|
||||
|
||||
static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) {
|
||||
if (devices.size() > 0) {
|
||||
GGML_LOG_INFO("%s: already initialized\n", __func__);
|
||||
return;
|
||||
}
|
||||
|
||||
virtgpu * gpu = apir_initialize();
|
||||
if (!gpu) {
|
||||
GGML_LOG_ERROR("apir_initialize failed\n");
|
||||
return;
|
||||
}
|
||||
|
||||
static bool initialized = false;
|
||||
|
||||
{
|
||||
static std::mutex mutex;
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
if (!initialized) {
|
||||
for (int i = 0; i < ggml_backend_remoting_get_device_count(); i++) {
|
||||
ggml_backend_remoting_device_context * ctx = new ggml_backend_remoting_device_context;
|
||||
char desc[256] = "API Remoting device";
|
||||
|
||||
ctx->device = i;
|
||||
ctx->name = GGML_REMOTING_FRONTEND_NAME + std::to_string(i);
|
||||
ctx->description = desc;
|
||||
ctx->gpu = gpu;
|
||||
|
||||
ggml_backend_dev_t dev = new ggml_backend_device{
|
||||
/* .iface = */ ggml_backend_remoting_device_interface,
|
||||
/* .reg = */ reg,
|
||||
/* .context = */ ctx,
|
||||
};
|
||||
devices.push_back(dev);
|
||||
}
|
||||
initialized = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static ggml_backend_dev_t ggml_backend_remoting_reg_get_device(ggml_backend_reg_t reg, size_t device) {
|
||||
UNUSED(reg);
|
||||
|
||||
return ggml_backend_remoting_get_device(device);
|
||||
}
|
||||
|
||||
static const char * ggml_backend_remoting_reg_get_name(ggml_backend_reg_t reg) {
|
||||
UNUSED(reg);
|
||||
|
||||
return GGML_REMOTING_FRONTEND_NAME;
|
||||
}
|
||||
|
||||
static const ggml_backend_reg_i ggml_backend_remoting_reg_i = {
|
||||
/* .get_name = */ ggml_backend_remoting_reg_get_name,
|
||||
/* .get_device_count = */ ggml_backend_remoting_reg_get_device_count,
|
||||
/* .get_device = */ ggml_backend_remoting_reg_get_device,
|
||||
/* .get_proc_address = */ NULL,
|
||||
};
|
||||
|
||||
ggml_backend_reg_t ggml_backend_virtgpu_reg() {
|
||||
virtgpu * gpu = apir_initialize();
|
||||
if (!gpu) {
|
||||
GGML_LOG_ERROR("virtgpu_apir_initialize failed\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static ggml_backend_reg reg = {
|
||||
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
||||
/* .iface = */ ggml_backend_remoting_reg_i,
|
||||
/* .context = */ gpu,
|
||||
};
|
||||
|
||||
static bool initialized = false;
|
||||
if (initialized) {
|
||||
return ®
|
||||
}
|
||||
initialized = true;
|
||||
|
||||
ggml_backend_remoting_reg_init_devices(®);
|
||||
|
||||
GGML_LOG_INFO("%s: initialized\n", __func__);
|
||||
|
||||
return ®
|
||||
}
|
||||
|
||||
GGML_BACKEND_DL_IMPL(ggml_backend_virtgpu_reg)
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
#include "ggml-remoting.h"
|
||||
#include "../../include/ggml-virtgpu.h"
|
||||
|
||||
static const char * ggml_backend_remoting_get_name(ggml_backend_t backend) {
|
||||
UNUSED(backend);
|
||||
|
||||
return "API Remoting backend";
|
||||
}
|
||||
|
||||
static void ggml_backend_remoting_free(ggml_backend_t backend) {
|
||||
delete backend;
|
||||
}
|
||||
|
||||
static ggml_status ggml_backend_remoting_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
||||
virtgpu * gpu = DEV_TO_GPU(backend->device);
|
||||
|
||||
return apir_backend_graph_compute(gpu, cgraph);
|
||||
}
|
||||
|
||||
static void ggml_backend_remoting_graph_optimize(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
||||
virtgpu * gpu = DEV_TO_GPU(backend->device);
|
||||
#if true
|
||||
UNUSED(gpu);
|
||||
UNUSED(cgraph);
|
||||
#else
|
||||
// not working yet
|
||||
|
||||
apir_backend_graph_optimize(gpu, cgraph);
|
||||
#endif
|
||||
}
|
||||
|
||||
static ggml_backend_i ggml_backend_remoting_interface = {
|
||||
/* .get_name = */ ggml_backend_remoting_get_name,
|
||||
/* .free = */ ggml_backend_remoting_free,
|
||||
/* .set_tensor_async = */ NULL, // ggml_backend_remoting_set_tensor_async,
|
||||
/* .get_tensor_async = */ NULL, // ggml_backend_remoting_get_tensor_async,
|
||||
/* .cpy_tensor_async = */ NULL, // ggml_backend_remoting_cpy_tensor_async,
|
||||
/* .synchronize = */ NULL, // ggml_backend_remoting_synchronize,
|
||||
/* .graph_plan_create = */ NULL,
|
||||
/* .graph_plan_free = */ NULL,
|
||||
/* .graph_plan_update = */ NULL,
|
||||
/* .graph_plan_compute = */ NULL,
|
||||
/* .graph_compute = */ ggml_backend_remoting_graph_compute,
|
||||
/* .event_record = */ NULL,
|
||||
/* .event_wait = */ NULL,
|
||||
/* .graph_optimize = */ ggml_backend_remoting_graph_optimize,
|
||||
};
|
||||
|
||||
static ggml_guid_t ggml_backend_remoting_guid() {
|
||||
static ggml_guid guid = { 0xb8, 0xf7, 0x4f, 0x86, 0x14, 0x03, 0x86, 0x02,
|
||||
0x91, 0xc8, 0xdd, 0xe9, 0x02, 0x3f, 0xc0, 0x2b };
|
||||
|
||||
return &guid;
|
||||
}
|
||||
|
||||
ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params) {
|
||||
UNUSED(params);
|
||||
|
||||
ggml_backend_remoting_device_context * ctx = (ggml_backend_remoting_device_context *) dev->context;
|
||||
|
||||
ggml_backend_t remoting_backend = new ggml_backend{
|
||||
/* .guid = */ ggml_backend_remoting_guid(),
|
||||
/* .interface = */ ggml_backend_remoting_interface,
|
||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_virtgpu_reg(), ctx->device),
|
||||
/* .context = */ ctx,
|
||||
};
|
||||
|
||||
return remoting_backend;
|
||||
}
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-impl.h"
|
||||
#include "virtgpu.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
// USE_ALWAYS_TRUE_SUPPORTS_OP: 1 is fast, 0 avoid micro-benchmark crashes
|
||||
|
||||
#define USE_ALWAYS_TRUE_SUPPORTS_OP 1
|
||||
#define USE_METAL_GUEST_SUPPORTS_OP 0
|
||||
|
||||
#define DEV_TO_GPU(name) ((ggml_backend_remoting_device_context *) (name)->context)->gpu
|
||||
|
||||
#define BUFFER_TO_GGML_CONTEXT(name) ((ggml_backend_remoting_buffer_context *) (name)->context)
|
||||
|
||||
#define BUFFER_TO_APIR_CONTEXT(name) &((ggml_backend_remoting_buffer_context *) (name)->context)->apir_context
|
||||
|
||||
#define BUFFER_TO_HOST_HANDLE(name) ((ggml_backend_remoting_buffer_context *) (name)->context)->apir_context.host_handle
|
||||
|
||||
#define GET_DEVICE_CONTEXT() (ggml_backend_remoting_device_context *) ggml_backend_remoting_get_device(0)->context
|
||||
|
||||
#define BUFT_TO_GPU(name) ((ggml_backend_remoting_device_context *) (name)->device->context)->gpu
|
||||
|
||||
struct ggml_backend_remoting_device_context {
|
||||
size_t device;
|
||||
std::string name;
|
||||
std::string description;
|
||||
|
||||
std::vector<std::tuple<void *, size_t, virtgpu_shmem *>> shared_memory;
|
||||
|
||||
virtgpu * gpu;
|
||||
};
|
||||
|
||||
struct ggml_backend_remoting_buffer_context {
|
||||
apir_buffer_context_t apir_context;
|
||||
|
||||
virtgpu * gpu;
|
||||
|
||||
void * base;
|
||||
|
||||
bool is_from_ptr;
|
||||
};
|
||||
|
||||
extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface;
|
||||
extern const ggml_backend_device_i ggml_backend_remoting_device_interface;
|
||||
extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_interface;
|
||||
extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface;
|
||||
extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_from_ptr_interface;
|
||||
|
||||
ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device);
|
||||
ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params);
|
||||
ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev);
|
||||
|
||||
static inline apir_buffer_type_host_handle_t ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) {
|
||||
// in the backend, the buffer handle is the buffer pointer
|
||||
return (apir_buffer_type_host_handle_t) buft->context;
|
||||
}
|
||||
|
||||
static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) {
|
||||
if (!buffer->context) {
|
||||
GGML_ABORT("%s: no context available :/", __func__);
|
||||
}
|
||||
return BUFFER_TO_HOST_HANDLE(buffer);
|
||||
}
|
||||
|
|
@ -0,0 +1,168 @@
|
|||
# YAML schema for GGML remoting API functions
|
||||
# This defines the structure for generating the remoting layer code
|
||||
|
||||
# Configuration for the generated files
|
||||
config:
|
||||
# Base path for the generated files
|
||||
base_path: "ggml/src"
|
||||
|
||||
# Header files to update
|
||||
files:
|
||||
apir_backend_header: "ggml-virtgpu-apir/backend/shared/apir_backend.gen.h"
|
||||
backend_dispatched_header: "ggml-virtgpu-apir/backend/backend-dispatched.gen.h"
|
||||
virtgpu_forward_header: "ggml-virtgpu-apir/virtgpu-forward.gen.h"
|
||||
|
||||
# Simplified function definitions with grouping and metadata combined
|
||||
functions:
|
||||
device:
|
||||
group_description: "device"
|
||||
functions:
|
||||
get_device_count:
|
||||
# No specific metadata - uses default void return and base params
|
||||
|
||||
get_count:
|
||||
frontend_return: "int"
|
||||
|
||||
get_name:
|
||||
frontend_return: "const char *"
|
||||
|
||||
get_description:
|
||||
frontend_return: "const char *"
|
||||
|
||||
get_type:
|
||||
frontend_return: "uint32_t"
|
||||
|
||||
get_memory:
|
||||
frontend_return: "void"
|
||||
frontend_extra_params:
|
||||
- "size_t *free"
|
||||
- "size_t *total"
|
||||
|
||||
supports_op:
|
||||
frontend_return: "bool"
|
||||
frontend_extra_params:
|
||||
- "const ggml_tensor *op"
|
||||
|
||||
get_buffer_type:
|
||||
frontend_return: "apir_buffer_type_host_handle_t"
|
||||
|
||||
get_props:
|
||||
frontend_return: "void"
|
||||
frontend_extra_params:
|
||||
- "bool *async"
|
||||
- "bool *host_buffer"
|
||||
- "bool *buffer_from_host_ptr"
|
||||
- "bool *events"
|
||||
|
||||
buffer_from_ptr:
|
||||
frontend_return: "apir_buffer_context_t"
|
||||
frontend_extra_params:
|
||||
- "size_t size"
|
||||
- "size_t max_tensor_size"
|
||||
|
||||
buffer_type:
|
||||
group_description: "buffer-type"
|
||||
functions:
|
||||
get_name:
|
||||
frontend_return: "const char *"
|
||||
frontend_extra_params:
|
||||
- "ggml_backend_buffer_type_t buft"
|
||||
|
||||
get_alignment:
|
||||
frontend_return: "size_t"
|
||||
frontend_extra_params:
|
||||
- "ggml_backend_buffer_type_t buft"
|
||||
|
||||
get_max_size:
|
||||
frontend_return: "size_t"
|
||||
frontend_extra_params:
|
||||
- "ggml_backend_buffer_type_t buft"
|
||||
|
||||
is_host:
|
||||
frontend_return: "bool"
|
||||
frontend_extra_params:
|
||||
- "ggml_backend_buffer_type_t buft"
|
||||
|
||||
alloc_buffer:
|
||||
frontend_return: "apir_buffer_context_t"
|
||||
frontend_extra_params:
|
||||
- "ggml_backend_buffer_type_t buffer_buft"
|
||||
- "size_t size"
|
||||
|
||||
get_alloc_size:
|
||||
frontend_return: "size_t"
|
||||
frontend_extra_params:
|
||||
- "ggml_backend_buffer_type_t buft"
|
||||
- "const ggml_tensor *op"
|
||||
|
||||
buffer:
|
||||
group_description: "buffer"
|
||||
functions:
|
||||
get_base:
|
||||
frontend_return: "void *"
|
||||
frontend_extra_params:
|
||||
- "apir_buffer_context_t *buffer_context"
|
||||
|
||||
set_tensor:
|
||||
frontend_return: "void"
|
||||
frontend_extra_params:
|
||||
- "apir_buffer_context_t *buffer_context"
|
||||
- "ggml_tensor *tensor"
|
||||
- "const void *data"
|
||||
- "size_t offset"
|
||||
- "size_t size"
|
||||
|
||||
get_tensor:
|
||||
frontend_return: "void"
|
||||
frontend_extra_params:
|
||||
- "apir_buffer_context_t *buffer_context"
|
||||
- "const ggml_tensor *tensor"
|
||||
- "void *data"
|
||||
- "size_t offset"
|
||||
- "size_t size"
|
||||
|
||||
cpy_tensor:
|
||||
frontend_return: "bool"
|
||||
frontend_extra_params:
|
||||
- "apir_buffer_context_t *buffer_context"
|
||||
- "const ggml_tensor *src"
|
||||
- "const ggml_tensor *dst"
|
||||
|
||||
clear:
|
||||
frontend_return: "void"
|
||||
frontend_extra_params:
|
||||
- "apir_buffer_context_t *buffer_context"
|
||||
- "uint8_t value"
|
||||
|
||||
free_buffer:
|
||||
frontend_return: "void"
|
||||
frontend_extra_params:
|
||||
- "apir_buffer_context_t *buffer_context"
|
||||
|
||||
backend:
|
||||
group_description: "backend"
|
||||
functions:
|
||||
graph_compute:
|
||||
frontend_return: "ggml_status"
|
||||
frontend_extra_params:
|
||||
- "ggml_cgraph *cgraph"
|
||||
|
||||
graph_optimize:
|
||||
frontend_return: "ggml_cgraph *"
|
||||
frontend_extra_params:
|
||||
- "ggml_cgraph *cgraph"
|
||||
enabled: false
|
||||
|
||||
# Naming patterns used for code generation
|
||||
naming_patterns:
|
||||
# How to generate enum names
|
||||
enum_prefix: "APIR_COMMAND_TYPE_"
|
||||
|
||||
# How to generate backend function names
|
||||
backend_function_prefix: "backend_"
|
||||
|
||||
# How to generate frontend function names
|
||||
frontend_function_prefix: "apir_"
|
||||
|
||||
# Standard frontend first parameter
|
||||
frontend_base_param: "struct virtgpu *gpu"
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
struct virgl_renderer_capset_apir {
|
||||
uint32_t apir_version;
|
||||
uint32_t supports_blob_resources;
|
||||
uint32_t reserved[4]; // For future expansion
|
||||
};
|
||||
|
|
@ -0,0 +1,322 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
# Generated by Claude AI
|
||||
|
||||
Script to completely regenerate the GGML remoting codebase from YAML configuration.
|
||||
|
||||
This script reads api_functions.yaml and regenerates all the header files and
|
||||
implementation templates for the GGML remoting layer.
|
||||
|
||||
Usage:
|
||||
python regenerate_remoting.py
|
||||
|
||||
The script will:
|
||||
1. Read ggmlremoting_functions.yaml configuration
|
||||
2. Generate updated header files
|
||||
3. Generate implementation templates in dedicated files
|
||||
4. Show a summary of what was generated
|
||||
"""
|
||||
|
||||
import yaml
|
||||
from typing import Dict, List, Any
|
||||
from pathlib import Path
|
||||
import os
|
||||
import subprocess
|
||||
import shutil
|
||||
import logging
|
||||
|
||||
NL = '\n' # can't have f"{'\n'}" in f-strings
|
||||
|
||||
|
||||
class RemotingCodebaseGenerator:
|
||||
def __init__(self, yaml_path: str = "ggmlremoting_functions.yaml"):
|
||||
"""Initialize the generator with the YAML configuration."""
|
||||
self.yaml_path = yaml_path
|
||||
|
||||
if not Path(yaml_path).exists():
|
||||
raise FileNotFoundError(f"Configuration file {yaml_path} not found")
|
||||
|
||||
with open(yaml_path, 'r') as f:
|
||||
self.config = yaml.safe_load(f)
|
||||
|
||||
self.functions = self.config['functions']
|
||||
self.naming_patterns = self.config['naming_patterns']
|
||||
self.config_data = self.config['config']
|
||||
|
||||
# Check if clang-format is available
|
||||
self.clang_format_available = self._check_clang_format_available()
|
||||
|
||||
def _check_clang_format_available(self) -> bool:
|
||||
"""Check if clang-format is available in the system PATH."""
|
||||
return shutil.which("clang-format") is not None
|
||||
|
||||
def _format_file_with_clang_format(self, file_path: Path) -> bool:
|
||||
"""Format a file with clang-format -i. Returns True if successful, False otherwise."""
|
||||
if not self.clang_format_available:
|
||||
return False
|
||||
|
||||
try:
|
||||
subprocess.run(
|
||||
["clang-format", "-i", str(file_path)],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
return True
|
||||
except subprocess.CalledProcessError:
|
||||
logging.exception(f" ⚠️ clang-format failed for {file_path}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logging.exception(f" ⚠️ Unexpected error formatting {file_path}: {e}")
|
||||
return False
|
||||
|
||||
def generate_enum_name(self, group_name: str, function_name: str) -> str:
|
||||
"""Generate the APIR_COMMAND_TYPE enum name for a function."""
|
||||
prefix = self.naming_patterns['enum_prefix']
|
||||
return f"{prefix}{group_name.upper()}_{function_name.upper()}"
|
||||
|
||||
def generate_backend_function_name(self, group_name: str, function_name: str) -> str:
|
||||
"""Generate the backend function name."""
|
||||
function_key = f"{group_name}_{function_name}"
|
||||
overrides = self.naming_patterns.get('backend_function_overrides', {})
|
||||
|
||||
if function_key in overrides:
|
||||
return overrides[function_key]
|
||||
|
||||
prefix = self.naming_patterns['backend_function_prefix']
|
||||
return f"{prefix}{group_name}_{function_name}"
|
||||
|
||||
def generate_frontend_function_name(self, group_name: str, function_name: str) -> str:
|
||||
"""Generate the frontend function name."""
|
||||
prefix = self.naming_patterns['frontend_function_prefix']
|
||||
return f"{prefix}{group_name}_{function_name}"
|
||||
|
||||
def get_enabled_functions(self) -> List[Dict[str, Any]]:
|
||||
"""Get all enabled functions with their metadata."""
|
||||
functions = []
|
||||
enum_value = 0
|
||||
|
||||
for group_name, group_data in self.functions.items():
|
||||
group_description = group_data['group_description']
|
||||
|
||||
for function_name, func_metadata in group_data['functions'].items():
|
||||
# Handle case where func_metadata is None or empty (functions with only comments)
|
||||
if func_metadata is None:
|
||||
func_metadata = {}
|
||||
|
||||
# Functions are enabled by default unless explicitly disabled
|
||||
if func_metadata.get('enabled', True):
|
||||
functions.append({
|
||||
'group_name': group_name,
|
||||
'function_name': function_name,
|
||||
'enum_name': self.generate_enum_name(group_name, function_name),
|
||||
'enum_value': enum_value,
|
||||
'backend_function': self.generate_backend_function_name(group_name, function_name),
|
||||
'frontend_function': self.generate_frontend_function_name(group_name, function_name),
|
||||
'frontend_return': func_metadata.get('frontend_return', 'void'),
|
||||
'frontend_extra_params': func_metadata.get('frontend_extra_params', []),
|
||||
'group_description': group_description,
|
||||
'newly_added': func_metadata.get('newly_added', False)
|
||||
})
|
||||
enum_value += 1
|
||||
|
||||
return functions
|
||||
|
||||
def generate_apir_backend_header(self) -> str:
|
||||
"""Generate the complete apir_backend.h file."""
|
||||
functions = self.get_enabled_functions()
|
||||
|
||||
# Generate the enum section
|
||||
enum_lines = ["typedef enum ApirBackendCommandType {"]
|
||||
current_group = None
|
||||
|
||||
for func in functions:
|
||||
# Add comment for new group
|
||||
if func['group_name'] != current_group:
|
||||
enum_lines.append("")
|
||||
enum_lines.append(f" /* {func['group_description']} */")
|
||||
current_group = func['group_name']
|
||||
|
||||
enum_lines.append(f" {func['enum_name']} = {func['enum_value']},")
|
||||
|
||||
# Add the count
|
||||
total_count = len(functions)
|
||||
enum_lines.append("\n // last command_type index + 1")
|
||||
enum_lines.append(f" APIR_BACKEND_DISPATCH_TABLE_COUNT = {total_count},")
|
||||
enum_lines.append("} ApirBackendCommandType;")
|
||||
|
||||
# Full header template
|
||||
header_content = NL.join(enum_lines) + "\n"
|
||||
|
||||
return header_content
|
||||
|
||||
def generate_backend_dispatched_header(self) -> str:
|
||||
"""Generate the complete backend-dispatched.h file."""
|
||||
functions = self.get_enabled_functions()
|
||||
|
||||
# Function declarations
|
||||
decl_lines = []
|
||||
current_group = None
|
||||
|
||||
for func in functions:
|
||||
if func['group_name'] != current_group:
|
||||
decl_lines.append(f"\n/* {func['group_description']} */")
|
||||
current_group = func['group_name']
|
||||
|
||||
signature = "uint32_t"
|
||||
params = "apir_encoder *enc, apir_decoder *dec, virgl_apir_context *ctx"
|
||||
decl_lines.append(f"{signature} {func['backend_function']}({params});")
|
||||
|
||||
# Switch cases
|
||||
switch_lines = []
|
||||
current_group = None
|
||||
|
||||
for func in functions:
|
||||
if func['group_name'] != current_group:
|
||||
switch_lines.append(f" /* {func['group_description']} */")
|
||||
current_group = func['group_name']
|
||||
|
||||
switch_lines.append(f" case {func['enum_name']}: return \"{func['backend_function']}\";")
|
||||
|
||||
# Dispatch table
|
||||
table_lines = []
|
||||
current_group = None
|
||||
|
||||
for func in functions:
|
||||
if func['group_name'] != current_group:
|
||||
table_lines.append(f"\n /* {func['group_description']} */")
|
||||
table_lines.append("")
|
||||
current_group = func['group_name']
|
||||
|
||||
table_lines.append(f" /* {func['enum_name']} = */ {func['backend_function']},")
|
||||
|
||||
header_content = f'''\
|
||||
#pragma once
|
||||
|
||||
{NL.join(decl_lines)}
|
||||
|
||||
static inline const char *backend_dispatch_command_name(ApirBackendCommandType type)
|
||||
{{
|
||||
switch (type) {{
|
||||
{NL.join(switch_lines)}
|
||||
|
||||
default: return "unknown";
|
||||
}}
|
||||
}}
|
||||
|
||||
extern "C" {{
|
||||
static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = {{
|
||||
{NL.join(table_lines)}
|
||||
}};
|
||||
}}
|
||||
'''
|
||||
return header_content
|
||||
|
||||
def generate_virtgpu_forward_header(self) -> str:
|
||||
"""Generate the complete virtgpu-forward.gen.h file."""
|
||||
functions = self.get_enabled_functions()
|
||||
|
||||
decl_lines = []
|
||||
current_group = None
|
||||
|
||||
for func in functions:
|
||||
if func['group_name'] != current_group:
|
||||
decl_lines.append("")
|
||||
decl_lines.append(f"/* {func['group_description']} */")
|
||||
current_group = func['group_name']
|
||||
|
||||
# Build parameter list
|
||||
params = [self.naming_patterns['frontend_base_param']]
|
||||
params.extend(func['frontend_extra_params'])
|
||||
param_str = ', '.join(params)
|
||||
|
||||
decl_lines.append(f"{func['frontend_return']} {func['frontend_function']}({param_str});")
|
||||
|
||||
header_content = f'''\
|
||||
#pragma once
|
||||
{NL.join(decl_lines)}
|
||||
'''
|
||||
return header_content
|
||||
|
||||
def regenerate_codebase(self) -> None:
|
||||
"""Regenerate the entire remoting codebase."""
|
||||
logging.info("🔄 Regenerating GGML Remoting Codebase...")
|
||||
logging.info("=" * 50)
|
||||
|
||||
# Detect if we're running from frontend directory
|
||||
current_dir = os.getcwd()
|
||||
is_frontend_dir = current_dir.endswith('ggml-virtgpu')
|
||||
|
||||
if is_frontend_dir:
|
||||
# Running from ggml/src/ggml-virtgpu-apir
|
||||
logging.info("📍 Detected frontend directory execution")
|
||||
frontend_base = Path(".")
|
||||
else:
|
||||
# Running from project root (fallback to original behavior)
|
||||
logging.info("📍 Detected project root execution")
|
||||
base_path = self.config_data.get('base_path', 'ggml/src')
|
||||
frontend_base = Path(base_path) / "ggml-virtgpu"
|
||||
|
||||
# Compute final file paths
|
||||
backend_base = frontend_base / "backend"
|
||||
apir_backend_path = backend_base / "shared" / "apir_backend.gen.h"
|
||||
backend_dispatched_path = backend_base / "backend-dispatched.gen.h"
|
||||
virtgpu_forward_path = frontend_base / "virtgpu-forward.gen.h"
|
||||
|
||||
# Create output directories for each file
|
||||
apir_backend_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
backend_dispatched_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
virtgpu_forward_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Generate header files
|
||||
logging.info("📁 Generating header files...")
|
||||
|
||||
apir_backend_content = self.generate_apir_backend_header()
|
||||
apir_backend_path.write_text(apir_backend_content)
|
||||
logging.info(f" ✅ {apir_backend_path.resolve()}")
|
||||
|
||||
backend_dispatched_content = self.generate_backend_dispatched_header()
|
||||
backend_dispatched_path.write_text(backend_dispatched_content)
|
||||
logging.info(f" ✅ {backend_dispatched_path.resolve()}")
|
||||
|
||||
virtgpu_forward_content = self.generate_virtgpu_forward_header()
|
||||
virtgpu_forward_path.write_text(virtgpu_forward_content)
|
||||
logging.info(f" ✅ {virtgpu_forward_path.resolve()}")
|
||||
|
||||
# Format generated files with clang-format
|
||||
generated_files = [apir_backend_path, backend_dispatched_path, virtgpu_forward_path]
|
||||
|
||||
if not self.clang_format_available:
|
||||
logging.warning("\n⚠️clang-format not found in PATH. Generated files will not be formatted."
|
||||
" Install clang-format to enable automatic code formatting.")
|
||||
else:
|
||||
logging.info("\n🎨 Formatting files with clang-format...")
|
||||
for file_path in generated_files:
|
||||
if self._format_file_with_clang_format(file_path):
|
||||
logging.info(f" ✅ Formatted {file_path.name}")
|
||||
else:
|
||||
logging.warning(f" ❌ Failed to format {file_path.name}")
|
||||
|
||||
# Generate summary
|
||||
functions = self.get_enabled_functions()
|
||||
total_functions = len(functions)
|
||||
|
||||
logging.info("\n📊 Generation Summary:")
|
||||
logging.info("=" * 50)
|
||||
logging.info(f" Total functions: {total_functions}")
|
||||
logging.info(f" Function groups: {len(self.functions)}")
|
||||
logging.info(" Header files: 3")
|
||||
logging.info(f" Working directory: {current_dir}")
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
generator = RemotingCodebaseGenerator()
|
||||
generator.regenerate_codebase()
|
||||
except Exception as e:
|
||||
logging.exception(f"❌ Error: {e}")
|
||||
exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
#include "backend/shared/apir_backend.h"
|
||||
#include "ggml-alloc.h"
|
||||
#include "ggml-impl.h"
|
||||
#include "ggml.h"
|
||||
#include "virtgpu-shm.h"
|
||||
#include "virtgpu-utils.h"
|
||||
|
||||
struct apir_buffer_context_t {
|
||||
apir_buffer_host_handle_t host_handle;
|
||||
|
||||
struct virtgpu_shmem shmem;
|
||||
apir_buffer_type_host_handle_t buft_host_handle;
|
||||
};
|
||||
|
||||
#include "virtgpu-forward.gen.h"
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
#include "virtgpu-forward-impl.h"
|
||||
|
||||
static long long current_time_ms() {
|
||||
timespec ts;
|
||||
clock_gettime(CLOCK_REALTIME, &ts); // Use CLOCK_MONOTONIC for elapsed time
|
||||
return (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec;
|
||||
}
|
||||
|
||||
ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE);
|
||||
|
||||
std::vector<uint8_t> cgraph_data;
|
||||
size_t cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data);
|
||||
|
||||
virtgpu_shmem temp_shmem; // Local storage for large buffers
|
||||
virtgpu_shmem * shmem = &temp_shmem;
|
||||
|
||||
if (cgraph_size <= gpu->data_shmem.mmap_size) {
|
||||
// prefer the init-time allocated page, if large enough
|
||||
shmem = &gpu->data_shmem;
|
||||
} else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) {
|
||||
GGML_ABORT("Couldn't allocate the guest-host shared buffer");
|
||||
}
|
||||
|
||||
apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
|
||||
|
||||
apir_encode_size_t(encoder, &cgraph_size);
|
||||
|
||||
char * shmem_data = (char *) shmem->mmap_ptr;
|
||||
apir_encoder secondary_enc = apir_new_encoder(shmem_data, cgraph_size);
|
||||
|
||||
apir_encode_cgraph_data(&secondary_enc, cgraph_data);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
ggml_status status = GGML_STATUS_ABORTED;
|
||||
apir_decode_ggml_status(decoder, &status);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
if (shmem != &gpu->data_shmem) {
|
||||
virtgpu_shmem_destroy(gpu, shmem);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
#include "virtgpu-forward-impl.h"
|
||||
|
||||
const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME);
|
||||
|
||||
apir_encode_ggml_buffer_type(encoder, buft);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
const size_t string_size = apir_decode_array_size_unchecked(decoder);
|
||||
char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
|
||||
if (!string) {
|
||||
GGML_LOG_ERROR("%s: Could not allocate the device name buffer\n", __func__);
|
||||
apir_decoder_set_fatal(decoder);
|
||||
}
|
||||
apir_decode_char_array(decoder, string, string_size);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return string;
|
||||
}
|
||||
|
||||
size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT);
|
||||
|
||||
apir_encode_ggml_buffer_type(encoder, buft);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
size_t alignment;
|
||||
apir_decode_size_t(decoder, &alignment);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return alignment;
|
||||
}
|
||||
|
||||
size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE);
|
||||
|
||||
apir_encode_ggml_buffer_type(encoder, buft);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
size_t max_size;
|
||||
apir_decode_size_t(decoder, &max_size);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return max_size;
|
||||
}
|
||||
|
||||
bool apir_buffer_type_is_host(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST);
|
||||
|
||||
apir_encode_ggml_buffer_type(encoder, buft);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
bool is_host;
|
||||
apir_decode_bool_t(decoder, &is_host);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return is_host;
|
||||
}
|
||||
|
||||
apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_buffer_type_t buft, size_t size) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
apir_buffer_context_t buffer_context;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER);
|
||||
|
||||
apir_encode_ggml_buffer_type(encoder, buft);
|
||||
|
||||
apir_encode_size_t(encoder, &size);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
apir_decode_apir_buffer_host_handle_t(decoder, &buffer_context.host_handle);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return buffer_context;
|
||||
}
|
||||
|
||||
size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE);
|
||||
|
||||
apir_encode_ggml_buffer_type(encoder, buft);
|
||||
|
||||
apir_encode_ggml_tensor_inline(encoder, op);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
size_t alloc_size;
|
||||
apir_decode_size_t(decoder, &alloc_size);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return alloc_size;
|
||||
}
|
||||
|
|
@ -0,0 +1,157 @@
|
|||
#include "virtgpu-forward-impl.h"
|
||||
|
||||
void * apir_buffer_get_base(virtgpu * gpu, apir_buffer_context_t * buffer_context) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_GET_BASE);
|
||||
|
||||
apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
uintptr_t base;
|
||||
apir_decode_uintptr_t(decoder, &base);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return (void *) base;
|
||||
}
|
||||
|
||||
void apir_buffer_set_tensor(virtgpu * gpu,
|
||||
apir_buffer_context_t * buffer_context,
|
||||
ggml_tensor * tensor,
|
||||
const void * data,
|
||||
size_t offset,
|
||||
size_t size) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_SET_TENSOR);
|
||||
|
||||
apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
|
||||
apir_encode_ggml_tensor(encoder, tensor);
|
||||
|
||||
virtgpu_shmem temp_shmem; // Local storage for large buffers
|
||||
virtgpu_shmem * shmem = &temp_shmem;
|
||||
|
||||
if (size <= gpu->data_shmem.mmap_size) {
|
||||
// prefer the init-time allocated page, if large enough
|
||||
shmem = &gpu->data_shmem;
|
||||
|
||||
} else if (virtgpu_shmem_create(gpu, size, shmem)) {
|
||||
GGML_ABORT("Couldn't allocate the guest-host shared buffer");
|
||||
}
|
||||
|
||||
memcpy(shmem->mmap_ptr, data, size);
|
||||
apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
|
||||
|
||||
apir_encode_size_t(encoder, &offset);
|
||||
apir_encode_size_t(encoder, &size);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
if (shmem != &gpu->data_shmem) {
|
||||
virtgpu_shmem_destroy(gpu, shmem);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void apir_buffer_get_tensor(virtgpu * gpu,
|
||||
apir_buffer_context_t * buffer_context,
|
||||
const ggml_tensor * tensor,
|
||||
void * data,
|
||||
size_t offset,
|
||||
size_t size) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_GET_TENSOR);
|
||||
|
||||
apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
|
||||
apir_encode_ggml_tensor(encoder, tensor);
|
||||
|
||||
virtgpu_shmem temp_shmem; // Local storage for large buffers
|
||||
virtgpu_shmem * shmem = &temp_shmem;
|
||||
|
||||
if (size <= gpu->data_shmem.mmap_size) {
|
||||
// prefer the init-time allocated page, if large enough
|
||||
shmem = &gpu->data_shmem;
|
||||
|
||||
} else if (virtgpu_shmem_create(gpu, size, shmem)) {
|
||||
GGML_ABORT("Couldn't allocate the guest-host shared buffer");
|
||||
}
|
||||
|
||||
apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
|
||||
apir_encode_size_t(encoder, &offset);
|
||||
apir_encode_size_t(encoder, &size);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
memcpy(data, shmem->mmap_ptr, size);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
if (shmem != &gpu->data_shmem) {
|
||||
virtgpu_shmem_destroy(gpu, shmem);
|
||||
}
|
||||
}
|
||||
|
||||
bool apir_buffer_cpy_tensor(virtgpu * gpu,
|
||||
apir_buffer_context_t * buffer_context,
|
||||
const ggml_tensor * src,
|
||||
const ggml_tensor * dst) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR);
|
||||
|
||||
apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
|
||||
apir_encode_ggml_tensor(encoder, src);
|
||||
apir_encode_ggml_tensor(encoder, dst);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
bool ret_val;
|
||||
apir_decode_bool_t(decoder, &ret_val);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
void apir_buffer_clear(virtgpu * gpu, apir_buffer_context_t * buffer_context, uint8_t value) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_CLEAR);
|
||||
|
||||
apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
|
||||
apir_encode_uint8_t(encoder, &value);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
}
|
||||
|
||||
void apir_buffer_free_buffer(virtgpu * gpu, apir_buffer_context_t * buffer_context) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER);
|
||||
|
||||
apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
}
|
||||
|
|
@ -0,0 +1,200 @@
|
|||
#include "virtgpu-forward-impl.h"
|
||||
#include "virtgpu-shm.h"
|
||||
|
||||
int apir_device_get_count(virtgpu * gpu) {
|
||||
static int32_t dev_count = -1;
|
||||
if (dev_count != -1) {
|
||||
return dev_count;
|
||||
}
|
||||
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_COUNT);
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
apir_decode_int32_t(decoder, &dev_count);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return dev_count;
|
||||
}
|
||||
|
||||
const char * apir_device_get_name(virtgpu * gpu) {
|
||||
static char * string = nullptr;
|
||||
if (string) {
|
||||
return string;
|
||||
}
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_NAME);
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
const size_t string_size = apir_decode_array_size_unchecked(decoder);
|
||||
string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
|
||||
if (!string) {
|
||||
GGML_LOG_ERROR("%s: Could not allocate the device name buffer\n", __func__);
|
||||
return NULL;
|
||||
}
|
||||
apir_decode_char_array(decoder, string, string_size);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return string;
|
||||
}
|
||||
|
||||
const char * apir_device_get_description(virtgpu * gpu) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
const size_t string_size = apir_decode_array_size_unchecked(decoder);
|
||||
char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
|
||||
if (!string) {
|
||||
GGML_LOG_ERROR("%s: Could not allocate the device description buffer\n", __func__);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
apir_decode_char_array(decoder, string, string_size);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return string;
|
||||
}
|
||||
|
||||
uint32_t apir_device_get_type(virtgpu * gpu) {
|
||||
static uint32_t dev_type = 255;
|
||||
if (dev_type != 255) {
|
||||
return dev_type;
|
||||
}
|
||||
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_TYPE);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
apir_decode_uint32_t(decoder, &dev_type);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return dev_type;
|
||||
}
|
||||
|
||||
void apir_device_get_memory(virtgpu * gpu, size_t * free, size_t * total) {
|
||||
static size_t dev_free = 0;
|
||||
static size_t dev_total = 0;
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_MEMORY);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
apir_decode_size_t(decoder, &dev_free);
|
||||
apir_decode_size_t(decoder, &dev_total);
|
||||
|
||||
*free = dev_free;
|
||||
*total = dev_total;
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
bool apir_device_supports_op(virtgpu * gpu, const ggml_tensor * op) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP);
|
||||
|
||||
apir_encode_ggml_tensor_inline(encoder, op);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
bool supports_op;
|
||||
apir_decode_bool_t(decoder, &supports_op);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return supports_op;
|
||||
}
|
||||
|
||||
apir_buffer_type_host_handle_t apir_device_get_buffer_type(virtgpu * gpu) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
apir_buffer_type_host_handle_t buft_handle;
|
||||
apir_decode_apir_buffer_type_host_handle_t(decoder, &buft_handle);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return buft_handle;
|
||||
}
|
||||
|
||||
void apir_device_get_props(virtgpu * gpu,
|
||||
bool * async,
|
||||
bool * host_buffer,
|
||||
bool * buffer_from_host_ptr,
|
||||
bool * events) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_PROPS);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
apir_decode_bool_t(decoder, async);
|
||||
apir_decode_bool_t(decoder, host_buffer);
|
||||
apir_decode_bool_t(decoder, buffer_from_host_ptr);
|
||||
apir_decode_bool_t(decoder, events);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
apir_buffer_context_t apir_device_buffer_from_ptr(virtgpu * gpu, size_t size, size_t max_tensor_size) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirForwardReturnCode ret;
|
||||
|
||||
apir_buffer_context_t buffer_context;
|
||||
|
||||
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR);
|
||||
|
||||
if (virtgpu_shmem_create(gpu, size, &buffer_context.shmem)) {
|
||||
GGML_ABORT("Couldn't allocate the guest-host shared buffer");
|
||||
}
|
||||
|
||||
apir_encode_virtgpu_shmem_res_id(encoder, buffer_context.shmem.res_id);
|
||||
|
||||
apir_encode_size_t(encoder, &size);
|
||||
apir_encode_size_t(encoder, &max_tensor_size);
|
||||
|
||||
REMOTE_CALL(gpu, encoder, decoder, ret);
|
||||
|
||||
apir_decode_apir_buffer_host_handle_t(decoder, &buffer_context.host_handle);
|
||||
buffer_context.buft_host_handle = apir_decode_apir_buffer_type_host_handle(decoder);
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
return buffer_context;
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
#include "virtgpu.h"
|
||||
|
||||
#include "ggml-remoting.h"
|
||||
#include "backend/shared/apir_backend.h"
|
||||
#include "backend/shared/apir_cs_ggml.h"
|
||||
|
||||
#include "ggml-backend-impl.h"
|
||||
|
||||
#define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__) \
|
||||
do { \
|
||||
int32_t forward_flag = (int32_t) apir_command_type__; \
|
||||
encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_FORWARD, forward_flag); \
|
||||
if (!encoder_name) { \
|
||||
GGML_ABORT("%s: failed to prepare the remote call encoder", __func__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name) \
|
||||
do { \
|
||||
ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \
|
||||
if (!decoder_name) { \
|
||||
GGML_ABORT("%s: failed to kick the remote call", __func__); \
|
||||
} \
|
||||
if (ret_name < APIR_FORWARD_BASE_INDEX) { \
|
||||
GGML_ABORT("%s: failed to forward the API call: %s: code %d", __func__, \
|
||||
apir_forward_error(ret_name), ret_name); \
|
||||
} \
|
||||
ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \
|
||||
} while (0)
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
#pragma once
|
||||
|
||||
/* device */
|
||||
void apir_device_get_device_count(struct virtgpu * gpu);
|
||||
int apir_device_get_count(struct virtgpu * gpu);
|
||||
const char * apir_device_get_name(struct virtgpu * gpu);
|
||||
const char * apir_device_get_description(struct virtgpu * gpu);
|
||||
uint32_t apir_device_get_type(struct virtgpu * gpu);
|
||||
void apir_device_get_memory(struct virtgpu * gpu, size_t * free, size_t * total);
|
||||
bool apir_device_supports_op(struct virtgpu * gpu, const ggml_tensor * op);
|
||||
apir_buffer_type_host_handle_t apir_device_get_buffer_type(struct virtgpu * gpu);
|
||||
void apir_device_get_props(struct virtgpu * gpu,
|
||||
bool * async,
|
||||
bool * host_buffer,
|
||||
bool * buffer_from_host_ptr,
|
||||
bool * events);
|
||||
apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu, size_t size, size_t max_tensor_size);
|
||||
|
||||
/* buffer-type */
|
||||
const char * apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
|
||||
size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
|
||||
size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
|
||||
bool apir_buffer_type_is_host(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
|
||||
apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu,
|
||||
ggml_backend_buffer_type_t buffer_buft,
|
||||
size_t size);
|
||||
size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op);
|
||||
|
||||
/* buffer */
|
||||
void * apir_buffer_get_base(struct virtgpu * gpu, apir_buffer_context_t * buffer_context);
|
||||
void apir_buffer_set_tensor(struct virtgpu * gpu,
|
||||
apir_buffer_context_t * buffer_context,
|
||||
ggml_tensor * tensor,
|
||||
const void * data,
|
||||
size_t offset,
|
||||
size_t size);
|
||||
void apir_buffer_get_tensor(struct virtgpu * gpu,
|
||||
apir_buffer_context_t * buffer_context,
|
||||
const ggml_tensor * tensor,
|
||||
void * data,
|
||||
size_t offset,
|
||||
size_t size);
|
||||
bool apir_buffer_cpy_tensor(struct virtgpu * gpu,
|
||||
apir_buffer_context_t * buffer_context,
|
||||
const ggml_tensor * src,
|
||||
const ggml_tensor * dst);
|
||||
void apir_buffer_clear(struct virtgpu * gpu, apir_buffer_context_t * buffer_context, uint8_t value);
|
||||
void apir_buffer_free_buffer(struct virtgpu * gpu, apir_buffer_context_t * buffer_context);
|
||||
|
||||
/* backend */
|
||||
ggml_status apir_backend_graph_compute(struct virtgpu * gpu, ggml_cgraph * cgraph);
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
#include "virtgpu-shm.h"
|
||||
|
||||
#include "virtgpu.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
static uint32_t virtgpu_ioctl_resource_create_blob(virtgpu * gpu,
|
||||
uint32_t blob_mem,
|
||||
uint32_t blob_flags,
|
||||
size_t blob_size,
|
||||
uint64_t blob_id,
|
||||
uint32_t * res_id) {
|
||||
#ifdef SIMULATE_BO_SIZE_FIX
|
||||
blob_size = align64(blob_size, 4096);
|
||||
#endif
|
||||
|
||||
drm_virtgpu_resource_create_blob args = {
|
||||
.blob_mem = blob_mem,
|
||||
.blob_flags = blob_flags,
|
||||
.bo_handle = 0,
|
||||
.res_handle = 0,
|
||||
.size = blob_size,
|
||||
.pad = 0,
|
||||
.cmd_size = 0,
|
||||
.cmd = 0,
|
||||
.blob_id = blob_id,
|
||||
};
|
||||
|
||||
if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB, &args)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
*res_id = args.res_handle;
|
||||
return args.bo_handle;
|
||||
}
|
||||
|
||||
static void virtgpu_ioctl_gem_close(virtgpu * gpu, uint32_t gem_handle) {
|
||||
drm_gem_close args = {
|
||||
.handle = gem_handle,
|
||||
.pad = 0,
|
||||
};
|
||||
|
||||
const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_GEM_CLOSE, &args);
|
||||
assert(!ret);
|
||||
#ifdef NDEBUG
|
||||
UNUSED(ret);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void * virtgpu_ioctl_map(virtgpu * gpu, uint32_t gem_handle, size_t size) {
|
||||
drm_virtgpu_map args = {
|
||||
.offset = 0,
|
||||
.handle = gem_handle,
|
||||
.pad = 0,
|
||||
};
|
||||
|
||||
if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_MAP, &args)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void * ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gpu->fd, args.offset);
|
||||
if (ptr == MAP_FAILED) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void virtgpu_shmem_destroy(virtgpu * gpu, virtgpu_shmem * shmem) {
|
||||
munmap(shmem->mmap_ptr, shmem->mmap_size);
|
||||
virtgpu_ioctl_gem_close(gpu, shmem->gem_handle);
|
||||
}
|
||||
|
||||
int virtgpu_shmem_create(virtgpu * gpu, size_t size, virtgpu_shmem * shmem) {
|
||||
size = align64(size, 16384);
|
||||
|
||||
uint32_t res_id;
|
||||
uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(gpu, VIRTGPU_BLOB_MEM_HOST3D,
|
||||
VIRTGPU_BLOB_FLAG_USE_MAPPABLE, size, 0, &res_id);
|
||||
|
||||
if (!gem_handle) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
void * ptr = virtgpu_ioctl_map(gpu, gem_handle, size);
|
||||
if (!ptr) {
|
||||
virtgpu_ioctl_gem_close(gpu, gem_handle);
|
||||
GGML_LOG_ERROR("virtgpu_ioctl_map FAILED\n");
|
||||
exit(1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
shmem->res_id = res_id;
|
||||
shmem->mmap_size = size;
|
||||
shmem->mmap_ptr = ptr;
|
||||
shmem->gem_handle = gem_handle;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
#pragma once
|
||||
|
||||
#include "virtgpu-utils.h"
|
||||
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
struct virtgpu;
|
||||
|
||||
struct virtgpu_shmem {
|
||||
uint32_t res_id;
|
||||
size_t mmap_size;
|
||||
void * mmap_ptr;
|
||||
|
||||
uint32_t gem_handle;
|
||||
};
|
||||
|
||||
int virtgpu_shmem_create(virtgpu * gpu, size_t size, virtgpu_shmem * shmem);
|
||||
void virtgpu_shmem_destroy(virtgpu * gpu, virtgpu_shmem * shmem);
|
||||
|
|
@ -0,0 +1,179 @@
|
|||
#include "virtgpu-utils.h"
|
||||
|
||||
#include <malloc.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#define NODE_ALLOC_ALIGN 64
|
||||
#define NODE_PTR_MASK (~((uintptr_t) NODE_ALLOC_ALIGN - 1))
|
||||
#define NODE_LEVEL_MASK ((uintptr_t) NODE_ALLOC_ALIGN - 1)
|
||||
#define NULL_NODE 0
|
||||
|
||||
#define os_malloc_aligned(_size, _align) _aligned_malloc(_size, _align)
|
||||
#define os_free_aligned(_ptr) free(_ptr)
|
||||
#define p_atomic_cmpxchg(v, old, _new) __sync_val_compare_and_swap((v), (old), (_new))
|
||||
|
||||
static inline uint64_t util_logbase2_64(uint64_t n) {
|
||||
#if defined(HAVE___BUILTIN_CLZLL)
|
||||
return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1));
|
||||
#else
|
||||
uint64_t pos = 0ull;
|
||||
if (n >= 1ull << 32) {
|
||||
n >>= 32;
|
||||
pos += 32;
|
||||
}
|
||||
if (n >= 1ull << 16) {
|
||||
n >>= 16;
|
||||
pos += 16;
|
||||
}
|
||||
if (n >= 1ull << 8) {
|
||||
n >>= 8;
|
||||
pos += 8;
|
||||
}
|
||||
if (n >= 1ull << 4) {
|
||||
n >>= 4;
|
||||
pos += 4;
|
||||
}
|
||||
if (n >= 1ull << 2) {
|
||||
n >>= 2;
|
||||
pos += 2;
|
||||
}
|
||||
if (n >= 1ull << 1) {
|
||||
pos += 1;
|
||||
}
|
||||
return pos;
|
||||
#endif
|
||||
}
|
||||
|
||||
void util_sparse_array_init(util_sparse_array * arr, size_t elem_size, size_t node_size) {
|
||||
memset(arr, 0, sizeof(*arr));
|
||||
arr->elem_size = elem_size;
|
||||
arr->node_size_log2 = util_logbase2_64(node_size);
|
||||
assert(node_size >= 2 && node_size == (1ull << arr->node_size_log2));
|
||||
}
|
||||
|
||||
static inline void * os_malloc_aligned(size_t size, size_t alignment) {
|
||||
void * ptr;
|
||||
alignment = (alignment + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
|
||||
if (posix_memalign(&ptr, alignment, size) != 0) {
|
||||
return NULL;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void * _util_sparse_array_node_data(uintptr_t handle) {
|
||||
return (void *) (handle & NODE_PTR_MASK);
|
||||
}
|
||||
|
||||
static inline unsigned _util_sparse_array_node_level(uintptr_t handle) {
|
||||
return handle & NODE_LEVEL_MASK;
|
||||
}
|
||||
|
||||
static inline void _util_sparse_array_node_finish(util_sparse_array * arr, uintptr_t node) {
|
||||
if (_util_sparse_array_node_level(node) > 0) {
|
||||
uintptr_t * children = (uintptr_t *) _util_sparse_array_node_data(node);
|
||||
size_t node_size = 1ull << arr->node_size_log2;
|
||||
for (size_t i = 0; i < node_size; i++) {
|
||||
if (children[i]) {
|
||||
_util_sparse_array_node_finish(arr, children[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
os_free_aligned(_util_sparse_array_node_data(node));
|
||||
}
|
||||
|
||||
static inline uintptr_t _util_sparse_array_node(void * data, unsigned level) {
|
||||
assert(data != NULL);
|
||||
assert(((uintptr_t) data & NODE_LEVEL_MASK) == 0);
|
||||
assert((level & NODE_PTR_MASK) == 0);
|
||||
return (uintptr_t) data | level;
|
||||
}
|
||||
|
||||
inline uintptr_t _util_sparse_array_node_alloc(util_sparse_array * arr, unsigned level) {
|
||||
size_t size;
|
||||
if (level == 0) {
|
||||
size = arr->elem_size << arr->node_size_log2;
|
||||
} else {
|
||||
size = sizeof(uintptr_t) << arr->node_size_log2;
|
||||
}
|
||||
|
||||
void * data = os_malloc_aligned(size, NODE_ALLOC_ALIGN);
|
||||
memset(data, 0, size);
|
||||
|
||||
return _util_sparse_array_node(data, level);
|
||||
}
|
||||
|
||||
static inline uintptr_t _util_sparse_array_set_or_free_node(uintptr_t * node_ptr, uintptr_t cmp_node, uintptr_t node) {
|
||||
uintptr_t prev_node = p_atomic_cmpxchg(node_ptr, cmp_node, node);
|
||||
|
||||
if (prev_node != cmp_node) {
|
||||
/* We lost the race. Free this one and return the one that was already
|
||||
* allocated.
|
||||
*/
|
||||
os_free_aligned(_util_sparse_array_node_data(node));
|
||||
return prev_node;
|
||||
} else {
|
||||
return node;
|
||||
}
|
||||
}
|
||||
|
||||
void * util_sparse_array_get(util_sparse_array * arr, uint64_t idx) {
|
||||
const unsigned node_size_log2 = arr->node_size_log2;
|
||||
uintptr_t root = p_atomic_read(&arr->root);
|
||||
if (unlikely(!root)) {
|
||||
unsigned root_level = 0;
|
||||
uint64_t idx_iter = idx >> node_size_log2;
|
||||
while (idx_iter) {
|
||||
idx_iter >>= node_size_log2;
|
||||
root_level++;
|
||||
}
|
||||
uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level);
|
||||
root = _util_sparse_array_set_or_free_node(&arr->root, NULL_NODE, new_root);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
unsigned root_level = _util_sparse_array_node_level(root);
|
||||
uint64_t root_idx = idx >> (root_level * node_size_log2);
|
||||
if (likely(root_idx < (1ull << node_size_log2))) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* In this case, we have a root but its level is low enough that the
|
||||
* requested index is out-of-bounds.
|
||||
*/
|
||||
uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level + 1);
|
||||
|
||||
uintptr_t * new_root_children = (uintptr_t *) _util_sparse_array_node_data(new_root);
|
||||
new_root_children[0] = root;
|
||||
|
||||
/* We only add one at a time instead of the whole tree because it's
|
||||
* easier to ensure correctness of both the tree building and the
|
||||
* clean-up path. Because we're only adding one node we never have to
|
||||
* worry about trying to free multiple things without freeing the old
|
||||
* things.
|
||||
*/
|
||||
root = _util_sparse_array_set_or_free_node(&arr->root, root, new_root);
|
||||
}
|
||||
|
||||
void * node_data = _util_sparse_array_node_data(root);
|
||||
unsigned node_level = _util_sparse_array_node_level(root);
|
||||
while (node_level > 0) {
|
||||
uint64_t child_idx = (idx >> (node_level * node_size_log2)) & ((1ull << node_size_log2) - 1);
|
||||
|
||||
uintptr_t * children = (uintptr_t *) node_data;
|
||||
uintptr_t child = p_atomic_read(&children[child_idx]);
|
||||
|
||||
if (unlikely(!child)) {
|
||||
child = _util_sparse_array_node_alloc(arr, node_level - 1);
|
||||
child = _util_sparse_array_set_or_free_node(&children[child_idx], NULL_NODE, child);
|
||||
}
|
||||
|
||||
node_data = _util_sparse_array_node_data(child);
|
||||
node_level = _util_sparse_array_node_level(child);
|
||||
}
|
||||
|
||||
uint64_t elem_idx = idx & ((1ull << node_size_log2) - 1);
|
||||
return (void *) ((char *) node_data + (elem_idx * arr->elem_size));
|
||||
}
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cerrno>
|
||||
#include <cstdarg>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
|
||||
#ifndef UNUSED
|
||||
# define UNUSED(x) (void) (x)
|
||||
#endif
|
||||
|
||||
/** Checks is a value is a power of two. Does not handle zero. */
|
||||
#define IS_POT(v) (((v) & ((v) - 1)) == 0)
|
||||
|
||||
/** Checks is a value is a power of two. Zero handled. */
|
||||
#define IS_POT_NONZERO(v) ((v) != 0 && IS_POT(v))
|
||||
|
||||
/** Align a value to a power of two */
|
||||
#define ALIGN_POT(x, pot_align) (((x) + (pot_align) - 1) & ~((pot_align) - 1))
|
||||
|
||||
#define p_atomic_read(_v) __atomic_load_n((_v), __ATOMIC_ACQUIRE)
|
||||
|
||||
static inline bool util_is_power_of_two_nonzero64(uint64_t v) {
|
||||
return IS_POT_NONZERO(v);
|
||||
}
|
||||
|
||||
static inline uint64_t align64(uint64_t value, uint64_t alignment) {
|
||||
assert(util_is_power_of_two_nonzero64(alignment));
|
||||
return ALIGN_POT(value, alignment);
|
||||
}
|
||||
|
||||
struct list_head {
|
||||
list_head * prev;
|
||||
list_head * next;
|
||||
};
|
||||
|
||||
struct util_sparse_array {
|
||||
size_t elem_size;
|
||||
unsigned node_size_log2;
|
||||
|
||||
uintptr_t root;
|
||||
};
|
||||
|
||||
void * util_sparse_array_get(util_sparse_array * arr, uint64_t idx);
|
||||
void util_sparse_array_init(util_sparse_array * arr, size_t elem_size, size_t node_size);
|
||||
|
||||
inline void os_time_sleep(int64_t usecs) {
|
||||
timespec time;
|
||||
time.tv_sec = usecs / 1000000;
|
||||
time.tv_nsec = (usecs % 1000000) * 1000;
|
||||
while (clock_nanosleep(CLOCK_MONOTONIC, 0, &time, &time) == EINTR)
|
||||
;
|
||||
}
|
||||
|
||||
struct timer_data {
|
||||
long long start;
|
||||
long long total;
|
||||
long long count;
|
||||
};
|
||||
|
||||
static inline void start_timer(timer_data * timer) {
|
||||
timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
timer->start = (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec;
|
||||
}
|
||||
|
||||
// returns the duration in ns
|
||||
static inline long long stop_timer(timer_data * timer) {
|
||||
timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
long long timer_end = (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec;
|
||||
|
||||
long long duration = (timer_end - timer->start);
|
||||
timer->total += duration;
|
||||
timer->count += 1;
|
||||
|
||||
return duration;
|
||||
}
|
||||
|
|
@ -0,0 +1,498 @@
|
|||
#include "virtgpu.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cerrno>
|
||||
#include <cstdlib>
|
||||
|
||||
static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr dev);
|
||||
static virt_gpu_result_t virtgpu_open(virtgpu * gpu);
|
||||
|
||||
static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu);
|
||||
static virt_gpu_result_t virtgpu_init_context(virtgpu * gpu);
|
||||
|
||||
static int virtgpu_ioctl_context_init(virtgpu * gpu, virgl_renderer_capset capset_id);
|
||||
static int virtgpu_ioctl_get_caps(virtgpu * gpu,
|
||||
virgl_renderer_capset id,
|
||||
uint32_t version,
|
||||
void * capset,
|
||||
size_t capset_size);
|
||||
static uint64_t virtgpu_ioctl_getparam(virtgpu * gpu, uint64_t param);
|
||||
static void virtgpu_init_renderer_info(virtgpu * gpu);
|
||||
|
||||
static void log_call_duration(long long call_duration_ns, const char * name);
|
||||
|
||||
const uint64_t APIR_HANDSHAKE_MAX_WAIT_MS = 2 * 1000; // 2s
|
||||
const uint64_t APIR_LOADLIBRARY_MAX_WAIT_MS = 60 * 1000; // 60s
|
||||
|
||||
static int virtgpu_handshake(virtgpu * gpu) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
|
||||
encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_HANDSHAKE, 0);
|
||||
if (!encoder) {
|
||||
GGML_ABORT("%s: failed to prepare the remote call encoder", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* write handshake props */
|
||||
|
||||
uint32_t guest_major = APIR_PROTOCOL_MAJOR;
|
||||
uint32_t guest_minor = APIR_PROTOCOL_MINOR;
|
||||
apir_encode_uint32_t(encoder, &guest_major);
|
||||
apir_encode_uint32_t(encoder, &guest_minor);
|
||||
|
||||
/* *** */
|
||||
|
||||
uint32_t ret_magic;
|
||||
long long call_duration_ns;
|
||||
ret_magic = remote_call(gpu, encoder, &decoder, APIR_HANDSHAKE_MAX_WAIT_MS, &call_duration_ns);
|
||||
log_call_duration(call_duration_ns, "API Remoting handshake");
|
||||
|
||||
if (!decoder) {
|
||||
GGML_ABORT(
|
||||
"%s: failed to initiate the communication with the virglrenderer library. "
|
||||
"Most likely, the wrong virglrenderer library was loaded in the hypervisor.",
|
||||
__func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* read handshake return values */
|
||||
|
||||
uint32_t host_major;
|
||||
uint32_t host_minor;
|
||||
|
||||
if (ret_magic != APIR_HANDSHAKE_MAGIC) {
|
||||
GGML_ABORT("%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic,
|
||||
apir_backend_initialize_error(ret_magic));
|
||||
} else {
|
||||
apir_decode_uint32_t(decoder, &host_major);
|
||||
apir_decode_uint32_t(decoder, &host_minor);
|
||||
}
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
if (ret_magic != APIR_HANDSHAKE_MAGIC) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
GGML_LOG_INFO("%s: Guest is running with %u.%u\n", __func__, guest_major, guest_minor);
|
||||
GGML_LOG_INFO("%s: Host is running with %u.%u\n", __func__, host_major, host_minor);
|
||||
|
||||
if (guest_major != host_major) {
|
||||
GGML_LOG_ERROR("Host major (%d) and guest major (%d) version differ\n", host_major, guest_major);
|
||||
} else if (guest_minor != host_minor) {
|
||||
GGML_LOG_WARN("Host minor (%d) and guest minor (%d) version differ\n", host_minor, guest_minor);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) {
|
||||
apir_encoder * encoder;
|
||||
apir_decoder * decoder;
|
||||
ApirLoadLibraryReturnCode ret;
|
||||
|
||||
encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_LOADLIBRARY, 0);
|
||||
if (!encoder) {
|
||||
GGML_ABORT("%s: hypercall error: failed to prepare the remote call encoder", __func__);
|
||||
return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR;
|
||||
}
|
||||
|
||||
long long call_duration_ns;
|
||||
|
||||
ret = (ApirLoadLibraryReturnCode) remote_call(gpu, encoder, &decoder, APIR_LOADLIBRARY_MAX_WAIT_MS,
|
||||
&call_duration_ns);
|
||||
log_call_duration(call_duration_ns, "API Remoting LoadLibrary");
|
||||
|
||||
if (!decoder) {
|
||||
GGML_ABORT("%s: hypercall error: failed to kick the API remoting hypercall.\n", __func__);
|
||||
return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR;
|
||||
}
|
||||
|
||||
remote_call_finish(gpu, encoder, decoder);
|
||||
|
||||
if (ret == APIR_LOAD_LIBRARY_SUCCESS) {
|
||||
GGML_LOG_INFO("%s: The API Remoting backend was successfully loaded and initialized\n", __func__);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// something wrong happened, find out what.
|
||||
|
||||
if (ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) {
|
||||
GGML_ABORT("%s: virglrenderer could not load the API Remoting backend library: %s (code %d)", __func__,
|
||||
apir_load_library_error(ret), ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
GGML_LOG_INFO("%s: virglrenderer successfully loaded the API Remoting backend library", __func__);
|
||||
|
||||
ApirLoadLibraryReturnCode apir_ret = (ApirLoadLibraryReturnCode) (ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX);
|
||||
|
||||
if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) {
|
||||
GGML_ABORT("%s: the API Remoting backend library couldn't load the backend library: apir code=%d | %s)",
|
||||
__func__, apir_ret, apir_load_library_error(apir_ret));
|
||||
} else {
|
||||
uint32_t lib_ret = apir_ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX;
|
||||
GGML_ABORT("%s: the API Remoting backend library initialize its backend library: apir code=%d)", __func__,
|
||||
lib_ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
virtgpu * create_virtgpu() {
|
||||
virtgpu * gpu = new virtgpu();
|
||||
|
||||
gpu->use_apir_capset = getenv("GGML_REMOTING_USE_APIR_CAPSET") != nullptr;
|
||||
util_sparse_array_init(&gpu->shmem_array, sizeof(virtgpu_shmem), 1024);
|
||||
|
||||
if (virtgpu_open(gpu) != APIR_SUCCESS) {
|
||||
GGML_ABORT("%s: failed to open the virtgpu device", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (virtgpu_init_capset(gpu) != APIR_SUCCESS) {
|
||||
GGML_ABORT("%s: failed to initialize the GPU capset", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (virtgpu_init_context(gpu) != APIR_SUCCESS) {
|
||||
GGML_ABORT("%s: failed to initialize the GPU context", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (virtgpu_shmem_create(gpu, SHMEM_REPLY_SIZE, &gpu->reply_shmem)) {
|
||||
GGML_ABORT("%s: failed to create the shared reply memory pages", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (virtgpu_shmem_create(gpu, SHMEM_DATA_SIZE, &gpu->data_shmem)) {
|
||||
GGML_ABORT("%s: failed to create the shared data memory pages", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (virtgpu_handshake(gpu)) {
|
||||
GGML_ABORT("%s: failed to handshake with the virglrenderer library", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (virtgpu_load_library(gpu) != APIR_LOAD_LIBRARY_SUCCESS) {
|
||||
GGML_ABORT("%s: failed to load the backend library", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return gpu;
|
||||
}
|
||||
|
||||
static virt_gpu_result_t virtgpu_open(virtgpu * gpu) {
|
||||
drmDevicePtr devs[8];
|
||||
int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs));
|
||||
if (count < 0) {
|
||||
GGML_LOG_ERROR("%s: failed to enumerate DRM devices\n", __func__);
|
||||
return APIR_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
virt_gpu_result_t result = APIR_ERROR_INITIALIZATION_FAILED;
|
||||
for (int i = 0; i < count; i++) {
|
||||
result = virtgpu_open_device(gpu, devs[i]);
|
||||
if (result == APIR_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
drmFreeDevices(devs, count);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr dev) {
|
||||
const char * node_path = dev->nodes[DRM_NODE_RENDER];
|
||||
|
||||
int fd = open(node_path, O_RDWR | O_CLOEXEC);
|
||||
if (fd < 0) {
|
||||
GGML_ABORT("failed to open %s", node_path);
|
||||
return APIR_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
drmVersionPtr version = drmGetVersion(fd);
|
||||
if (!version || strcmp(version->name, "virtio_gpu") || version->version_major != 0) {
|
||||
if (version) {
|
||||
GGML_ABORT("unknown DRM driver %s version %d", version->name, version->version_major);
|
||||
} else {
|
||||
GGML_ABORT("failed to get DRM driver version");
|
||||
}
|
||||
|
||||
if (version) {
|
||||
drmFreeVersion(version);
|
||||
}
|
||||
close(fd);
|
||||
return APIR_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
gpu->fd = fd;
|
||||
|
||||
drmFreeVersion(version);
|
||||
|
||||
GGML_LOG_INFO("using DRM device %s\n", node_path);
|
||||
|
||||
return APIR_SUCCESS;
|
||||
}
|
||||
|
||||
static virt_gpu_result_t virtgpu_init_context(virtgpu * gpu) {
|
||||
assert(!gpu->capset.version);
|
||||
const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id);
|
||||
if (ret) {
|
||||
GGML_LOG_INFO("failed to initialize context: %s\n", strerror(errno));
|
||||
return APIR_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
return APIR_SUCCESS;
|
||||
}
|
||||
|
||||
static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu) {
|
||||
if (gpu->use_apir_capset) {
|
||||
GGML_LOG_INFO("Using the APIR capset\n");
|
||||
gpu->capset.id = VIRTGPU_DRM_CAPSET_APIR;
|
||||
} else {
|
||||
GGML_LOG_INFO("Using the Venus capset\n");
|
||||
gpu->capset.id = VIRTGPU_DRM_CAPSET_VENUS;
|
||||
}
|
||||
gpu->capset.version = 0;
|
||||
|
||||
int ret =
|
||||
virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, &gpu->capset.data, sizeof(gpu->capset.data));
|
||||
|
||||
if (ret) {
|
||||
GGML_LOG_INFO("failed to get APIR v%d capset: %s\n", gpu->capset.version, strerror(errno));
|
||||
return APIR_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
assert(gpu->capset.data.supports_blob_resources);
|
||||
|
||||
return APIR_SUCCESS;
|
||||
}
|
||||
|
||||
static int virtgpu_ioctl_context_init(virtgpu * gpu, virgl_renderer_capset capset_id) {
|
||||
drm_virtgpu_context_set_param ctx_set_params[3] = {
|
||||
{
|
||||
.param = VIRTGPU_CONTEXT_PARAM_CAPSET_ID,
|
||||
.value = capset_id,
|
||||
},
|
||||
{
|
||||
.param = VIRTGPU_CONTEXT_PARAM_NUM_RINGS,
|
||||
.value = 1,
|
||||
},
|
||||
{
|
||||
.param = VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK,
|
||||
.value = 0, /* don't generate drm_events on fence signaling */
|
||||
},
|
||||
};
|
||||
|
||||
drm_virtgpu_context_init args = {
|
||||
.num_params = ARRAY_SIZE(ctx_set_params),
|
||||
.pad = 0,
|
||||
.ctx_set_params = (uintptr_t) &ctx_set_params,
|
||||
};
|
||||
|
||||
return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_CONTEXT_INIT, &args);
|
||||
}
|
||||
|
||||
static int virtgpu_ioctl_get_caps(virtgpu * gpu,
|
||||
virgl_renderer_capset id,
|
||||
uint32_t version,
|
||||
void * capset,
|
||||
size_t capset_size) {
|
||||
drm_virtgpu_get_caps args = {
|
||||
.cap_set_id = id,
|
||||
.cap_set_ver = version,
|
||||
.addr = (uintptr_t) capset,
|
||||
.size = (__u32) capset_size,
|
||||
.pad = 0,
|
||||
};
|
||||
|
||||
return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GET_CAPS, &args);
|
||||
}
|
||||
|
||||
static uint64_t virtgpu_ioctl_getparam(virtgpu * gpu, uint64_t param) {
|
||||
/* val must be zeroed because kernel only writes the lower 32 bits */
|
||||
uint64_t val = 0;
|
||||
drm_virtgpu_getparam args = {
|
||||
.param = param,
|
||||
.value = (uintptr_t) &val,
|
||||
};
|
||||
|
||||
const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GETPARAM, &args);
|
||||
return ret ? 0 : val;
|
||||
}
|
||||
|
||||
apir_encoder * remote_call_prepare(virtgpu * gpu, ApirCommandType apir_cmd_type, int32_t cmd_flags) {
|
||||
/*
|
||||
* Prepare the command encoder and its buffer
|
||||
*/
|
||||
|
||||
static char encoder_buffer[4096];
|
||||
|
||||
static apir_encoder enc;
|
||||
enc = {
|
||||
.cur = encoder_buffer,
|
||||
.start = encoder_buffer,
|
||||
.end = encoder_buffer + sizeof(encoder_buffer),
|
||||
.fatal = false,
|
||||
};
|
||||
|
||||
/*
|
||||
* Fill the command encoder with the common args:
|
||||
* - cmd_type (int32_t)
|
||||
* - cmd_flags (int32_t)
|
||||
* - reply res id (uint32_t)
|
||||
*/
|
||||
|
||||
int32_t cmd_type = apir_cmd_type;
|
||||
|
||||
// for testing during the hypervisor transition
|
||||
if (!gpu->use_apir_capset) {
|
||||
cmd_type += VENUS_COMMAND_TYPE_LENGTH;
|
||||
}
|
||||
apir_encode_int32_t(&enc, &cmd_type);
|
||||
apir_encode_int32_t(&enc, &cmd_flags);
|
||||
|
||||
uint32_t reply_res_id = gpu->reply_shmem.res_id;
|
||||
apir_encode_uint32_t(&enc, &reply_res_id);
|
||||
|
||||
return &enc;
|
||||
}
|
||||
|
||||
void remote_call_finish(virtgpu * gpu, apir_encoder * enc, apir_decoder * dec) {
|
||||
UNUSED(gpu);
|
||||
|
||||
if (!enc) {
|
||||
GGML_LOG_ERROR("Invalid (null) encoder\n");
|
||||
}
|
||||
|
||||
if (!dec) {
|
||||
GGML_LOG_ERROR("Invalid (null) decoder\n");
|
||||
}
|
||||
|
||||
if (apir_encoder_get_fatal(enc)) {
|
||||
GGML_LOG_ERROR("Failed to encode the output parameters.\n");
|
||||
}
|
||||
|
||||
if (apir_decoder_get_fatal(dec)) {
|
||||
GGML_LOG_ERROR("Failed to decode the input parameters.\n");
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t remote_call(virtgpu * gpu,
|
||||
apir_encoder * encoder,
|
||||
apir_decoder ** decoder,
|
||||
float max_wait_ms,
|
||||
long long * call_duration_ns) {
|
||||
/*
|
||||
* Prepare the reply notification pointer
|
||||
*/
|
||||
|
||||
volatile std::atomic_uint * atomic_reply_notif = (volatile std::atomic_uint *) gpu->reply_shmem.mmap_ptr;
|
||||
*atomic_reply_notif = 0;
|
||||
|
||||
/*
|
||||
* Trigger the execbuf ioctl
|
||||
*/
|
||||
|
||||
drm_virtgpu_execbuffer args = {
|
||||
.flags = VIRTGPU_EXECBUF_RING_IDX,
|
||||
.size = (uint32_t) (encoder->cur - encoder->start),
|
||||
.command = (uintptr_t) encoder->start,
|
||||
|
||||
.bo_handles = 0,
|
||||
.num_bo_handles = 0,
|
||||
|
||||
.fence_fd = 0,
|
||||
.ring_idx = 0,
|
||||
.syncobj_stride = 0,
|
||||
.num_in_syncobjs = 0,
|
||||
.num_out_syncobjs = 0,
|
||||
.in_syncobjs = 0,
|
||||
.out_syncobjs = 0,
|
||||
};
|
||||
|
||||
*decoder = NULL;
|
||||
|
||||
int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
|
||||
|
||||
if (ret != 0) {
|
||||
GGML_ABORT("%s: the virtgpu EXECBUFFER ioctl failed (%d)", __func__, ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for the response notification
|
||||
*/
|
||||
timer_data wait_host_reply_timer = { 0, 0, 0 };
|
||||
|
||||
start_timer(&wait_host_reply_timer);
|
||||
|
||||
timespec ts_start, ts_end;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts_start);
|
||||
long long start_time = (long long) ts_start.tv_sec * 1000000000LL + ts_start.tv_nsec;
|
||||
|
||||
bool timedout = false;
|
||||
uint32_t notif_value = 0;
|
||||
while (true) {
|
||||
notif_value = std::atomic_load_explicit(atomic_reply_notif, std::memory_order_acquire);
|
||||
|
||||
if (notif_value != 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
int64_t base_sleep_us = 15;
|
||||
|
||||
os_time_sleep(base_sleep_us);
|
||||
|
||||
if (max_wait_ms) {
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts_end);
|
||||
long long end_time = (long long) ts_end.tv_sec * 1000000000LL + ts_end.tv_nsec;
|
||||
float duration_ms = (end_time - start_time) / 1000000;
|
||||
|
||||
if (duration_ms > max_wait_ms) {
|
||||
timedout = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (call_duration_ns) {
|
||||
*call_duration_ns = stop_timer(&wait_host_reply_timer);
|
||||
}
|
||||
|
||||
if (max_wait_ms && timedout) {
|
||||
GGML_LOG_ERROR("timed out waiting for the host answer...\n");
|
||||
return APIR_FORWARD_TIMEOUT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare the decoder
|
||||
*/
|
||||
static apir_decoder response_dec;
|
||||
response_dec.cur = (char *) gpu->reply_shmem.mmap_ptr + sizeof(*atomic_reply_notif);
|
||||
response_dec.end = (char *) gpu->reply_shmem.mmap_ptr + gpu->reply_shmem.mmap_size;
|
||||
*decoder = &response_dec;
|
||||
|
||||
// extract the actual return value from the notif flag
|
||||
uint32_t returned_value = notif_value - 1;
|
||||
return returned_value;
|
||||
}
|
||||
|
||||
static void log_call_duration(long long call_duration_ns, const char * name) {
|
||||
double call_duration_ms = (double) call_duration_ns / 1e6; // 1 millisecond = 1e6 nanoseconds
|
||||
double call_duration_s = (double) call_duration_ns / 1e9; // 1 second = 1e9 nanoseconds
|
||||
|
||||
if (call_duration_s > 1) {
|
||||
GGML_LOG_INFO("%s: waited %.2fs for the %s host reply...\n", __func__, call_duration_s, name);
|
||||
} else if (call_duration_ms > 1) {
|
||||
GGML_LOG_INFO("%s: waited %.2fms for the %s host reply...\n", __func__, call_duration_ms, name);
|
||||
} else {
|
||||
GGML_LOG_INFO("%s: waited %lldns for the %s host reply...\n", __func__, call_duration_ns, name);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
#pragma once
|
||||
|
||||
#include "virtgpu-utils.h"
|
||||
#include "virtgpu-shm.h"
|
||||
#include "virtgpu-apir.h"
|
||||
|
||||
#include "backend/shared/api_remoting.h"
|
||||
#include "backend/shared/apir_cs.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <threads.h>
|
||||
#include <xf86drm.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#define VIRGL_RENDERER_UNSTABLE_APIS 1
|
||||
#include "apir_hw.h"
|
||||
#include <drm/virtgpu_drm.h>
|
||||
#include "venus_hw.h"
|
||||
|
||||
#ifndef VIRTGPU_DRM_CAPSET_APIR
|
||||
// Will be defined include/drm/virtgpu_drm.h when
|
||||
// https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1590/diffs
|
||||
// is merged
|
||||
#define VIRTGPU_DRM_CAPSET_APIR 10
|
||||
#endif
|
||||
|
||||
// Mesa/Virlgrenderer Venus internal. Only necessary during the
|
||||
// Venus->APIR transition in Virglrenderer
|
||||
#define VENUS_COMMAND_TYPE_LENGTH 331
|
||||
|
||||
#ifndef VIRTGPU_DRM_CAPSET_VENUS // only available with Linux >= v6.16
|
||||
#define VIRTGPU_DRM_CAPSET_VENUS 4
|
||||
#endif
|
||||
|
||||
typedef uint32_t virgl_renderer_capset;
|
||||
|
||||
/* from src/virtio/vulkan/vn_renderer_virtgpu.c */
|
||||
#define VIRTGPU_PCI_VENDOR_ID 0x1af4
|
||||
#define VIRTGPU_PCI_DEVICE_ID 0x1050
|
||||
#define VIRTGPU_BLOB_MEM_GUEST_VRAM 0x0004
|
||||
#define VIRTGPU_PARAM_GUEST_VRAM 9
|
||||
|
||||
#define SHMEM_DATA_SIZE 0x1830000 // 24MiB
|
||||
#define SHMEM_REPLY_SIZE 0x4000
|
||||
|
||||
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
|
||||
|
||||
enum virt_gpu_result_t {
|
||||
APIR_SUCCESS = 0,
|
||||
APIR_ERROR_INITIALIZATION_FAILED = -1,
|
||||
};
|
||||
|
||||
#define PRINTFLIKE(f, a) __attribute__((format(__printf__, f, a)))
|
||||
|
||||
struct virtgpu {
|
||||
bool use_apir_capset;
|
||||
|
||||
int fd;
|
||||
|
||||
struct {
|
||||
virgl_renderer_capset id;
|
||||
uint32_t version;
|
||||
virgl_renderer_capset_apir data;
|
||||
} capset;
|
||||
|
||||
util_sparse_array shmem_array;
|
||||
|
||||
/* APIR communication pages */
|
||||
virtgpu_shmem reply_shmem;
|
||||
virtgpu_shmem data_shmem;
|
||||
};
|
||||
|
||||
static inline int virtgpu_ioctl(virtgpu * gpu, unsigned long request, void * args) {
|
||||
return drmIoctl(gpu->fd, request, args);
|
||||
}
|
||||
|
||||
virtgpu * create_virtgpu();
|
||||
|
||||
apir_encoder * remote_call_prepare(virtgpu * gpu, ApirCommandType apir_cmd_type, int32_t cmd_flags);
|
||||
|
||||
uint32_t remote_call(virtgpu * gpu,
|
||||
apir_encoder * enc,
|
||||
apir_decoder ** dec,
|
||||
float max_wait_ms,
|
||||
long long * call_duration_ns);
|
||||
|
||||
void remote_call_finish(virtgpu * gpu, apir_encoder * enc, apir_decoder * dec);
|
||||
Loading…
Reference in New Issue