51 lines
1.6 KiB
C++
51 lines
1.6 KiB
C++
#include "virtgpu-forward-impl.h"
|
|
|
|
static long long current_time_ms() {
|
|
timespec ts;
|
|
clock_gettime(CLOCK_REALTIME, &ts); // Use CLOCK_MONOTONIC for elapsed time
|
|
return (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec;
|
|
}
|
|
|
|
ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) {
|
|
apir_encoder * encoder;
|
|
apir_decoder * decoder;
|
|
ApirForwardReturnCode ret;
|
|
|
|
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE);
|
|
|
|
std::vector<uint8_t> cgraph_data;
|
|
size_t cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data);
|
|
|
|
virtgpu_shmem temp_shmem; // Local storage for large buffers
|
|
virtgpu_shmem * shmem = &temp_shmem;
|
|
|
|
if (cgraph_size <= gpu->data_shmem.mmap_size) {
|
|
// prefer the init-time allocated page, if large enough
|
|
shmem = &gpu->data_shmem;
|
|
} else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) {
|
|
GGML_ABORT("Couldn't allocate the guest-host shared buffer");
|
|
}
|
|
|
|
apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
|
|
|
|
apir_encode_size_t(encoder, &cgraph_size);
|
|
|
|
char * shmem_data = (char *) shmem->mmap_ptr;
|
|
apir_encoder secondary_enc = apir_new_encoder(shmem_data, cgraph_size);
|
|
|
|
apir_encode_cgraph_data(&secondary_enc, cgraph_data);
|
|
|
|
REMOTE_CALL(gpu, encoder, decoder, ret);
|
|
|
|
ggml_status status = GGML_STATUS_ABORTED;
|
|
apir_decode_ggml_status(decoder, &status);
|
|
|
|
remote_call_finish(gpu, encoder, decoder);
|
|
|
|
if (shmem != &gpu->data_shmem) {
|
|
virtgpu_shmem_destroy(gpu, shmem);
|
|
}
|
|
|
|
return status;
|
|
}
|