#include "virtgpu-forward-impl.h" static long long current_time_ms() { timespec ts; clock_gettime(CLOCK_REALTIME, &ts); // Use CLOCK_MONOTONIC for elapsed time return (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec; } ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) { apir_encoder * encoder; apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE); std::vector cgraph_data; size_t cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data); virtgpu_shmem temp_shmem; // Local storage for large buffers virtgpu_shmem * shmem = &temp_shmem; if (cgraph_size <= gpu->data_shmem.mmap_size) { // prefer the init-time allocated page, if large enough shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) { GGML_ABORT("Couldn't allocate the guest-host shared buffer"); } apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id); apir_encode_size_t(encoder, &cgraph_size); char * shmem_data = (char *) shmem->mmap_ptr; apir_encoder secondary_enc = apir_new_encoder(shmem_data, cgraph_size); apir_encode_cgraph_data(&secondary_enc, cgraph_data); REMOTE_CALL(gpu, encoder, decoder, ret); ggml_status status = GGML_STATUS_ABORTED; apir_decode_ggml_status(decoder, &status); remote_call_finish(gpu, encoder, decoder); if (shmem != &gpu->data_shmem) { virtgpu_shmem_destroy(gpu, shmem); } return status; }