move llama_context_device_memory function to llama-ext.h

This commit is contained in:
Ruben Ortlam 2026-04-02 11:39:07 +02:00
parent 7e10ec8ff2
commit 7666cacf28
3 changed files with 8 additions and 6 deletions

View File

@ -1547,12 +1547,6 @@ extern "C" {
// print a breakdown of per-device memory use via LLAMA_LOG:
LLAMA_API void llama_memory_breakdown_print(const struct llama_context * ctx);
// Returns the projected memory use (model + context + compute) in bytes
// for the given device within this context. Returns 0 if the device is not used.
LLAMA_API uint64_t llama_context_device_memory(
const struct llama_context * ctx,
ggml_backend_dev_t device);
//
// training
//

View File

@ -54,3 +54,9 @@ LLAMA_API void llama_quant_compute_types(
ggml_tensor ** tensors,
ggml_type * result_types,
size_t n_tensors);
// Returns the projected memory use (model + context + compute) in bytes
// for the given device within this context. Returns 0 if the device is not used.
LLAMA_API uint64_t llama_context_device_memory(
const struct llama_context * ctx,
ggml_backend_dev_t device);

View File

@ -7,6 +7,8 @@
#include <cpp-httplib/httplib.h> // TODO: remove this once we use HTTP client from download.h
#include <sheredom/subprocess.h>
#include "../../src/llama-ext.h"
#include <functional>
#include <algorithm>
#include <thread>