From 7666cacf28591c0179c499d1525121e7406b58e5 Mon Sep 17 00:00:00 2001 From: Ruben Ortlam Date: Thu, 2 Apr 2026 11:39:07 +0200 Subject: [PATCH] move llama_context_device_memory function to llama-ext.h --- include/llama.h | 6 ------ src/llama-ext.h | 6 ++++++ tools/server/server-models.cpp | 2 ++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/llama.h b/include/llama.h index de7c0670f5..a940f9d648 100644 --- a/include/llama.h +++ b/include/llama.h @@ -1547,12 +1547,6 @@ extern "C" { // print a breakdown of per-device memory use via LLAMA_LOG: LLAMA_API void llama_memory_breakdown_print(const struct llama_context * ctx); - // Returns the projected memory use (model + context + compute) in bytes - // for the given device within this context. Returns 0 if the device is not used. - LLAMA_API uint64_t llama_context_device_memory( - const struct llama_context * ctx, - ggml_backend_dev_t device); - // // training // diff --git a/src/llama-ext.h b/src/llama-ext.h index 2ffb77934e..ee6ff27be3 100644 --- a/src/llama-ext.h +++ b/src/llama-ext.h @@ -54,3 +54,9 @@ LLAMA_API void llama_quant_compute_types( ggml_tensor ** tensors, ggml_type * result_types, size_t n_tensors); + +// Returns the projected memory use (model + context + compute) in bytes +// for the given device within this context. Returns 0 if the device is not used. +LLAMA_API uint64_t llama_context_device_memory( + const struct llama_context * ctx, + ggml_backend_dev_t device); diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index bb41f205a9..fe039b03ee 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -7,6 +7,8 @@ #include // TODO: remove this once we use HTTP client from download.h #include +#include "../../src/llama-ext.h" + #include #include #include