From ac6d09c63cc1b02e3fe54d25e4a4a776abb5932a Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Fri, 6 Feb 2026 07:57:47 +0100
Subject: [PATCH] move buffer_view to llama-impl.h

---
 src/llama-context.cpp |  6 +++---
 src/llama-context.h   | 11 +----------
 src/llama-impl.h      | 10 ++++++++++
 3 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index 8748fb77b0..7b458e77dd 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -1363,7 +1363,7 @@ static std::map<llama_seq_id, uint32_t> build_seq_to_output_row(const llama_ubat
 
 static void copy_tensor_async_ints(
     const std::map<llama_seq_id, ggml_tensor*> & tensor_map,
-    const llama_context::buffer_view<llama_token> & sampled,
+    const buffer_view<llama_token> & sampled,
     const std::map<llama_seq_id, uint32_t> & seq_to_row,
     ggml_backend_sched_t sched) {
     if (!sampled.has_data()) {
@@ -1388,7 +1388,7 @@ static void copy_tensor_async_ints(
 
 static void copy_tensor_async_floats(
     const std::map<llama_seq_id, ggml_tensor*> & tensor_map,
-    const llama_context::buffer_view<float> & dst,
+    const buffer_view<float> & dst,
     size_t stride,
     std::vector<uint32_t> & counts,
     const std::map<llama_seq_id, uint32_t> & seq_to_row,
@@ -1419,7 +1419,7 @@ static void copy_tensor_async_floats(
 
 static void copy_tensor_async_candidates(
     const std::map<llama_seq_id, ggml_tensor*> & tensor_map,
-    const llama_context::buffer_view<llama_token> & dst,
+    const buffer_view<llama_token> & dst,
     size_t stride,
     std::vector<uint32_t> & counts,
     const std::map<llama_seq_id, uint32_t> & seq_to_row,
diff --git a/src/llama-context.h b/src/llama-context.h
index 978a109292..0dc05d8d9b 100644
--- a/src/llama-context.h
+++ b/src/llama-context.h
@@ -4,6 +4,7 @@
 #include "llama-cparams.h"
 #include "llama-graph.h"
 #include "llama-adapter.h"
+#include "llama-impl.h"
 
 #include "ggml-cpp.h"
 #include "ggml-opt.h"
@@ -238,16 +239,6 @@ public:
 
     bool set_sampler(llama_seq_id seq_id, llama_sampler * sampler);
 
-    template <typename T>
-    struct buffer_view {
-        T * data;
-        size_t size = 0;
-
-        bool has_data() const {
-            return data && size > 0;
-        }
-    };
-
 private:
     llm_graph_params graph_params(
                         llm_graph_result * res,
diff --git a/src/llama-impl.h b/src/llama-impl.h
index c3391e79f5..dfd9fee9f4 100644
--- a/src/llama-impl.h
+++ b/src/llama-impl.h
@@ -49,6 +49,16 @@ struct time_meas {
     int64_t & t_acc;
 };
 
+template <typename T>
+struct buffer_view {
+    T * data;
+    size_t size = 0;
+
+    bool has_data() const {
+        return data && size > 0;
+    }
+};
+
 void replace_all(std::string & s, const std::string & search, const std::string & replace);
 
 // TODO: rename to llama_format ?