llama.cpp/src/llama-ext.h

#pragma once

#include "llama-context.h"
#include "ggml.h"
#include "stdint.h"

// Reserve a new compute graph. It is valid until the next call to llama_graph_reserve.
LLAMA_API struct ggml_cgraph * llama_graph_reserve(
        struct llama_context * ctx,
        uint32_t n_tokens,
        uint32_t n_seqs,
        uint32_t n_outputs);