llama.cpp/src/llama-ext.h

13 lines
337 B
C

#pragma once
#include "llama-context.h"
#include "ggml.h"
#include "stdint.h"
// Reserve a new compute graph. It is valid until the next call to llama_graph_reserve.
LLAMA_API struct ggml_cgraph * llama_graph_reserve(
struct llama_context * ctx,
uint32_t n_tokens,
uint32_t n_seqs,
uint32_t n_outputs);