72 lines
2.1 KiB
C++
72 lines
2.1 KiB
C++
#include <cstdio>
|
|
#include <cstring>
|
|
|
|
#include "llama.h"
|
|
#include "get-model.h"
|
|
|
|
struct callback_state {
|
|
bool called;
|
|
bool reassign_ok;
|
|
};
|
|
|
|
static void pre_alloc_cb(ggml_backend_sched_t sched, struct ggml_cgraph * gf, void * user_data) {
|
|
auto * state = static_cast<callback_state *>(user_data);
|
|
state->called = true;
|
|
|
|
// reassign the first node to the last backend (CPU) and verify
|
|
int n_backends = ggml_backend_sched_get_n_backends(sched);
|
|
if (n_backends < 1 || ggml_graph_n_nodes(gf) <= 0) {
|
|
return;
|
|
}
|
|
|
|
ggml_backend_t target = ggml_backend_sched_get_backend(sched, n_backends - 1);
|
|
struct ggml_tensor * node = ggml_graph_node(gf, 0);
|
|
ggml_backend_sched_set_tensor_backend(sched, node, target);
|
|
state->reassign_ok = (ggml_backend_sched_get_tensor_backend(sched, node) == target);
|
|
}
|
|
|
|
int main(int argc, char ** argv) {
|
|
auto * model_path = get_model_or_exit(argc, argv);
|
|
|
|
llama_backend_init();
|
|
auto * model = llama_model_load_from_file(model_path, llama_model_default_params());
|
|
if (!model) {
|
|
fprintf(stderr, "FAIL: could not load model\n");
|
|
return 1;
|
|
}
|
|
|
|
callback_state state = { false, false };
|
|
|
|
auto params = llama_context_default_params();
|
|
params.n_ctx = 64;
|
|
params.n_batch = 1;
|
|
params.cb_pre_alloc = pre_alloc_cb;
|
|
params.cb_pre_alloc_user_data = &state;
|
|
|
|
auto * ctx = llama_init_from_model(model, params);
|
|
if (!ctx) {
|
|
fprintf(stderr, "FAIL: could not create context\n");
|
|
llama_model_free(model);
|
|
llama_backend_free();
|
|
return 1;
|
|
}
|
|
|
|
llama_token token = 0;
|
|
if (llama_decode(ctx, llama_batch_get_one(&token, 1)) != 0) {
|
|
fprintf(stderr, "FAIL: llama_decode failed\n");
|
|
llama_free(ctx);
|
|
llama_model_free(model);
|
|
llama_backend_free();
|
|
return 1;
|
|
}
|
|
|
|
fprintf(stderr, "called=%d reassign_ok=%d\n", state.called, state.reassign_ok);
|
|
|
|
int ret = (state.called && state.reassign_ok) ? 0 : 1;
|
|
|
|
llama_free(ctx);
|
|
llama_model_free(model);
|
|
llama_backend_free();
|
|
return ret;
|
|
}
|