diff --git a/tests/test-backend-sampler.cpp b/tests/test-backend-sampler.cpp index ad73eae92a..7c33d0374c 100644 --- a/tests/test-backend-sampler.cpp +++ b/tests/test-backend-sampler.cpp @@ -266,7 +266,6 @@ struct test_model_context { if (model) { llama_model_free(model); } - llama_backend_free(); ctx = nullptr; model = nullptr; @@ -754,6 +753,9 @@ static void test_backend_dist_sampling(const char * model_path) { token = llama_get_sampled_token_ith(test_ctx.ctx, -1); printf("dist sampled id:%d, string:'%s'\n", token, test_ctx.token_to_piece(token, false).c_str()); GGML_ASSERT(token >= 0 && token < test_ctx.n_vocab); + + llama_sampler_free(backend_sampler_chain); + printf("backend dist sampling test PASSED\n"); } static void test_backend_dist_sampling_and_cpu(const char * model_path) { @@ -785,6 +787,11 @@ static void test_backend_dist_sampling_and_cpu(const char * model_path) { llama_token cpu_token = llama_sampler_sample(chain, test_ctx.ctx, batch_idx); printf("dist & cpu sampled id:%d, string:'%s'\n", cpu_token, test_ctx.token_to_piece(cpu_token, false).c_str()); GGML_ASSERT(backend_token == cpu_token); + + llama_sampler_free(backend_sampler_chain); + llama_sampler_free(chain); + + printf("backend dist & cpu sampling test PASSED\n"); } static void test_backend_logit_bias_sampling(const char * model_path) { @@ -832,6 +839,8 @@ static void test_backend_logit_bias_sampling(const char * model_path) { const std::string backend_token_str = test_ctx.token_to_piece(backend_token, false); printf("logit bias sampled token = %d, string='%s'\n", backend_token, backend_token_str.c_str()); GGML_ASSERT(backend_token == bias_token); + + llama_sampler_free(backend_sampler_chain); } // This test verifies that it is possible to have two different backend sampler, @@ -887,6 +896,9 @@ static void test_backend_mixed_sampling(const char * model_path) { GGML_ASSERT(llama_get_sampled_token_ith(test_ctx.ctx, batch_idx) == LLAMA_TOKEN_NULL); } + llama_sampler_free(sampler_chain_0); + llama_sampler_free(sampler_chain_1); + printf("backend mixed sampling test PASSED\n"); } @@ -954,6 +966,12 @@ static void test_backend_set_sampler(const char * model_path) { llama_token new_backend_token = llama_get_sampled_token_ith(test_ctx.ctx, test_ctx.idx_for_seq(seq_id)); const std::string new_backend_token_str = test_ctx.token_to_piece(new_backend_token, false); printf("dist sampled token = %d, string='%s'\n", new_backend_token, new_backend_token_str.c_str()); + + llama_sampler_free(backend_sampler_chain); + llama_sampler_free(chain); + llama_sampler_free(new_backend_sampler_chain); + + printf("backend set sampler test PASSED\n"); } static void test_backend_cpu_mixed_batch(const char * model_path) { @@ -1032,7 +1050,7 @@ static void test_backend_cpu_mixed_batch(const char * model_path) { // Set a backend sampler so that we can verify that it can be reset { struct llama_sampler_chain_params chain_params = llama_sampler_chain_default_params(); - struct llama_sampler * sampler_chain= llama_sampler_chain_init(chain_params); + struct llama_sampler * sampler_chain = llama_sampler_chain_init(chain_params); llama_sampler_chain_add(sampler_chain, llama_sampler_init_dist(88)); llama_set_sampler(test_ctx.ctx, 0, sampler_chain); @@ -1046,8 +1064,12 @@ static void test_backend_cpu_mixed_batch(const char * model_path) { const std::string token_str = test_ctx.token_to_piece(token, false); printf("re-added backend sampled token id=%d, string='%s'\n", token, token_str.c_str()); GGML_ASSERT(token >= 0 && token < test_ctx.n_vocab); + + llama_sampler_free(sampler_chain); } + llama_sampler_free(sampler_chain_0); + printf("backend-cpu mixed batch test PASSED\n"); } @@ -1089,6 +1111,9 @@ static void test_backend_max_outputs(const char * model_path) { GGML_ASSERT(ret != 0 && "llama_decode should not succeed multiple outputs per sequence"); printf("<<< test_max_outputs expected error end.\n"); llama_batch_free(batch); + + llama_sampler_free(backend_sampler_chain); + printf("backend max outputs test PASSED\n"); } struct backend_test_case {