clean up mtp sample typing after rebase

2025-12-21 17:53:27 -05:00 · 2025-12-21 17:53:27 -05:00 · d10a5a4a5b
parent a8dc54672c
commit d10a5a4a5b
5 changed files with 8 additions and 6 deletions
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@ -691,4 +691,4 @@ llama_token common_sampler_sample_speculative(struct common_sampler * gsmpl, str
    }

    return best_id;
-}
+}
--- a/common/sampling.h
+++ b/common/sampling.h
@ -115,3 +115,5 @@ struct common_sampler_deleter {
 };

 typedef std::unique_ptr<common_sampler, common_sampler_deleter> common_sampler_ptr;
+
+llama_token common_sampler_sample_speculative(struct common_sampler * gsmpl, struct llama_context * ctx, int idx);
--- a/common/speculative.cpp
+++ b/common/speculative.cpp
@ -361,8 +361,8 @@ llama_tokens common_speculative_gen_draft(
 }

 llama_tokens mtp_speculative_gen_draft(
-    struct common_sampler* smpl,
-    struct llama_context* ctx,
+    struct common_sampler * smpl,
+    struct llama_context * ctx,
    struct common_speculative_params params,
    llama_token id_last,
    int32_t n_past,
--- a/common/speculative.h
+++ b/common/speculative.h
@ -57,8 +57,8 @@ llama_tokens common_speculative_gen_draft(
 * @return std::vector<llama_token> The generated draft tokens.
 */
 llama_tokens mtp_speculative_gen_draft(
-    struct common_sampler* smpl,
-    struct llama_context* ctx,
+    struct common_sampler * smpl,
+    struct llama_context * ctx,
    struct common_speculative_params params,
    llama_token id_last,
    int32_t n_past,
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@ -2001,7 +2001,7 @@ struct server_context_impl {
                    llama_set_draft_input_hidden_state(ctx, llama_get_embeddings_ith(ctx, -1));

                    draft = mtp_speculative_gen_draft(
-                        slot.smpl, 
+                        slot.smpl.get(), 
                        ctx,
                        params_spec,
                        slot.sampled,