diff --git a/common/sampling.cpp b/common/sampling.cpp
index c33d58ae5e..27b2a082b0 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -691,4 +691,4 @@ llama_token common_sampler_sample_speculative(struct common_sampler * gsmpl, str
     }
 
     return best_id;
-}
\ No newline at end of file
+}
diff --git a/common/sampling.h b/common/sampling.h
index c7101032f2..90c2401c2f 100644
--- a/common/sampling.h
+++ b/common/sampling.h
@@ -115,3 +115,5 @@ struct common_sampler_deleter {
 };
 
 typedef std::unique_ptr<common_sampler, common_sampler_deleter> common_sampler_ptr;
+
+llama_token common_sampler_sample_speculative(struct common_sampler * gsmpl, struct llama_context * ctx, int idx);
diff --git a/common/speculative.cpp b/common/speculative.cpp
index 136f2c1b1a..548394bbe8 100644
--- a/common/speculative.cpp
+++ b/common/speculative.cpp
@@ -361,8 +361,8 @@ llama_tokens common_speculative_gen_draft(
 }
 
 llama_tokens mtp_speculative_gen_draft(
-    struct common_sampler* smpl,
-    struct llama_context* ctx,
+    struct common_sampler * smpl,
+    struct llama_context * ctx,
     struct common_speculative_params params,
     llama_token id_last,
     int32_t n_past,
diff --git a/common/speculative.h b/common/speculative.h
index a33c5a8b02..d22a752d3f 100644
--- a/common/speculative.h
+++ b/common/speculative.h
@@ -57,8 +57,8 @@ llama_tokens common_speculative_gen_draft(
  * @return std::vector<llama_token> The generated draft tokens.
  */
 llama_tokens mtp_speculative_gen_draft(
-    struct common_sampler* smpl,
-    struct llama_context* ctx,
+    struct common_sampler * smpl,
+    struct llama_context * ctx,
     struct common_speculative_params params,
     llama_token id_last,
     int32_t n_past,
diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp
index 925b9b805d..dca005da35 100644
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -2001,7 +2001,7 @@ struct server_context_impl {
                     llama_set_draft_input_hidden_state(ctx, llama_get_embeddings_ith(ctx, -1));
 
                     draft = mtp_speculative_gen_draft(
-                        slot.smpl, 
+                        slot.smpl.get(), 
                         ctx,
                         params_spec,
                         slot.sampled,