clean up mtp sample typing after rebase
This commit is contained in:
parent
a8dc54672c
commit
d10a5a4a5b
|
|
@ -691,4 +691,4 @@ llama_token common_sampler_sample_speculative(struct common_sampler * gsmpl, str
|
||||||
}
|
}
|
||||||
|
|
||||||
return best_id;
|
return best_id;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -115,3 +115,5 @@ struct common_sampler_deleter {
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef std::unique_ptr<common_sampler, common_sampler_deleter> common_sampler_ptr;
|
typedef std::unique_ptr<common_sampler, common_sampler_deleter> common_sampler_ptr;
|
||||||
|
|
||||||
|
llama_token common_sampler_sample_speculative(struct common_sampler * gsmpl, struct llama_context * ctx, int idx);
|
||||||
|
|
|
||||||
|
|
@ -361,8 +361,8 @@ llama_tokens common_speculative_gen_draft(
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_tokens mtp_speculative_gen_draft(
|
llama_tokens mtp_speculative_gen_draft(
|
||||||
struct common_sampler* smpl,
|
struct common_sampler * smpl,
|
||||||
struct llama_context* ctx,
|
struct llama_context * ctx,
|
||||||
struct common_speculative_params params,
|
struct common_speculative_params params,
|
||||||
llama_token id_last,
|
llama_token id_last,
|
||||||
int32_t n_past,
|
int32_t n_past,
|
||||||
|
|
|
||||||
|
|
@ -57,8 +57,8 @@ llama_tokens common_speculative_gen_draft(
|
||||||
* @return std::vector<llama_token> The generated draft tokens.
|
* @return std::vector<llama_token> The generated draft tokens.
|
||||||
*/
|
*/
|
||||||
llama_tokens mtp_speculative_gen_draft(
|
llama_tokens mtp_speculative_gen_draft(
|
||||||
struct common_sampler* smpl,
|
struct common_sampler * smpl,
|
||||||
struct llama_context* ctx,
|
struct llama_context * ctx,
|
||||||
struct common_speculative_params params,
|
struct common_speculative_params params,
|
||||||
llama_token id_last,
|
llama_token id_last,
|
||||||
int32_t n_past,
|
int32_t n_past,
|
||||||
|
|
|
||||||
|
|
@ -2001,7 +2001,7 @@ struct server_context_impl {
|
||||||
llama_set_draft_input_hidden_state(ctx, llama_get_embeddings_ith(ctx, -1));
|
llama_set_draft_input_hidden_state(ctx, llama_get_embeddings_ith(ctx, -1));
|
||||||
|
|
||||||
draft = mtp_speculative_gen_draft(
|
draft = mtp_speculative_gen_draft(
|
||||||
slot.smpl,
|
slot.smpl.get(),
|
||||||
ctx,
|
ctx,
|
||||||
params_spec,
|
params_spec,
|
||||||
slot.sampled,
|
slot.sampled,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue