sampling : remove version from sampler chain

This commit removes the version field from the sampler chain and instead
used the sampler pointer itself for change detection.
This commit is contained in:
Daniel Bevenius 2025-11-19 06:59:03 +01:00
parent 26be108be8
commit 0da7e7dccc
No known key found for this signature in database
5 changed files with 2 additions and 21 deletions

View File

@ -1235,7 +1235,6 @@ extern "C" {
// after removing a sampler, the chain will no longer own it, and it will not be freed when the chain is freed
LLAMA_API struct llama_sampler * llama_sampler_chain_remove( struct llama_sampler * chain, int32_t i);
LLAMA_API uint64_t llama_sampler_chain_get_version(const struct llama_sampler * chain);
// available samplers:

View File

@ -477,7 +477,7 @@ bool llm_graph_input_sampling::can_reuse(const llm_graph_params & params) {
}
for (const auto & [seq_id, sampler] : params.samplers) {
if (sampler_versions[seq_id] != llama_sampler_chain_get_version(sampler)) {
if (samplers[seq_id] != sampler) {
return false;
}
}

View File

@ -387,13 +387,7 @@ class llm_graph_input_sampling : public llm_graph_input_i {
public:
llm_graph_input_sampling(int32_t n_vocab, bool sorted,
std::unordered_map<llama_seq_id, llama_sampler*> samplers) :
n_vocab(n_vocab), sorted_value(sorted), samplers(samplers) {
sampler_versions.reserve(samplers.size());
for (const auto & [seq_id, sampler] : samplers) {
sampler_versions[seq_id] = llama_sampler_chain_get_version(sampler);
}
}
n_vocab(n_vocab), sorted_value(sorted), samplers(samplers) { }
virtual ~llm_graph_input_sampling() = default;
void set_input(const llama_ubatch * ubatch) override;
@ -404,8 +398,6 @@ public:
ggml_tensor * size = nullptr; // I32 [1]
ggml_tensor * sorted = nullptr; // I32 [1]
// Track sampler chain version for reuse
std::unordered_map<llama_seq_id, uint64_t> sampler_versions;
std::unordered_map<llama_seq_id, llama_sampler*> samplers;
};

View File

@ -639,7 +639,6 @@ struct llama_sampler * llama_sampler_chain_init(struct llama_sampler_chain_param
void llama_sampler_chain_add(struct llama_sampler * chain, struct llama_sampler * smpl) {
auto * p = (llama_sampler_chain *) chain->ctx;
p->samplers.push_back(smpl);
p->version++;
}
struct llama_sampler * llama_sampler_chain_get(const struct llama_sampler * chain, int32_t i) {
@ -661,7 +660,6 @@ struct llama_sampler * llama_sampler_chain_remove(struct llama_sampler * chain,
auto * result = p->samplers[i];
p->samplers.erase(p->samplers.begin() + i);
p->version++;
return result;
}
@ -672,11 +670,6 @@ int llama_sampler_chain_n(const struct llama_sampler * chain) {
return p->samplers.size();
}
uint64_t llama_sampler_chain_get_version(const struct llama_sampler * chain) {
const auto * p = (const llama_sampler_chain *) chain->ctx;
return p->version;
}
//
// samplers
//

View File

@ -21,9 +21,6 @@ struct llama_sampler_chain {
mutable int64_t t_sample_us;
mutable int32_t n_sample;
// simple version tracking for GPU sampling graph can_reuse
uint64_t version = 0;
};
struct llama_sampler * llama_sampler_init_dry_testing(