Compilation fixes

This commit is contained in:
Piotr Wilkin 2026-03-21 22:00:33 +01:00
parent 982cf3b6a2
commit c25aed1f5c
4 changed files with 24 additions and 25 deletions

View File

@ -483,9 +483,6 @@ void common_perf_print(const struct llama_context * ctx, const struct common_sam
}
}
// forward declaration of internal function (defined in llama-sampler.cpp)
void llama_sampler_grammar_set_trigger_suppressed(struct llama_sampler * smpl, bool suppressed);
void common_sampler_set_grammar_trigger_suppressed(struct common_sampler * gsmpl, bool suppressed) {
if (!gsmpl || !gsmpl->grmr) {
return;

View File

@ -1380,6 +1380,13 @@ extern "C" {
const llama_token * trigger_tokens,
size_t num_trigger_tokens);
/// @details Suppress or un-suppress trigger detection on a grammar sampler.
/// When suppressed, the grammar still buffers tokens but does not check for triggers.
/// Useful for suppressing grammar activation during reasoning/thinking blocks.
/// No-op if the sampler is not a grammar sampler.
LLAMA_API void llama_sampler_grammar_set_trigger_suppressed(
struct llama_sampler * smpl,
bool suppressed);
/// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
LLAMA_API struct llama_sampler * llama_sampler_init_penalties(

View File

@ -33,10 +33,6 @@ struct llama_sampler_chain {
mutable int32_t n_sample;
};
// set trigger_suppressed on a grammar sampler (e.g. to suppress triggers during reasoning)
// the sampler must have been created by llama_sampler_init_grammar* or this is a no-op
void llama_sampler_grammar_set_trigger_suppressed(struct llama_sampler * smpl, bool suppressed);
struct llama_sampler * llama_sampler_init_dry_testing(
int32_t context_size,
float dry_multiplier,

View File

@ -1241,26 +1241,25 @@ private:
const auto & end_tag = slot.task->params.thinking_end_tag;
const auto & start_tag = slot.task->params.thinking_start_tag;
if (slot.in_reasoning) {
// check if the end tag just appeared
if (slot.generated_text.size() >= end_tag.size()) {
auto tail = std::string_view(slot.generated_text).substr(
slot.generated_text.size() - end_tag.size());
if (tail == end_tag) {
slot.in_reasoning = false;
common_sampler_set_grammar_trigger_suppressed(slot.smpl.get(), false);
SLT_DBG(slot, "reasoning ended, grammar triggers un-suppressed\n%s", "");
}
// check if the end tag just appeared at the end of generated_text
if (slot.generated_text.size() >= end_tag.size()
&& slot.generated_text.compare(
slot.generated_text.size() - end_tag.size(),
end_tag.size(), end_tag) == 0) {
slot.in_reasoning = false;
common_sampler_set_grammar_trigger_suppressed(slot.smpl.get(), false);
SLT_DBG(slot, "reasoning ended, grammar triggers un-suppressed\n%s", "");
}
} else {
// check if the start tag just appeared
if (!start_tag.empty() && slot.generated_text.size() >= start_tag.size()) {
auto tail = std::string_view(slot.generated_text).substr(
slot.generated_text.size() - start_tag.size());
if (tail == start_tag) {
slot.in_reasoning = true;
common_sampler_set_grammar_trigger_suppressed(slot.smpl.get(), true);
SLT_DBG(slot, "reasoning started, grammar triggers suppressed\n%s", "");
}
// check if the start tag just appeared at the end of generated_text
if (!start_tag.empty()
&& slot.generated_text.size() >= start_tag.size()
&& slot.generated_text.compare(
slot.generated_text.size() - start_tag.size(),
start_tag.size(), start_tag) == 0) {
slot.in_reasoning = true;
common_sampler_set_grammar_trigger_suppressed(slot.smpl.get(), true);
SLT_DBG(slot, "reasoning started, grammar triggers suppressed\n%s", "");
}
}
}