diff --git a/common/sampling.cpp b/common/sampling.cpp index 2f60be1943..526f036ff9 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -287,8 +287,8 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st } } - // reasoning budget sampler - if (!params.reasoning_budget_start.empty() && !params.reasoning_budget_end.empty()) { + // reasoning budget sampler (skip when budget is unlimited unless a lazy grammar is active, which needs rbudget for thinking-block suppression) + if (!params.reasoning_budget_start.empty() && !params.reasoning_budget_end.empty() && (params.grammar_lazy || params.reasoning_budget_tokens >= 0)) { rbudget = common_reasoning_budget_init( vocab, params.reasoning_budget_start,