Compilation fixes
This commit is contained in:
parent
982cf3b6a2
commit
c25aed1f5c
|
|
@ -483,9 +483,6 @@ void common_perf_print(const struct llama_context * ctx, const struct common_sam
|
|||
}
|
||||
}
|
||||
|
||||
// forward declaration of internal function (defined in llama-sampler.cpp)
|
||||
void llama_sampler_grammar_set_trigger_suppressed(struct llama_sampler * smpl, bool suppressed);
|
||||
|
||||
void common_sampler_set_grammar_trigger_suppressed(struct common_sampler * gsmpl, bool suppressed) {
|
||||
if (!gsmpl || !gsmpl->grmr) {
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -1380,6 +1380,13 @@ extern "C" {
|
|||
const llama_token * trigger_tokens,
|
||||
size_t num_trigger_tokens);
|
||||
|
||||
/// @details Suppress or un-suppress trigger detection on a grammar sampler.
|
||||
/// When suppressed, the grammar still buffers tokens but does not check for triggers.
|
||||
/// Useful for suppressing grammar activation during reasoning/thinking blocks.
|
||||
/// No-op if the sampler is not a grammar sampler.
|
||||
LLAMA_API void llama_sampler_grammar_set_trigger_suppressed(
|
||||
struct llama_sampler * smpl,
|
||||
bool suppressed);
|
||||
|
||||
/// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
|
||||
LLAMA_API struct llama_sampler * llama_sampler_init_penalties(
|
||||
|
|
|
|||
|
|
@ -33,10 +33,6 @@ struct llama_sampler_chain {
|
|||
mutable int32_t n_sample;
|
||||
};
|
||||
|
||||
// set trigger_suppressed on a grammar sampler (e.g. to suppress triggers during reasoning)
|
||||
// the sampler must have been created by llama_sampler_init_grammar* or this is a no-op
|
||||
void llama_sampler_grammar_set_trigger_suppressed(struct llama_sampler * smpl, bool suppressed);
|
||||
|
||||
struct llama_sampler * llama_sampler_init_dry_testing(
|
||||
int32_t context_size,
|
||||
float dry_multiplier,
|
||||
|
|
|
|||
|
|
@ -1241,26 +1241,25 @@ private:
|
|||
const auto & end_tag = slot.task->params.thinking_end_tag;
|
||||
const auto & start_tag = slot.task->params.thinking_start_tag;
|
||||
if (slot.in_reasoning) {
|
||||
// check if the end tag just appeared
|
||||
if (slot.generated_text.size() >= end_tag.size()) {
|
||||
auto tail = std::string_view(slot.generated_text).substr(
|
||||
slot.generated_text.size() - end_tag.size());
|
||||
if (tail == end_tag) {
|
||||
slot.in_reasoning = false;
|
||||
common_sampler_set_grammar_trigger_suppressed(slot.smpl.get(), false);
|
||||
SLT_DBG(slot, "reasoning ended, grammar triggers un-suppressed\n%s", "");
|
||||
}
|
||||
// check if the end tag just appeared at the end of generated_text
|
||||
if (slot.generated_text.size() >= end_tag.size()
|
||||
&& slot.generated_text.compare(
|
||||
slot.generated_text.size() - end_tag.size(),
|
||||
end_tag.size(), end_tag) == 0) {
|
||||
slot.in_reasoning = false;
|
||||
common_sampler_set_grammar_trigger_suppressed(slot.smpl.get(), false);
|
||||
SLT_DBG(slot, "reasoning ended, grammar triggers un-suppressed\n%s", "");
|
||||
}
|
||||
} else {
|
||||
// check if the start tag just appeared
|
||||
if (!start_tag.empty() && slot.generated_text.size() >= start_tag.size()) {
|
||||
auto tail = std::string_view(slot.generated_text).substr(
|
||||
slot.generated_text.size() - start_tag.size());
|
||||
if (tail == start_tag) {
|
||||
slot.in_reasoning = true;
|
||||
common_sampler_set_grammar_trigger_suppressed(slot.smpl.get(), true);
|
||||
SLT_DBG(slot, "reasoning started, grammar triggers suppressed\n%s", "");
|
||||
}
|
||||
// check if the start tag just appeared at the end of generated_text
|
||||
if (!start_tag.empty()
|
||||
&& slot.generated_text.size() >= start_tag.size()
|
||||
&& slot.generated_text.compare(
|
||||
slot.generated_text.size() - start_tag.size(),
|
||||
start_tag.size(), start_tag) == 0) {
|
||||
slot.in_reasoning = true;
|
||||
common_sampler_set_grammar_trigger_suppressed(slot.smpl.get(), true);
|
||||
SLT_DBG(slot, "reasoning started, grammar triggers suppressed\n%s", "");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue