From 8da07610f8f4a6b8bd2bad1970899525afd87ee1 Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Wed, 24 Dec 2025 01:16:49 -0600 Subject: [PATCH 1/6] grammar : add support for std::regex_search() with trigger patterns --- common/sampling.cpp | 18 ++++++------ src/llama-grammar.cpp | 67 ++++++++++++++++++++++++++++++++----------- src/llama-grammar.h | 8 ++++++ 3 files changed, 69 insertions(+), 24 deletions(-) diff --git a/common/sampling.cpp b/common/sampling.cpp index c66f935c65..68e36e8744 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -179,24 +179,30 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co #endif // LLAMA_USE_LLGUIDANCE } else { std::vector trigger_patterns; - std::vector patterns_anywhere; std::vector trigger_tokens; for (const auto & trigger : params.grammar_triggers) { switch (trigger.type) { case COMMON_GRAMMAR_TRIGGER_TYPE_WORD: { const auto & word = trigger.value; - patterns_anywhere.push_back(regex_escape(word)); + trigger_patterns.push_back(regex_escape(word)); break; } case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN: { - patterns_anywhere.push_back(trigger.value); + trigger_patterns.push_back(trigger.value); break; } case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL: { - trigger_patterns.push_back(trigger.value); + const auto & pattern = trigger.value; + std::string anchored = "^$"; + if (!pattern.empty()) { + anchored = (pattern.front() != '^' ? "^" : "") + + pattern + + (pattern.back() != '$' ? "$" : ""); + } + trigger_patterns.push_back(anchored); break; } case COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN: @@ -210,10 +216,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co } } - if (!patterns_anywhere.empty()) { - trigger_patterns.push_back("^[\\s\\S]*?(" + string_join(patterns_anywhere, "|") + ")[\\s\\S]*"); - } - std::vector trigger_patterns_c; trigger_patterns_c.reserve(trigger_patterns.size()); for (const auto & regex : trigger_patterns) { diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp index 75d5d750c3..d62733b5d6 100644 --- a/src/llama-grammar.cpp +++ b/src/llama-grammar.cpp @@ -369,6 +369,54 @@ static void print_rule( fprintf(file, "\n"); } +// +// Regex utilities +// +static llama_grammar_trigger_pattern llama_grammar_trigger_pattern_compile(const std::string & pattern) { + llama_grammar_trigger_pattern_type type = LLAMA_GRAMMAR_TRIGGER_PATTERN_TYPE_SEARCH; + if (!pattern.empty() && pattern.front() == '^' && pattern.back() == '$') { + // If anchored on both ends, consider it a match + type = LLAMA_GRAMMAR_TRIGGER_PATTERN_TYPE_MATCH; + } + return {type, pattern, std::regex(pattern)}; +} + +size_t llama_grammar_trigger_pattern::find(const std::string & input) const { + auto find_start_pos = [](const std::smatch & match) { + // get from the first matched capturing group to the end of the string + size_t start = std::string::npos; + for (auto i = 1u; i < match.size(); i++) { + if (match.length(i) > 0) { + start = match.position(i); + break; + } + } + if (start == std::string::npos) { + start = match.position(0); + } + return start; + }; + + if (type == LLAMA_GRAMMAR_TRIGGER_PATTERN_TYPE_MATCH) { + // match against the entire input + std::smatch match; + if (std::regex_match(input, match, regex)) { + return find_start_pos(match); + } + } + + if (type == LLAMA_GRAMMAR_TRIGGER_PATTERN_TYPE_SEARCH) { + // search anywhere + std::smatch match; + if (std::regex_search(input, match, regex)) { + return find_start_pos(match); + } + } + + return std::string::npos; +} + + // // implementation // @@ -1192,9 +1240,7 @@ struct llama_grammar * llama_grammar_init_impl( } for (size_t i = 0; i < num_trigger_patterns; i++) { GGML_ASSERT(trigger_patterns != nullptr); - auto & trigger = vec_trigger_patterns.emplace_back(); - trigger.pattern = trigger_patterns[i]; - trigger.regex = std::regex(trigger.pattern); + vec_trigger_patterns.emplace_back(llama_grammar_trigger_pattern_compile(trigger_patterns[i])); } // Important: vec_rules has to be moved here, not copied, because stacks contains @@ -1312,21 +1358,10 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token grammar.trigger_buffer_positions.push_back(std::make_pair(token, position)); grammar.trigger_buffer += piece; - std::smatch match; for (const auto & trigger_pattern : grammar.trigger_patterns) { - if (std::regex_match(grammar.trigger_buffer, match, trigger_pattern.regex)) { + auto start = trigger_pattern.find(grammar.trigger_buffer); + if (start != std::string::npos) { grammar.awaiting_trigger = false; - // get from the first matched capturing group to the end of the string - size_t start = std::string::npos; - for (auto i = 1u; i < match.size(); i++) { - if (match.length(i) > 0) { - start = match.position(i); - break; - } - } - if (start == std::string::npos) { - start = match.position(0); - } // replay tokens that overlap with [start, end) for (const auto & [tok, tok_pos] : grammar.trigger_buffer_positions) { diff --git a/src/llama-grammar.h b/src/llama-grammar.h index a4c978ac11..2cd03bff1f 100644 --- a/src/llama-grammar.h +++ b/src/llama-grammar.h @@ -116,9 +116,17 @@ struct llama_grammar_parser { void print(FILE * file); }; +enum llama_grammar_trigger_pattern_type { + LLAMA_GRAMMAR_TRIGGER_PATTERN_TYPE_MATCH = 0, + LLAMA_GRAMMAR_TRIGGER_PATTERN_TYPE_SEARCH = 1, +}; + struct llama_grammar_trigger_pattern { + llama_grammar_trigger_pattern_type type; std::string pattern; std::regex regex; + + size_t find(const std::string & input) const; }; struct llama_grammar { From 6b757458da91dfaf60d4d4c083c86dd965415ce4 Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Wed, 24 Dec 2025 01:17:45 -0600 Subject: [PATCH 2/6] common : update hermes2 pro trigger to search instead of match --- common/chat.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 0a426f4478..bc0faf146c 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -2397,17 +2397,17 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call)); // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives) data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, + COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, // If thinking_forced_open, then we capture the tag in the grammar, // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) - std::string(data.thinking_forced_open ? "[\\s\\S]*?(\\s*)" : "(?:[\\s\\S]*?\\s*)?") + ( + std::string(data.thinking_forced_open ? "(\\s*)" : "") + ( "\\s*(" "(?:" "||||)?" "\\s*\\{\\s*\"name\"\\s*:\\s*\"(?:" + string_join(escaped_names, "|") + ")\"" ")" - ")[\\s\\S]*" + ")" ), }); data.preserved_tokens = { From 2a7bf2ede27edcf32bdff3801ab81daac224da38 Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Wed, 24 Dec 2025 01:39:41 -0600 Subject: [PATCH 3/6] common : use regex_search with anchoring for partial matching --- common/regex-partial.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/common/regex-partial.cpp b/common/regex-partial.cpp index 4bff6b6633..e667a209e9 100644 --- a/common/regex-partial.cpp +++ b/common/regex-partial.cpp @@ -27,7 +27,7 @@ common_regex_match common_regex::search(const std::string & input, size_t pos, b return res; } std::match_results srmatch; - if (std::regex_match(input.rbegin(), input.rend() - pos, srmatch, rx_reversed_partial)) { + if (std::regex_search(input.rbegin(), input.rend() - pos, srmatch, rx_reversed_partial, std::regex_constants::match_continuous)) { auto group = srmatch[1].str(); if (group.length() != 0) { auto it = srmatch[1].second.base(); @@ -55,18 +55,18 @@ common_regex_match common_regex::search(const std::string & input, size_t pos, b to see if a string ends with a partial regex match, but but it's not in std::regex yet. Instead, we'll the regex into a partial match regex operating as a full match on the reverse iterators of the input. - - /abcd/ -> (dcba|cba|ba|a).* -> ((?:(?:(?:(?:d)?c)?b)?a).* - - /a|b/ -> (a|b).* + - /abcd/ -> ^(dcba|cba|ba|a) -> ^((?:(?:(?:(?:d)?c)?b)?a) + - /a|b/ -> ^(a|b) - /a*?/ -> error, could match "" - - /a*b/ -> ((?:b)?a*+).* (final repetitions become eager) - - /.*?ab/ -> ((?:b)?a).* (merge .*) - - /a.*?b/ -> ((?:b)?.*?a).* (keep reluctant matches) - - /a(bc)d/ -> ((?:(?:d)?(?:(?:c)?b))?a).* - - /a(bc|de)/ -> ((?:(?:(?:e)?d)?|(?:(?:c)?b)?)?a).* - - /ab{2,4}c/ -> abbb?b?c -> ((?:(?:(?:(?:(?:c)?b)?b)?b?)?b?)?a).* + - /a*b/ -> ^((?:b)?a*+) (final repetitions become eager) + - /.*?ab/ -> ^((?:b)?a) (omit .*) + - /a.*?b/ -> ^((?:b)?.*?a) (keep reluctant matches) + - /a(bc)d/ -> ^((?:(?:d)?(?:(?:c)?b))?a) + - /a(bc|de)/ -> ^((?:(?:(?:e)?d)?|(?:(?:c)?b)?)?a) + - /ab{2,4}c/ -> ^cbbb?b?a -> ^((?:(?:(?:(?:(?:c)?b)?b)?b?)?b?)?a) - The regex will match a reversed string fully, and the end of the first (And only) capturing group will indicate the reversed start of the original partial pattern - (i.e. just where the final .* starts in the inverted pattern; all other groups are turned into non-capturing groups, and reluctant quantifiers are ignored) + The regex will match a reversed string fully, and the end of the first (And only) capturing group will indicate the reversed start of the original partial pattern. + All other groups are turned into non-capturing groups, and reluctant quantifiers are ignored. */ std::string regex_to_reversed_partial_regex(const std::string & pattern) { auto it = pattern.begin(); @@ -177,7 +177,7 @@ std::string regex_to_reversed_partial_regex(const std::string & pattern) { } } - // /abcd/ -> (dcba|cba|ba|a).* -> ((?:(?:(?:d)?c)?b)?a).* + // /abcd/ -> ^(dcba|cba|ba|a) -> ^((?:(?:(?:d)?c)?b)?a) // if n(=4) parts, opening n-1(=3) non-capturing groups after the 1 capturing group // We'll do the outermost capturing group and final .* in the enclosing function. std::vector res_alts; @@ -200,5 +200,5 @@ std::string regex_to_reversed_partial_regex(const std::string & pattern) { throw std::runtime_error("Unmatched '(' in pattern"); } - return "(" + res + ")[\\s\\S]*"; + return "^(" + res + ")"; } From f9071240978bf9f38361f83dddf3ef4a97d43ad4 Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Wed, 24 Dec 2025 01:45:32 -0600 Subject: [PATCH 4/6] common : adjust regex partial tests to use new pattern --- tests/test-regex-partial.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/test-regex-partial.cpp b/tests/test-regex-partial.cpp index ffad189786..70af6d75a1 100644 --- a/tests/test-regex-partial.cpp +++ b/tests/test-regex-partial.cpp @@ -232,52 +232,52 @@ static void test_regex_to_reversed_partial_regex() { printf("[%s]\n", __func__); assert_equals( - "((?:(?:c)?b)?a)[\\s\\S]*", + "^((?:(?:c)?b)?a)", regex_to_reversed_partial_regex("abc")); assert_equals( - "(a+)[\\s\\S]*", + "^(a+)", regex_to_reversed_partial_regex("a+")); assert_equals( - "(a*)[\\s\\S]*", + "^(a*)", regex_to_reversed_partial_regex("a*")); assert_equals( - "(a?)[\\s\\S]*", + "^(a?)", regex_to_reversed_partial_regex("a?")); assert_equals( - "([a-z])[\\s\\S]*", + "^([a-z])", regex_to_reversed_partial_regex("[a-z]")); assert_equals( - "((?:\\w+)?[a-z])[\\s\\S]*", + "^((?:\\w+)?[a-z])", regex_to_reversed_partial_regex("[a-z]\\w+")); assert_equals( - "((?:a|b))[\\s\\S]*", + "^((?:a|b))", regex_to_reversed_partial_regex("(?:a|b)")); assert_equals( - "((?:(?:(?:d)?c)?b)?a)[\\s\\S]*", + "^((?:(?:(?:d)?c)?b)?a)", regex_to_reversed_partial_regex("abcd")); assert_equals( - "((?:b)?a*)[\\s\\S]*", // TODO: ((?:b)?a*+).* ?? + "^((?:b)?a*)", // TODO: ((?:b)?a*+).* ?? regex_to_reversed_partial_regex("a*b")); assert_equals( - "((?:(?:b)?a)?.*)[\\s\\S]*", + "^((?:(?:b)?a)?.*)", regex_to_reversed_partial_regex(".*?ab")); assert_equals( - "((?:(?:b)?.*)?a)[\\s\\S]*", + "^((?:(?:b)?.*)?a)", regex_to_reversed_partial_regex("a.*?b")); assert_equals( - "((?:(?:d)?(?:(?:c)?b))?a)[\\s\\S]*", + "^((?:(?:d)?(?:(?:c)?b))?a)", regex_to_reversed_partial_regex("a(bc)d")); assert_equals( - "((?:(?:(?:c)?b|(?:e)?d))?a)[\\s\\S]*", + "^((?:(?:(?:c)?b|(?:e)?d))?a)", regex_to_reversed_partial_regex("a(bc|de)")); assert_equals( - "((?:(?:(?:(?:(?:c)?b?)?b?)?b)?b)?a)[\\s\\S]*", + "^((?:(?:(?:(?:(?:c)?b?)?b?)?b)?b)?a)", regex_to_reversed_partial_regex("ab{2,4}c")); } From 3bdcc4f77317fc26abc3e71b537ee122bf7afb69 Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Wed, 24 Dec 2025 02:19:09 -0600 Subject: [PATCH 5/6] grammar : check pattern directly instead of adding a type --- src/llama-grammar.cpp | 24 ++++++++---------------- src/llama-grammar.h | 6 ------ 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp index d62733b5d6..64ea2fd00a 100644 --- a/src/llama-grammar.cpp +++ b/src/llama-grammar.cpp @@ -372,14 +372,6 @@ static void print_rule( // // Regex utilities // -static llama_grammar_trigger_pattern llama_grammar_trigger_pattern_compile(const std::string & pattern) { - llama_grammar_trigger_pattern_type type = LLAMA_GRAMMAR_TRIGGER_PATTERN_TYPE_SEARCH; - if (!pattern.empty() && pattern.front() == '^' && pattern.back() == '$') { - // If anchored on both ends, consider it a match - type = LLAMA_GRAMMAR_TRIGGER_PATTERN_TYPE_MATCH; - } - return {type, pattern, std::regex(pattern)}; -} size_t llama_grammar_trigger_pattern::find(const std::string & input) const { auto find_start_pos = [](const std::smatch & match) { @@ -397,7 +389,7 @@ size_t llama_grammar_trigger_pattern::find(const std::string & input) const { return start; }; - if (type == LLAMA_GRAMMAR_TRIGGER_PATTERN_TYPE_MATCH) { + if (!pattern.empty() && pattern.front() == '^' && pattern.back() == '$') { // match against the entire input std::smatch match; if (std::regex_match(input, match, regex)) { @@ -405,12 +397,10 @@ size_t llama_grammar_trigger_pattern::find(const std::string & input) const { } } - if (type == LLAMA_GRAMMAR_TRIGGER_PATTERN_TYPE_SEARCH) { - // search anywhere - std::smatch match; - if (std::regex_search(input, match, regex)) { - return find_start_pos(match); - } + // search anywhere + std::smatch match; + if (std::regex_search(input, match, regex)) { + return find_start_pos(match); } return std::string::npos; @@ -1240,7 +1230,9 @@ struct llama_grammar * llama_grammar_init_impl( } for (size_t i = 0; i < num_trigger_patterns; i++) { GGML_ASSERT(trigger_patterns != nullptr); - vec_trigger_patterns.emplace_back(llama_grammar_trigger_pattern_compile(trigger_patterns[i])); + auto & trigger = vec_trigger_patterns.emplace_back(); + trigger.pattern = trigger_patterns[i]; + trigger.regex = std::regex(trigger.pattern); } // Important: vec_rules has to be moved here, not copied, because stacks contains diff --git a/src/llama-grammar.h b/src/llama-grammar.h index 2cd03bff1f..b5a0e588e9 100644 --- a/src/llama-grammar.h +++ b/src/llama-grammar.h @@ -116,13 +116,7 @@ struct llama_grammar_parser { void print(FILE * file); }; -enum llama_grammar_trigger_pattern_type { - LLAMA_GRAMMAR_TRIGGER_PATTERN_TYPE_MATCH = 0, - LLAMA_GRAMMAR_TRIGGER_PATTERN_TYPE_SEARCH = 1, -}; - struct llama_grammar_trigger_pattern { - llama_grammar_trigger_pattern_type type; std::string pattern; std::regex regex; From ff4d24ad1bd671f47a3c3b04d4c8bb1d78574742 Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Wed, 24 Dec 2025 07:36:07 -0600 Subject: [PATCH 6/6] common : adjust existing patterns to match new semantics --- common/chat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/chat.cpp b/common/chat.cpp index bc0faf146c..b6b028ccbe 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -2064,7 +2064,7 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp // Trigger on tool calls that appear in the commentary channel data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, - "<\\|channel\\|>(commentary|analysis) to" + "<\\|channel\\|>(?:commentary|analysis) to" }); // Trigger tool calls that appear in the role section, either at the