From 17d8309fe0ef997087a060a03457e1f7353e7505 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Fri, 2 Jan 2026 17:42:31 +0100
Subject: [PATCH 1/5] grammar: add test case for nullable symbol loop

Reproduce stack overflow (or OOM) with ( [x]* )* found while adding
GBNF support to ripgrep-edit.

llama-server reproducer:

curl \
  -X POST \
  -d '{
    "messages": [{ "role": "user", "content": "write yes" }],
    "grammar": "root ::= ( [x]* )*"
  }' \
  -H "Content-Type: application/json" \
  http://localhost:8811/v1/chat/completions

Not security related according to
https://github.com/ggml-org/llama.cpp/security#untrusted-environments-or-networks
---
 tests/test-grammar-integration.cpp | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp
index 7aa7e58a5c..84a680c670 100644
--- a/tests/test-grammar-integration.cpp
+++ b/tests/test-grammar-integration.cpp
@@ -784,6 +784,24 @@ static void test_quantifiers() {
             "0xFF 0x12 0xAB 0x00 0x00 0x00",
         }
     );
+    test_grammar(
+        "segfault",
+        // Grammar
+        R"""(
+            root ::= ( [x]* )*
+        )""",
+        // Passing strings
+        {
+		"",
+		"x",
+		"xx"
+        },
+        // Failing strings
+        {
+		"y",
+		"yy"
+        }
+    );
 }
 
 static void test_failure_missing_root() {

From e289f380bfdd52065b1119e5f29b2a09f1fafd17 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Fri, 2 Jan 2026 17:42:31 +0100
Subject: [PATCH 2/5] grammar: prevent stack overflow with nullable symbol loop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a potential stack overflow in llama_grammar_advance_stack that
could occur when processing grammars with nullable symbols that lead
to infinite derivations of empty strings. The fix introduces cycle
detection by tracking visited stacks to prevent infinite recursion.

rg-edit regexp: llama_grammar_advance_stack
rg-edit extra-args: -A20
rg-edit directive: """Rewrite: fix the following segfault:

[..]
⚫ Testing segfault. Grammar:
            root ::= ( [x]* )*

            root ::= ( [x]* )*

Segmentation fault         build/bin/test-grammar-integration"""

gptel-context Value:
(("~/devel/ai/llama.cpp/src/llama-grammar.cpp")
 ("~/devel/ai/llama.cpp/tests/test-grammar-integration.cpp")
 ("~/devel/ai/llama.cpp/grammars/./list.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./json_arr.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./json.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./japanese.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./english.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./chess.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./c.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./arithmetic.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./README.md"))
---
 src/llama-grammar.cpp | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp
index 64ea2fd00a..1af066e9f6 100644
--- a/src/llama-grammar.cpp
+++ b/src/llama-grammar.cpp
@@ -830,7 +830,13 @@ static bool llama_grammar_match_token(
 static void llama_grammar_advance_stack(
         const llama_grammar_rules  & rules,
         const llama_grammar_stack  & stack,
-              llama_grammar_stacks & new_stacks) {
+              llama_grammar_stacks & new_stacks,
+              llama_grammar_stacks & seen_stacks) {
+    if (std::find(seen_stacks.begin(), seen_stacks.end(), stack) != seen_stacks.end()) {
+        return;
+    }
+    seen_stacks.push_back(stack);
+
     if (stack.empty()) {
         if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
             new_stacks.emplace_back(stack);
@@ -855,7 +861,7 @@ static void llama_grammar_advance_stack(
                     // if alternate is nonempty, add to stack
                     new_stack.push_back(subpos);
                 }
-                llama_grammar_advance_stack(rules, new_stack, new_stacks);
+                llama_grammar_advance_stack(rules, new_stack, new_stacks, seen_stacks);
                 while (!llama_grammar_is_end_of_sequence(subpos)) {
                     // scan to end of alternate def
                     subpos++;
@@ -989,7 +995,8 @@ static void llama_grammar_accept_chr(
         if (!llama_grammar_is_end_of_sequence(match.second)) {
             new_stack.push_back(match.second);
         }
-        llama_grammar_advance_stack(grammar.rules, new_stack, new_stacks);
+        llama_grammar_stacks seen_stacks;
+        llama_grammar_advance_stack(grammar.rules, new_stack, new_stacks, seen_stacks);
     }
 }
 
@@ -1065,7 +1072,8 @@ llama_grammar_candidates llama_grammar_reject_candidates_for_stack(
         stack_after.push_back(stack_pos_after);
     }
     llama_grammar_stacks next_stacks;
-    llama_grammar_advance_stack(rules, stack_after, next_stacks);
+    llama_grammar_stacks seen_stacks;
+    llama_grammar_advance_stack(rules, stack_after, next_stacks, seen_stacks);
 
     auto next_rejects = llama_grammar_reject_candidates(rules, next_stacks, next_candidates);
     for (const auto & tok : next_rejects) {
@@ -1116,7 +1124,8 @@ struct llama_grammar * llama_grammar_init_impl(
             // if alternate is nonempty, add to stack
             stack.push_back(pos);
         }
-        llama_grammar_advance_stack(vec_rules, stack, stacks);
+        llama_grammar_stacks seen_stacks;
+        llama_grammar_advance_stack(vec_rules, stack, stacks, seen_stacks);
         while (!llama_grammar_is_end_of_sequence(pos)) {
             // scan to end of alternate def
             pos++;
@@ -1209,7 +1218,8 @@ struct llama_grammar * llama_grammar_init_impl(
             // if alternate is nonempty, add to stack
             stack.push_back(pos);
         }
-        llama_grammar_advance_stack(vec_rules, stack, stacks);
+        llama_grammar_stacks seen_stacks;
+        llama_grammar_advance_stack(vec_rules, stack, stacks, seen_stacks);
         while (!llama_grammar_is_end_of_sequence(pos)) {
             // scan to end of alternate def
             pos++;
@@ -1428,7 +1438,8 @@ void llama_grammar_accept_token(struct llama_grammar & grammar, llama_token toke
                 if (!llama_grammar_is_end_of_sequence(pos + 1)) {
                     new_stack.push_back(pos + 1);
                 }
-                llama_grammar_advance_stack(grammar.rules, new_stack, stacks_new);
+                llama_grammar_stacks seen_stacks;
+                llama_grammar_advance_stack(grammar.rules, new_stack, stacks_new, seen_stacks);
             }
         } else {
             llama_grammar_stacks current_stacks = {stack};

From b689ff4779f9e5ca839bc9d1d61172d299b48762 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Fri, 2 Jan 2026 17:42:31 +0100
Subject: [PATCH 3/5] grammar: convert recursive llama_grammar_advance_stack to
 iterative
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change converts the function to an iterative approach using
explicit stacks, which prevents deep recursion and eliminates the risk
of stack overflow.

rg-edit regexp: llama_grammar_advance_stack
rg-edit extra-args: -A30
rg-edit directive: """Rewrite: fix the following segfault:

[..]
⚫ Testing segfault. Grammar:
            root ::= ( [x]* )*

            root ::= ( [x]* )*

Segmentation fault         build/bin/test-grammar-integration

convert from recursive to interactive"""

gptel-context Value:
(("~/devel/ai/llama.cpp/src/llama-grammar.cpp")
 ("~/devel/ai/llama.cpp/tests/test-grammar-integration.cpp")
 ("~/devel/ai/llama.cpp/grammars/./list.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./json_arr.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./json.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./japanese.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./english.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./chess.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./c.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./arithmetic.gbnf")
 ("~/devel/ai/llama.cpp/grammars/./README.md"))
---
 src/llama-grammar.cpp        | 136 ++++++++++++++++++-----------------
 tests/test-llama-grammar.cpp |  74 ++++++++++---------
 2 files changed, 106 insertions(+), 104 deletions(-)

diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp
index 1af066e9f6..ec8bb46527 100644
--- a/src/llama-grammar.cpp
+++ b/src/llama-grammar.cpp
@@ -830,66 +830,75 @@ static bool llama_grammar_match_token(
 static void llama_grammar_advance_stack(
         const llama_grammar_rules  & rules,
         const llama_grammar_stack  & stack,
-              llama_grammar_stacks & new_stacks,
-              llama_grammar_stacks & seen_stacks) {
-    if (std::find(seen_stacks.begin(), seen_stacks.end(), stack) != seen_stacks.end()) {
-        return;
-    }
-    seen_stacks.push_back(stack);
+              llama_grammar_stacks & new_stacks) {
+    std::vector<llama_grammar_stack> todo;
+    todo.push_back(stack);
 
-    if (stack.empty()) {
-        if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
-            new_stacks.emplace_back(stack);
+    std::vector<llama_grammar_stack> seen;
+
+    while (!todo.empty()) {
+        llama_grammar_stack curr_stack = std::move(todo.back());
+        todo.pop_back();
+
+        if (std::find(seen.begin(), seen.end(), curr_stack) != seen.end()) {
+            continue;
         }
-        return;
-    }
+        seen.push_back(curr_stack);
 
-    const llama_grammar_element * pos = stack.back();
-
-    switch (pos->type) {
-        case LLAMA_GRETYPE_RULE_REF: {
-            const size_t                  rule_id = static_cast<size_t>(pos->value);
-            const llama_grammar_element * subpos  = rules[rule_id].data();
-            do {
-                // init new stack without the top (pos)
-                llama_grammar_stack new_stack(stack.begin(), stack.end() - 1);
-                if (!llama_grammar_is_end_of_sequence(pos + 1)) {
-                    // if this rule ref is followed by another element, add that to stack
-                    new_stack.push_back(pos + 1);
-                }
-                if (!llama_grammar_is_end_of_sequence(subpos)) {
-                    // if alternate is nonempty, add to stack
-                    new_stack.push_back(subpos);
-                }
-                llama_grammar_advance_stack(rules, new_stack, new_stacks, seen_stacks);
-                while (!llama_grammar_is_end_of_sequence(subpos)) {
-                    // scan to end of alternate def
-                    subpos++;
-                }
-                if (subpos->type == LLAMA_GRETYPE_ALT) {
-                    // there's another alternate def of this rule to process
-                    subpos++;
-                } else {
-                    break;
-                }
-            } while (true);
-            break;
-        }
-        case LLAMA_GRETYPE_CHAR:
-        case LLAMA_GRETYPE_CHAR_NOT:
-        case LLAMA_GRETYPE_CHAR_ANY:
-        case LLAMA_GRETYPE_TOKEN:
-        case LLAMA_GRETYPE_TOKEN_NOT:
-            if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
-                // only add the stack if it's not a duplicate of one we already have
-                new_stacks.emplace_back(stack);
+        if (curr_stack.empty()) {
+            if (std::find(new_stacks.begin(), new_stacks.end(), curr_stack) == new_stacks.end()) {
+                new_stacks.emplace_back(std::move(curr_stack));
             }
-            break;
-        default:
-            // end of alternate (LLAMA_GRETYPE_END, LLAMA_GRETYPE_ALT) or middle of char range
-            // (LLAMA_GRETYPE_CHAR_ALT, LLAMA_GRETYPE_CHAR_RNG_UPPER); stack should never be left on
-            // those
-            GGML_ABORT("fatal error");
+            continue;
+        }
+
+        const llama_grammar_element * pos = curr_stack.back();
+
+        switch (pos->type) {
+            case LLAMA_GRETYPE_RULE_REF: {
+                const size_t                  rule_id = static_cast<size_t>(pos->value);
+                const llama_grammar_element * subpos  = rules[rule_id].data();
+                do {
+                    // init new stack without the top (pos)
+                    llama_grammar_stack next_stack(curr_stack.begin(), curr_stack.end() - 1);
+                    if (!llama_grammar_is_end_of_sequence(pos + 1)) {
+                        // if this rule ref is followed by another element, add that to stack
+                        next_stack.push_back(pos + 1);
+                    }
+                    if (!llama_grammar_is_end_of_sequence(subpos)) {
+                        // if alternate is nonempty, add to stack
+                        next_stack.push_back(subpos);
+                    }
+                    todo.push_back(std::move(next_stack));
+                    while (!llama_grammar_is_end_of_sequence(subpos)) {
+                        // scan to end of alternate def
+                        subpos++;
+                    }
+                    if (subpos->type == LLAMA_GRETYPE_ALT) {
+                        // there's another alternate def of this rule to process
+                        subpos++;
+                    } else {
+                        break;
+                    }
+                } while (true);
+                break;
+            }
+            case LLAMA_GRETYPE_CHAR:
+            case LLAMA_GRETYPE_CHAR_NOT:
+            case LLAMA_GRETYPE_CHAR_ANY:
+            case LLAMA_GRETYPE_TOKEN:
+            case LLAMA_GRETYPE_TOKEN_NOT:
+                if (std::find(new_stacks.begin(), new_stacks.end(), curr_stack) == new_stacks.end()) {
+                    // only add the stack if it's not a duplicate of one we already have
+                    new_stacks.emplace_back(std::move(curr_stack));
+                }
+                break;
+            default:
+                // end of alternate (LLAMA_GRETYPE_END, LLAMA_GRETYPE_ALT) or middle of char range
+                // (LLAMA_GRETYPE_CHAR_ALT, LLAMA_GRETYPE_CHAR_RNG_UPPER); stack should never be left on
+                // those
+                GGML_ABORT("fatal error");
+        }
     }
 }
 
@@ -995,8 +1004,7 @@ static void llama_grammar_accept_chr(
         if (!llama_grammar_is_end_of_sequence(match.second)) {
             new_stack.push_back(match.second);
         }
-        llama_grammar_stacks seen_stacks;
-        llama_grammar_advance_stack(grammar.rules, new_stack, new_stacks, seen_stacks);
+        llama_grammar_advance_stack(grammar.rules, new_stack, new_stacks);
     }
 }
 
@@ -1072,8 +1080,7 @@ llama_grammar_candidates llama_grammar_reject_candidates_for_stack(
         stack_after.push_back(stack_pos_after);
     }
     llama_grammar_stacks next_stacks;
-    llama_grammar_stacks seen_stacks;
-    llama_grammar_advance_stack(rules, stack_after, next_stacks, seen_stacks);
+    llama_grammar_advance_stack(rules, stack_after, next_stacks);
 
     auto next_rejects = llama_grammar_reject_candidates(rules, next_stacks, next_candidates);
     for (const auto & tok : next_rejects) {
@@ -1124,8 +1131,7 @@ struct llama_grammar * llama_grammar_init_impl(
             // if alternate is nonempty, add to stack
             stack.push_back(pos);
         }
-        llama_grammar_stacks seen_stacks;
-        llama_grammar_advance_stack(vec_rules, stack, stacks, seen_stacks);
+        llama_grammar_advance_stack(vec_rules, stack, stacks);
         while (!llama_grammar_is_end_of_sequence(pos)) {
             // scan to end of alternate def
             pos++;
@@ -1218,8 +1224,7 @@ struct llama_grammar * llama_grammar_init_impl(
             // if alternate is nonempty, add to stack
             stack.push_back(pos);
         }
-        llama_grammar_stacks seen_stacks;
-        llama_grammar_advance_stack(vec_rules, stack, stacks, seen_stacks);
+        llama_grammar_advance_stack(vec_rules, stack, stacks);
         while (!llama_grammar_is_end_of_sequence(pos)) {
             // scan to end of alternate def
             pos++;
@@ -1438,8 +1443,7 @@ void llama_grammar_accept_token(struct llama_grammar & grammar, llama_token toke
                 if (!llama_grammar_is_end_of_sequence(pos + 1)) {
                     new_stack.push_back(pos + 1);
                 }
-                llama_grammar_stacks seen_stacks;
-                llama_grammar_advance_stack(grammar.rules, new_stack, stacks_new, seen_stacks);
+                llama_grammar_advance_stack(grammar.rules, new_stack, stacks_new);
             }
         } else {
             llama_grammar_stacks current_stacks = {stack};
diff --git a/tests/test-llama-grammar.cpp b/tests/test-llama-grammar.cpp
index fd45d5ada8..25f432a2f5 100644
--- a/tests/test-llama-grammar.cpp
+++ b/tests/test-llama-grammar.cpp
@@ -123,25 +123,27 @@ int main()
 
     std::vector<std::vector<llama_grammar_element>> expected_stacks = {
         {
-            {LLAMA_GRETYPE_RULE_REF, 5},
+            {LLAMA_GRETYPE_CHAR, 61},
+            {LLAMA_GRETYPE_RULE_REF, 7},
+            {LLAMA_GRETYPE_CHAR, 40},
+        },
+        {
+            {LLAMA_GRETYPE_CHAR, 61},
+            {LLAMA_GRETYPE_RULE_REF, 7},
+            {LLAMA_GRETYPE_RULE_REF, 3},
+            {LLAMA_GRETYPE_CHAR, 48},
+        },
+        {
+            {LLAMA_GRETYPE_CHAR, 61},
+            {LLAMA_GRETYPE_RULE_REF, 7},
+            {LLAMA_GRETYPE_RULE_REF, 3},
+            {LLAMA_GRETYPE_CHAR, 48},
+        },
+        {
             {LLAMA_GRETYPE_CHAR, 61},
             {LLAMA_GRETYPE_RULE_REF, 7},
             {LLAMA_GRETYPE_CHAR, 97},
         },
-        {
-            {LLAMA_GRETYPE_RULE_REF, 5},
-            {LLAMA_GRETYPE_CHAR, 61},
-            {LLAMA_GRETYPE_RULE_REF, 7},
-            {LLAMA_GRETYPE_RULE_REF, 3},
-            {LLAMA_GRETYPE_CHAR, 48},
-        },
-        {
-            {LLAMA_GRETYPE_RULE_REF, 5},
-            {LLAMA_GRETYPE_CHAR, 61},
-            {LLAMA_GRETYPE_RULE_REF, 7},
-            {LLAMA_GRETYPE_RULE_REF, 3},
-            {LLAMA_GRETYPE_CHAR, 48},
-        },
         {
             {LLAMA_GRETYPE_RULE_REF, 5},
             {LLAMA_GRETYPE_CHAR, 61},
@@ -149,26 +151,24 @@ int main()
             {LLAMA_GRETYPE_CHAR, 40},
         },
         {
+            {LLAMA_GRETYPE_RULE_REF, 5},
+            {LLAMA_GRETYPE_CHAR, 61},
+            {LLAMA_GRETYPE_RULE_REF, 7},
+            {LLAMA_GRETYPE_RULE_REF, 3},
+            {LLAMA_GRETYPE_CHAR, 48},
+        },
+        {
+            {LLAMA_GRETYPE_RULE_REF, 5},
+            {LLAMA_GRETYPE_CHAR, 61},
+            {LLAMA_GRETYPE_RULE_REF, 7},
+            {LLAMA_GRETYPE_RULE_REF, 3},
+            {LLAMA_GRETYPE_CHAR, 48},
+        },
+        {
+            {LLAMA_GRETYPE_RULE_REF, 5},
             {LLAMA_GRETYPE_CHAR, 61},
             {LLAMA_GRETYPE_RULE_REF, 7},
             {LLAMA_GRETYPE_CHAR, 97},
-        },
-        {
-            {LLAMA_GRETYPE_CHAR, 61},
-            {LLAMA_GRETYPE_RULE_REF, 7},
-            {LLAMA_GRETYPE_RULE_REF, 3},
-            {LLAMA_GRETYPE_CHAR, 48},
-        },
-        {
-            {LLAMA_GRETYPE_CHAR, 61},
-            {LLAMA_GRETYPE_RULE_REF, 7},
-            {LLAMA_GRETYPE_RULE_REF, 3},
-            {LLAMA_GRETYPE_CHAR, 48},
-        },
-        {
-            {LLAMA_GRETYPE_CHAR, 61},
-            {LLAMA_GRETYPE_RULE_REF, 7},
-            {LLAMA_GRETYPE_CHAR, 40},
         }};
 
     auto index = 0;
@@ -195,9 +195,9 @@ int main()
     }
 
     std::vector<llama_grammar_candidate> next_candidates;
-    next_candidates.resize(24);
+    next_candidates.resize(23);
 
-    for (size_t i = 0; i < 24; ++i)
+    for (size_t i = 0; i < 23; ++i)
     {
         uint32_t *cp = new uint32_t[2]; // dynamically allocate memory for code_point
         cp[0] = 37 + i;
@@ -210,7 +210,6 @@ int main()
             {0, 37},
             {1, 38},
             {2, 39},
-            {3, 40},
             {4, 41},
             {5, 42},
             {6, 43},
@@ -268,6 +267,7 @@ int main()
             {0, 37},
             {1, 38},
             {2, 39},
+            {3, 40},
             {4, 41},
             {5, 42},
             {6, 43},
@@ -287,13 +287,11 @@ int main()
             {20, 57},
             {21, 58},
             {22, 59},
-            {23, 60},
         },
         {
             {0, 37},
             {1, 38},
             {2, 39},
-            {3, 40},
             {4, 41},
             {5, 42},
             {6, 43},
@@ -351,6 +349,7 @@ int main()
             {0, 37},
             {1, 38},
             {2, 39},
+            {3, 40},
             {4, 41},
             {5, 42},
             {6, 43},
@@ -370,7 +369,6 @@ int main()
             {20, 57},
             {21, 58},
             {22, 59},
-            {23, 60},
         },
     };
 

From c537f77a4dac9f31164e29f9b5cbda7fe6284ed1 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Fri, 2 Jan 2026 17:42:31 +0100
Subject: [PATCH 4/5] grammar: add test case for hang in repetition grammar
 processing

This commit adds a new test case to the grammar integration tests that
specifically targets a hang scenario in the repetition grammar parser
found while adding GBNF support to ripgrep-edit.

llama-server reproducer:

curl \
  -X POST \
  -d '{
    "messages": [{ "role": "user", "content": "write yes" }],
    "grammar": "root ::= (([^x]*){0,99}){0,99}"
  }' \
  -H "Content-Type: application/json" \
  http://localhost:8811/v1/chat/completions

Not security related according to
https://github.com/ggml-org/llama.cpp/security#untrusted-environments-or-networks
---
 tests/test-grammar-integration.cpp | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp
index 84a680c670..74bf41eb7e 100644
--- a/tests/test-grammar-integration.cpp
+++ b/tests/test-grammar-integration.cpp
@@ -802,6 +802,19 @@ static void test_quantifiers() {
 		"yy"
         }
     );
+    test_grammar(
+        "hang",
+        // Grammar
+        R"""(
+            root ::= (((((([^x]*){0,99}){0,99}){0,99}){0,99}){0,99}){0,99}
+        )""",
+        // Passing strings
+        {
+        },
+        // Failing strings
+        {
+        }
+    );
 }
 
 static void test_failure_missing_root() {

From 43d9e59d0a10f226640060c2a1e2fb7d6eb3fc2d Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Fri, 2 Jan 2026 17:42:31 +0100
Subject: [PATCH 5/5] grammar: add repetition threshold check

The change introduces a maximum repetition threshold to avoid
excessive rule expansion during grammar parsing. When parsing
repetition patterns like {m,n}, the parser now calculates the
potential number of rules that would be generated and throws an error
if the product of previous rules and new rules exceeds the threshold.

A test case was added to verify the threshold is properly enforced for
deeply nested repetition patterns that would otherwise cause hangs.
---
 src/llama-grammar.cpp              | 22 ++++++++++++++++++++++
 tests/test-grammar-integration.cpp | 13 -------------
 tests/test-grammar-parser.cpp      |  4 ++++
 3 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp
index ec8bb46527..53250caeb5 100644
--- a/src/llama-grammar.cpp
+++ b/src/llama-grammar.cpp
@@ -454,6 +454,7 @@ const char * llama_grammar_parser::parse_sequence(
         bool               is_nested) {
     size_t last_sym_start = rule.size();
     const char * pos = src;
+    uint64_t n_prev_rules = 1;
 
     // use UINT64_MAX as the empty value because we aligned to the proper uint64_t type so -1 can't be used
     // (though it's technically the same as -1 now)
@@ -481,6 +482,18 @@ const char * llama_grammar_parser::parse_sequence(
         //            S'     ::= S |
 
         llama_grammar_rule prev_rule(rule.begin() + last_sym_start, rule.end());
+        // Calculate the total number of rules that will be generated by this repetition
+        uint64_t total_rules = 1; // Start with 1 for the original rule
+        if (!no_max && max_times > 0) {
+            total_rules = max_times;
+        } else if (min_times > 0) {
+            total_rules = min_times;
+        }
+
+	if (n_prev_rules * total_rules >= MAX_REPETITION_THRESHOLD) {
+            throw std::runtime_error("number of rules that are going to be repeated multiplied by the new repetition exceeds sane defaults, please reduce the number of repetitions or rule complexity");
+        }
+
         if (min_times == 0) {
             rule.resize(last_sym_start);
         } else {
@@ -508,12 +521,15 @@ const char * llama_grammar_parser::parse_sequence(
         if (n_opt > 0) {
             rule.push_back({LLAMA_GRETYPE_RULE_REF, last_rec_rule_id});
         }
+        n_prev_rules *= total_rules;
+        GGML_ASSERT(n_prev_rules >= 1);
     };
 
     while (*pos) {
         if (*pos == '"') { // literal string
             pos++;
             last_sym_start = rule.size();
+            n_prev_rules = 1;
             while (*pos != '"') {
                 if (!*pos) {
                     throw std::runtime_error("unexpected end of input");
@@ -531,6 +547,7 @@ const char * llama_grammar_parser::parse_sequence(
                 start_type = LLAMA_GRETYPE_CHAR_NOT;
             }
             last_sym_start = rule.size();
+            n_prev_rules = 1;
             while (*pos != ']') {
                 if (!*pos) {
                     throw std::runtime_error("unexpected end of input");
@@ -561,6 +578,7 @@ const char * llama_grammar_parser::parse_sequence(
             auto token_pair = parse_token(vocab, pos);
             const char * token_end  = token_pair.second;
             last_sym_start = rule.size();
+            n_prev_rules = 1;
             rule.push_back({type, token_pair.first});
             pos = parse_space(token_end, is_nested);
         } else if (is_word_char(*pos)) { // rule reference
@@ -568,12 +586,15 @@ const char * llama_grammar_parser::parse_sequence(
             uint32_t ref_rule_id = get_symbol_id(pos, name_end - pos);
             pos = parse_space(name_end, is_nested);
             last_sym_start = rule.size();
+            n_prev_rules = 1;
             rule.push_back({LLAMA_GRETYPE_RULE_REF, ref_rule_id});
         } else if (*pos == '(') { // grouping
             // parse nested alternates into synthesized rule
             pos = parse_space(pos + 1, true);
+            uint32_t n_rules_before = symbol_ids.size();
             uint32_t sub_rule_id = generate_symbol_id(rule_name);
             pos = parse_alternates(pos, rule_name, sub_rule_id, true);
+            n_prev_rules = std::max(1u, (uint32_t)symbol_ids.size() - n_rules_before);
             last_sym_start = rule.size();
             // output reference to synthesized rule
             rule.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id});
@@ -583,6 +604,7 @@ const char * llama_grammar_parser::parse_sequence(
             pos = parse_space(pos + 1, is_nested);
         } else if (*pos == '.') { // any char
             last_sym_start = rule.size();
+            n_prev_rules = 1;
             rule.push_back({LLAMA_GRETYPE_CHAR_ANY, 0});
             pos = parse_space(pos + 1, is_nested);
         } else if (*pos == '*') {
diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp
index 74bf41eb7e..84a680c670 100644
--- a/tests/test-grammar-integration.cpp
+++ b/tests/test-grammar-integration.cpp
@@ -802,19 +802,6 @@ static void test_quantifiers() {
 		"yy"
         }
     );
-    test_grammar(
-        "hang",
-        // Grammar
-        R"""(
-            root ::= (((((([^x]*){0,99}){0,99}){0,99}){0,99}){0,99}){0,99}
-        )""",
-        // Passing strings
-        {
-        },
-        // Failing strings
-        {
-        }
-    );
 }
 
 static void test_failure_missing_root() {
diff --git a/tests/test-grammar-parser.cpp b/tests/test-grammar-parser.cpp
index 03ae78ff73..6abc43461b 100644
--- a/tests/test-grammar-parser.cpp
+++ b/tests/test-grammar-parser.cpp
@@ -145,6 +145,10 @@ int main()
         root ::= "a"{,}"
     )""");
 
+    verify_failure(R"""(
+        root ::= (((((([^x]*){0,99}){0,99}){0,99}){0,99}){0,99}){0,99}
+    )""");
+
     verify_failure(R"""(
         root ::= "a"{,10}"
     )""");