fix: fail closed when grammar parsing fails (#19051)
When response_format: json_schema is requested but grammar parsing fails (e.g., invalid regex pattern), the server now returns HTTP 400 error instead of silently generating unconstrained output. Previously, common_sampler_init() would return a valid sampler even when grammar parsing failed (grmr=nullptr). This caused the server to continue with unconstrained generation, violating the structured output contract. Now, if grammar was requested but parsing failed, common_sampler_init() returns nullptr, triggering the existing error handling path. Fixes #19051
This commit is contained in:
parent
25f40ca65f
commit
76363cd0af
|
|
@ -191,6 +191,11 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
|
||||||
if (params.grammar.compare(0, 11, "%llguidance") == 0) {
|
if (params.grammar.compare(0, 11, "%llguidance") == 0) {
|
||||||
#ifdef LLAMA_USE_LLGUIDANCE
|
#ifdef LLAMA_USE_LLGUIDANCE
|
||||||
grmr = llama_sampler_init_llg(vocab, "lark", params.grammar.c_str());
|
grmr = llama_sampler_init_llg(vocab, "lark", params.grammar.c_str());
|
||||||
|
// fail closed: if llguidance grammar parsing failed, return nullptr
|
||||||
|
if (grmr == nullptr) {
|
||||||
|
llama_sampler_free(chain);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
|
GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
|
||||||
#endif // LLAMA_USE_LLGUIDANCE
|
#endif // LLAMA_USE_LLGUIDANCE
|
||||||
|
|
@ -247,6 +252,12 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
|
||||||
} else {
|
} else {
|
||||||
grmr = llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root");
|
grmr = llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root");
|
||||||
}
|
}
|
||||||
|
// fail closed: if grammar was requested but parsing failed, return nullptr
|
||||||
|
// this ensures the server returns an error instead of generating unconstrained output
|
||||||
|
if (grmr == nullptr) {
|
||||||
|
llama_sampler_free(chain);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue