common: support parse FunctionGemma

This commit is contained in:
Hans 2026-01-23 22:52:14 +08:00
parent a14b960bc7
commit e950e56a6b
No known key found for this signature in database
3 changed files with 80 additions and 0 deletions

View File

@ -1515,6 +1515,39 @@ static void common_chat_parse_exaone_moe(common_chat_msg_parser & builder) {
}
}
static void common_chat_parse_function_gemma(common_chat_msg_parser & builder) {
if (!builder.syntax().parse_tool_calls) {
builder.add_content(builder.consume_rest());
return;
}
static const common_regex tool_call_start_regex(regex_escape("<start_function_call>call:"));
static const common_regex tool_call_end_regex(regex_escape("}<end_function_call>"));
// Loop through all tool calls
while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
builder.move_to(res->groups[0].end);
static const common_regex function_name_regex("[^{]*");
auto fun_res = builder.consume_regex(function_name_regex);
auto function_name = builder.str(fun_res.groups[0]);
builder.consume_literal("{");
builder.consume_spaces();
auto arguments = builder.consume_json();
builder.consume_spaces();
if (!builder.try_consume_regex(tool_call_end_regex)) {
throw common_chat_msg_partial_exception("incomplete tool call");
}
if (!arguments.json.is_object()){
throw common_chat_msg_partial_exception("arguments must be an object");
}
if (!builder.add_tool_call(function_name, "", arguments.json.dump())) {
throw common_chat_msg_partial_exception("incomplete tool call");
}
}
builder.add_content(builder.consume_rest());
}
static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
builder.try_parse_reasoning("<think>", "</think>");
builder.add_content(builder.consume_rest());
@ -1605,6 +1638,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
case COMMON_CHAT_FORMAT_EXAONE_MOE:
common_chat_parse_exaone_moe(builder);
break;
case COMMON_CHAT_FORMAT_FUNCTION_GEMMA:
common_chat_parse_function_gemma(builder);
break;
default:
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
}

View File

@ -699,6 +699,7 @@ const char * common_chat_format_name(common_chat_format format) {
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
case COMMON_CHAT_FORMAT_EXAONE_MOE: return "EXAONE MoE";
case COMMON_CHAT_FORMAT_FUNCTION_GEMMA: return "FunctionGemma";
case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
@ -2720,6 +2721,43 @@ static common_chat_params common_chat_params_init_seed_oss(
return data;
}
static common_chat_params common_chat_params_init_function_gemma(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
data.prompt = apply(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_FUNCTION_GEMMA;
if (inputs.tools.is_array() && !inputs.tools.empty()) {
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
std::vector<std::string> tool_rules;
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
std::string name = function.at("name");
auto parameters = function.at("parameters");
builder.resolve_refs(parameters);
// Create rule for FunctionGemma function call format
std::string param_rules = builder.add_schema(name + "-args", parameters);
tool_rules.push_back(builder.add_rule(name + "-call",
"\"<start_function_call>call:" + name + "{\" " +
param_rules +
" \"}<end_function_call>\""));
});
data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<start_function_call>" });
data.preserved_tokens = {
"<start_function_call>", "<end_function_call>",
};
builder.add_rule("root", string_join(tool_rules, " | "));
});
} else {
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
}
return data;
}
// various workarounds for known issues with certain templates or model behaviors
// TODO @ngxson : improve this (how?)
namespace workaround {
@ -2998,6 +3036,11 @@ static common_chat_params common_chat_templates_apply_jinja(
return common_chat_params_init_apriel_1_5(tmpl, params);
}
// FunctionGemma format detection
if (src.find("<start_function_call>") != std::string::npos) {
return common_chat_params_init_function_gemma(tmpl, params);
}
// Use generic handler when mixing tools + JSON schema.
// TODO: support that mix in handlers below.
if ((params.tools.is_array() && params.json_schema.is_object())) {

View File

@ -133,6 +133,7 @@ enum common_chat_format {
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
COMMON_CHAT_FORMAT_SOLAR_OPEN,
COMMON_CHAT_FORMAT_EXAONE_MOE,
COMMON_CHAT_FORMAT_FUNCTION_GEMMA,
// These are intended to be parsed by the PEG parser
COMMON_CHAT_FORMAT_PEG_SIMPLE,