common : add nemotron nano 3 support
This commit is contained in:
parent
8b6bb3df27
commit
7274e8ed53
144
common/chat.cpp
144
common/chat.cpp
|
|
@ -711,6 +711,25 @@ static void foreach_function(const json & tools, const std::function<void(const
|
|||
}
|
||||
}
|
||||
|
||||
static void foreach_parameter(const json & function, const std::function<void(const std::string &, const json &, bool)> & fn) {
|
||||
if (!function.contains("parameters") || !function.at("parameters").is_object()) {
|
||||
return;
|
||||
}
|
||||
const auto & params = function.at("parameters");
|
||||
if (!params.contains("properties") || !params.at("properties").is_object()) {
|
||||
return;
|
||||
}
|
||||
const auto & props = params.at("properties");
|
||||
std::set<std::string> required;
|
||||
if (params.contains("required") && params.at("required").is_array()) {
|
||||
params.at("required").get_to(required);
|
||||
}
|
||||
for (const auto & [name, prop] : props.items()) {
|
||||
bool is_required = (required.find(name) != required.end());
|
||||
fn(name, prop, is_required);
|
||||
}
|
||||
}
|
||||
|
||||
static std::string apply(
|
||||
const common_chat_template & tmpl,
|
||||
const struct templates_params & inputs,
|
||||
|
|
@ -1409,6 +1428,127 @@ static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_
|
|||
return data;
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_nemotron_v3(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||
common_chat_params data;
|
||||
|
||||
data.prompt = apply(tmpl, inputs);
|
||||
data.format = COMMON_CHAT_FORMAT_PEG_CONSTRUCTED;
|
||||
|
||||
// Handle thinking tags appropriately based on inputs.enable_thinking
|
||||
if (string_ends_with(data.prompt, "<think>\n")) {
|
||||
if (!inputs.enable_thinking) {
|
||||
data.prompt += "</think>";
|
||||
} else {
|
||||
data.thinking_forced_open = true;
|
||||
}
|
||||
}
|
||||
|
||||
data.preserved_tokens = {
|
||||
"<think>",
|
||||
"</think>",
|
||||
"<tool_call>",
|
||||
"</tool_call>",
|
||||
};
|
||||
|
||||
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
|
||||
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
||||
auto include_grammar = true;
|
||||
|
||||
auto parser = build_chat_peg_constructed_parser([&](auto & p) {
|
||||
auto reasoning = p.eps();
|
||||
if (inputs.enable_thinking && extract_reasoning) {
|
||||
auto reasoning_content = p.reasoning(p.until("</think>")) + ("</think>" | p.end());
|
||||
if (data.thinking_forced_open) {
|
||||
reasoning = reasoning_content;
|
||||
} else {
|
||||
reasoning = p.optional("<think>" + reasoning_content);
|
||||
}
|
||||
}
|
||||
|
||||
// Response format parser
|
||||
if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
|
||||
return reasoning << p.content(p.schema(p.json(), "response-format", inputs.json_schema));
|
||||
}
|
||||
|
||||
// Tool call parser
|
||||
if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
|
||||
auto tool_choice = p.choice();
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool.at("function");
|
||||
std::string name = function.at("name");
|
||||
auto parameters = function.at("parameters");
|
||||
|
||||
auto schema_info = common_schema_info();
|
||||
schema_info.resolve_refs(parameters);
|
||||
|
||||
auto tool_open = "<function=" + p.tool_name(p.literal(name)) + ">\n";
|
||||
auto tool_close = p.literal("</function>\n");
|
||||
auto args = p.sequence();
|
||||
auto arg_string = p.rule("xml-arg-string", p.until_one_of({
|
||||
"\n</parameter>",
|
||||
"\n<parameter=",
|
||||
"\n</function>"
|
||||
}));
|
||||
|
||||
foreach_parameter(function, [&](const auto & param_name, const json & param_schema, bool is_required) {
|
||||
auto rule_name = "tool-" + name + "-arg-" + param_name;
|
||||
|
||||
auto arg_open = "<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">\n";
|
||||
auto arg_close = p.literal("</parameter>\n");
|
||||
auto arg_value = p.eps();
|
||||
|
||||
if (schema_info.resolves_to_string(param_schema)) {
|
||||
arg_value = p.tool_arg_string_value(arg_string) + "\n";
|
||||
} else {
|
||||
arg_value = p.tool_arg_json_value(p.schema(p.json(), rule_name + "-schema", param_schema));
|
||||
}
|
||||
|
||||
// Model may or my not close with </parameter>
|
||||
auto arg_rule = p.rule(rule_name, p.tool_arg_open(arg_open) + arg_value + p.optional(p.tool_arg_close(arg_close)));
|
||||
args += p.repeat(arg_rule, /* min = */ is_required ? 1 : 0, /* max = */ 1);
|
||||
});
|
||||
|
||||
tool_choice |= p.rule("tool-" + name, p.tool_open(tool_open) + args + p.tool_close(tool_close));
|
||||
});
|
||||
|
||||
auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
|
||||
auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
|
||||
auto tool_call = p.rule("tool-call", "<tool_call>\n" + tool_choice + "</tool_call>" + p.space());
|
||||
auto tool_calls = p.trigger_rule("tool-call-root", p.repeat(tool_call, /* min = */ min_calls, /* max = */ max_calls));
|
||||
|
||||
return reasoning << p.content(p.until("<tool_call>")) << tool_calls;
|
||||
}
|
||||
|
||||
// Content only parser
|
||||
include_grammar = false;
|
||||
return reasoning << p.content(p.rest());
|
||||
});
|
||||
|
||||
data.parser = parser.save();
|
||||
|
||||
if (include_grammar) {
|
||||
data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
|
||||
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool.at("function");
|
||||
auto schema = function.at("parameters");
|
||||
builder.resolve_refs(schema);
|
||||
});
|
||||
parser.build_grammar(builder, data.grammar_lazy);
|
||||
});
|
||||
|
||||
LOG_INF("grammar\n%s\n", data.grammar.c_str());
|
||||
|
||||
data.grammar_triggers = {
|
||||
{COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_call>"}
|
||||
};
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
|
||||
static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||
common_chat_params data;
|
||||
|
||||
|
|
@ -2534,6 +2674,10 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|||
src.find("<function=") != std::string::npos &&
|
||||
src.find("<parameters>") != std::string::npos &&
|
||||
src.find("<parameter=") != std::string::npos) {
|
||||
// Nemotron 3 Nano 30B A3B
|
||||
if (src.find("<think>") != std::string::npos) {
|
||||
return common_chat_params_init_nemotron_v3(tmpl, params);
|
||||
}
|
||||
return common_chat_params_init_qwen3_coder_xml(tmpl, params);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue