diff --git a/common/chat.cpp b/common/chat.cpp index 065506a4d1..d2ff3eab9c 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -2547,8 +2547,6 @@ static common_chat_params common_chat_params_init_solar_open(const common_chat_t } } - auto enable_thinking = prompt.rfind("<|think|><|end|>") == std::string::npos; - data.prompt = prompt; data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; data.preserved_tokens = { @@ -2570,6 +2568,9 @@ static common_chat_params common_chat_params_init_solar_open(const common_chat_t auto lit_end = p.atomic(p.literal("<|end|>")); auto parser_until_end = p.until("<|end|>"); + // prefix <- "<|begin|>assistant"? + auto parser_prefix = p.optional(lit_assistant_begin); + // reasoning <- "<|think|>" (!"<|end|>" .)* auto parser_reasoning = p.rule("reasoning", lit_think + p.reasoning(parser_until_end)); @@ -2581,8 +2582,7 @@ static common_chat_params common_chat_params_init_solar_open(const common_chat_t // wrapped <- "<|end|><|begin|>assistant" item-choice auto wrap_choice = [&](const std::vector & items) { auto choice = p.choice(items); - auto first = enable_thinking ? choice : lit_assistant_begin + choice; - return first + p.zero_or_more(lit_end + lit_assistant_begin + choice); + return choice + p.zero_or_more(lit_end + lit_assistant_begin + choice); }; // wrap_seq(items) <- item[0] "<|end|><|begin|>assistant" item[1] ... @@ -2590,7 +2590,7 @@ static common_chat_params common_chat_params_init_solar_open(const common_chat_t auto seq = p.sequence(); for (auto i = 0u; i < items.size(); i++) { if (i == 0) { - seq += enable_thinking ? items[i] : lit_assistant_begin + items[i]; + seq += items[i]; continue; } seq += lit_end + lit_assistant_begin + items[i]; @@ -2601,7 +2601,7 @@ static common_chat_params common_chat_params_init_solar_open(const common_chat_t // Response format parser if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) { auto parser_response_format = lit_content + p.content(p.schema(p.json(), "response-format", inputs.json_schema)); - return p.choice({ + return parser_prefix + p.choice({ wrap_seq({parser_reasoning, parser_response_format}), wrap_seq({parser_response_format}) }); @@ -2648,19 +2648,20 @@ static common_chat_params common_chat_params_init_solar_open(const common_chat_t if (min_calls == 1) { // If required, then try any combination of the reasoning, content, and tool call - p.choice({ + return parser_prefix + p.choice({ wrap_seq({parser_reasoning, parser_content, parser_tool_calls}), + wrap_seq({parser_reasoning, parser_tool_calls}), wrap_seq({parser_content, parser_tool_calls}), wrap_seq({parser_tool_calls}) }); } - return wrap_choice({parser_reasoning, parser_content, parser_tool_calls}); + return parser_prefix + wrap_choice({parser_reasoning, parser_content, parser_tool_calls}); } // Content only parser include_grammar = false; - return wrap_choice({parser_reasoning, parser_content}); + return parser_prefix + wrap_choice({parser_reasoning, parser_content}); }); data.parser = parser.save();