Get rid of some crazy formatting

This commit is contained in:
Piotr Wilkin 2026-02-11 22:53:02 +01:00
parent 18054b4e44
commit f8b0b75a00
1 changed files with 85 additions and 111 deletions

View File

@ -20,8 +20,7 @@ static std::vector<std::function<void(const common_chat_template & tmpl, diff_an
{ // Old reasoning Qwen templates - they don't really display reasoning content, but we still want to { // Old reasoning Qwen templates - they don't really display reasoning content, but we still want to
// support reasoning on them // support reasoning on them
[](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void {
if (tmpl.src.find("content.split('</think>')") != std::string::npos && if (tmpl.src.find("content.split('</think>')") != std::string::npos && analysis.reasoning == reasoning_mode::NONE) {
analysis.reasoning == reasoning_mode::NONE) {
analysis.reasoning = reasoning_mode::FORCED_OPEN; analysis.reasoning = reasoning_mode::FORCED_OPEN;
analysis.markers.reasoning_start = "<think>"; analysis.markers.reasoning_start = "<think>";
analysis.markers.reasoning_end = "</think>"; analysis.markers.reasoning_end = "</think>";
@ -50,7 +49,8 @@ static std::vector<std::function<void(const common_chat_template & tmpl, diff_an
// Cohere Command R+ - content wrapped in <|CHATBOT_TOKEN|>...<|END_OF_TURN_TOKEN|> // Cohere Command R+ - content wrapped in <|CHATBOT_TOKEN|>...<|END_OF_TURN_TOKEN|>
[](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void {
if (tmpl.src.find("<|CHATBOT_TOKEN|>") != std::string::npos && if (tmpl.src.find("<|CHATBOT_TOKEN|>") != std::string::npos &&
tmpl.src.find("<|END_OF_TURN_TOKEN|>") != std::string::npos && analysis.markers.content_start.empty()) { tmpl.src.find("<|END_OF_TURN_TOKEN|>") != std::string::npos &&
analysis.markers.content_start.empty()) {
analysis.content = content_mode::ALWAYS_WRAPPED; analysis.content = content_mode::ALWAYS_WRAPPED;
analysis.markers.content_start = "<|CHATBOT_TOKEN|>"; analysis.markers.content_start = "<|CHATBOT_TOKEN|>";
analysis.markers.content_end = "<|END_OF_TURN_TOKEN|>"; analysis.markers.content_end = "<|END_OF_TURN_TOKEN|>";
@ -82,8 +82,7 @@ static std::vector<std::function<void(const common_chat_template & tmpl, diff_an
}, },
// DeepSeek-R1-Distill-Qwen // DeepSeek-R1-Distill-Qwen
[](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void {
if (tmpl.src.find( if (tmpl.src.find("{{'<Assistant><tool▁calls▁begin><tool▁call▁begin>' + tool['type'] + '<tool▁sep>'") !=
"{{'<Assistant><tool▁calls▁begin><tool▁call▁begin>' + tool['type'] + '<tool▁sep>'") !=
std::string::npos) { std::string::npos) {
analysis.markers.tool_section_start = "<tool▁calls▁begin>"; analysis.markers.tool_section_start = "<tool▁calls▁begin>";
analysis.markers.tool_section_end = "<tool▁calls▁end>"; analysis.markers.tool_section_end = "<tool▁calls▁end>";
@ -126,40 +125,16 @@ static json build_tool_call(const std::string & name, const json & args, const s
} }
static json first_tool_call_zero_args = build_tool_call("foofoo", json::object(), "call00001"); static json first_tool_call_zero_args = build_tool_call("foofoo", json::object(), "call00001");
static json first_tool_call_one_arg = build_tool_call("foofoo", static json first_tool_call_one_arg = build_tool_call("foofoo", json{ "first", "XXXX" }, "call00001");
json{ static json first_tool_call_one_arg_other_val = build_tool_call("foofoo",json{{ "first", "YYYY" }}, "call00001");
{ "first", "XXXX" } static json first_tool_call_other_arg = build_tool_call("foofoo",json{ { "second", "YYYY" }}, "call00001");
},
"call00001"); static json first_tool_call =
static json first_tool_call_one_arg_other_val = build_tool_call("foofoo", build_tool_call("foofoo", json{{ "first", "XXXX" }, { "second", "YYYY" }}, "call00001");
json{ static json second_tool_call =
{ "first", "YYYY" } build_tool_call("barbar", json{ { "first", "XXXX" }, { "second", "YYYY" }}, "call00002");
}, static json first_tool_call_alt_id =
"call00001"); build_tool_call("foofoo", json{{ "first", "XXXX" }, { "second", "YYYY" }}, "call99999");
static json first_tool_call_other_arg = build_tool_call("foofoo",
json{
{ "second", "YYYY" }
},
"call00001");
static json first_tool_call = build_tool_call("foofoo",
json{
{ "first", "XXXX" },
{ "second", "YYYY" }
},
"call00001");
static json second_tool_call = build_tool_call("barbar",
json{
{ "first", "XXXX" },
{ "second", "YYYY" }
},
"call00002");
// Tool call variants with different IDs for call_id detection
static json first_tool_call_alt_id = build_tool_call("foofoo",
json{
{ "first", "XXXX" },
{ "second", "YYYY" }
},
"call99999");
std::string differential_analyzer::apply_template(const common_chat_template & tmpl, const template_params & params) { std::string differential_analyzer::apply_template(const common_chat_template & tmpl, const template_params & params) {
templates_params tmpl_params; templates_params tmpl_params;
@ -685,8 +660,7 @@ void differential_analyzer::analyze_tool_call_format_json_native(const std::stri
int json_end = clean_haystack.find_last_of('}'); int json_end = clean_haystack.find_last_of('}');
std::string cut = clean_haystack.substr(json_start, json_end - json_start + 1); std::string cut = clean_haystack.substr(json_start, json_end - json_start + 1);
json call_struct = json::parse(cut); json call_struct = json::parse(cut);
auto register_field = [&](const std::string & prefix, auto register_field = [&](const std::string & prefix, const nlohmann::detail::iteration_proxy_value<json::iterator> & subel) {
const nlohmann::detail::iteration_proxy_value<json::iterator> & subel) {
if (subel.value().is_string() && std::string(subel.value()).find("call0000") != std::string::npos) { if (subel.value().is_string() && std::string(subel.value()).find("call0000") != std::string::npos) {
result.id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); result.id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
} else if (subel.value().is_string() && std::string(subel.value()) == fun_name_needle) { } else if (subel.value().is_string() && std::string(subel.value()) == fun_name_needle) {