#include "chat-auto-parser-helpers.h" #include "chat-diff-analyzer.h" #include "chat-peg-parser.h" #include "chat.h" #include "peg-parser.h" #include "testing.h" #include #include #include #include static void test_calculate_diff_split_basic(testing & t); static void test_calculate_diff_split_identical(testing & t); static void test_calculate_diff_split_common_prefix(testing & t); static void test_calculate_diff_split_common_suffix(testing & t); static void test_calculate_diff_split_common_both(testing & t); static void test_calculate_diff_split_empty_cases(testing & t); static void test_calculate_diff_split_no_common(testing & t); static void test_calculate_diff_split_single_char(testing & t); static void test_calculate_diff_split_overlaps(testing & t); static void test_calculate_diff_split_tag_boundaries(testing & t); static void test_calculate_diff_split(testing & t); static void test_until_common_prefix_basic(testing & t); static void test_until_common_prefix(testing & t); static void test_after_common_suffix_basic(testing & t); static void test_after_common_suffix(testing & t); static void test_analyze_tool_call_pure_json(testing & t); static void test_analyze_tool_call_function_name_markers(testing & t); static void test_analyze_tool_call_full_markers(testing & t); static void test_analyze_tool_call_edge_cases(testing & t); static void test_compare_variants_basic(testing & t); static void test_compare_variants_messages_modifier(testing & t); static void test_compare_variants_tools_modifier(testing & t); static void test_compare_variants_both_modifiers(testing & t); static void test_compare_variants_template_failure(testing & t); static void test_compare_variants_identity(testing & t); static void test_compare_variants(testing & t); // Seed-OSS template tool calling analysis tests static void test_seed_oss_tool_analysis(testing & t); static void test_seed_oss_tool_presence(testing & t); static void test_seed_oss_call_count(testing & t); static void test_seed_oss_function_names(testing & t); static void test_seed_oss_argument_count(testing & t); static void test_seed_oss_args_presence(testing & t); static void test_seed_oss_tool_with_reasoning(testing & t); // Nemotron template analysis tests static void test_nemotron_analysis(testing & t); static void test_nemotron_reasoning_detection(testing & t); static void test_nemotron_tool_format(testing & t); // CohereForAI template analysis tests static void test_cohere_reasoning_detection(testing & t); static void test_cohere_tool_format(testing & t); static void test_cohere_analysis(testing & t); // Marker separation static void test_marker_separation(testing & t); // standard_json_tools format tests static void test_standard_json_tools_formats(testing & t); static void test_standard_json_tools_openai(testing & t); static void test_standard_json_tools_cohere(testing & t); static void test_standard_json_tools_function_key(testing & t); // normalize_quotes_to_json tests static void test_normalize_quotes_to_json(testing & t); static void test_normalize_quotes_with_embedded_quotes(testing & t); // TAG_WITH_TAGGED argument parsing tests static void test_tagged_args_with_embedded_quotes(testing & t); int main(int argc, char * argv[]) { testing t(std::cout); t.verbose = true; // usage: test-chat-auto-parser-helpers [filter_regex] if (argc > 1) { t.set_filter(argv[1]); } t.test("diff_split", test_calculate_diff_split); t.test("common_prefix", test_until_common_prefix); t.test("common_suffix", test_after_common_suffix); t.test("compare_variants", test_compare_variants); t.test("segments", test_marker_separation); t.test("seed_oss_diffs", test_seed_oss_tool_analysis); t.test("cohere", test_cohere_analysis); t.test("nemotron", test_nemotron_analysis); t.test("standard_json_tools", test_standard_json_tools_formats); t.test("normalize_quotes_to_json", test_normalize_quotes_to_json); t.test("tagged_args_embedded_quotes", test_tagged_args_with_embedded_quotes); return t.summary(); } static void test_marker_separation(testing & t) { auto single_square_marker = segmentize_markers("pre_marker[marker]post_marker"); auto single_diag_marker = segmentize_markers("pre_markerpost_marker"); auto paired_markers = segmentize_markers("world"); auto double_different_markers = segmentize_markers("[hello][world]"); auto in_between = segmentize_markers("imdabada[hey]"); t.test("single_square_marker", [&] (testing & t) { t.assert_equal("first is text", segment_type::TEXT, single_square_marker[0].type); t.assert_equal("second is marker", segment_type::MARKER, single_square_marker[1].type); t.assert_equal("last is text", segment_type::TEXT, single_square_marker[2].type); t.assert_equal("first is 'pre_marker'", "pre_marker", single_square_marker[0].value); t.assert_equal("second is '[marker]'", "[marker]", single_square_marker[1].value); t.assert_equal("last is 'post_marker'", "post_marker", single_square_marker[2].value); }); t.test("single_diagonal_marker", [&] (testing & t) { t.assert_equal("first is text", segment_type::TEXT, single_diag_marker[0].type); t.assert_equal("second is marker", segment_type::MARKER, single_diag_marker[1].type); t.assert_equal("last is text", segment_type::TEXT, single_diag_marker[2].type); t.assert_equal("first is 'pre_marker'", "pre_marker", single_diag_marker[0].value); t.assert_equal("second is ''", "", single_diag_marker[1].value); t.assert_equal("last is 'post_marker'", "post_marker", single_diag_marker[2].value); }); t.test("paired_markers", [&] (testing & t) { t.assert_equal("first is marker", segment_type::MARKER, paired_markers[0].type); t.assert_equal("second is text", segment_type::TEXT, paired_markers[1].type); t.assert_equal("third is marker", segment_type::MARKER, paired_markers[2].type); t.assert_equal("first is ''", "", paired_markers[0].value); t.assert_equal("second is 'world'", "world", paired_markers[1].value); t.assert_equal("third is ''", "", paired_markers[2].value); }); t.test("double_different_markers", [&] (testing & t) { t.assert_equal("first is marker", segment_type::MARKER, double_different_markers[0].type); t.assert_equal("second is marker", segment_type::MARKER, double_different_markers[1].type); t.assert_equal("third is marker", segment_type::MARKER, double_different_markers[2].type); t.assert_equal("fourth is marker", segment_type::MARKER, double_different_markers[3].type); t.assert_equal("first is ''", "", double_different_markers[0].value); t.assert_equal("second is '[hello]'", "[hello]", double_different_markers[1].value); t.assert_equal("third is ''", "", double_different_markers[2].value); t.assert_equal("fourth is '[world]'", "[world]", double_different_markers[3].value); }); t.test("in_between", [&] (testing & t) { t.assert_equal("first is text", segment_type::TEXT, in_between[0].type); t.assert_equal("second is marker", segment_type::MARKER, in_between[1].type); t.assert_equal("third is text", segment_type::TEXT, in_between[2].type); t.assert_equal("fourth is marker", segment_type::MARKER, in_between[3].type); t.assert_equal("fifth is text", segment_type::TEXT, in_between[4].type); t.assert_equal("sixth is marker", segment_type::MARKER, in_between[5].type); t.assert_equal("first is 'im'", "im", in_between[0].value); t.assert_equal("second is ''", "", in_between[1].value); t.assert_equal("third is 'daba'", "daba", in_between[2].value); t.assert_equal("fourth is ''", "", in_between[3].value); t.assert_equal("fifth is 'da'", "da", in_between[4].value); t.assert_equal("sixth is '[hey]'", "[hey]", in_between[5].value); }); } static void test_calculate_diff_split(testing & t) { t.test("calculate_diff_split basic", test_calculate_diff_split_basic); t.test("calculate_diff_split identical", test_calculate_diff_split_identical); t.test("calculate_diff_split common prefix", test_calculate_diff_split_common_prefix); t.test("calculate_diff_split common suffix", test_calculate_diff_split_common_suffix); t.test("calculate_diff_split common both", test_calculate_diff_split_common_both); t.test("calculate_diff_split empty cases", test_calculate_diff_split_empty_cases); t.test("calculate_diff_split no common", test_calculate_diff_split_no_common); t.test("calculate_diff_split single char", test_calculate_diff_split_single_char); t.test("calculate_diff_split overlaps", test_calculate_diff_split_overlaps); t.test("calculate_diff_split tag boundaries", test_calculate_diff_split_tag_boundaries); } static void test_calculate_diff_split_basic(testing & t) { diff_split result = calculate_diff_split("hello world", "hello test"); t.assert_equal("prefix should be 'hello '", "hello ", result.prefix); t.assert_equal("left should be 'world'", "world", result.left); t.assert_equal("right should be 'test'", "test", result.right); t.assert_equal("suffix should be empty", "", result.suffix); result = calculate_diff_split("abc", "xyz"); t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be 'abc'", "abc", result.left); t.assert_equal("right should be 'xyz'", "xyz", result.right); t.assert_equal("suffix should be empty", "", result.suffix); result = calculate_diff_split("prefixA suffix", "prefixB suffix"); t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix); t.assert_equal("left should be 'A'", "A", result.left); t.assert_equal("right should be 'B'", "B", result.right); t.assert_equal("suffix should be ' suffix'", " suffix", result.suffix); } static void test_calculate_diff_split_identical(testing & t) { diff_split result = calculate_diff_split("hello", "hello"); t.assert_equal("prefix should be 'hello'", "hello", result.prefix); t.assert_equal("left should be empty", "", result.left); t.assert_equal("right should be empty", "", result.right); t.assert_equal("suffix should be empty", "", result.suffix); result = calculate_diff_split("", ""); t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be empty", "", result.left); t.assert_equal("right should be empty", "", result.right); t.assert_equal("suffix should be empty", "", result.suffix); result = calculate_diff_split("a", "a"); t.assert_equal("prefix should be 'a'", "a", result.prefix); t.assert_equal("left should be empty", "", result.left); t.assert_equal("right should be empty", "", result.right); t.assert_equal("suffix should be empty", "", result.suffix); } static void test_calculate_diff_split_common_prefix(testing & t) { diff_split result = calculate_diff_split("abcdef", "abcxyz"); t.assert_equal("prefix should be 'abc'", "abc", result.prefix); t.assert_equal("left should be 'def'", "def", result.left); t.assert_equal("right should be 'xyz'", "xyz", result.right); t.assert_equal("suffix should be empty", "", result.suffix); result = calculate_diff_split("same", "sameagain"); t.assert_equal("prefix should be 'same'", "same", result.prefix); t.assert_equal("left should be empty", "", result.left); t.assert_equal("right should be 'again'", "again", result.right); t.assert_equal("suffix should be empty", "", result.suffix); result = calculate_diff_split("test", "testing"); t.assert_equal("prefix should be 'test'", "test", result.prefix); t.assert_equal("left should be empty", "", result.left); t.assert_equal("right should be 'ing'", "ing", result.right); t.assert_equal("suffix should be empty", "", result.suffix); } static void test_calculate_diff_split_common_suffix(testing & t) { diff_split result = calculate_diff_split("123end", "456end"); t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be '123'", "123", result.left); t.assert_equal("right should be '456'", "456", result.right); t.assert_equal("suffix should be 'end'", "end", result.suffix); result = calculate_diff_split("start", "end"); t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be 'start'", "start", result.left); t.assert_equal("right should be 'end'", "end", result.right); t.assert_equal("suffix should be empty", "", result.suffix); result = calculate_diff_split("abcsuffix", "xyzsuffix"); t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be 'abc'", "abc", result.left); t.assert_equal("right should be 'xyz'", "xyz", result.right); t.assert_equal("suffix should be 'suffix'", "suffix", result.suffix); } static void test_calculate_diff_split_common_both(testing & t) { diff_split result = calculate_diff_split("helloXworld", "helloYworld"); t.assert_equal("prefix should be 'hello'", "hello", result.prefix); t.assert_equal("left should be 'X'", "X", result.left); t.assert_equal("right should be 'Y'", "Y", result.right); t.assert_equal("suffix should be 'world'", "world", result.suffix); result = calculate_diff_split("ABCmiddleXYZ", "ABCdifferentXYZ"); t.assert_equal("prefix should be 'ABC'", "ABC", result.prefix); t.assert_equal("left should be 'middle'", "middle", result.left); t.assert_equal("right should be 'different'", "different", result.right); t.assert_equal("suffix should be 'XYZ'", "XYZ", result.suffix); result = calculate_diff_split("startAend", "startBend"); t.assert_equal("prefix should be 'start'", "start", result.prefix); t.assert_equal("left should be 'A'", "A", result.left); t.assert_equal("right should be 'B'", "B", result.right); t.assert_equal("suffix should be 'end'", "end", result.suffix); // Edge case: common prefix and suffix overlap result = calculate_diff_split("aa", "ab"); t.assert_equal("prefix should be 'a'", "a", result.prefix); t.assert_equal("left should be 'a'", "a", result.left); t.assert_equal("right should be 'b'", "b", result.right); t.assert_equal("suffix should be empty", "", result.suffix); } static void test_calculate_diff_split_empty_cases(testing & t) { // Empty left, non-empty right diff_split result = calculate_diff_split("", "hello"); t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be empty", "", result.left); t.assert_equal("right should be 'hello'", "hello", result.right); t.assert_equal("suffix should be empty", "", result.suffix); // Non-empty left, empty right result = calculate_diff_split("hello", ""); t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be 'hello'", "hello", result.left); t.assert_equal("right should be empty", "", result.right); t.assert_equal("suffix should be empty", "", result.suffix); // Both empty result = calculate_diff_split("", ""); t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be empty", "", result.left); t.assert_equal("right should be empty", "", result.right); t.assert_equal("suffix should be empty", "", result.suffix); // Left single char, empty right result = calculate_diff_split("a", ""); t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be 'a'", "a", result.left); t.assert_equal("right should be empty", "", result.right); t.assert_equal("suffix should be empty", "", result.suffix); // Empty left, right single char result = calculate_diff_split("", "a"); t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be empty", "", result.left); t.assert_equal("right should be 'a'", "a", result.right); t.assert_equal("suffix should be empty", "", result.suffix); } static void test_calculate_diff_split_no_common(testing & t) { diff_split result = calculate_diff_split("abc", "xyz"); t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be 'abc'", "abc", result.left); t.assert_equal("right should be 'xyz'", "xyz", result.right); t.assert_equal("suffix should be empty", "", result.suffix); result = calculate_diff_split("left", "right"); // The algorithm finds "t" as a common suffix since both strings end with 't' // This is the algorithm's actual behavior - it finds maximal common suffix t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be 'lef'", "lef", result.left); t.assert_equal("right should be 'righ'", "righ", result.right); t.assert_equal("suffix should be 't'", "t", result.suffix); result = calculate_diff_split("123", "456"); t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be '123'", "123", result.left); t.assert_equal("right should be '456'", "456", result.right); t.assert_equal("suffix should be empty", "", result.suffix); } static void test_calculate_diff_split_single_char(testing & t) { diff_split result = calculate_diff_split("a", "b"); t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be 'a'", "a", result.left); t.assert_equal("right should be 'b'", "b", result.right); t.assert_equal("suffix should be empty", "", result.suffix); result = calculate_diff_split("a", "a"); t.assert_equal("prefix should be 'a'", "a", result.prefix); t.assert_equal("left should be empty", "", result.left); t.assert_equal("right should be empty", "", result.right); t.assert_equal("suffix should be empty", "", result.suffix); result = calculate_diff_split("a", "ab"); t.assert_equal("prefix should be 'a'", "a", result.prefix); t.assert_equal("left should be empty", "", result.left); t.assert_equal("right should be 'b'", "b", result.right); t.assert_equal("suffix should be empty", "", result.suffix); result = calculate_diff_split("ab", "a"); t.assert_equal("prefix should be 'a'", "a", result.prefix); t.assert_equal("left should be 'b'", "b", result.left); t.assert_equal("right should be empty", "", result.right); t.assert_equal("suffix should be empty", "", result.suffix); } static void test_calculate_diff_split_overlaps(testing & t) { // One string is substring of another diff_split result = calculate_diff_split("test", "testing"); t.assert_equal("prefix should be 'test'", "test", result.prefix); t.assert_equal("left should be empty", "", result.left); t.assert_equal("right should be 'ing'", "ing", result.right); t.assert_equal("suffix should be empty", "", result.suffix); result = calculate_diff_split("testing", "test"); t.assert_equal("prefix should be 'test'", "test", result.prefix); t.assert_equal("left should be 'ing'", "ing", result.left); t.assert_equal("right should be empty", "", result.right); t.assert_equal("suffix should be empty", "", result.suffix); // Similar strings with one extra char at start result = calculate_diff_split("Xtest", "Ytest"); // The algorithm finds "test" as a common suffix since both strings end with "test" // This is the algorithm's actual behavior - it finds maximal common suffix t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be 'X'", "X", result.left); t.assert_equal("right should be 'Y'", "Y", result.right); t.assert_equal("suffix should be 'test'", "test", result.suffix); // Similar strings with one extra char at end result = calculate_diff_split("testX", "testY"); t.assert_equal("prefix should be 'test'", "test", result.prefix); t.assert_equal("left should be 'X'", "X", result.left); t.assert_equal("right should be 'Y'", "Y", result.right); t.assert_equal("suffix should be empty", "", result.suffix); // Strings that are reverses result = calculate_diff_split("abc", "cba"); t.assert_equal("prefix should be empty", "", result.prefix); t.assert_equal("left should be 'abc'", "abc", result.left); t.assert_equal("right should be 'cba'", "cba", result.right); t.assert_equal("suffix should be empty", "", result.suffix); } static void test_calculate_diff_split_tag_boundaries(testing & t) { // Test with unclosed XML tags diff_split result = calculate_diff_split("testcontent"); // The fix_tag_boundaries should move incomplete tags appropriately t.assert_true("prefix should start with 'test'", result.prefix.find("test") == 0); t.assert_true("should handle tag boundaries", result.left != "" || result.right != "" || result.suffix != ""); // Test with unclosed brackets result = calculate_diff_split("test[", "test]value"); t.assert_true("should handle bracket boundaries", result.left != "" || result.right != "" || result.suffix != ""); // Test with partial tags on both sides result = calculate_diff_split("prefix", "prefixsuffix"); // fix_tag_boundaries moves the incomplete '<' from prefix to left/right t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix); t.assert_equal("left should be ''", "", result.left); t.assert_equal("right should be 'suffix'", "suffix", result.right); t.assert_equal("suffix should be empty", "", result.suffix); // Test with complex nested tags result = calculate_diff_split("prefix
content
", "prefix
different
"); // Algorithm finds "ent" as a common suffix because both strings end with it // This is the actual algorithm behavior, though not semantically ideal t.assert_equal("prefix should be 'prefix
'", "prefix
", result.prefix); t.assert_equal("left should be 'cont'", "cont", result.left); t.assert_equal("right should be 'differ'", "differ", result.right); t.assert_equal("suffix should be 'ent
'", "ent
", result.suffix); // Test with unclosed angle bracket result = calculate_diff_split("Hello ", "Hello test"); t.assert_equal("prefix should be 'Hello '", "Hello ", result.prefix); t.assert_true("left should contain ''", result.left.find("") != std::string::npos); t.assert_equal("right should be 'test'", "test", result.right); t.assert_equal("suffix should be empty", "", result.suffix); // Test with unclosed square bracket result = calculate_diff_split("test [array]", "test other"); t.assert_equal("prefix should be 'test '", "test ", result.prefix); t.assert_true("left should contain '[array]'", result.left.find("[array]") != std::string::npos); t.assert_equal("right should be 'other'", "other", result.right); t.assert_equal("suffix should be empty", "", result.suffix); // Test empty prefix and suffix with tags result = calculate_diff_split("left", "righ"); t.assert_equal("prefix should be ''", "", result.prefix); t.assert_equal("left should be 'left'", "left", result.left); t.assert_equal("right should be 'righ'", "righ", result.right); t.assert_equal("suffix should be ''", "", result.suffix); { // real case from template tests, simplified std::string left = "PREFIXSure"; std::string right = "PREFIXLemme thinkSure"; result = calculate_diff_split(left, right); t.assert_equal("prefix should be PREFIX", "PREFIX", result.prefix); t.assert_equal("suffix should be Sure", "Sure", result.suffix); t.assert_equal("left should be empty", "", result.left); t.assert_equal("right should be Lemme think", "Lemme think", result.right); } { // Real case: special tokens with |> boundary issue // The suffix starts with |> which should be moved to complete <|END_RESPONSE and <|END_ACTION std::string prefix = "SOME_PREFIX"; std::string suffix = "|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"; std::string left_diff = "<|START_RESPONSE|>Let me help you.<|END_RESPONSE"; std::string right_diff = "<|START_THINKING|><|END_THINKING|><|START_ACTION|>[\n" " {\"tool_call_id\": \"0\", \"tool_name\": \"test_function_name\", " "\"parameters\": {\"param1\": \"value1\", \"param2\": \"value2\"}}\n" "]<|END_ACTION"; std::string left = prefix + left_diff + suffix; std::string right = prefix + right_diff + suffix; result = calculate_diff_split(left, right); t.assert_equal("special token prefix", prefix, result.prefix); // The |> should be moved from suffix to complete the tokens t.assert_equal("special token left", "<|START_RESPONSE|>Let me help you.<|END_RESPONSE|>", result.left); t.assert_true("special token right ends with |>", result.right.find("<|END_ACTION|>") != std::string::npos); t.assert_equal("special token suffix", "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", result.suffix); } } static void test_until_common_prefix(testing & t) { t.test("until_common_prefix basic", test_until_common_prefix_basic); } static void test_until_common_prefix_basic(testing & t) { // Test case from the user request std::string result = until_common_prefix("", "", ""); t.assert_equal("untilCommonPrefix should return ''", "", result); // Additional test cases to ensure robustness // Test with different common prefix lengths result = until_common_prefix("prefixsuffix", "different", "other"); t.assert_equal("should return 'prefix'", "prefix", result); // Test when common prefix is at the start result = until_common_prefix("rest", "left", "right"); t.assert_equal("should return empty string when common prefix at start", "", result); // Test when there's no common prefix result = until_common_prefix("something", "left", "right"); t.assert_equal("should return empty string when no common prefix", "", result); // Test with empty strings result = until_common_prefix("test", "", "right"); t.assert_equal("should return empty string when left is empty", "", result); // Test with longer common prefix result = until_common_prefix("abcXYZrest", "left", "right"); t.assert_equal("should return 'abcXYZ'", "abcXYZ", result); } static void test_after_common_suffix(testing & t) { t.test("after_common_suffix basic", test_after_common_suffix_basic); } static void test_after_common_suffix_basic(testing & t) { // Test case from the user request std::string result = after_common_suffix("100", "100", "535"); t.assert_equal("afterCommonSuffix should return ''", "", result); // Test when common suffix is at the end result = after_common_suffix("rest", "left", "right"); t.assert_equal("should return empty string when common suffix at end", "", result); // Test with empty strings result = after_common_suffix("test", "left", ""); t.assert_equal("should return empty string when right is empty", "", result); // Test case with XML-like structure similar to the main example result = after_common_suffix("value", "value", "different"); t.assert_equal("should return ''", "", result); // Test with longer common suffix appearing at the end of full result = after_common_suffix("prefixrest", "prefixleft", "prefixright"); t.assert_equal("should return '' when common suffix is at end of full", "", result); // Test with common suffix appearing in middle but not at end result = after_common_suffix("content", "value", "other"); t.assert_equal("should return '' when common suffix appears before end", "", result); // Test with multi-character common suffix at the very end of full result = after_common_suffix("startend", "prefixleft", "prefixright"); t.assert_equal("should return '' when common suffix is at end of full", "", result); } static void test_compare_variants(testing & t) { t.test("compare_variants basic", test_compare_variants_basic); t.test("compare_variants messages modifier", test_compare_variants_messages_modifier); t.test("compare_variants tools modifier", test_compare_variants_tools_modifier); t.test("compare_variants both modifiers", test_compare_variants_both_modifiers); t.test("compare_variants template failure", test_compare_variants_template_failure); t.test("compare_variants identity", test_compare_variants_identity); } static void test_compare_variants_basic(testing & t) { // Create a simple template that just echoes messages common_chat_template tmpl("{{ messages[0]['content'] }}", "", ""); template_params params; params.messages = json::array({ json {{"role", "user"}, {"content", "Hello"}} }); auto modifier = [](template_params & p) { p.messages[0]["content"] = "World"; }; auto result = differential_analyzer::compare_variants(tmpl, params, modifier); t.assert_true("result should have value", result.has_value()); // The template might not output anything if messages is empty or format is different // Check that we get a valid result t.assert_true("prefix or left should have content", !result->diff.prefix.empty() || !result->diff.left.empty()); } static void test_compare_variants_messages_modifier(testing & t) { // Test with messages modifier only common_chat_template tmpl("{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", ""); template_params params; params.messages = json::array({ json {{"role", "user"}, {"content", "A"}} }); auto modifier = [](template_params & p) { p.messages[0]["content"] = "B"; }; std::optional result = differential_analyzer::compare_variants(tmpl, params, modifier); t.assert_true("result should have value", result.has_value()); t.assert_equal("left should be 'A'", "A", result->diff.left); t.assert_equal("right should be 'B'", "B", result->diff.right); } static void test_compare_variants_tools_modifier(testing & t) { // Test with tools modifier only common_chat_template tmpl( "{% for tool in tools %}{{ tool['name'] }}{% endfor %}", "", ""); template_params params; params.tools = json::array({ json {{"name", "foo"}} }); auto modifier = [](template_params & p) { p.tools[0]["name"] = "bar"; }; auto result = differential_analyzer::compare_variants(tmpl, params, modifier); t.assert_true("result should have value", result.has_value()); t.assert_equal("left should be 'foo'", "foo", result->diff.left); t.assert_equal("right should be 'bar'", "bar", result->diff.right); } static void test_compare_variants_both_modifiers(testing & t) { // Test with both messages and tools modifiers using the for loop approach common_chat_template tmpl( "{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", ""); template_params params; params.messages = json::array({ json {{"role", "user"}, {"content", "A"}} }); auto modifier = [](template_params & p) { p.messages[0]["content"] = "B"; p.messages[0]["role"] = "newuser"; }; auto result = differential_analyzer::compare_variants(tmpl, params, modifier); t.assert_true("result should have value", result.has_value()); t.assert_equal("left should be 'user:A'", "user:A", result->diff.left); t.assert_equal("right should be 'newuser:B'", "newuser:B", result->diff.right); } static void test_compare_variants_template_failure(testing & t) { // Test with template that causes failure during application (not construction) // We use a valid template syntax but one that will fail during application common_chat_template tmpl("{{ messages[0]['nonexistent_field'] }}", "", ""); template_params params; params.messages = json::array({ json {{"role", "user"}, {"content", "Hello"}} }); auto modifier = [](template_params & p) { p.messages[0]["content"] = "World"; }; auto result = differential_analyzer::compare_variants(tmpl, params, modifier); t.assert_true("result should be nullopt on template failure", !result.has_value()); } static void test_compare_variants_identity(testing & t) { // Test with identity modifier (no change) common_chat_template tmpl("{{ messages[0]['content'] }}", "", ""); template_params params; params.messages = json::array({ json {{"role", "user"}, {"content", "Hello"}} }); // No modifier - should use identity auto result = differential_analyzer::compare_variants(tmpl, params, nullptr); t.assert_true("result should have value", result.has_value()); t.assert_equal("prefix should be 'Hello'", "Hello", result->diff.prefix); t.assert_equal("left should be empty", "", result->diff.left); t.assert_equal("right should be empty", "", result->diff.right); t.assert_equal("suffix should be empty", "", result->diff.suffix); } // ============================================================================ // Seed-OSS Template Tool Calling Analysis Tests // ============================================================================ static void test_seed_oss_tool_analysis(testing & t) { t.test("Seed-OSS tool presence", test_seed_oss_tool_presence); t.test("Seed-OSS call count", test_seed_oss_call_count); t.test("Seed-OSS function names", test_seed_oss_function_names); t.test("Seed-OSS argument count", test_seed_oss_argument_count); t.test("Seed-OSS args presence", test_seed_oss_args_presence); t.test("Seed-OSS tool with reasoning", test_seed_oss_tool_with_reasoning); } // Helper to load Seed-OSS template static common_chat_template load_seed_oss_template(testing & t) { std::string template_path = "models/templates/ByteDance-Seed-OSS.jinja"; std::ifstream fin(template_path, std::ios::binary); std::ostringstream buf; if (fin.is_open()) { buf << fin.rdbuf(); } std::string template_source = buf.str(); common_chat_template tmpl(template_source, "", ""); t.assert_true("Seed-OSS template loaded successfully", template_source.length() > 0); return tmpl; } // Helper to build tool call JSON static json build_tool_call(const std::string & name, const json & args, const std::string & id = "call_001") { return json{ {"id", id}, {"type", "function"}, {"function", json{ {"name", name}, {"arguments", args} }} }; } // Helper to build tools definition static json build_tools_definition() { json parameters_schema = json::object(); parameters_schema["type"] = "object"; parameters_schema["properties"] = json::object(); parameters_schema["properties"]["param1"] = json::object({ {"type", "string"}, {"description", "First parameter"} }); parameters_schema["properties"]["param2"] = json::object({ {"type", "string"}, {"description", "Second parameter"} }); parameters_schema["required"] = json::array({"param1", "param2"}); return json::array({ json{ {"type", "function"}, {"function", json{ {"name", "test_function_name"}, {"description", "A test function for debugging"}, {"parameters", parameters_schema} }} } }); } // T1: Compare with/without tool call (user, assistant) static void test_seed_oss_tool_presence(testing & t) { common_chat_template tmpl = load_seed_oss_template(t); json assistant_no_tools = json{ {"role", "assistant"}, {"content", "Let me help you."} }; json assistant_with_tools = json{ {"role", "assistant"}, {"content", nullptr}, {"tool_calls", json::array({ build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) })} }; json user_msg = json{ {"role", "user"}, {"content", "Hello, please help me."} }; template_params params_no_tools; params_no_tools.messages = json::array({user_msg, assistant_no_tools}); params_no_tools.tools = build_tools_definition(); params_no_tools.add_generation_prompt = false; params_no_tools.enable_thinking = true; template_params params_with_tools; params_with_tools.messages = json::array({user_msg, assistant_with_tools}); params_with_tools.tools = build_tools_definition(); params_with_tools.add_generation_prompt = false; params_with_tools.enable_thinking = true; auto result = differential_analyzer::compare_variants(tmpl, params_no_tools, [&](template_params & p) { p.messages = params_with_tools.messages; }); t.assert_true("T1 result should have value", result.has_value()); const auto & diff = result->diff; t.assert_true("T1 prefix should contain system", diff.prefix.find("system") != std::string::npos); t.assert_true("T1 prefix should contain user", diff.prefix.find("user") != std::string::npos); t.assert_true("T1 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos); // Left should be the assistant content without tool t.assert_equal("T1 left should contain 'Let me help you.'", "Let me help you.", diff.left); // Right should contain the tool call markers t.assert_true("T1 right should contain tool_call begin", diff.right.find("") != std::string::npos); t.assert_true("T1 right should contain function tag", diff.right.find("") != std::string::npos); t.assert_true("T1 right should contain parameter=param1", diff.right.find("") != std::string::npos); t.assert_true("T1 right should contain parameter=param2", diff.right.find("") != std::string::npos); t.assert_true("T1 right should contain value1", diff.right.find("value1") != std::string::npos); t.assert_true("T1 right should contain value2", diff.right.find("value2") != std::string::npos); t.assert_true("T1 right should contain tool_call end", diff.right.find("") != std::string::npos); // Suffix should be the eos token t.assert_equal("T1 suffix should be ''", "", diff.suffix); } // T2: Compare one vs two tool calls static void test_seed_oss_call_count(testing & t) { common_chat_template tmpl = load_seed_oss_template(t); json assistant_one_call = json{ {"role", "assistant"}, {"content", nullptr}, {"tool_calls", json::array({ build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) })} }; json assistant_two_calls = json{ {"role", "assistant"}, {"content", nullptr}, {"tool_calls", json::array({ build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})), build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002") })} }; json user_msg = json{ {"role", "user"}, {"content", "Hello, please help me."} }; template_params params_one; params_one.messages = json::array({user_msg, assistant_one_call}); params_one.tools = build_tools_definition(); params_one.add_generation_prompt = false; params_one.enable_thinking = true; auto result = differential_analyzer::compare_variants(tmpl, params_one, [&](template_params & p) { p.messages = json::array({user_msg, assistant_two_calls}); }); t.assert_true("T2 result should have value", result.has_value()); const auto & diff = result->diff; // Prefix should include the first tool call t.assert_true("T2 prefix should contain first tool_call begin", diff.prefix.find("") != std::string::npos); t.assert_true("T2 prefix should contain first function", diff.prefix.find("") != std::string::npos); t.assert_true("T2 prefix should contain value1", diff.prefix.find("value1") != std::string::npos); t.assert_true("T2 prefix should contain value2", diff.prefix.find("value2") != std::string::npos); t.assert_true("T2 prefix should contain first tool_call end", diff.prefix.find("") != std::string::npos); // Left should be empty (no second tool call in variant A) t.assert_equal("T2 left should be empty", "", diff.left); // Right should contain the second tool call t.assert_true("T2 right should contain second tool_call begin", diff.right.find("") != std::string::npos); t.assert_true("T2 right should contain second function", diff.right.find("") != std::string::npos); t.assert_true("T2 right should contain value3", diff.right.find("value3") != std::string::npos); t.assert_true("T2 right should contain value4", diff.right.find("value4") != std::string::npos); t.assert_true("T2 right should contain second tool_call end", diff.right.find("") != std::string::npos); // Suffix should be the eos token t.assert_equal("T2 suffix should be ''", "", diff.suffix); } // T3: Compare different function names static void test_seed_oss_function_names(testing & t) { common_chat_template tmpl = load_seed_oss_template(t); // Build tools with two different function names json parameters_schema = json::object(); parameters_schema["type"] = "object"; parameters_schema["properties"] = json::object(); parameters_schema["properties"]["arg1"] = json::object({ {"type", "string"}, {"description", "Argument 1"} }); parameters_schema["required"] = json::array({"arg1"}); json tools = json::array({ json{ {"type", "function"}, {"function", json{ {"name", "func_alpha"}, {"description", "First function"}, {"parameters", parameters_schema} }} }, json{ {"type", "function"}, {"function", json{ {"name", "func_beta"}, {"description", "Second function"}, {"parameters", parameters_schema} }} } }); json assistant_func_alpha = json{ {"role", "assistant"}, {"content", nullptr}, {"tool_calls", json::array({ build_tool_call("func_alpha", json::object({{"arg1", "test_value"}})) })} }; json assistant_func_beta = json{ {"role", "assistant"}, {"content", nullptr}, {"tool_calls", json::array({ build_tool_call("func_beta", json::object({{"arg1", "test_value"}})) })} }; json user_msg = json{ {"role", "user"}, {"content", "Hello"} }; template_params params_alpha; params_alpha.messages = json::array({user_msg, assistant_func_alpha}); params_alpha.tools = tools; params_alpha.add_generation_prompt = false; params_alpha.enable_thinking = true; auto result = differential_analyzer::compare_variants(tmpl, params_alpha, [&](template_params & p) { p.messages = json::array({user_msg, assistant_func_beta}); }); t.assert_true("T3 result should have value", result.has_value()); const auto & diff = result->diff; bool func_alpha_in_left = diff.left.find("func_alpha") != std::string::npos; bool func_alpha_in_prefix = diff.prefix.find("func_alpha") != std::string::npos; bool func_beta_in_right = diff.right.find("func_beta") != std::string::npos; bool func_beta_in_prefix = diff.prefix.find("func_beta") != std::string::npos; bool func_beta_in_suffix = diff.suffix.find("func_beta") != std::string::npos; // Left should contain func_alpha (or be in prefix) t.assert_true("T3 left should contain func_alpha (or prefix)", func_alpha_in_left || func_alpha_in_prefix); // Right should contain func_beta t.assert_true("T3 right should contain func_beta", func_beta_in_right || func_beta_in_prefix || func_beta_in_suffix); // Both should have the same parameter value (in common parts, not in diffs) // Since both have same args, test_value will be in prefix/suffix t.assert_true("T3 diff should contain test_value (in prefix or suffix)", diff.prefix.find("test_value") != std::string::npos || diff.suffix.find("test_value") != std::string::npos); } // T4: Compare different argument counts (zero, one, two parameters) static void test_seed_oss_argument_count(testing & t) { common_chat_template tmpl = load_seed_oss_template(t); // Build tools with 0, 1, or 2 required parameters json params_2_required = json::object(); params_2_required["type"] = "object"; params_2_required["properties"] = json::object(); params_2_required["properties"]["arg1"] = json::object({ {"type", "string"}, {"description", "Argument 1"} }); params_2_required["properties"]["arg2"] = json::object({ {"type", "string"}, {"description", "Argument 2"} }); params_2_required["required"] = json::array({"arg1", "arg2"}); json params_1_required = json::object(); params_1_required["type"] = "object"; params_1_required["properties"] = json::object(); params_1_required["properties"]["arg1"] = json::object({ {"type", "string"}, {"description", "Argument 1"} }); params_1_required["required"] = json::array({"arg1"}); json tools = json::array({ json{ {"type", "function"}, {"function", json{ {"name", "test_func"}, {"description", "Test function"}, {"parameters", params_2_required} }} } }); // Test: zero args vs one arg json assistant_zero_args = json{ {"role", "assistant"}, {"content", nullptr}, {"tool_calls", json::array({ build_tool_call("test_func", json::object()) })} }; json assistant_one_arg = json{ {"role", "assistant"}, {"content", nullptr}, {"tool_calls", json::array({ build_tool_call("test_func", json::object({{"arg1", "value1"}})) })} }; json assistant_two_args = json{ {"role", "assistant"}, {"content", nullptr}, {"tool_calls", json::array({ build_tool_call("test_func", json::object({{"arg1", "value1"}, {"arg2", "value2"}})) })} }; json user_msg = json{ {"role", "user"}, {"content", "Hello"} }; // Test zero vs one template_params params_zero; params_zero.messages = json::array({user_msg, assistant_zero_args}); params_zero.tools = tools; params_zero.add_generation_prompt = false; params_zero.enable_thinking = true; auto result_zero_one = differential_analyzer::compare_variants(tmpl, params_zero, [&](template_params & p) { p.messages = json::array({user_msg, assistant_one_arg}); }); t.assert_true("T4 zero vs one result should have value", result_zero_one.has_value()); t.assert_true("T4 zero vs one left should be empty or minimal", result_zero_one->diff.left.empty() || result_zero_one->diff.left == ""); t.assert_true("T4 zero vs one right should contain arg1", result_zero_one->diff.right.find("arg1") != std::string::npos); // Test one vs two template_params params_one; params_one.messages = json::array({user_msg, assistant_one_arg}); params_one.tools = tools; params_one.add_generation_prompt = false; params_one.enable_thinking = true; auto result_one_two = differential_analyzer::compare_variants(tmpl, params_one, [&](template_params & p) { p.messages = json::array({user_msg, assistant_two_args}); }); t.assert_true("T4 one vs two result should have value", result_one_two.has_value()); const auto & diff4 = result_one_two->diff; t.assert_true("T4 one vs two left should contain arg1 (or prefix)", diff4.left.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos); t.assert_true("T4 one vs two right should contain arg1 (or prefix)", diff4.right.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos); t.assert_true("T4 one vs two right should contain arg2 (or prefix/suffix)", diff4.right.find("arg2") != std::string::npos || diff4.prefix.find("arg2") != std::string::npos || diff4.suffix.find("arg2") != std::string::npos); } // T5: Compare different argument values static void test_seed_oss_args_presence(testing & t) { common_chat_template tmpl = load_seed_oss_template(t); json assistant_same_arg = json{ {"role", "assistant"}, {"content", nullptr}, {"tool_calls", json::array({ build_tool_call("test_function_name", json::object({{"param1", "value1"}})) })} }; json assistant_other_arg = json{ {"role", "assistant"}, {"content", nullptr}, {"tool_calls", json::array({ build_tool_call("test_function_name", json::object({{"param2", "value2"}})) })} }; json assistant_both_args = json{ {"role", "assistant"}, {"content", nullptr}, {"tool_calls", json::array({ build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) })} }; json user_msg = json{ {"role", "user"}, {"content", "Hello"} }; template_params params_same; params_same.messages = json::array({user_msg, assistant_same_arg}); params_same.tools = build_tools_definition(); params_same.add_generation_prompt = false; params_same.enable_thinking = true; // Test same arg vs other arg auto result_same_other = differential_analyzer::compare_variants(tmpl, params_same, [&](template_params & p) { p.messages = json::array({user_msg, assistant_other_arg}); }); t.assert_true("T5 same vs other result should have value", result_same_other.has_value()); const auto & diff5a = result_same_other->diff; t.assert_true("T5 same vs other left should contain param1 (or prefix/suffix)", diff5a.left.find("param1") != std::string::npos || diff5a.prefix.find("param1") != std::string::npos || diff5a.suffix.find("param1") != std::string::npos); t.assert_true("T5 same vs other left should contain value1 (or prefix/suffix)", diff5a.left.find("value1") != std::string::npos || diff5a.prefix.find("value1") != std::string::npos); t.assert_true("T5 same vs other right should contain param2 (or prefix/suffix)", diff5a.right.find("param2") != std::string::npos || diff5a.prefix.find("param2") != std::string::npos || diff5a.suffix.find("param2") != std::string::npos); t.assert_true("T5 same vs other right should contain value2 (or prefix/suffix)", diff5a.right.find("value2") != std::string::npos || diff5a.prefix.find("value2") != std::string::npos || diff5a.suffix.find("value2") != std::string::npos); // Test same arg vs both args auto result_same_both = differential_analyzer::compare_variants(tmpl, params_same, [&](template_params & p) { p.messages = json::array({user_msg, assistant_both_args}); }); t.assert_true("T5 same vs both result should have value", result_same_both.has_value()); const auto & diff5b = result_same_both->diff; t.assert_true("T5 same vs both left should contain param1 (or prefix/suffix)", diff5b.left.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos); t.assert_true("T5 same vs both right should contain param1 (or prefix/suffix)", diff5b.right.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos); t.assert_true("T5 same vs both right should contain param2 (or prefix/suffix)", diff5b.right.find("param2") != std::string::npos || diff5b.prefix.find("param2") != std::string::npos || diff5b.suffix.find("param2") != std::string::npos); } // T6: Tool call with vs without reasoning_content static void test_seed_oss_tool_with_reasoning(testing & t) { common_chat_template tmpl = load_seed_oss_template(t); json assistant_tool_only = json{ {"role", "assistant"}, {"content", nullptr}, {"tool_calls", json::array({ build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) })} }; json assistant_tool_with_reasoning = json{ {"role", "assistant"}, {"content", nullptr}, {"tool_calls", json::array({ build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) })}, {"reasoning_content", "I need to call the tool first."} }; json user_msg = json{ {"role", "user"}, {"content", "Hello, please help me."} }; template_params params_tool_only; params_tool_only.messages = json::array({user_msg, assistant_tool_only}); params_tool_only.tools = build_tools_definition(); params_tool_only.add_generation_prompt = false; params_tool_only.enable_thinking = true; auto result = differential_analyzer::compare_variants(tmpl, params_tool_only, [&](template_params & p) { p.messages = json::array({user_msg, assistant_tool_with_reasoning}); }); t.assert_true("T6 result should have value", result.has_value()); const auto & diff = result->diff; // Left should be empty (no reasoning in variant A) t.assert_equal("T6 left should be empty", "", diff.left); // Right should contain the thinking token with reasoning content t.assert_true("T6 right should contain think begin", diff.right.find("") != std::string::npos); t.assert_true("T6 right should contain reasoning content", diff.right.find("I need to call the tool first.") != std::string::npos); t.assert_true("T6 right should contain think end", diff.right.find("") != std::string::npos); // Prefix should contain the assistant role t.assert_true("T6 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos); // Suffix should contain the tool call t.assert_true("T6 suffix should contain tool_call begin", diff.suffix.find("") != std::string::npos); t.assert_true("T6 suffix should contain function name", diff.suffix.find("test_function_name") != std::string::npos); t.assert_true("T6 suffix should contain eos", diff.suffix.find("") != std::string::npos); } static common_chat_template load_template(testing & t, const std::string & template_path) { std::ifstream fin(template_path, std::ios::binary); std::ostringstream buf; if (fin.is_open()) { buf << fin.rdbuf(); } std::string template_source = buf.str(); common_chat_template tmpl(template_source, "", ""); t.assert_true("Nemotron template loaded successfully", template_source.length() > 0); return tmpl; } // ============================================================================ // Nemotron Template Analysis Tests // ============================================================================ static common_chat_template load_nemotron_template(testing & t) { return load_template(t, "models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja"); } static void test_nemotron_analysis(testing & t) { t.test("Nemotron reasoning detection", test_nemotron_reasoning_detection); t.test("Nemotron tool format", test_nemotron_tool_format); } static void test_nemotron_reasoning_detection(testing & t) { common_chat_template tmpl = load_nemotron_template(t); // Test the comparison manually to see what's happening json user_msg = json{ { "role", "user" }, { "content", "Hello" } }; json assistant_no_reasoning = json{ { "role", "assistant" }, { "content", "I can help." } }; json assistant_with_reasoning = json{ { "role", "assistant" }, { "content", "I can help." }, { "reasoning_content", "Let me think about this." } }; template_params params; params.messages = json::array({ user_msg, assistant_no_reasoning }); params.add_generation_prompt = false; params.enable_thinking = true; // Run differential analysis auto analysis = differential_analyzer::analyze(tmpl); // Check reasoning markers t.assert_equal("reasoning_start should be ''", "", analysis.markers.reasoning_start); t.assert_equal("reasoning_end should be ''", "", analysis.markers.reasoning_end); // Check reasoning mode detection // Nemotron uses forced closed reasoning with add_generation_prompt t.assert_equal("reasoning should be FORCED_CLOSED", reasoning_mode::FORCED_CLOSED, analysis.reasoning); // Make sure reasoning markers don't spill over to content markers t.assert_equal("content start should be empty", "", analysis.markers.content_start); t.assert_equal("content end should be empty", "", analysis.markers.content_end); t.assert_equal("content should be PLAIN", content_mode::PLAIN, analysis.content); } static void test_nemotron_tool_format(testing & t) { common_chat_template tmpl = load_nemotron_template(t); // Run differential analysis auto analysis = differential_analyzer::analyze(tmpl); // Check tool markers - Nemotron uses per-call wrapping (each call individually wrapped) t.assert_equal("tool_section_start should be empty (per-call format)", "", analysis.markers.tool_section_start); t.assert_equal("tool_section_end should be empty (per-call format)", "", analysis.markers.tool_section_end); t.assert_equal("per_call_start should be '\\n'", "\n", analysis.markers.per_call_start); t.assert_equal("per_call_end should be ''", "", analysis.markers.per_call_end); t.assert_true("should support parallel calls", analysis.supports_parallel_calls); // Check function markers t.assert_equal("func_name_prefix should be '\\n'", ">\n", analysis.markers.func_name_suffix); t.assert_equal("func_close should be ''", "", analysis.markers.func_close); // Check argument markers (note: markers retain trailing newlines for proper parsing) t.assert_equal("arg_name_prefix should be '\\n'", ">\n", analysis.markers.arg_name_suffix); t.assert_equal("arg_value_suffix should be '\\n'", "\n", analysis.markers.arg_value_suffix); // Check format classification t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools == tool_format::TAG_WITH_TAGGED); // Verify tool support t.assert_true("should support tools", analysis.supports_tools); } static common_chat_template load_cohere_template(testing & t) { return load_template(t, "models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja"); } static void test_cohere_analysis(testing & t) { t.test("Cohere reasoning detection", test_cohere_reasoning_detection); t.test("Cohere tool format", test_cohere_tool_format); } static void test_cohere_reasoning_detection(testing & t) { common_chat_template tmpl = load_cohere_template(t); // Run differential analysis auto analysis = differential_analyzer::analyze(tmpl); // Check reasoning markers - Cohere uses special token format t.assert_equal("reasoning_start should be '<|START_THINKING|>'", "<|START_THINKING|>", analysis.markers.reasoning_start); t.assert_equal("reasoning_end should be '<|END_THINKING|>'", "<|END_THINKING|>", analysis.markers.reasoning_end); // Check reasoning mode - Cohere only shows reasoning with tool calls (TOOLS_ONLY) t.assert_equal("reasoning should be TOOLS_ONLY", reasoning_mode::TOOLS_ONLY, analysis.reasoning); // Check content markers - Cohere wraps all content with START/END_RESPONSE t.assert_equal("content_start should be '<|START_RESPONSE|>'", "<|START_RESPONSE|>", analysis.markers.content_start); t.assert_equal("content_end should be '<|END_RESPONSE|>'", "<|END_RESPONSE|>", analysis.markers.content_end); // Content is always wrapped (both with and without tools) t.assert_equal("content should be ALWAYS_WRAPPED", content_mode::ALWAYS_WRAPPED, analysis.content); } static void test_cohere_tool_format(testing & t) { common_chat_template tmpl = load_cohere_template(t); // Run differential analysis auto analysis = differential_analyzer::analyze(tmpl); // Check tool section markers - Cohere uses ACTION markers t.assert_equal("tool_section_start should be '<|START_ACTION|>'", "<|START_ACTION|>", analysis.markers.tool_section_start); t.assert_equal("tool_section_end should be '<|END_ACTION|>'", "<|END_ACTION|>", analysis.markers.tool_section_end); // JSON_NATIVE format has no per-call markers t.assert_equal("per_call_start should be empty", "", analysis.markers.per_call_start); t.assert_equal("per_call_end should be empty", "", analysis.markers.per_call_end); // JSON_NATIVE format has empty function markers (no XML-style markers) t.assert_equal("func_name_prefix should be empty", "", analysis.markers.func_name_prefix); t.assert_equal("func_name_suffix should be empty", "", analysis.markers.func_name_suffix); t.assert_equal("func_close should be empty", "", analysis.markers.func_close); // JSON_NATIVE format has empty args markers t.assert_equal("args_start should be empty", "", analysis.markers.args_start); t.assert_equal("args_end should be empty", "", analysis.markers.args_end); // JSON_NATIVE format has empty argument markers t.assert_equal("arg_name_prefix should be empty", "", analysis.markers.arg_name_prefix); t.assert_equal("arg_name_suffix should be empty", "", analysis.markers.arg_name_suffix); t.assert_equal("arg_value_prefix should be empty", "", analysis.markers.arg_value_prefix); t.assert_equal("arg_value_suffix should be empty", "", analysis.markers.arg_value_suffix); t.assert_equal("arg_separator should be empty", "", analysis.markers.arg_separator); // Check JSON field names - Cohere uses non-standard names t.assert_equal("name_field should be 'tool_name'", "tool_name", analysis.name_field); t.assert_equal("args_field should be 'parameters'", "parameters", analysis.args_field); // This isn't a real tool call id field, i.e. with the OpenAI tool call ID format t.assert_equal("id_field should be 'tool_call_id'", "", analysis.id_field); // Check format classification t.assert_equal("tool format should be JSON_NATIVE", tool_format::JSON_NATIVE, analysis.tools); // Check flags t.assert_true("should support tools", analysis.supports_tools); t.assert_true("should support parallel calls", analysis.supports_parallel_calls); t.assert_true("should not require nonnull content", !analysis.requires_nonnull_content); t.assert_true("tools_array_wrapped should be true", analysis.tools_array_wrapped); } // ============================================================================ // standard_json_tools Format Tests // ============================================================================ // Helper to build tools definition for tests static json build_test_tools() { json parameters_schema = json::object(); parameters_schema["type"] = "object"; parameters_schema["properties"] = json::object(); parameters_schema["properties"]["location"] = json::object({ {"type", "string"}, {"description", "The city and state"} }); parameters_schema["properties"]["unit"] = json::object({ {"type", "string"}, {"description", "Temperature unit"}, {"enum", json::array({"celsius", "fahrenheit"})} }); parameters_schema["required"] = json::array({"location"}); return json::array({ json{ {"type", "function"}, {"function", json{ {"name", "get_current_weather"}, {"description", "Get the current weather in a given location"}, {"parameters", parameters_schema} }} } }); } static void test_standard_json_tools_formats(testing & t) { t.test("OpenAI format", test_standard_json_tools_openai); t.test("Cohere format", test_standard_json_tools_cohere); t.test("function-as-key format", test_standard_json_tools_function_key); } // Test 1: OpenAI Standard Format // {"id": "call_abc", "function": {"name": "get_weather", "arguments": {"location": "NYC"}}} static void test_standard_json_tools_openai(testing & t) { json tools = build_test_tools(); auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { auto tool_call = p.standard_json_tools( "", "", tools, /* parallel */ true, /* force */ false, /* name_key */ "function.name", /* args_key */ "function.arguments", /* array_wrapped */ false, /* function_is_key */ false, /* call_id_key */ "id", /* gen_call_id_key */ "", /* parameters_order */ {} ); return p.content(p.until("")) + p.optional(tool_call) + p.end(); }); std::string input = "Let me check the weather." "" R"({"id": "call_abc123", "function": {"name": "get_current_weather", "arguments": {"location": "NYC"}}})" ""; common_peg_parse_context ctx(input, false); auto result = parser.parse(ctx); t.assert_true("parse success", result.success()); common_chat_msg msg; auto mapper = common_chat_peg_unified_mapper(msg); mapper.from_ast(ctx.ast, result); t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); if (!msg.tool_calls.empty()) { t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name); t.assert_equal("tool id", "call_abc123", msg.tool_calls[0].id); } t.assert_true("content present", msg.content.find("Let me check the weather") != std::string::npos); } // Test 2: Cohere Format // {"tool_call_id": 0, "tool_name": "get_weather", "parameters": {"location": "NYC"}} static void test_standard_json_tools_cohere(testing & t) { json tools = build_test_tools(); auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { auto tool_call = p.standard_json_tools( "<|START_ACTION|>[", "]<|END_ACTION|>", tools, /* parallel */ true, /* force */ false, /* name_key */ "tool_name", /* args_key */ "parameters", /* array_wrapped */ false, // Brackets are part of section markers /* function_is_key */ false, /* call_id_key */ "", /* gen_call_id_key */ "tool_call_id", /* parameters_order */ {"tool_call_id", "tool_name", "parameters"} ); return p.content(p.until("<|START_ACTION|>")) + p.optional(tool_call) + p.end(); }); std::string input = "Let me search for that." "<|START_ACTION|>[" R"({"tool_call_id": 0, "tool_name": "get_current_weather", "parameters": {"location": "NYC", "unit": "celsius"}})" "]<|END_ACTION|>"; common_peg_parse_context ctx(input, false); auto result = parser.parse(ctx); t.assert_true("parse success", result.success()); common_chat_msg msg; auto mapper = common_chat_peg_unified_mapper(msg); mapper.from_ast(ctx.ast, result); t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); if (!msg.tool_calls.empty()) { t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name); t.assert_equal("tool id", "0", msg.tool_calls[0].id); } t.assert_true("content present", msg.content.find("Let me search") != std::string::npos); } // Test 3: Function-as-Key Format // {"get_current_weather": {"id": "call-0001", "args": {"location": "NYC"}}} static void test_standard_json_tools_function_key(testing & t) { json tools = build_test_tools(); auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { auto tool_call = p.standard_json_tools( "[", "]", tools, /* parallel */ true, /* force */ false, /* name_key */ "", // Name is the key itself /* args_key */ "args", /* array_wrapped */ false, /* function_is_key */ true, /* call_id_key */ "id", /* gen_call_id_key */ "", /* parameters_order */ {} ); return p.content(p.until("")) + p.optional(tool_call) + p.end(); }); std::string input = "I'll call the weather function." "[" R"({"get_current_weather": {"id": "call-0001", "args": {"location": "NYC", "unit": "celsius"}}})" "]"; common_peg_parse_context ctx(input, false); auto result = parser.parse(ctx); t.assert_true("parse success", result.success()); common_chat_msg msg; auto mapper = common_chat_peg_unified_mapper(msg); mapper.from_ast(ctx.ast, result); t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); if (!msg.tool_calls.empty()) { t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name); t.assert_equal("tool id", "call-0001", msg.tool_calls[0].id); } t.assert_true("content present", msg.content.find("I'll call the weather") != std::string::npos); } // ============================================================================ // normalize_quotes_to_json Tests // ============================================================================ // Copy of the function for isolated testing (original is static in chat-peg-parser.cpp) static std::string normalize_quotes_to_json(const std::string & input) { std::string result; result.reserve(input.size() + 16); bool in_single_quoted = false; bool in_double_quoted = false; for (size_t i = 0; i < input.size(); ++i) { char c = input[i]; if (c == '\\' && i + 1 < input.size()) { char next = input[i + 1]; if (in_single_quoted) { if (next == '\'') { result += '\''; ++i; continue; } if (next == '"') { result += "\\\""; ++i; continue; } result += c; result += next; ++i; continue; } if (in_double_quoted) { result += c; result += next; ++i; continue; } result += c; continue; } if (c == '"') { if (in_single_quoted) { result += "\\\""; } else { in_double_quoted = !in_double_quoted; result += c; } } else if (c == '\'') { if (in_double_quoted) { result += c; } else if (in_single_quoted) { in_single_quoted = false; result += '"'; } else { in_single_quoted = true; result += '"'; } } else { result += c; } } return result; } static void test_normalize_quotes_to_json(testing & t) { t.test("basic single to double quotes", [](testing & t) { std::string input = "{'key': 'value'}"; std::string expected = "{\"key\": \"value\"}"; std::string result = normalize_quotes_to_json(input); t.assert_equal("basic conversion", expected, result); }); t.test("escaped single quote inside single-quoted string", [](testing & t) { std::string input = "{'code': 'print(\\'hello\\')'}"; std::string expected = "{\"code\": \"print('hello')\"}"; std::string result = normalize_quotes_to_json(input); t.assert_equal("escaped single quote", expected, result); }); t.test("double quote inside single-quoted string", [](testing & t) { std::string input = "{'msg': 'He said \"hi\"'}"; std::string expected = "{\"msg\": \"He said \\\"hi\\\"\"}"; std::string result = normalize_quotes_to_json(input); t.assert_equal("double quote escaping", expected, result); }); t.test("nested backslash escapes", [](testing & t) { std::string input = "{'path': 'C:\\\\Users\\\\test'}"; std::string expected = "{\"path\": \"C:\\\\Users\\\\test\"}"; std::string result = normalize_quotes_to_json(input); t.assert_equal("backslash escaping", expected, result); }); t.test("newline escapes", [](testing & t) { std::string input = "{'text': 'line1\\nline2'}"; std::string expected = "{\"text\": \"line1\\nline2\"}"; std::string result = normalize_quotes_to_json(input); t.assert_equal("newline escaping", expected, result); }); t.test("mixed quotes", [](testing & t) { std::string input = "{\"already_double\": 'single_value'}"; std::string expected = "{\"already_double\": \"single_value\"}"; std::string result = normalize_quotes_to_json(input); t.assert_equal("mixed quotes", expected, result); }); t.test("embedded quotes - the test case", test_normalize_quotes_with_embedded_quotes); } // Test case that mirrors the Seed-OSS failing test scenario static void test_normalize_quotes_with_embedded_quotes(testing & t) { // This is similar to the Seed-OSS template test case // The input has embedded double quotes like "14" and "bar" inside string values std::string input = "{'filename': 'foo.cpp', 'oldString': 'def foo(arg = \"14\"):\\n return arg + \"bar\"\\n', 'newString': 'def foo(arg = \"15\"):\\n pass\\n'}"; // Expected: Python single quotes -> JSON double quotes, internal double quotes escaped std::string expected = "{\"filename\": \"foo.cpp\", \"oldString\": \"def foo(arg = \\\"14\\\"):\\n return arg + \\\"bar\\\"\\n\", \"newString\": \"def foo(arg = \\\"15\\\"):\\n pass\\n\"}"; std::string result = normalize_quotes_to_json(input); t.assert_equal("normalize quotes with embedded double quotes", expected, result); // Also verify the result is valid JSON try { json parsed = json::parse(result); t.assert_true("result is valid JSON", true); t.assert_equal("filename field", "foo.cpp", parsed["filename"].get()); t.assert_true("oldString contains embedded quotes", parsed["oldString"].get().find("\"14\"") != std::string::npos); t.assert_true("newString contains embedded quotes", parsed["newString"].get().find("\"15\"") != std::string::npos); } catch (const std::exception & e) { t.assert_true(std::string("JSON parse failed: ") + e.what(), false); } } // ============================================================================ // TAG_WITH_TAGGED Argument Parsing Tests // ============================================================================ // Build tools definition for edit function static json build_edit_tool() { json parameters_schema = json::object(); parameters_schema["type"] = "object"; parameters_schema["properties"] = json::object(); parameters_schema["properties"]["filename"] = json::object({ {"type", "string"}, {"description", "Path of file to edit"} }); parameters_schema["properties"]["oldString"] = json::object({ {"type", "string"}, {"description", "String to replace"} }); parameters_schema["properties"]["newString"] = json::object({ {"type", "string"}, {"description", "New (replacement) value"} }); parameters_schema["required"] = json::array({"filename", "oldString", "newString"}); return json::array({ json{ {"type", "function"}, {"function", json{ {"name", "edit"}, {"description", "Edit a file"}, {"parameters", parameters_schema} }} } }); } // Test that reproduces the Seed-OSS template issue with embedded quotes static void test_tagged_args_with_embedded_quotes(testing & t) { json tools = build_edit_tool(); // Build a parser for TAG_WITH_TAGGED format like Seed-OSS/Nemotron auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { // Build tool choice for the edit function auto tool_choice = p.choice(); for (const auto & tool_def : tools) { if (!tool_def.contains("function")) continue; const auto & function = tool_def.at("function"); std::string name = function.at("name"); const auto & params = function.at("parameters"); if (!params.contains("properties") || !params.at("properties").is_object()) continue; const auto & properties = params.at("properties"); // Build argument parsers std::vector arg_parsers; for (const auto & [param_name, param_schema] : properties.items()) { auto arg = p.tool_arg( p.tool_arg_open(p.literal("")) + p.space() + p.tool_arg_string_value(p.until("")) + p.space() + p.tool_arg_close(p.literal("")) ); arg_parsers.push_back(p.optional(p.rule("arg-" + param_name, arg))); } // Build arg sequence with space() between common_peg_parser args_seq = p.eps(); for (size_t i = 0; i < arg_parsers.size(); i++) { if (i > 0) { args_seq = args_seq + p.space(); } args_seq = args_seq + arg_parsers[i]; } auto func_parser = p.tool_open(p.literal("")) + p.space() + args_seq + p.space() + p.tool_close(p.literal("")); tool_choice |= p.rule("tool-" + name, p.tool(func_parser)); } auto tool_section = p.literal("") + p.space() + tool_choice + p.space() + p.literal(""); return p.content(p.until("")) + p.optional(tool_section) + p.end(); }); // The exact input from the failing test std::string input = "\n" "\n" "\n" "foo.cpp\n" "\n" "" "def foo(arg = \"14\"):\n" " return arg + \"bar\"\n" "\n" "\n" "" "def foo(arg = \"15\"):\n" " pass\n" "\n" "\n" "\n" ""; common_peg_parse_context ctx(input, false); auto result = parser.parse(ctx); t.assert_true("parse success", result.success()); common_chat_msg msg; auto mapper = common_chat_peg_unified_mapper(msg); mapper.from_ast(ctx.ast, result); t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); if (!msg.tool_calls.empty()) { t.assert_equal("tool name", "edit", msg.tool_calls[0].name); // Parse the arguments as JSON to verify they're valid std::string args = msg.tool_calls[0].arguments; try { json parsed = json::parse(args); t.assert_true("arguments is valid JSON", true); // Verify each field has proper value t.assert_equal("filename", "foo.cpp", parsed.value("filename", "")); std::string oldString = parsed.value("oldString", ""); t.assert_true("oldString contains embedded quotes", oldString.find("\"14\"") != std::string::npos); t.assert_true("oldString contains bar with quotes", oldString.find("\"bar\"") != std::string::npos); std::string newString = parsed.value("newString", ""); t.assert_true("newString contains embedded quotes", newString.find("\"15\"") != std::string::npos); } catch (const std::exception & e) { t.assert_true(std::string("arguments should be valid JSON: ") + e.what(), false); } } }