llama.cpp/tests/test-chat-auto-parser.cpp

1846 lines
81 KiB
C++

#include "chat-auto-parser-helpers.h"
#include "chat-diff-analyzer.h"
#include "chat-peg-parser.h"
#include "chat.h"
#include "peg-parser.h"
#include "testing.h"
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
static void test_calculate_diff_split_basic(testing & t);
static void test_calculate_diff_split_identical(testing & t);
static void test_calculate_diff_split_common_prefix(testing & t);
static void test_calculate_diff_split_common_suffix(testing & t);
static void test_calculate_diff_split_common_both(testing & t);
static void test_calculate_diff_split_empty_cases(testing & t);
static void test_calculate_diff_split_no_common(testing & t);
static void test_calculate_diff_split_single_char(testing & t);
static void test_calculate_diff_split_overlaps(testing & t);
static void test_calculate_diff_split_tag_boundaries(testing & t);
static void test_calculate_diff_split(testing & t);
static void test_until_common_prefix_basic(testing & t);
static void test_until_common_prefix(testing & t);
static void test_after_common_suffix_basic(testing & t);
static void test_after_common_suffix(testing & t);
static void test_analyze_tool_call_pure_json(testing & t);
static void test_analyze_tool_call_function_name_markers(testing & t);
static void test_analyze_tool_call_full_markers(testing & t);
static void test_analyze_tool_call_edge_cases(testing & t);
static void test_compare_variants_basic(testing & t);
static void test_compare_variants_messages_modifier(testing & t);
static void test_compare_variants_tools_modifier(testing & t);
static void test_compare_variants_both_modifiers(testing & t);
static void test_compare_variants_template_failure(testing & t);
static void test_compare_variants_identity(testing & t);
static void test_compare_variants(testing & t);
// Seed-OSS template tool calling analysis tests
static void test_seed_oss_tool_analysis(testing & t);
static void test_seed_oss_tool_presence(testing & t);
static void test_seed_oss_call_count(testing & t);
static void test_seed_oss_function_names(testing & t);
static void test_seed_oss_argument_count(testing & t);
static void test_seed_oss_args_presence(testing & t);
static void test_seed_oss_tool_with_reasoning(testing & t);
// Nemotron template analysis tests
static void test_nemotron_analysis(testing & t);
static void test_nemotron_reasoning_detection(testing & t);
static void test_nemotron_tool_format(testing & t);
// CohereForAI template analysis tests
static void test_cohere_reasoning_detection(testing & t);
static void test_cohere_tool_format(testing & t);
static void test_cohere_analysis(testing & t);
// Marker separation
static void test_marker_separation(testing & t);
// standard_json_tools format tests
static void test_standard_json_tools_formats(testing & t);
static void test_standard_json_tools_openai(testing & t);
static void test_standard_json_tools_cohere(testing & t);
static void test_standard_json_tools_function_key(testing & t);
// normalize_quotes_to_json tests
static void test_normalize_quotes_to_json(testing & t);
static void test_normalize_quotes_with_embedded_quotes(testing & t);
// TAG_WITH_TAGGED argument parsing tests
static void test_tagged_args_with_embedded_quotes(testing & t);
int main(int argc, char * argv[]) {
testing t(std::cout);
t.verbose = true;
// usage: test-chat-auto-parser-helpers [filter_regex]
if (argc > 1) {
t.set_filter(argv[1]);
}
t.test("diff_split", test_calculate_diff_split);
t.test("common_prefix", test_until_common_prefix);
t.test("common_suffix", test_after_common_suffix);
t.test("compare_variants", test_compare_variants);
t.test("segments", test_marker_separation);
t.test("seed_oss_diffs", test_seed_oss_tool_analysis);
t.test("cohere", test_cohere_analysis);
t.test("nemotron", test_nemotron_analysis);
t.test("standard_json_tools", test_standard_json_tools_formats);
t.test("normalize_quotes_to_json", test_normalize_quotes_to_json);
t.test("tagged_args_embedded_quotes", test_tagged_args_with_embedded_quotes);
return t.summary();
}
static void test_marker_separation(testing & t) {
auto single_square_marker = segmentize_markers("pre_marker[marker]post_marker");
auto single_diag_marker = segmentize_markers("pre_marker<marker>post_marker");
auto paired_markers = segmentize_markers("<hello>world</hello>");
auto double_different_markers = segmentize_markers("<hello>[hello]<world>[world]");
auto in_between = segmentize_markers("im<blue>daba<dee>da[hey]");
t.test("single_square_marker", [&] (testing & t) {
t.assert_equal("first is text", segment_type::TEXT, single_square_marker[0].type);
t.assert_equal("second is marker", segment_type::MARKER, single_square_marker[1].type);
t.assert_equal("last is text", segment_type::TEXT, single_square_marker[2].type);
t.assert_equal("first is 'pre_marker'", "pre_marker", single_square_marker[0].value);
t.assert_equal("second is '[marker]'", "[marker]", single_square_marker[1].value);
t.assert_equal("last is 'post_marker'", "post_marker", single_square_marker[2].value);
});
t.test("single_diagonal_marker", [&] (testing & t) {
t.assert_equal("first is text", segment_type::TEXT, single_diag_marker[0].type);
t.assert_equal("second is marker", segment_type::MARKER, single_diag_marker[1].type);
t.assert_equal("last is text", segment_type::TEXT, single_diag_marker[2].type);
t.assert_equal("first is 'pre_marker'", "pre_marker", single_diag_marker[0].value);
t.assert_equal("second is '<marker>'", "<marker>", single_diag_marker[1].value);
t.assert_equal("last is 'post_marker'", "post_marker", single_diag_marker[2].value);
});
t.test("paired_markers", [&] (testing & t) {
t.assert_equal("first is marker", segment_type::MARKER, paired_markers[0].type);
t.assert_equal("second is text", segment_type::TEXT, paired_markers[1].type);
t.assert_equal("third is marker", segment_type::MARKER, paired_markers[2].type);
t.assert_equal("first is '<hello>'", "<hello>", paired_markers[0].value);
t.assert_equal("second is 'world'", "world", paired_markers[1].value);
t.assert_equal("third is '</hello>'", "</hello>", paired_markers[2].value);
});
t.test("double_different_markers", [&] (testing & t) {
t.assert_equal("first is marker", segment_type::MARKER, double_different_markers[0].type);
t.assert_equal("second is marker", segment_type::MARKER, double_different_markers[1].type);
t.assert_equal("third is marker", segment_type::MARKER, double_different_markers[2].type);
t.assert_equal("fourth is marker", segment_type::MARKER, double_different_markers[3].type);
t.assert_equal("first is '<hello>'", "<hello>", double_different_markers[0].value);
t.assert_equal("second is '[hello]'", "[hello]", double_different_markers[1].value);
t.assert_equal("third is '<world>'", "<world>", double_different_markers[2].value);
t.assert_equal("fourth is '[world]'", "[world]", double_different_markers[3].value);
});
t.test("in_between", [&] (testing & t) {
t.assert_equal("first is text", segment_type::TEXT, in_between[0].type);
t.assert_equal("second is marker", segment_type::MARKER, in_between[1].type);
t.assert_equal("third is text", segment_type::TEXT, in_between[2].type);
t.assert_equal("fourth is marker", segment_type::MARKER, in_between[3].type);
t.assert_equal("fifth is text", segment_type::TEXT, in_between[4].type);
t.assert_equal("sixth is marker", segment_type::MARKER, in_between[5].type);
t.assert_equal("first is 'im'", "im", in_between[0].value);
t.assert_equal("second is '<blue>'", "<blue>", in_between[1].value);
t.assert_equal("third is 'daba'", "daba", in_between[2].value);
t.assert_equal("fourth is '<dee>'", "<dee>", in_between[3].value);
t.assert_equal("fifth is 'da'", "da", in_between[4].value);
t.assert_equal("sixth is '[hey]'", "[hey]", in_between[5].value);
});
}
static void test_calculate_diff_split(testing & t) {
t.test("calculate_diff_split basic", test_calculate_diff_split_basic);
t.test("calculate_diff_split identical", test_calculate_diff_split_identical);
t.test("calculate_diff_split common prefix", test_calculate_diff_split_common_prefix);
t.test("calculate_diff_split common suffix", test_calculate_diff_split_common_suffix);
t.test("calculate_diff_split common both", test_calculate_diff_split_common_both);
t.test("calculate_diff_split empty cases", test_calculate_diff_split_empty_cases);
t.test("calculate_diff_split no common", test_calculate_diff_split_no_common);
t.test("calculate_diff_split single char", test_calculate_diff_split_single_char);
t.test("calculate_diff_split overlaps", test_calculate_diff_split_overlaps);
t.test("calculate_diff_split tag boundaries", test_calculate_diff_split_tag_boundaries);
}
static void test_calculate_diff_split_basic(testing & t) {
diff_split result = calculate_diff_split("hello world", "hello test");
t.assert_equal("prefix should be 'hello '", "hello ", result.prefix);
t.assert_equal("left should be 'world'", "world", result.left);
t.assert_equal("right should be 'test'", "test", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
result = calculate_diff_split("abc", "xyz");
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be 'abc'", "abc", result.left);
t.assert_equal("right should be 'xyz'", "xyz", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
result = calculate_diff_split("prefixA suffix", "prefixB suffix");
t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix);
t.assert_equal("left should be 'A'", "A", result.left);
t.assert_equal("right should be 'B'", "B", result.right);
t.assert_equal("suffix should be ' suffix'", " suffix", result.suffix);
}
static void test_calculate_diff_split_identical(testing & t) {
diff_split result = calculate_diff_split("hello", "hello");
t.assert_equal("prefix should be 'hello'", "hello", result.prefix);
t.assert_equal("left should be empty", "", result.left);
t.assert_equal("right should be empty", "", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
result = calculate_diff_split("", "");
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be empty", "", result.left);
t.assert_equal("right should be empty", "", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
result = calculate_diff_split("a", "a");
t.assert_equal("prefix should be 'a'", "a", result.prefix);
t.assert_equal("left should be empty", "", result.left);
t.assert_equal("right should be empty", "", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
}
static void test_calculate_diff_split_common_prefix(testing & t) {
diff_split result = calculate_diff_split("abcdef", "abcxyz");
t.assert_equal("prefix should be 'abc'", "abc", result.prefix);
t.assert_equal("left should be 'def'", "def", result.left);
t.assert_equal("right should be 'xyz'", "xyz", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
result = calculate_diff_split("same", "sameagain");
t.assert_equal("prefix should be 'same'", "same", result.prefix);
t.assert_equal("left should be empty", "", result.left);
t.assert_equal("right should be 'again'", "again", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
result = calculate_diff_split("test", "testing");
t.assert_equal("prefix should be 'test'", "test", result.prefix);
t.assert_equal("left should be empty", "", result.left);
t.assert_equal("right should be 'ing'", "ing", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
}
static void test_calculate_diff_split_common_suffix(testing & t) {
diff_split result = calculate_diff_split("123end", "456end");
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be '123'", "123", result.left);
t.assert_equal("right should be '456'", "456", result.right);
t.assert_equal("suffix should be 'end'", "end", result.suffix);
result = calculate_diff_split("start", "end");
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be 'start'", "start", result.left);
t.assert_equal("right should be 'end'", "end", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
result = calculate_diff_split("abcsuffix", "xyzsuffix");
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be 'abc'", "abc", result.left);
t.assert_equal("right should be 'xyz'", "xyz", result.right);
t.assert_equal("suffix should be 'suffix'", "suffix", result.suffix);
}
static void test_calculate_diff_split_common_both(testing & t) {
diff_split result = calculate_diff_split("helloXworld", "helloYworld");
t.assert_equal("prefix should be 'hello'", "hello", result.prefix);
t.assert_equal("left should be 'X'", "X", result.left);
t.assert_equal("right should be 'Y'", "Y", result.right);
t.assert_equal("suffix should be 'world'", "world", result.suffix);
result = calculate_diff_split("ABCmiddleXYZ", "ABCdifferentXYZ");
t.assert_equal("prefix should be 'ABC'", "ABC", result.prefix);
t.assert_equal("left should be 'middle'", "middle", result.left);
t.assert_equal("right should be 'different'", "different", result.right);
t.assert_equal("suffix should be 'XYZ'", "XYZ", result.suffix);
result = calculate_diff_split("startAend", "startBend");
t.assert_equal("prefix should be 'start'", "start", result.prefix);
t.assert_equal("left should be 'A'", "A", result.left);
t.assert_equal("right should be 'B'", "B", result.right);
t.assert_equal("suffix should be 'end'", "end", result.suffix);
// Edge case: common prefix and suffix overlap
result = calculate_diff_split("aa", "ab");
t.assert_equal("prefix should be 'a'", "a", result.prefix);
t.assert_equal("left should be 'a'", "a", result.left);
t.assert_equal("right should be 'b'", "b", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
}
static void test_calculate_diff_split_empty_cases(testing & t) {
// Empty left, non-empty right
diff_split result = calculate_diff_split("", "hello");
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be empty", "", result.left);
t.assert_equal("right should be 'hello'", "hello", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
// Non-empty left, empty right
result = calculate_diff_split("hello", "");
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be 'hello'", "hello", result.left);
t.assert_equal("right should be empty", "", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
// Both empty
result = calculate_diff_split("", "");
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be empty", "", result.left);
t.assert_equal("right should be empty", "", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
// Left single char, empty right
result = calculate_diff_split("a", "");
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be 'a'", "a", result.left);
t.assert_equal("right should be empty", "", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
// Empty left, right single char
result = calculate_diff_split("", "a");
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be empty", "", result.left);
t.assert_equal("right should be 'a'", "a", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
}
static void test_calculate_diff_split_no_common(testing & t) {
diff_split result = calculate_diff_split("abc", "xyz");
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be 'abc'", "abc", result.left);
t.assert_equal("right should be 'xyz'", "xyz", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
result = calculate_diff_split("left", "right");
// The algorithm finds "t" as a common suffix since both strings end with 't'
// This is the algorithm's actual behavior - it finds maximal common suffix
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be 'lef'", "lef", result.left);
t.assert_equal("right should be 'righ'", "righ", result.right);
t.assert_equal("suffix should be 't'", "t", result.suffix);
result = calculate_diff_split("123", "456");
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be '123'", "123", result.left);
t.assert_equal("right should be '456'", "456", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
}
static void test_calculate_diff_split_single_char(testing & t) {
diff_split result = calculate_diff_split("a", "b");
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be 'a'", "a", result.left);
t.assert_equal("right should be 'b'", "b", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
result = calculate_diff_split("a", "a");
t.assert_equal("prefix should be 'a'", "a", result.prefix);
t.assert_equal("left should be empty", "", result.left);
t.assert_equal("right should be empty", "", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
result = calculate_diff_split("a", "ab");
t.assert_equal("prefix should be 'a'", "a", result.prefix);
t.assert_equal("left should be empty", "", result.left);
t.assert_equal("right should be 'b'", "b", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
result = calculate_diff_split("ab", "a");
t.assert_equal("prefix should be 'a'", "a", result.prefix);
t.assert_equal("left should be 'b'", "b", result.left);
t.assert_equal("right should be empty", "", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
}
static void test_calculate_diff_split_overlaps(testing & t) {
// One string is substring of another
diff_split result = calculate_diff_split("test", "testing");
t.assert_equal("prefix should be 'test'", "test", result.prefix);
t.assert_equal("left should be empty", "", result.left);
t.assert_equal("right should be 'ing'", "ing", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
result = calculate_diff_split("testing", "test");
t.assert_equal("prefix should be 'test'", "test", result.prefix);
t.assert_equal("left should be 'ing'", "ing", result.left);
t.assert_equal("right should be empty", "", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
// Similar strings with one extra char at start
result = calculate_diff_split("Xtest", "Ytest");
// The algorithm finds "test" as a common suffix since both strings end with "test"
// This is the algorithm's actual behavior - it finds maximal common suffix
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be 'X'", "X", result.left);
t.assert_equal("right should be 'Y'", "Y", result.right);
t.assert_equal("suffix should be 'test'", "test", result.suffix);
// Similar strings with one extra char at end
result = calculate_diff_split("testX", "testY");
t.assert_equal("prefix should be 'test'", "test", result.prefix);
t.assert_equal("left should be 'X'", "X", result.left);
t.assert_equal("right should be 'Y'", "Y", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
// Strings that are reverses
result = calculate_diff_split("abc", "cba");
t.assert_equal("prefix should be empty", "", result.prefix);
t.assert_equal("left should be 'abc'", "abc", result.left);
t.assert_equal("right should be 'cba'", "cba", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
}
static void test_calculate_diff_split_tag_boundaries(testing & t) {
// Test with unclosed XML tags
diff_split result = calculate_diff_split("test<tag", "test>content");
// The fix_tag_boundaries should move incomplete tags appropriately
t.assert_true("prefix should start with 'test'", result.prefix.find("test") == 0);
t.assert_true("should handle tag boundaries", result.left != "" || result.right != "" || result.suffix != "");
// Test with unclosed brackets
result = calculate_diff_split("test[", "test]value");
t.assert_true("should handle bracket boundaries", result.left != "" || result.right != "" || result.suffix != "");
// Test with partial tags on both sides
result = calculate_diff_split("prefix<tag>", "prefix</tag>suffix");
// fix_tag_boundaries moves the incomplete '<' from prefix to left/right
t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix);
t.assert_equal("left should be '<tag>'", "<tag>", result.left);
t.assert_equal("right should be '</tag>suffix'", "</tag>suffix", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
// Test with complex nested tags
result = calculate_diff_split("prefix<div>content</div>", "prefix<div>different</div>");
// Algorithm finds "ent</div>" as a common suffix because both strings end with it
// This is the actual algorithm behavior, though not semantically ideal
t.assert_equal("prefix should be 'prefix<div>'", "prefix<div>", result.prefix);
t.assert_equal("left should be 'cont'", "cont", result.left);
t.assert_equal("right should be 'differ'", "differ", result.right);
t.assert_equal("suffix should be 'ent</div>'", "ent</div>", result.suffix);
// Test with unclosed angle bracket
result = calculate_diff_split("Hello <world>", "Hello test");
t.assert_equal("prefix should be 'Hello '", "Hello ", result.prefix);
t.assert_true("left should contain '<world>'", result.left.find("<world>") != std::string::npos);
t.assert_equal("right should be 'test'", "test", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
// Test with unclosed square bracket
result = calculate_diff_split("test [array]", "test other");
t.assert_equal("prefix should be 'test '", "test ", result.prefix);
t.assert_true("left should contain '[array]'", result.left.find("[array]") != std::string::npos);
t.assert_equal("right should be 'other'", "other", result.right);
t.assert_equal("suffix should be empty", "", result.suffix);
// Test empty prefix and suffix with tags
result = calculate_diff_split("<tag>left</tag>", "<tag>righ</tag>");
t.assert_equal("prefix should be '<tag>'", "<tag>", result.prefix);
t.assert_equal("left should be 'left'", "left", result.left);
t.assert_equal("right should be 'righ'", "righ", result.right);
t.assert_equal("suffix should be '</tag>'", "</tag>", result.suffix);
{
// real case from template tests, simplified
std::string left = "PREFIX</think>Sure";
std::string right = "PREFIX<think>Lemme think</think>Sure";
result = calculate_diff_split(left, right);
t.assert_equal("prefix should be PREFIX", "PREFIX", result.prefix);
t.assert_equal("suffix should be </think>Sure", "</think>Sure", result.suffix);
t.assert_equal("left should be empty", "", result.left);
t.assert_equal("right should be <think>Lemme think", "<think>Lemme think", result.right);
}
{
// Real case: special tokens with |> boundary issue
// The suffix starts with |> which should be moved to complete <|END_RESPONSE and <|END_ACTION
std::string prefix = "SOME_PREFIX";
std::string suffix = "|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
std::string left_diff = "<|START_RESPONSE|>Let me help you.<|END_RESPONSE";
std::string right_diff =
"<|START_THINKING|><|END_THINKING|><|START_ACTION|>[\n"
" {\"tool_call_id\": \"0\", \"tool_name\": \"test_function_name\", "
"\"parameters\": {\"param1\": \"value1\", \"param2\": \"value2\"}}\n"
"]<|END_ACTION";
std::string left = prefix + left_diff + suffix;
std::string right = prefix + right_diff + suffix;
result = calculate_diff_split(left, right);
t.assert_equal("special token prefix", prefix, result.prefix);
// The |> should be moved from suffix to complete the tokens
t.assert_equal("special token left", "<|START_RESPONSE|>Let me help you.<|END_RESPONSE|>", result.left);
t.assert_true("special token right ends with |>", result.right.find("<|END_ACTION|>") != std::string::npos);
t.assert_equal("special token suffix", "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
result.suffix);
}
}
static void test_until_common_prefix(testing & t) {
t.test("until_common_prefix basic", test_until_common_prefix_basic);
}
static void test_until_common_prefix_basic(testing & t) {
// Test case from the user request
std::string result = until_common_prefix("<function name=foo><arg name=bar>", "<arg name=bar>", "<arg name=baz>");
t.assert_equal("untilCommonPrefix should return '<function name=foo>'", "<function name=foo>", result);
// Additional test cases to ensure robustness
// Test with different common prefix lengths
result = until_common_prefix("prefix<test>suffix", "<test>different", "<test>other");
t.assert_equal("should return 'prefix'", "prefix", result);
// Test when common prefix is at the start
result = until_common_prefix("<common>rest", "<common>left", "<common>right");
t.assert_equal("should return empty string when common prefix at start", "", result);
// Test when there's no common prefix
result = until_common_prefix("something", "left", "right");
t.assert_equal("should return empty string when no common prefix", "", result);
// Test with empty strings
result = until_common_prefix("test", "", "right");
t.assert_equal("should return empty string when left is empty", "", result);
// Test with longer common prefix
result = until_common_prefix("abcXYZ<shared_prefix>rest", "<shared_prefix>left", "<shared_prefix>right");
t.assert_equal("should return 'abcXYZ'", "abcXYZ", result);
}
static void test_after_common_suffix(testing & t) {
t.test("after_common_suffix basic", test_after_common_suffix_basic);
}
static void test_after_common_suffix_basic(testing & t) {
// Test case from the user request
std::string result = after_common_suffix("<function name=foo><arg name=bar>100</arg></function>",
"<arg name=bar>100</arg>",
"<arg name=baz>535</arg>");
t.assert_equal("afterCommonSuffix should return '</function>'", "</function>", result);
// Test when common suffix is at the end
result = after_common_suffix("rest<common>", "left<common>", "right<common>");
t.assert_equal("should return empty string when common suffix at end", "", result);
// Test with empty strings
result = after_common_suffix("test", "left", "");
t.assert_equal("should return empty string when right is empty", "", result);
// Test case with XML-like structure similar to the main example
result = after_common_suffix("<outer><inner>value</inner></outer>",
"<inner>value</inner>",
"<inner>different</inner>");
t.assert_equal("should return '</outer>'", "</outer>", result);
// Test with longer common suffix appearing at the end of full
result = after_common_suffix("prefix<shared>rest</shared>", "prefix<shared>left</shared>", "prefix<shared>right</shared>");
t.assert_equal("should return '' when common suffix is at end of full", "", result);
// Test with common suffix appearing in middle but not at end
result = after_common_suffix("<tag>content</tag><extra>", "<tag>value</tag>", "<tag>other</tag>");
t.assert_equal("should return '<extra>' when common suffix appears before end", "<extra>", result);
// Test with multi-character common suffix at the very end of full
result = after_common_suffix("start<middle>end</middle>", "prefix<middle>left</middle>", "prefix<middle>right</middle>");
t.assert_equal("should return '' when common suffix </middle> is at end of full", "", result);
}
static void test_compare_variants(testing & t) {
t.test("compare_variants basic", test_compare_variants_basic);
t.test("compare_variants messages modifier", test_compare_variants_messages_modifier);
t.test("compare_variants tools modifier", test_compare_variants_tools_modifier);
t.test("compare_variants both modifiers", test_compare_variants_both_modifiers);
t.test("compare_variants template failure", test_compare_variants_template_failure);
t.test("compare_variants identity", test_compare_variants_identity);
}
static void test_compare_variants_basic(testing & t) {
// Create a simple template that just echoes messages
common_chat_template tmpl("{{ messages[0]['content'] }}", "", "");
template_params params;
params.messages = json::array({
json {{"role", "user"}, {"content", "Hello"}}
});
auto modifier = [](template_params & p) {
p.messages[0]["content"] = "World";
};
auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
t.assert_true("result should have value", result.has_value());
// The template might not output anything if messages is empty or format is different
// Check that we get a valid result
t.assert_true("prefix or left should have content", !result->diff.prefix.empty() || !result->diff.left.empty());
}
static void test_compare_variants_messages_modifier(testing & t) {
// Test with messages modifier only
common_chat_template tmpl("{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", "");
template_params params;
params.messages = json::array({
json {{"role", "user"}, {"content", "A"}}
});
auto modifier = [](template_params & p) {
p.messages[0]["content"] = "B";
};
std::optional<compare_variants_result> result = differential_analyzer::compare_variants(tmpl, params, modifier);
t.assert_true("result should have value", result.has_value());
t.assert_equal("left should be 'A'", "A", result->diff.left);
t.assert_equal("right should be 'B'", "B", result->diff.right);
}
static void test_compare_variants_tools_modifier(testing & t) {
// Test with tools modifier only
common_chat_template tmpl(
"{% for tool in tools %}{{ tool['name'] }}{% endfor %}", "", "");
template_params params;
params.tools = json::array({
json {{"name", "foo"}}
});
auto modifier = [](template_params & p) {
p.tools[0]["name"] = "bar";
};
auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
t.assert_true("result should have value", result.has_value());
t.assert_equal("left should be 'foo'", "foo", result->diff.left);
t.assert_equal("right should be 'bar'", "bar", result->diff.right);
}
static void test_compare_variants_both_modifiers(testing & t) {
// Test with both messages and tools modifiers using the for loop approach
common_chat_template tmpl(
"{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", "");
template_params params;
params.messages = json::array({
json {{"role", "user"}, {"content", "A"}}
});
auto modifier = [](template_params & p) {
p.messages[0]["content"] = "B";
p.messages[0]["role"] = "newuser";
};
auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
t.assert_true("result should have value", result.has_value());
t.assert_equal("left should be 'user:A'", "user:A", result->diff.left);
t.assert_equal("right should be 'newuser:B'", "newuser:B", result->diff.right);
}
static void test_compare_variants_template_failure(testing & t) {
// Test with template that causes failure during application (not construction)
// We use a valid template syntax but one that will fail during application
common_chat_template tmpl("{{ messages[0]['nonexistent_field'] }}", "", "");
template_params params;
params.messages = json::array({
json {{"role", "user"}, {"content", "Hello"}}
});
auto modifier = [](template_params & p) {
p.messages[0]["content"] = "World";
};
auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
t.assert_true("result should be nullopt on template failure", !result.has_value());
}
static void test_compare_variants_identity(testing & t) {
// Test with identity modifier (no change)
common_chat_template tmpl("{{ messages[0]['content'] }}", "", "");
template_params params;
params.messages = json::array({
json {{"role", "user"}, {"content", "Hello"}}
});
// No modifier - should use identity
auto result = differential_analyzer::compare_variants(tmpl, params, nullptr);
t.assert_true("result should have value", result.has_value());
t.assert_equal("prefix should be 'Hello'", "Hello", result->diff.prefix);
t.assert_equal("left should be empty", "", result->diff.left);
t.assert_equal("right should be empty", "", result->diff.right);
t.assert_equal("suffix should be empty", "", result->diff.suffix);
}
// ============================================================================
// Seed-OSS Template Tool Calling Analysis Tests
// ============================================================================
static void test_seed_oss_tool_analysis(testing & t) {
t.test("Seed-OSS tool presence", test_seed_oss_tool_presence);
t.test("Seed-OSS call count", test_seed_oss_call_count);
t.test("Seed-OSS function names", test_seed_oss_function_names);
t.test("Seed-OSS argument count", test_seed_oss_argument_count);
t.test("Seed-OSS args presence", test_seed_oss_args_presence);
t.test("Seed-OSS tool with reasoning", test_seed_oss_tool_with_reasoning);
}
// Helper to load Seed-OSS template
static common_chat_template load_seed_oss_template(testing & t) {
std::string template_path = "models/templates/ByteDance-Seed-OSS.jinja";
std::ifstream fin(template_path, std::ios::binary);
std::ostringstream buf;
if (fin.is_open()) {
buf << fin.rdbuf();
}
std::string template_source = buf.str();
common_chat_template tmpl(template_source, "", "");
t.assert_true("Seed-OSS template loaded successfully", template_source.length() > 0);
return tmpl;
}
// Helper to build tool call JSON
static json build_tool_call(const std::string & name, const json & args, const std::string & id = "call_001") {
return json{
{"id", id},
{"type", "function"},
{"function", json{
{"name", name},
{"arguments", args}
}}
};
}
// Helper to build tools definition
static json build_tools_definition() {
json parameters_schema = json::object();
parameters_schema["type"] = "object";
parameters_schema["properties"] = json::object();
parameters_schema["properties"]["param1"] = json::object({
{"type", "string"},
{"description", "First parameter"}
});
parameters_schema["properties"]["param2"] = json::object({
{"type", "string"},
{"description", "Second parameter"}
});
parameters_schema["required"] = json::array({"param1", "param2"});
return json::array({
json{
{"type", "function"},
{"function", json{
{"name", "test_function_name"},
{"description", "A test function for debugging"},
{"parameters", parameters_schema}
}}
}
});
}
// T1: Compare with/without tool call (user, assistant)
static void test_seed_oss_tool_presence(testing & t) {
common_chat_template tmpl = load_seed_oss_template(t);
json assistant_no_tools = json{
{"role", "assistant"},
{"content", "Let me help you."}
};
json assistant_with_tools = json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
})}
};
json user_msg = json{
{"role", "user"},
{"content", "Hello, please help me."}
};
template_params params_no_tools;
params_no_tools.messages = json::array({user_msg, assistant_no_tools});
params_no_tools.tools = build_tools_definition();
params_no_tools.add_generation_prompt = false;
params_no_tools.enable_thinking = true;
template_params params_with_tools;
params_with_tools.messages = json::array({user_msg, assistant_with_tools});
params_with_tools.tools = build_tools_definition();
params_with_tools.add_generation_prompt = false;
params_with_tools.enable_thinking = true;
auto result = differential_analyzer::compare_variants(tmpl, params_no_tools,
[&](template_params & p) {
p.messages = params_with_tools.messages;
});
t.assert_true("T1 result should have value", result.has_value());
const auto & diff = result->diff;
t.assert_true("T1 prefix should contain system", diff.prefix.find("system") != std::string::npos);
t.assert_true("T1 prefix should contain user", diff.prefix.find("user") != std::string::npos);
t.assert_true("T1 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos);
// Left should be the assistant content without tool
t.assert_equal("T1 left should contain 'Let me help you.'", "Let me help you.", diff.left);
// Right should contain the tool call markers
t.assert_true("T1 right should contain tool_call begin", diff.right.find("<seed:tool_call>") != std::string::npos);
t.assert_true("T1 right should contain function tag", diff.right.find("<function=test_function_name>") != std::string::npos);
t.assert_true("T1 right should contain parameter=param1", diff.right.find("<parameter=param1>") != std::string::npos);
t.assert_true("T1 right should contain parameter=param2", diff.right.find("<parameter=param2>") != std::string::npos);
t.assert_true("T1 right should contain value1", diff.right.find("value1") != std::string::npos);
t.assert_true("T1 right should contain value2", diff.right.find("value2") != std::string::npos);
t.assert_true("T1 right should contain tool_call end", diff.right.find("</seed:tool_call>") != std::string::npos);
// Suffix should be the eos token
t.assert_equal("T1 suffix should be '<seed:eos>'", "<seed:eos>", diff.suffix);
}
// T2: Compare one vs two tool calls
static void test_seed_oss_call_count(testing & t) {
common_chat_template tmpl = load_seed_oss_template(t);
json assistant_one_call = json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
})}
};
json assistant_two_calls = json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})),
build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002")
})}
};
json user_msg = json{
{"role", "user"},
{"content", "Hello, please help me."}
};
template_params params_one;
params_one.messages = json::array({user_msg, assistant_one_call});
params_one.tools = build_tools_definition();
params_one.add_generation_prompt = false;
params_one.enable_thinking = true;
auto result = differential_analyzer::compare_variants(tmpl, params_one,
[&](template_params & p) {
p.messages = json::array({user_msg, assistant_two_calls});
});
t.assert_true("T2 result should have value", result.has_value());
const auto & diff = result->diff;
// Prefix should include the first tool call
t.assert_true("T2 prefix should contain first tool_call begin", diff.prefix.find("<seed:tool_call>") != std::string::npos);
t.assert_true("T2 prefix should contain first function", diff.prefix.find("<function=test_function_name>") != std::string::npos);
t.assert_true("T2 prefix should contain value1", diff.prefix.find("value1") != std::string::npos);
t.assert_true("T2 prefix should contain value2", diff.prefix.find("value2") != std::string::npos);
t.assert_true("T2 prefix should contain first tool_call end", diff.prefix.find("</seed:tool_call>") != std::string::npos);
// Left should be empty (no second tool call in variant A)
t.assert_equal("T2 left should be empty", "", diff.left);
// Right should contain the second tool call
t.assert_true("T2 right should contain second tool_call begin", diff.right.find("<seed:tool_call>") != std::string::npos);
t.assert_true("T2 right should contain second function", diff.right.find("<function=test_function_name>") != std::string::npos);
t.assert_true("T2 right should contain value3", diff.right.find("value3") != std::string::npos);
t.assert_true("T2 right should contain value4", diff.right.find("value4") != std::string::npos);
t.assert_true("T2 right should contain second tool_call end", diff.right.find("</seed:tool_call>") != std::string::npos);
// Suffix should be the eos token
t.assert_equal("T2 suffix should be '<seed:eos>'", "<seed:eos>", diff.suffix);
}
// T3: Compare different function names
static void test_seed_oss_function_names(testing & t) {
common_chat_template tmpl = load_seed_oss_template(t);
// Build tools with two different function names
json parameters_schema = json::object();
parameters_schema["type"] = "object";
parameters_schema["properties"] = json::object();
parameters_schema["properties"]["arg1"] = json::object({
{"type", "string"},
{"description", "Argument 1"}
});
parameters_schema["required"] = json::array({"arg1"});
json tools = json::array({
json{
{"type", "function"},
{"function", json{
{"name", "func_alpha"},
{"description", "First function"},
{"parameters", parameters_schema}
}}
},
json{
{"type", "function"},
{"function", json{
{"name", "func_beta"},
{"description", "Second function"},
{"parameters", parameters_schema}
}}
}
});
json assistant_func_alpha = json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("func_alpha", json::object({{"arg1", "test_value"}}))
})}
};
json assistant_func_beta = json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("func_beta", json::object({{"arg1", "test_value"}}))
})}
};
json user_msg = json{
{"role", "user"},
{"content", "Hello"}
};
template_params params_alpha;
params_alpha.messages = json::array({user_msg, assistant_func_alpha});
params_alpha.tools = tools;
params_alpha.add_generation_prompt = false;
params_alpha.enable_thinking = true;
auto result = differential_analyzer::compare_variants(tmpl, params_alpha,
[&](template_params & p) {
p.messages = json::array({user_msg, assistant_func_beta});
});
t.assert_true("T3 result should have value", result.has_value());
const auto & diff = result->diff;
bool func_alpha_in_left = diff.left.find("func_alpha") != std::string::npos;
bool func_alpha_in_prefix = diff.prefix.find("func_alpha") != std::string::npos;
bool func_beta_in_right = diff.right.find("func_beta") != std::string::npos;
bool func_beta_in_prefix = diff.prefix.find("func_beta") != std::string::npos;
bool func_beta_in_suffix = diff.suffix.find("func_beta") != std::string::npos;
// Left should contain func_alpha (or be in prefix)
t.assert_true("T3 left should contain func_alpha (or prefix)", func_alpha_in_left || func_alpha_in_prefix);
// Right should contain func_beta
t.assert_true("T3 right should contain func_beta", func_beta_in_right || func_beta_in_prefix || func_beta_in_suffix);
// Both should have the same parameter value (in common parts, not in diffs)
// Since both have same args, test_value will be in prefix/suffix
t.assert_true("T3 diff should contain test_value (in prefix or suffix)",
diff.prefix.find("test_value") != std::string::npos || diff.suffix.find("test_value") != std::string::npos);
}
// T4: Compare different argument counts (zero, one, two parameters)
static void test_seed_oss_argument_count(testing & t) {
common_chat_template tmpl = load_seed_oss_template(t);
// Build tools with 0, 1, or 2 required parameters
json params_2_required = json::object();
params_2_required["type"] = "object";
params_2_required["properties"] = json::object();
params_2_required["properties"]["arg1"] = json::object({
{"type", "string"},
{"description", "Argument 1"}
});
params_2_required["properties"]["arg2"] = json::object({
{"type", "string"},
{"description", "Argument 2"}
});
params_2_required["required"] = json::array({"arg1", "arg2"});
json params_1_required = json::object();
params_1_required["type"] = "object";
params_1_required["properties"] = json::object();
params_1_required["properties"]["arg1"] = json::object({
{"type", "string"},
{"description", "Argument 1"}
});
params_1_required["required"] = json::array({"arg1"});
json tools = json::array({
json{
{"type", "function"},
{"function", json{
{"name", "test_func"},
{"description", "Test function"},
{"parameters", params_2_required}
}}
}
});
// Test: zero args vs one arg
json assistant_zero_args = json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("test_func", json::object())
})}
};
json assistant_one_arg = json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("test_func", json::object({{"arg1", "value1"}}))
})}
};
json assistant_two_args = json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("test_func", json::object({{"arg1", "value1"}, {"arg2", "value2"}}))
})}
};
json user_msg = json{
{"role", "user"},
{"content", "Hello"}
};
// Test zero vs one
template_params params_zero;
params_zero.messages = json::array({user_msg, assistant_zero_args});
params_zero.tools = tools;
params_zero.add_generation_prompt = false;
params_zero.enable_thinking = true;
auto result_zero_one = differential_analyzer::compare_variants(tmpl, params_zero,
[&](template_params & p) {
p.messages = json::array({user_msg, assistant_one_arg});
});
t.assert_true("T4 zero vs one result should have value", result_zero_one.has_value());
t.assert_true("T4 zero vs one left should be empty or minimal", result_zero_one->diff.left.empty() || result_zero_one->diff.left == "");
t.assert_true("T4 zero vs one right should contain arg1", result_zero_one->diff.right.find("arg1") != std::string::npos);
// Test one vs two
template_params params_one;
params_one.messages = json::array({user_msg, assistant_one_arg});
params_one.tools = tools;
params_one.add_generation_prompt = false;
params_one.enable_thinking = true;
auto result_one_two = differential_analyzer::compare_variants(tmpl, params_one,
[&](template_params & p) {
p.messages = json::array({user_msg, assistant_two_args});
});
t.assert_true("T4 one vs two result should have value", result_one_two.has_value());
const auto & diff4 = result_one_two->diff;
t.assert_true("T4 one vs two left should contain arg1 (or prefix)",
diff4.left.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos);
t.assert_true("T4 one vs two right should contain arg1 (or prefix)",
diff4.right.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos);
t.assert_true("T4 one vs two right should contain arg2 (or prefix/suffix)",
diff4.right.find("arg2") != std::string::npos || diff4.prefix.find("arg2") != std::string::npos || diff4.suffix.find("arg2") != std::string::npos);
}
// T5: Compare different argument values
static void test_seed_oss_args_presence(testing & t) {
common_chat_template tmpl = load_seed_oss_template(t);
json assistant_same_arg = json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("test_function_name", json::object({{"param1", "value1"}}))
})}
};
json assistant_other_arg = json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("test_function_name", json::object({{"param2", "value2"}}))
})}
};
json assistant_both_args = json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
})}
};
json user_msg = json{
{"role", "user"},
{"content", "Hello"}
};
template_params params_same;
params_same.messages = json::array({user_msg, assistant_same_arg});
params_same.tools = build_tools_definition();
params_same.add_generation_prompt = false;
params_same.enable_thinking = true;
// Test same arg vs other arg
auto result_same_other = differential_analyzer::compare_variants(tmpl, params_same,
[&](template_params & p) {
p.messages = json::array({user_msg, assistant_other_arg});
});
t.assert_true("T5 same vs other result should have value", result_same_other.has_value());
const auto & diff5a = result_same_other->diff;
t.assert_true("T5 same vs other left should contain param1 (or prefix/suffix)",
diff5a.left.find("param1") != std::string::npos || diff5a.prefix.find("param1") != std::string::npos || diff5a.suffix.find("param1") != std::string::npos);
t.assert_true("T5 same vs other left should contain value1 (or prefix/suffix)",
diff5a.left.find("value1") != std::string::npos || diff5a.prefix.find("value1") != std::string::npos);
t.assert_true("T5 same vs other right should contain param2 (or prefix/suffix)",
diff5a.right.find("param2") != std::string::npos || diff5a.prefix.find("param2") != std::string::npos || diff5a.suffix.find("param2") != std::string::npos);
t.assert_true("T5 same vs other right should contain value2 (or prefix/suffix)",
diff5a.right.find("value2") != std::string::npos || diff5a.prefix.find("value2") != std::string::npos || diff5a.suffix.find("value2") != std::string::npos);
// Test same arg vs both args
auto result_same_both = differential_analyzer::compare_variants(tmpl, params_same,
[&](template_params & p) {
p.messages = json::array({user_msg, assistant_both_args});
});
t.assert_true("T5 same vs both result should have value", result_same_both.has_value());
const auto & diff5b = result_same_both->diff;
t.assert_true("T5 same vs both left should contain param1 (or prefix/suffix)",
diff5b.left.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos);
t.assert_true("T5 same vs both right should contain param1 (or prefix/suffix)",
diff5b.right.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos);
t.assert_true("T5 same vs both right should contain param2 (or prefix/suffix)",
diff5b.right.find("param2") != std::string::npos || diff5b.prefix.find("param2") != std::string::npos || diff5b.suffix.find("param2") != std::string::npos);
}
// T6: Tool call with vs without reasoning_content
static void test_seed_oss_tool_with_reasoning(testing & t) {
common_chat_template tmpl = load_seed_oss_template(t);
json assistant_tool_only = json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
})}
};
json assistant_tool_with_reasoning = json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
})},
{"reasoning_content", "I need to call the tool first."}
};
json user_msg = json{
{"role", "user"},
{"content", "Hello, please help me."}
};
template_params params_tool_only;
params_tool_only.messages = json::array({user_msg, assistant_tool_only});
params_tool_only.tools = build_tools_definition();
params_tool_only.add_generation_prompt = false;
params_tool_only.enable_thinking = true;
auto result = differential_analyzer::compare_variants(tmpl, params_tool_only,
[&](template_params & p) {
p.messages = json::array({user_msg, assistant_tool_with_reasoning});
});
t.assert_true("T6 result should have value", result.has_value());
const auto & diff = result->diff;
// Left should be empty (no reasoning in variant A)
t.assert_equal("T6 left should be empty", "", diff.left);
// Right should contain the thinking token with reasoning content
t.assert_true("T6 right should contain think begin", diff.right.find("<seed:think>") != std::string::npos);
t.assert_true("T6 right should contain reasoning content", diff.right.find("I need to call the tool first.") != std::string::npos);
t.assert_true("T6 right should contain think end", diff.right.find("</seed:think>") != std::string::npos);
// Prefix should contain the assistant role
t.assert_true("T6 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos);
// Suffix should contain the tool call
t.assert_true("T6 suffix should contain tool_call begin", diff.suffix.find("<seed:tool_call>") != std::string::npos);
t.assert_true("T6 suffix should contain function name", diff.suffix.find("test_function_name") != std::string::npos);
t.assert_true("T6 suffix should contain eos", diff.suffix.find("<seed:eos>") != std::string::npos);
}
static common_chat_template load_template(testing & t, const std::string & template_path) {
std::ifstream fin(template_path, std::ios::binary);
std::ostringstream buf;
if (fin.is_open()) {
buf << fin.rdbuf();
}
std::string template_source = buf.str();
common_chat_template tmpl(template_source, "", "");
t.assert_true("Nemotron template loaded successfully", template_source.length() > 0);
return tmpl;
}
// ============================================================================
// Nemotron Template Analysis Tests
// ============================================================================
static common_chat_template load_nemotron_template(testing & t) {
return load_template(t, "models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja");
}
static void test_nemotron_analysis(testing & t) {
t.test("Nemotron reasoning detection", test_nemotron_reasoning_detection);
t.test("Nemotron tool format", test_nemotron_tool_format);
}
static void test_nemotron_reasoning_detection(testing & t) {
common_chat_template tmpl = load_nemotron_template(t);
// Test the comparison manually to see what's happening
json user_msg = json{ { "role", "user" }, { "content", "Hello" } };
json assistant_no_reasoning = json{
{ "role", "assistant" },
{ "content", "I can help." }
};
json assistant_with_reasoning = json{
{ "role", "assistant" },
{ "content", "I can help." },
{ "reasoning_content", "Let me think about this." }
};
template_params params;
params.messages = json::array({ user_msg, assistant_no_reasoning });
params.add_generation_prompt = false;
params.enable_thinking = true;
// Run differential analysis
auto analysis = differential_analyzer::analyze(tmpl);
// Check reasoning markers
t.assert_equal("reasoning_start should be '<think>'", "<think>", analysis.markers.reasoning_start);
t.assert_equal("reasoning_end should be '</think>'", "</think>", analysis.markers.reasoning_end);
// Check reasoning mode detection
// Nemotron uses forced closed reasoning with add_generation_prompt
t.assert_equal("reasoning should be FORCED_CLOSED", reasoning_mode::FORCED_CLOSED, analysis.reasoning);
// Make sure reasoning markers don't spill over to content markers
t.assert_equal("content start should be empty", "", analysis.markers.content_start);
t.assert_equal("content end should be empty", "", analysis.markers.content_end);
t.assert_equal("content should be PLAIN", content_mode::PLAIN, analysis.content);
}
static void test_nemotron_tool_format(testing & t) {
common_chat_template tmpl = load_nemotron_template(t);
// Run differential analysis
auto analysis = differential_analyzer::analyze(tmpl);
// Check tool markers - Nemotron uses per-call wrapping (each call individually wrapped)
t.assert_equal("tool_section_start should be empty (per-call format)", "", analysis.markers.tool_section_start);
t.assert_equal("tool_section_end should be empty (per-call format)", "", analysis.markers.tool_section_end);
t.assert_equal("per_call_start should be '<tool_call>\\n'", "<tool_call>\n", analysis.markers.per_call_start);
t.assert_equal("per_call_end should be '</tool_call>'", "</tool_call>", analysis.markers.per_call_end);
t.assert_true("should support parallel calls", analysis.supports_parallel_calls);
// Check function markers
t.assert_equal("func_name_prefix should be '<function='", "<function=", analysis.markers.func_name_prefix);
t.assert_equal("func_name_suffix should be '>\\n'", ">\n", analysis.markers.func_name_suffix);
t.assert_equal("func_close should be '</function>\\n'", "</function>\n", analysis.markers.func_close);
// Check argument markers (note: markers retain trailing newlines for proper parsing)
t.assert_equal("arg_name_prefix should be '<parameter='", "<parameter=", analysis.markers.arg_name_prefix);
t.assert_equal("arg_name_suffix should be '>\\n'", ">\n", analysis.markers.arg_name_suffix);
t.assert_equal("arg_value_suffix should be '</parameter>\\n'", "</parameter>\n", analysis.markers.arg_value_suffix);
// Check format classification
t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools == tool_format::TAG_WITH_TAGGED);
// Verify tool support
t.assert_true("should support tools", analysis.supports_tools);
}
static common_chat_template load_cohere_template(testing & t) {
return load_template(t, "models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja");
}
static void test_cohere_analysis(testing & t) {
t.test("Cohere reasoning detection", test_cohere_reasoning_detection);
t.test("Cohere tool format", test_cohere_tool_format);
}
static void test_cohere_reasoning_detection(testing & t) {
common_chat_template tmpl = load_cohere_template(t);
// Run differential analysis
auto analysis = differential_analyzer::analyze(tmpl);
// Check reasoning markers - Cohere uses special token format
t.assert_equal("reasoning_start should be '<|START_THINKING|>'", "<|START_THINKING|>", analysis.markers.reasoning_start);
t.assert_equal("reasoning_end should be '<|END_THINKING|>'", "<|END_THINKING|>", analysis.markers.reasoning_end);
// Check reasoning mode - Cohere only shows reasoning with tool calls (TOOLS_ONLY)
t.assert_equal("reasoning should be TOOLS_ONLY", reasoning_mode::TOOLS_ONLY, analysis.reasoning);
// Check content markers - Cohere wraps all content with START/END_RESPONSE
t.assert_equal("content_start should be '<|START_RESPONSE|>'", "<|START_RESPONSE|>", analysis.markers.content_start);
t.assert_equal("content_end should be '<|END_RESPONSE|>'", "<|END_RESPONSE|>", analysis.markers.content_end);
// Content is always wrapped (both with and without tools)
t.assert_equal("content should be ALWAYS_WRAPPED", content_mode::ALWAYS_WRAPPED, analysis.content);
}
static void test_cohere_tool_format(testing & t) {
common_chat_template tmpl = load_cohere_template(t);
// Run differential analysis
auto analysis = differential_analyzer::analyze(tmpl);
// Check tool section markers - Cohere uses ACTION markers
t.assert_equal("tool_section_start should be '<|START_ACTION|>'", "<|START_ACTION|>", analysis.markers.tool_section_start);
t.assert_equal("tool_section_end should be '<|END_ACTION|>'", "<|END_ACTION|>", analysis.markers.tool_section_end);
// JSON_NATIVE format has no per-call markers
t.assert_equal("per_call_start should be empty", "", analysis.markers.per_call_start);
t.assert_equal("per_call_end should be empty", "", analysis.markers.per_call_end);
// JSON_NATIVE format has empty function markers (no XML-style markers)
t.assert_equal("func_name_prefix should be empty", "", analysis.markers.func_name_prefix);
t.assert_equal("func_name_suffix should be empty", "", analysis.markers.func_name_suffix);
t.assert_equal("func_close should be empty", "", analysis.markers.func_close);
// JSON_NATIVE format has empty args markers
t.assert_equal("args_start should be empty", "", analysis.markers.args_start);
t.assert_equal("args_end should be empty", "", analysis.markers.args_end);
// JSON_NATIVE format has empty argument markers
t.assert_equal("arg_name_prefix should be empty", "", analysis.markers.arg_name_prefix);
t.assert_equal("arg_name_suffix should be empty", "", analysis.markers.arg_name_suffix);
t.assert_equal("arg_value_prefix should be empty", "", analysis.markers.arg_value_prefix);
t.assert_equal("arg_value_suffix should be empty", "", analysis.markers.arg_value_suffix);
t.assert_equal("arg_separator should be empty", "", analysis.markers.arg_separator);
// Check JSON field names - Cohere uses non-standard names
t.assert_equal("name_field should be 'tool_name'", "tool_name", analysis.name_field);
t.assert_equal("args_field should be 'parameters'", "parameters", analysis.args_field);
// This isn't a real tool call id field, i.e. with the OpenAI tool call ID format
t.assert_equal("id_field should be 'tool_call_id'", "", analysis.id_field);
// Check format classification
t.assert_equal("tool format should be JSON_NATIVE", tool_format::JSON_NATIVE, analysis.tools);
// Check flags
t.assert_true("should support tools", analysis.supports_tools);
t.assert_true("should support parallel calls", analysis.supports_parallel_calls);
t.assert_true("should not require nonnull content", !analysis.requires_nonnull_content);
t.assert_true("tools_array_wrapped should be true", analysis.tools_array_wrapped);
}
// ============================================================================
// standard_json_tools Format Tests
// ============================================================================
// Helper to build tools definition for tests
static json build_test_tools() {
json parameters_schema = json::object();
parameters_schema["type"] = "object";
parameters_schema["properties"] = json::object();
parameters_schema["properties"]["location"] = json::object({
{"type", "string"},
{"description", "The city and state"}
});
parameters_schema["properties"]["unit"] = json::object({
{"type", "string"},
{"description", "Temperature unit"},
{"enum", json::array({"celsius", "fahrenheit"})}
});
parameters_schema["required"] = json::array({"location"});
return json::array({
json{
{"type", "function"},
{"function", json{
{"name", "get_current_weather"},
{"description", "Get the current weather in a given location"},
{"parameters", parameters_schema}
}}
}
});
}
static void test_standard_json_tools_formats(testing & t) {
t.test("OpenAI format", test_standard_json_tools_openai);
t.test("Cohere format", test_standard_json_tools_cohere);
t.test("function-as-key format", test_standard_json_tools_function_key);
}
// Test 1: OpenAI Standard Format
// {"id": "call_abc", "function": {"name": "get_weather", "arguments": {"location": "NYC"}}}
static void test_standard_json_tools_openai(testing & t) {
json tools = build_test_tools();
auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
auto tool_call = p.standard_json_tools(
"<tool_call>", "</tool_call>", tools,
/* parallel */ true,
/* force */ false,
/* name_key */ "function.name",
/* args_key */ "function.arguments",
/* array_wrapped */ false,
/* function_is_key */ false,
/* call_id_key */ "id",
/* gen_call_id_key */ "",
/* parameters_order */ {}
);
return p.content(p.until("<tool_call>")) + p.optional(tool_call) + p.end();
});
std::string input =
"Let me check the weather."
"<tool_call>"
R"({"id": "call_abc123", "function": {"name": "get_current_weather", "arguments": {"location": "NYC"}}})"
"</tool_call>";
common_peg_parse_context ctx(input, false);
auto result = parser.parse(ctx);
t.assert_true("parse success", result.success());
common_chat_msg msg;
auto mapper = common_chat_peg_unified_mapper(msg);
mapper.from_ast(ctx.ast, result);
t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
if (!msg.tool_calls.empty()) {
t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
t.assert_equal("tool id", "call_abc123", msg.tool_calls[0].id);
}
t.assert_true("content present", msg.content.find("Let me check the weather") != std::string::npos);
}
// Test 2: Cohere Format
// {"tool_call_id": 0, "tool_name": "get_weather", "parameters": {"location": "NYC"}}
static void test_standard_json_tools_cohere(testing & t) {
json tools = build_test_tools();
auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
auto tool_call = p.standard_json_tools(
"<|START_ACTION|>[", "]<|END_ACTION|>", tools,
/* parallel */ true,
/* force */ false,
/* name_key */ "tool_name",
/* args_key */ "parameters",
/* array_wrapped */ false, // Brackets are part of section markers
/* function_is_key */ false,
/* call_id_key */ "",
/* gen_call_id_key */ "tool_call_id",
/* parameters_order */ {"tool_call_id", "tool_name", "parameters"}
);
return p.content(p.until("<|START_ACTION|>")) + p.optional(tool_call) + p.end();
});
std::string input =
"Let me search for that."
"<|START_ACTION|>["
R"({"tool_call_id": 0, "tool_name": "get_current_weather", "parameters": {"location": "NYC", "unit": "celsius"}})"
"]<|END_ACTION|>";
common_peg_parse_context ctx(input, false);
auto result = parser.parse(ctx);
t.assert_true("parse success", result.success());
common_chat_msg msg;
auto mapper = common_chat_peg_unified_mapper(msg);
mapper.from_ast(ctx.ast, result);
t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
if (!msg.tool_calls.empty()) {
t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
t.assert_equal("tool id", "0", msg.tool_calls[0].id);
}
t.assert_true("content present", msg.content.find("Let me search") != std::string::npos);
}
// Test 3: Function-as-Key Format
// {"get_current_weather": {"id": "call-0001", "args": {"location": "NYC"}}}
static void test_standard_json_tools_function_key(testing & t) {
json tools = build_test_tools();
auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
auto tool_call = p.standard_json_tools(
"<tool_calls>[", "]</tool_calls>", tools,
/* parallel */ true,
/* force */ false,
/* name_key */ "", // Name is the key itself
/* args_key */ "args",
/* array_wrapped */ false,
/* function_is_key */ true,
/* call_id_key */ "id",
/* gen_call_id_key */ "",
/* parameters_order */ {}
);
return p.content(p.until("<tool_calls>")) + p.optional(tool_call) + p.end();
});
std::string input =
"I'll call the weather function."
"<tool_calls>["
R"({"get_current_weather": {"id": "call-0001", "args": {"location": "NYC", "unit": "celsius"}}})"
"]</tool_calls>";
common_peg_parse_context ctx(input, false);
auto result = parser.parse(ctx);
t.assert_true("parse success", result.success());
common_chat_msg msg;
auto mapper = common_chat_peg_unified_mapper(msg);
mapper.from_ast(ctx.ast, result);
t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
if (!msg.tool_calls.empty()) {
t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
t.assert_equal("tool id", "call-0001", msg.tool_calls[0].id);
}
t.assert_true("content present", msg.content.find("I'll call the weather") != std::string::npos);
}
// ============================================================================
// normalize_quotes_to_json Tests
// ============================================================================
// Copy of the function for isolated testing (original is static in chat-peg-parser.cpp)
static std::string normalize_quotes_to_json(const std::string & input) {
std::string result;
result.reserve(input.size() + 16);
bool in_single_quoted = false;
bool in_double_quoted = false;
for (size_t i = 0; i < input.size(); ++i) {
char c = input[i];
if (c == '\\' && i + 1 < input.size()) {
char next = input[i + 1];
if (in_single_quoted) {
if (next == '\'') {
result += '\'';
++i;
continue;
}
if (next == '"') {
result += "\\\"";
++i;
continue;
}
result += c;
result += next;
++i;
continue;
}
if (in_double_quoted) {
result += c;
result += next;
++i;
continue;
}
result += c;
continue;
}
if (c == '"') {
if (in_single_quoted) {
result += "\\\"";
} else {
in_double_quoted = !in_double_quoted;
result += c;
}
} else if (c == '\'') {
if (in_double_quoted) {
result += c;
} else if (in_single_quoted) {
in_single_quoted = false;
result += '"';
} else {
in_single_quoted = true;
result += '"';
}
} else {
result += c;
}
}
return result;
}
static void test_normalize_quotes_to_json(testing & t) {
t.test("basic single to double quotes", [](testing & t) {
std::string input = "{'key': 'value'}";
std::string expected = "{\"key\": \"value\"}";
std::string result = normalize_quotes_to_json(input);
t.assert_equal("basic conversion", expected, result);
});
t.test("escaped single quote inside single-quoted string", [](testing & t) {
std::string input = "{'code': 'print(\\'hello\\')'}";
std::string expected = "{\"code\": \"print('hello')\"}";
std::string result = normalize_quotes_to_json(input);
t.assert_equal("escaped single quote", expected, result);
});
t.test("double quote inside single-quoted string", [](testing & t) {
std::string input = "{'msg': 'He said \"hi\"'}";
std::string expected = "{\"msg\": \"He said \\\"hi\\\"\"}";
std::string result = normalize_quotes_to_json(input);
t.assert_equal("double quote escaping", expected, result);
});
t.test("nested backslash escapes", [](testing & t) {
std::string input = "{'path': 'C:\\\\Users\\\\test'}";
std::string expected = "{\"path\": \"C:\\\\Users\\\\test\"}";
std::string result = normalize_quotes_to_json(input);
t.assert_equal("backslash escaping", expected, result);
});
t.test("newline escapes", [](testing & t) {
std::string input = "{'text': 'line1\\nline2'}";
std::string expected = "{\"text\": \"line1\\nline2\"}";
std::string result = normalize_quotes_to_json(input);
t.assert_equal("newline escaping", expected, result);
});
t.test("mixed quotes", [](testing & t) {
std::string input = "{\"already_double\": 'single_value'}";
std::string expected = "{\"already_double\": \"single_value\"}";
std::string result = normalize_quotes_to_json(input);
t.assert_equal("mixed quotes", expected, result);
});
t.test("embedded quotes - the test case", test_normalize_quotes_with_embedded_quotes);
}
// Test case that mirrors the Seed-OSS failing test scenario
static void test_normalize_quotes_with_embedded_quotes(testing & t) {
// This is similar to the Seed-OSS template test case
// The input has embedded double quotes like "14" and "bar" inside string values
std::string input = "{'filename': 'foo.cpp', 'oldString': 'def foo(arg = \"14\"):\\n return arg + \"bar\"\\n', 'newString': 'def foo(arg = \"15\"):\\n pass\\n'}";
// Expected: Python single quotes -> JSON double quotes, internal double quotes escaped
std::string expected = "{\"filename\": \"foo.cpp\", \"oldString\": \"def foo(arg = \\\"14\\\"):\\n return arg + \\\"bar\\\"\\n\", \"newString\": \"def foo(arg = \\\"15\\\"):\\n pass\\n\"}";
std::string result = normalize_quotes_to_json(input);
t.assert_equal("normalize quotes with embedded double quotes", expected, result);
// Also verify the result is valid JSON
try {
json parsed = json::parse(result);
t.assert_true("result is valid JSON", true);
t.assert_equal("filename field", "foo.cpp", parsed["filename"].get<std::string>());
t.assert_true("oldString contains embedded quotes",
parsed["oldString"].get<std::string>().find("\"14\"") != std::string::npos);
t.assert_true("newString contains embedded quotes",
parsed["newString"].get<std::string>().find("\"15\"") != std::string::npos);
} catch (const std::exception & e) {
t.assert_true(std::string("JSON parse failed: ") + e.what(), false);
}
}
// ============================================================================
// TAG_WITH_TAGGED Argument Parsing Tests
// ============================================================================
// Build tools definition for edit function
static json build_edit_tool() {
json parameters_schema = json::object();
parameters_schema["type"] = "object";
parameters_schema["properties"] = json::object();
parameters_schema["properties"]["filename"] = json::object({
{"type", "string"},
{"description", "Path of file to edit"}
});
parameters_schema["properties"]["oldString"] = json::object({
{"type", "string"},
{"description", "String to replace"}
});
parameters_schema["properties"]["newString"] = json::object({
{"type", "string"},
{"description", "New (replacement) value"}
});
parameters_schema["required"] = json::array({"filename", "oldString", "newString"});
return json::array({
json{
{"type", "function"},
{"function", json{
{"name", "edit"},
{"description", "Edit a file"},
{"parameters", parameters_schema}
}}
}
});
}
// Test that reproduces the Seed-OSS template issue with embedded quotes
static void test_tagged_args_with_embedded_quotes(testing & t) {
json tools = build_edit_tool();
// Build a parser for TAG_WITH_TAGGED format like Seed-OSS/Nemotron
auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
// Build tool choice for the edit function
auto tool_choice = p.choice();
for (const auto & tool_def : tools) {
if (!tool_def.contains("function")) continue;
const auto & function = tool_def.at("function");
std::string name = function.at("name");
const auto & params = function.at("parameters");
if (!params.contains("properties") || !params.at("properties").is_object()) continue;
const auto & properties = params.at("properties");
// Build argument parsers
std::vector<common_peg_parser> arg_parsers;
for (const auto & [param_name, param_schema] : properties.items()) {
auto arg = p.tool_arg(
p.tool_arg_open(p.literal("<parameter=") + p.tool_arg_name(p.literal(param_name)) + p.literal(">")) +
p.space() +
p.tool_arg_string_value(p.until("</parameter>")) +
p.space() +
p.tool_arg_close(p.literal("</parameter>"))
);
arg_parsers.push_back(p.optional(p.rule("arg-" + param_name, arg)));
}
// Build arg sequence with space() between
common_peg_parser args_seq = p.eps();
for (size_t i = 0; i < arg_parsers.size(); i++) {
if (i > 0) {
args_seq = args_seq + p.space();
}
args_seq = args_seq + arg_parsers[i];
}
auto func_parser =
p.tool_open(p.literal("<function=") + p.tool_name(p.literal(name)) + p.literal(">")) +
p.space() + args_seq + p.space() +
p.tool_close(p.literal("</function>"));
tool_choice |= p.rule("tool-" + name, p.tool(func_parser));
}
auto tool_section =
p.literal("<seed:tool_call>") + p.space() +
tool_choice +
p.space() + p.literal("</seed:tool_call>");
return p.content(p.until("<seed:tool_call>")) + p.optional(tool_section) + p.end();
});
// The exact input from the failing test
std::string input =
"<seed:tool_call>\n"
"<function=edit>\n"
"<parameter=filename>\n"
"foo.cpp\n"
"</parameter>\n"
"<parameter=oldString>"
"def foo(arg = \"14\"):\n"
" return arg + \"bar\"\n"
"\n"
"</parameter>\n"
"<parameter=newString>"
"def foo(arg = \"15\"):\n"
" pass\n"
"\n"
"</parameter>\n"
"</function>\n"
"</seed:tool_call>";
common_peg_parse_context ctx(input, false);
auto result = parser.parse(ctx);
t.assert_true("parse success", result.success());
common_chat_msg msg;
auto mapper = common_chat_peg_unified_mapper(msg);
mapper.from_ast(ctx.ast, result);
t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
if (!msg.tool_calls.empty()) {
t.assert_equal("tool name", "edit", msg.tool_calls[0].name);
// Parse the arguments as JSON to verify they're valid
std::string args = msg.tool_calls[0].arguments;
try {
json parsed = json::parse(args);
t.assert_true("arguments is valid JSON", true);
// Verify each field has proper value
t.assert_equal("filename", "foo.cpp", parsed.value("filename", ""));
std::string oldString = parsed.value("oldString", "");
t.assert_true("oldString contains embedded quotes",
oldString.find("\"14\"") != std::string::npos);
t.assert_true("oldString contains bar with quotes",
oldString.find("\"bar\"") != std::string::npos);
std::string newString = parsed.value("newString", "");
t.assert_true("newString contains embedded quotes",
newString.find("\"15\"") != std::string::npos);
} catch (const std::exception & e) {
t.assert_true(std::string("arguments should be valid JSON: ") + e.what(), false);
}
}
}