1846 lines
81 KiB
C++
1846 lines
81 KiB
C++
#include "chat-auto-parser-helpers.h"
|
|
#include "chat-diff-analyzer.h"
|
|
#include "chat-peg-parser.h"
|
|
#include "chat.h"
|
|
#include "peg-parser.h"
|
|
#include "testing.h"
|
|
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <sstream>
|
|
#include <string>
|
|
|
|
static void test_calculate_diff_split_basic(testing & t);
|
|
static void test_calculate_diff_split_identical(testing & t);
|
|
static void test_calculate_diff_split_common_prefix(testing & t);
|
|
static void test_calculate_diff_split_common_suffix(testing & t);
|
|
static void test_calculate_diff_split_common_both(testing & t);
|
|
static void test_calculate_diff_split_empty_cases(testing & t);
|
|
static void test_calculate_diff_split_no_common(testing & t);
|
|
static void test_calculate_diff_split_single_char(testing & t);
|
|
static void test_calculate_diff_split_overlaps(testing & t);
|
|
static void test_calculate_diff_split_tag_boundaries(testing & t);
|
|
static void test_calculate_diff_split(testing & t);
|
|
|
|
static void test_until_common_prefix_basic(testing & t);
|
|
static void test_until_common_prefix(testing & t);
|
|
|
|
static void test_after_common_suffix_basic(testing & t);
|
|
static void test_after_common_suffix(testing & t);
|
|
|
|
static void test_analyze_tool_call_pure_json(testing & t);
|
|
static void test_analyze_tool_call_function_name_markers(testing & t);
|
|
static void test_analyze_tool_call_full_markers(testing & t);
|
|
static void test_analyze_tool_call_edge_cases(testing & t);
|
|
|
|
static void test_compare_variants_basic(testing & t);
|
|
static void test_compare_variants_messages_modifier(testing & t);
|
|
static void test_compare_variants_tools_modifier(testing & t);
|
|
static void test_compare_variants_both_modifiers(testing & t);
|
|
static void test_compare_variants_template_failure(testing & t);
|
|
static void test_compare_variants_identity(testing & t);
|
|
static void test_compare_variants(testing & t);
|
|
|
|
// Seed-OSS template tool calling analysis tests
|
|
static void test_seed_oss_tool_analysis(testing & t);
|
|
static void test_seed_oss_tool_presence(testing & t);
|
|
static void test_seed_oss_call_count(testing & t);
|
|
static void test_seed_oss_function_names(testing & t);
|
|
static void test_seed_oss_argument_count(testing & t);
|
|
static void test_seed_oss_args_presence(testing & t);
|
|
static void test_seed_oss_tool_with_reasoning(testing & t);
|
|
|
|
// Nemotron template analysis tests
|
|
static void test_nemotron_analysis(testing & t);
|
|
static void test_nemotron_reasoning_detection(testing & t);
|
|
static void test_nemotron_tool_format(testing & t);
|
|
|
|
// CohereForAI template analysis tests
|
|
static void test_cohere_reasoning_detection(testing & t);
|
|
static void test_cohere_tool_format(testing & t);
|
|
static void test_cohere_analysis(testing & t);
|
|
|
|
// Marker separation
|
|
static void test_marker_separation(testing & t);
|
|
|
|
// standard_json_tools format tests
|
|
static void test_standard_json_tools_formats(testing & t);
|
|
static void test_standard_json_tools_openai(testing & t);
|
|
static void test_standard_json_tools_cohere(testing & t);
|
|
static void test_standard_json_tools_function_key(testing & t);
|
|
|
|
// normalize_quotes_to_json tests
|
|
static void test_normalize_quotes_to_json(testing & t);
|
|
static void test_normalize_quotes_with_embedded_quotes(testing & t);
|
|
|
|
// TAG_WITH_TAGGED argument parsing tests
|
|
static void test_tagged_args_with_embedded_quotes(testing & t);
|
|
|
|
int main(int argc, char * argv[]) {
|
|
testing t(std::cout);
|
|
t.verbose = true;
|
|
|
|
// usage: test-chat-auto-parser-helpers [filter_regex]
|
|
|
|
if (argc > 1) {
|
|
t.set_filter(argv[1]);
|
|
}
|
|
|
|
t.test("diff_split", test_calculate_diff_split);
|
|
t.test("common_prefix", test_until_common_prefix);
|
|
t.test("common_suffix", test_after_common_suffix);
|
|
t.test("compare_variants", test_compare_variants);
|
|
t.test("segments", test_marker_separation);
|
|
t.test("seed_oss_diffs", test_seed_oss_tool_analysis);
|
|
t.test("cohere", test_cohere_analysis);
|
|
t.test("nemotron", test_nemotron_analysis);
|
|
t.test("standard_json_tools", test_standard_json_tools_formats);
|
|
t.test("normalize_quotes_to_json", test_normalize_quotes_to_json);
|
|
t.test("tagged_args_embedded_quotes", test_tagged_args_with_embedded_quotes);
|
|
|
|
return t.summary();
|
|
}
|
|
|
|
static void test_marker_separation(testing & t) {
|
|
auto single_square_marker = segmentize_markers("pre_marker[marker]post_marker");
|
|
auto single_diag_marker = segmentize_markers("pre_marker<marker>post_marker");
|
|
auto paired_markers = segmentize_markers("<hello>world</hello>");
|
|
auto double_different_markers = segmentize_markers("<hello>[hello]<world>[world]");
|
|
auto in_between = segmentize_markers("im<blue>daba<dee>da[hey]");
|
|
|
|
t.test("single_square_marker", [&] (testing & t) {
|
|
t.assert_equal("first is text", segment_type::TEXT, single_square_marker[0].type);
|
|
t.assert_equal("second is marker", segment_type::MARKER, single_square_marker[1].type);
|
|
t.assert_equal("last is text", segment_type::TEXT, single_square_marker[2].type);
|
|
|
|
t.assert_equal("first is 'pre_marker'", "pre_marker", single_square_marker[0].value);
|
|
t.assert_equal("second is '[marker]'", "[marker]", single_square_marker[1].value);
|
|
t.assert_equal("last is 'post_marker'", "post_marker", single_square_marker[2].value);
|
|
});
|
|
|
|
t.test("single_diagonal_marker", [&] (testing & t) {
|
|
t.assert_equal("first is text", segment_type::TEXT, single_diag_marker[0].type);
|
|
t.assert_equal("second is marker", segment_type::MARKER, single_diag_marker[1].type);
|
|
t.assert_equal("last is text", segment_type::TEXT, single_diag_marker[2].type);
|
|
|
|
t.assert_equal("first is 'pre_marker'", "pre_marker", single_diag_marker[0].value);
|
|
t.assert_equal("second is '<marker>'", "<marker>", single_diag_marker[1].value);
|
|
t.assert_equal("last is 'post_marker'", "post_marker", single_diag_marker[2].value);
|
|
});
|
|
|
|
t.test("paired_markers", [&] (testing & t) {
|
|
t.assert_equal("first is marker", segment_type::MARKER, paired_markers[0].type);
|
|
t.assert_equal("second is text", segment_type::TEXT, paired_markers[1].type);
|
|
t.assert_equal("third is marker", segment_type::MARKER, paired_markers[2].type);
|
|
|
|
t.assert_equal("first is '<hello>'", "<hello>", paired_markers[0].value);
|
|
t.assert_equal("second is 'world'", "world", paired_markers[1].value);
|
|
t.assert_equal("third is '</hello>'", "</hello>", paired_markers[2].value);
|
|
});
|
|
|
|
t.test("double_different_markers", [&] (testing & t) {
|
|
t.assert_equal("first is marker", segment_type::MARKER, double_different_markers[0].type);
|
|
t.assert_equal("second is marker", segment_type::MARKER, double_different_markers[1].type);
|
|
t.assert_equal("third is marker", segment_type::MARKER, double_different_markers[2].type);
|
|
t.assert_equal("fourth is marker", segment_type::MARKER, double_different_markers[3].type);
|
|
|
|
t.assert_equal("first is '<hello>'", "<hello>", double_different_markers[0].value);
|
|
t.assert_equal("second is '[hello]'", "[hello]", double_different_markers[1].value);
|
|
t.assert_equal("third is '<world>'", "<world>", double_different_markers[2].value);
|
|
t.assert_equal("fourth is '[world]'", "[world]", double_different_markers[3].value);
|
|
});
|
|
|
|
t.test("in_between", [&] (testing & t) {
|
|
t.assert_equal("first is text", segment_type::TEXT, in_between[0].type);
|
|
t.assert_equal("second is marker", segment_type::MARKER, in_between[1].type);
|
|
t.assert_equal("third is text", segment_type::TEXT, in_between[2].type);
|
|
t.assert_equal("fourth is marker", segment_type::MARKER, in_between[3].type);
|
|
t.assert_equal("fifth is text", segment_type::TEXT, in_between[4].type);
|
|
t.assert_equal("sixth is marker", segment_type::MARKER, in_between[5].type);
|
|
|
|
t.assert_equal("first is 'im'", "im", in_between[0].value);
|
|
t.assert_equal("second is '<blue>'", "<blue>", in_between[1].value);
|
|
t.assert_equal("third is 'daba'", "daba", in_between[2].value);
|
|
t.assert_equal("fourth is '<dee>'", "<dee>", in_between[3].value);
|
|
t.assert_equal("fifth is 'da'", "da", in_between[4].value);
|
|
t.assert_equal("sixth is '[hey]'", "[hey]", in_between[5].value);
|
|
});
|
|
}
|
|
|
|
static void test_calculate_diff_split(testing & t) {
|
|
t.test("calculate_diff_split basic", test_calculate_diff_split_basic);
|
|
t.test("calculate_diff_split identical", test_calculate_diff_split_identical);
|
|
t.test("calculate_diff_split common prefix", test_calculate_diff_split_common_prefix);
|
|
t.test("calculate_diff_split common suffix", test_calculate_diff_split_common_suffix);
|
|
t.test("calculate_diff_split common both", test_calculate_diff_split_common_both);
|
|
t.test("calculate_diff_split empty cases", test_calculate_diff_split_empty_cases);
|
|
t.test("calculate_diff_split no common", test_calculate_diff_split_no_common);
|
|
t.test("calculate_diff_split single char", test_calculate_diff_split_single_char);
|
|
t.test("calculate_diff_split overlaps", test_calculate_diff_split_overlaps);
|
|
t.test("calculate_diff_split tag boundaries", test_calculate_diff_split_tag_boundaries);
|
|
}
|
|
|
|
static void test_calculate_diff_split_basic(testing & t) {
|
|
diff_split result = calculate_diff_split("hello world", "hello test");
|
|
t.assert_equal("prefix should be 'hello '", "hello ", result.prefix);
|
|
t.assert_equal("left should be 'world'", "world", result.left);
|
|
t.assert_equal("right should be 'test'", "test", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
result = calculate_diff_split("abc", "xyz");
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be 'abc'", "abc", result.left);
|
|
t.assert_equal("right should be 'xyz'", "xyz", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
result = calculate_diff_split("prefixA suffix", "prefixB suffix");
|
|
t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix);
|
|
t.assert_equal("left should be 'A'", "A", result.left);
|
|
t.assert_equal("right should be 'B'", "B", result.right);
|
|
t.assert_equal("suffix should be ' suffix'", " suffix", result.suffix);
|
|
}
|
|
|
|
static void test_calculate_diff_split_identical(testing & t) {
|
|
diff_split result = calculate_diff_split("hello", "hello");
|
|
t.assert_equal("prefix should be 'hello'", "hello", result.prefix);
|
|
t.assert_equal("left should be empty", "", result.left);
|
|
t.assert_equal("right should be empty", "", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
result = calculate_diff_split("", "");
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be empty", "", result.left);
|
|
t.assert_equal("right should be empty", "", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
result = calculate_diff_split("a", "a");
|
|
t.assert_equal("prefix should be 'a'", "a", result.prefix);
|
|
t.assert_equal("left should be empty", "", result.left);
|
|
t.assert_equal("right should be empty", "", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
}
|
|
|
|
static void test_calculate_diff_split_common_prefix(testing & t) {
|
|
diff_split result = calculate_diff_split("abcdef", "abcxyz");
|
|
t.assert_equal("prefix should be 'abc'", "abc", result.prefix);
|
|
t.assert_equal("left should be 'def'", "def", result.left);
|
|
t.assert_equal("right should be 'xyz'", "xyz", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
result = calculate_diff_split("same", "sameagain");
|
|
t.assert_equal("prefix should be 'same'", "same", result.prefix);
|
|
t.assert_equal("left should be empty", "", result.left);
|
|
t.assert_equal("right should be 'again'", "again", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
result = calculate_diff_split("test", "testing");
|
|
t.assert_equal("prefix should be 'test'", "test", result.prefix);
|
|
t.assert_equal("left should be empty", "", result.left);
|
|
t.assert_equal("right should be 'ing'", "ing", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
}
|
|
|
|
static void test_calculate_diff_split_common_suffix(testing & t) {
|
|
diff_split result = calculate_diff_split("123end", "456end");
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be '123'", "123", result.left);
|
|
t.assert_equal("right should be '456'", "456", result.right);
|
|
t.assert_equal("suffix should be 'end'", "end", result.suffix);
|
|
|
|
result = calculate_diff_split("start", "end");
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be 'start'", "start", result.left);
|
|
t.assert_equal("right should be 'end'", "end", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
result = calculate_diff_split("abcsuffix", "xyzsuffix");
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be 'abc'", "abc", result.left);
|
|
t.assert_equal("right should be 'xyz'", "xyz", result.right);
|
|
t.assert_equal("suffix should be 'suffix'", "suffix", result.suffix);
|
|
}
|
|
|
|
static void test_calculate_diff_split_common_both(testing & t) {
|
|
diff_split result = calculate_diff_split("helloXworld", "helloYworld");
|
|
t.assert_equal("prefix should be 'hello'", "hello", result.prefix);
|
|
t.assert_equal("left should be 'X'", "X", result.left);
|
|
t.assert_equal("right should be 'Y'", "Y", result.right);
|
|
t.assert_equal("suffix should be 'world'", "world", result.suffix);
|
|
|
|
result = calculate_diff_split("ABCmiddleXYZ", "ABCdifferentXYZ");
|
|
t.assert_equal("prefix should be 'ABC'", "ABC", result.prefix);
|
|
t.assert_equal("left should be 'middle'", "middle", result.left);
|
|
t.assert_equal("right should be 'different'", "different", result.right);
|
|
t.assert_equal("suffix should be 'XYZ'", "XYZ", result.suffix);
|
|
|
|
result = calculate_diff_split("startAend", "startBend");
|
|
t.assert_equal("prefix should be 'start'", "start", result.prefix);
|
|
t.assert_equal("left should be 'A'", "A", result.left);
|
|
t.assert_equal("right should be 'B'", "B", result.right);
|
|
t.assert_equal("suffix should be 'end'", "end", result.suffix);
|
|
|
|
// Edge case: common prefix and suffix overlap
|
|
result = calculate_diff_split("aa", "ab");
|
|
t.assert_equal("prefix should be 'a'", "a", result.prefix);
|
|
t.assert_equal("left should be 'a'", "a", result.left);
|
|
t.assert_equal("right should be 'b'", "b", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
}
|
|
|
|
static void test_calculate_diff_split_empty_cases(testing & t) {
|
|
// Empty left, non-empty right
|
|
diff_split result = calculate_diff_split("", "hello");
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be empty", "", result.left);
|
|
t.assert_equal("right should be 'hello'", "hello", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
// Non-empty left, empty right
|
|
result = calculate_diff_split("hello", "");
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be 'hello'", "hello", result.left);
|
|
t.assert_equal("right should be empty", "", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
// Both empty
|
|
result = calculate_diff_split("", "");
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be empty", "", result.left);
|
|
t.assert_equal("right should be empty", "", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
// Left single char, empty right
|
|
result = calculate_diff_split("a", "");
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be 'a'", "a", result.left);
|
|
t.assert_equal("right should be empty", "", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
// Empty left, right single char
|
|
result = calculate_diff_split("", "a");
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be empty", "", result.left);
|
|
t.assert_equal("right should be 'a'", "a", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
}
|
|
|
|
static void test_calculate_diff_split_no_common(testing & t) {
|
|
diff_split result = calculate_diff_split("abc", "xyz");
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be 'abc'", "abc", result.left);
|
|
t.assert_equal("right should be 'xyz'", "xyz", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
result = calculate_diff_split("left", "right");
|
|
// The algorithm finds "t" as a common suffix since both strings end with 't'
|
|
// This is the algorithm's actual behavior - it finds maximal common suffix
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be 'lef'", "lef", result.left);
|
|
t.assert_equal("right should be 'righ'", "righ", result.right);
|
|
t.assert_equal("suffix should be 't'", "t", result.suffix);
|
|
|
|
result = calculate_diff_split("123", "456");
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be '123'", "123", result.left);
|
|
t.assert_equal("right should be '456'", "456", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
}
|
|
|
|
static void test_calculate_diff_split_single_char(testing & t) {
|
|
diff_split result = calculate_diff_split("a", "b");
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be 'a'", "a", result.left);
|
|
t.assert_equal("right should be 'b'", "b", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
result = calculate_diff_split("a", "a");
|
|
t.assert_equal("prefix should be 'a'", "a", result.prefix);
|
|
t.assert_equal("left should be empty", "", result.left);
|
|
t.assert_equal("right should be empty", "", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
result = calculate_diff_split("a", "ab");
|
|
t.assert_equal("prefix should be 'a'", "a", result.prefix);
|
|
t.assert_equal("left should be empty", "", result.left);
|
|
t.assert_equal("right should be 'b'", "b", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
result = calculate_diff_split("ab", "a");
|
|
t.assert_equal("prefix should be 'a'", "a", result.prefix);
|
|
t.assert_equal("left should be 'b'", "b", result.left);
|
|
t.assert_equal("right should be empty", "", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
}
|
|
|
|
static void test_calculate_diff_split_overlaps(testing & t) {
|
|
// One string is substring of another
|
|
diff_split result = calculate_diff_split("test", "testing");
|
|
t.assert_equal("prefix should be 'test'", "test", result.prefix);
|
|
t.assert_equal("left should be empty", "", result.left);
|
|
t.assert_equal("right should be 'ing'", "ing", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
result = calculate_diff_split("testing", "test");
|
|
t.assert_equal("prefix should be 'test'", "test", result.prefix);
|
|
t.assert_equal("left should be 'ing'", "ing", result.left);
|
|
t.assert_equal("right should be empty", "", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
// Similar strings with one extra char at start
|
|
result = calculate_diff_split("Xtest", "Ytest");
|
|
// The algorithm finds "test" as a common suffix since both strings end with "test"
|
|
// This is the algorithm's actual behavior - it finds maximal common suffix
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be 'X'", "X", result.left);
|
|
t.assert_equal("right should be 'Y'", "Y", result.right);
|
|
t.assert_equal("suffix should be 'test'", "test", result.suffix);
|
|
|
|
// Similar strings with one extra char at end
|
|
result = calculate_diff_split("testX", "testY");
|
|
t.assert_equal("prefix should be 'test'", "test", result.prefix);
|
|
t.assert_equal("left should be 'X'", "X", result.left);
|
|
t.assert_equal("right should be 'Y'", "Y", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
// Strings that are reverses
|
|
result = calculate_diff_split("abc", "cba");
|
|
t.assert_equal("prefix should be empty", "", result.prefix);
|
|
t.assert_equal("left should be 'abc'", "abc", result.left);
|
|
t.assert_equal("right should be 'cba'", "cba", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
}
|
|
|
|
static void test_calculate_diff_split_tag_boundaries(testing & t) {
|
|
// Test with unclosed XML tags
|
|
diff_split result = calculate_diff_split("test<tag", "test>content");
|
|
// The fix_tag_boundaries should move incomplete tags appropriately
|
|
t.assert_true("prefix should start with 'test'", result.prefix.find("test") == 0);
|
|
t.assert_true("should handle tag boundaries", result.left != "" || result.right != "" || result.suffix != "");
|
|
|
|
// Test with unclosed brackets
|
|
result = calculate_diff_split("test[", "test]value");
|
|
t.assert_true("should handle bracket boundaries", result.left != "" || result.right != "" || result.suffix != "");
|
|
|
|
// Test with partial tags on both sides
|
|
result = calculate_diff_split("prefix<tag>", "prefix</tag>suffix");
|
|
// fix_tag_boundaries moves the incomplete '<' from prefix to left/right
|
|
t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix);
|
|
t.assert_equal("left should be '<tag>'", "<tag>", result.left);
|
|
t.assert_equal("right should be '</tag>suffix'", "</tag>suffix", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
// Test with complex nested tags
|
|
result = calculate_diff_split("prefix<div>content</div>", "prefix<div>different</div>");
|
|
// Algorithm finds "ent</div>" as a common suffix because both strings end with it
|
|
// This is the actual algorithm behavior, though not semantically ideal
|
|
t.assert_equal("prefix should be 'prefix<div>'", "prefix<div>", result.prefix);
|
|
t.assert_equal("left should be 'cont'", "cont", result.left);
|
|
t.assert_equal("right should be 'differ'", "differ", result.right);
|
|
t.assert_equal("suffix should be 'ent</div>'", "ent</div>", result.suffix);
|
|
|
|
// Test with unclosed angle bracket
|
|
result = calculate_diff_split("Hello <world>", "Hello test");
|
|
t.assert_equal("prefix should be 'Hello '", "Hello ", result.prefix);
|
|
t.assert_true("left should contain '<world>'", result.left.find("<world>") != std::string::npos);
|
|
t.assert_equal("right should be 'test'", "test", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
// Test with unclosed square bracket
|
|
result = calculate_diff_split("test [array]", "test other");
|
|
t.assert_equal("prefix should be 'test '", "test ", result.prefix);
|
|
t.assert_true("left should contain '[array]'", result.left.find("[array]") != std::string::npos);
|
|
t.assert_equal("right should be 'other'", "other", result.right);
|
|
t.assert_equal("suffix should be empty", "", result.suffix);
|
|
|
|
// Test empty prefix and suffix with tags
|
|
result = calculate_diff_split("<tag>left</tag>", "<tag>righ</tag>");
|
|
t.assert_equal("prefix should be '<tag>'", "<tag>", result.prefix);
|
|
t.assert_equal("left should be 'left'", "left", result.left);
|
|
t.assert_equal("right should be 'righ'", "righ", result.right);
|
|
t.assert_equal("suffix should be '</tag>'", "</tag>", result.suffix);
|
|
|
|
{
|
|
// real case from template tests, simplified
|
|
std::string left = "PREFIX</think>Sure";
|
|
std::string right = "PREFIX<think>Lemme think</think>Sure";
|
|
result = calculate_diff_split(left, right);
|
|
t.assert_equal("prefix should be PREFIX", "PREFIX", result.prefix);
|
|
t.assert_equal("suffix should be </think>Sure", "</think>Sure", result.suffix);
|
|
t.assert_equal("left should be empty", "", result.left);
|
|
t.assert_equal("right should be <think>Lemme think", "<think>Lemme think", result.right);
|
|
}
|
|
|
|
{
|
|
// Real case: special tokens with |> boundary issue
|
|
// The suffix starts with |> which should be moved to complete <|END_RESPONSE and <|END_ACTION
|
|
std::string prefix = "SOME_PREFIX";
|
|
std::string suffix = "|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
|
|
std::string left_diff = "<|START_RESPONSE|>Let me help you.<|END_RESPONSE";
|
|
std::string right_diff =
|
|
"<|START_THINKING|><|END_THINKING|><|START_ACTION|>[\n"
|
|
" {\"tool_call_id\": \"0\", \"tool_name\": \"test_function_name\", "
|
|
"\"parameters\": {\"param1\": \"value1\", \"param2\": \"value2\"}}\n"
|
|
"]<|END_ACTION";
|
|
|
|
std::string left = prefix + left_diff + suffix;
|
|
std::string right = prefix + right_diff + suffix;
|
|
result = calculate_diff_split(left, right);
|
|
|
|
t.assert_equal("special token prefix", prefix, result.prefix);
|
|
// The |> should be moved from suffix to complete the tokens
|
|
t.assert_equal("special token left", "<|START_RESPONSE|>Let me help you.<|END_RESPONSE|>", result.left);
|
|
t.assert_true("special token right ends with |>", result.right.find("<|END_ACTION|>") != std::string::npos);
|
|
t.assert_equal("special token suffix", "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
|
|
result.suffix);
|
|
}
|
|
}
|
|
|
|
static void test_until_common_prefix(testing & t) {
|
|
t.test("until_common_prefix basic", test_until_common_prefix_basic);
|
|
}
|
|
|
|
static void test_until_common_prefix_basic(testing & t) {
|
|
// Test case from the user request
|
|
std::string result = until_common_prefix("<function name=foo><arg name=bar>", "<arg name=bar>", "<arg name=baz>");
|
|
t.assert_equal("untilCommonPrefix should return '<function name=foo>'", "<function name=foo>", result);
|
|
|
|
// Additional test cases to ensure robustness
|
|
// Test with different common prefix lengths
|
|
result = until_common_prefix("prefix<test>suffix", "<test>different", "<test>other");
|
|
t.assert_equal("should return 'prefix'", "prefix", result);
|
|
|
|
// Test when common prefix is at the start
|
|
result = until_common_prefix("<common>rest", "<common>left", "<common>right");
|
|
t.assert_equal("should return empty string when common prefix at start", "", result);
|
|
|
|
// Test when there's no common prefix
|
|
result = until_common_prefix("something", "left", "right");
|
|
t.assert_equal("should return empty string when no common prefix", "", result);
|
|
|
|
// Test with empty strings
|
|
result = until_common_prefix("test", "", "right");
|
|
t.assert_equal("should return empty string when left is empty", "", result);
|
|
|
|
// Test with longer common prefix
|
|
result = until_common_prefix("abcXYZ<shared_prefix>rest", "<shared_prefix>left", "<shared_prefix>right");
|
|
t.assert_equal("should return 'abcXYZ'", "abcXYZ", result);
|
|
}
|
|
|
|
static void test_after_common_suffix(testing & t) {
|
|
t.test("after_common_suffix basic", test_after_common_suffix_basic);
|
|
}
|
|
|
|
static void test_after_common_suffix_basic(testing & t) {
|
|
// Test case from the user request
|
|
std::string result = after_common_suffix("<function name=foo><arg name=bar>100</arg></function>",
|
|
"<arg name=bar>100</arg>",
|
|
"<arg name=baz>535</arg>");
|
|
t.assert_equal("afterCommonSuffix should return '</function>'", "</function>", result);
|
|
|
|
// Test when common suffix is at the end
|
|
result = after_common_suffix("rest<common>", "left<common>", "right<common>");
|
|
t.assert_equal("should return empty string when common suffix at end", "", result);
|
|
|
|
// Test with empty strings
|
|
result = after_common_suffix("test", "left", "");
|
|
t.assert_equal("should return empty string when right is empty", "", result);
|
|
|
|
// Test case with XML-like structure similar to the main example
|
|
result = after_common_suffix("<outer><inner>value</inner></outer>",
|
|
"<inner>value</inner>",
|
|
"<inner>different</inner>");
|
|
t.assert_equal("should return '</outer>'", "</outer>", result);
|
|
|
|
// Test with longer common suffix appearing at the end of full
|
|
result = after_common_suffix("prefix<shared>rest</shared>", "prefix<shared>left</shared>", "prefix<shared>right</shared>");
|
|
t.assert_equal("should return '' when common suffix is at end of full", "", result);
|
|
|
|
// Test with common suffix appearing in middle but not at end
|
|
result = after_common_suffix("<tag>content</tag><extra>", "<tag>value</tag>", "<tag>other</tag>");
|
|
t.assert_equal("should return '<extra>' when common suffix appears before end", "<extra>", result);
|
|
|
|
// Test with multi-character common suffix at the very end of full
|
|
result = after_common_suffix("start<middle>end</middle>", "prefix<middle>left</middle>", "prefix<middle>right</middle>");
|
|
t.assert_equal("should return '' when common suffix </middle> is at end of full", "", result);
|
|
}
|
|
|
|
static void test_compare_variants(testing & t) {
|
|
t.test("compare_variants basic", test_compare_variants_basic);
|
|
t.test("compare_variants messages modifier", test_compare_variants_messages_modifier);
|
|
t.test("compare_variants tools modifier", test_compare_variants_tools_modifier);
|
|
t.test("compare_variants both modifiers", test_compare_variants_both_modifiers);
|
|
t.test("compare_variants template failure", test_compare_variants_template_failure);
|
|
t.test("compare_variants identity", test_compare_variants_identity);
|
|
}
|
|
|
|
static void test_compare_variants_basic(testing & t) {
|
|
// Create a simple template that just echoes messages
|
|
common_chat_template tmpl("{{ messages[0]['content'] }}", "", "");
|
|
|
|
template_params params;
|
|
params.messages = json::array({
|
|
json {{"role", "user"}, {"content", "Hello"}}
|
|
});
|
|
|
|
auto modifier = [](template_params & p) {
|
|
p.messages[0]["content"] = "World";
|
|
};
|
|
|
|
auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
|
|
|
|
t.assert_true("result should have value", result.has_value());
|
|
// The template might not output anything if messages is empty or format is different
|
|
// Check that we get a valid result
|
|
t.assert_true("prefix or left should have content", !result->diff.prefix.empty() || !result->diff.left.empty());
|
|
}
|
|
|
|
static void test_compare_variants_messages_modifier(testing & t) {
|
|
// Test with messages modifier only
|
|
common_chat_template tmpl("{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", "");
|
|
|
|
template_params params;
|
|
params.messages = json::array({
|
|
json {{"role", "user"}, {"content", "A"}}
|
|
});
|
|
|
|
auto modifier = [](template_params & p) {
|
|
p.messages[0]["content"] = "B";
|
|
};
|
|
|
|
std::optional<compare_variants_result> result = differential_analyzer::compare_variants(tmpl, params, modifier);
|
|
|
|
t.assert_true("result should have value", result.has_value());
|
|
t.assert_equal("left should be 'A'", "A", result->diff.left);
|
|
t.assert_equal("right should be 'B'", "B", result->diff.right);
|
|
}
|
|
|
|
static void test_compare_variants_tools_modifier(testing & t) {
|
|
// Test with tools modifier only
|
|
common_chat_template tmpl(
|
|
"{% for tool in tools %}{{ tool['name'] }}{% endfor %}", "", "");
|
|
|
|
template_params params;
|
|
params.tools = json::array({
|
|
json {{"name", "foo"}}
|
|
});
|
|
|
|
auto modifier = [](template_params & p) {
|
|
p.tools[0]["name"] = "bar";
|
|
};
|
|
|
|
auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
|
|
|
|
t.assert_true("result should have value", result.has_value());
|
|
t.assert_equal("left should be 'foo'", "foo", result->diff.left);
|
|
t.assert_equal("right should be 'bar'", "bar", result->diff.right);
|
|
}
|
|
|
|
static void test_compare_variants_both_modifiers(testing & t) {
|
|
// Test with both messages and tools modifiers using the for loop approach
|
|
common_chat_template tmpl(
|
|
"{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", "");
|
|
|
|
template_params params;
|
|
params.messages = json::array({
|
|
json {{"role", "user"}, {"content", "A"}}
|
|
});
|
|
|
|
auto modifier = [](template_params & p) {
|
|
p.messages[0]["content"] = "B";
|
|
p.messages[0]["role"] = "newuser";
|
|
};
|
|
|
|
auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
|
|
|
|
t.assert_true("result should have value", result.has_value());
|
|
t.assert_equal("left should be 'user:A'", "user:A", result->diff.left);
|
|
t.assert_equal("right should be 'newuser:B'", "newuser:B", result->diff.right);
|
|
}
|
|
|
|
static void test_compare_variants_template_failure(testing & t) {
|
|
// Test with template that causes failure during application (not construction)
|
|
// We use a valid template syntax but one that will fail during application
|
|
common_chat_template tmpl("{{ messages[0]['nonexistent_field'] }}", "", "");
|
|
|
|
template_params params;
|
|
params.messages = json::array({
|
|
json {{"role", "user"}, {"content", "Hello"}}
|
|
});
|
|
|
|
auto modifier = [](template_params & p) {
|
|
p.messages[0]["content"] = "World";
|
|
};
|
|
|
|
auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
|
|
|
|
t.assert_true("result should be nullopt on template failure", !result.has_value());
|
|
}
|
|
|
|
static void test_compare_variants_identity(testing & t) {
|
|
// Test with identity modifier (no change)
|
|
common_chat_template tmpl("{{ messages[0]['content'] }}", "", "");
|
|
|
|
template_params params;
|
|
params.messages = json::array({
|
|
json {{"role", "user"}, {"content", "Hello"}}
|
|
});
|
|
|
|
// No modifier - should use identity
|
|
auto result = differential_analyzer::compare_variants(tmpl, params, nullptr);
|
|
|
|
t.assert_true("result should have value", result.has_value());
|
|
t.assert_equal("prefix should be 'Hello'", "Hello", result->diff.prefix);
|
|
t.assert_equal("left should be empty", "", result->diff.left);
|
|
t.assert_equal("right should be empty", "", result->diff.right);
|
|
t.assert_equal("suffix should be empty", "", result->diff.suffix);
|
|
}
|
|
|
|
// ============================================================================
|
|
// Seed-OSS Template Tool Calling Analysis Tests
|
|
// ============================================================================
|
|
|
|
static void test_seed_oss_tool_analysis(testing & t) {
|
|
t.test("Seed-OSS tool presence", test_seed_oss_tool_presence);
|
|
t.test("Seed-OSS call count", test_seed_oss_call_count);
|
|
t.test("Seed-OSS function names", test_seed_oss_function_names);
|
|
t.test("Seed-OSS argument count", test_seed_oss_argument_count);
|
|
t.test("Seed-OSS args presence", test_seed_oss_args_presence);
|
|
t.test("Seed-OSS tool with reasoning", test_seed_oss_tool_with_reasoning);
|
|
}
|
|
|
|
// Helper to load Seed-OSS template
|
|
static common_chat_template load_seed_oss_template(testing & t) {
|
|
std::string template_path = "models/templates/ByteDance-Seed-OSS.jinja";
|
|
std::ifstream fin(template_path, std::ios::binary);
|
|
std::ostringstream buf;
|
|
if (fin.is_open()) {
|
|
buf << fin.rdbuf();
|
|
}
|
|
std::string template_source = buf.str();
|
|
common_chat_template tmpl(template_source, "", "");
|
|
t.assert_true("Seed-OSS template loaded successfully", template_source.length() > 0);
|
|
return tmpl;
|
|
}
|
|
|
|
// Helper to build tool call JSON
|
|
static json build_tool_call(const std::string & name, const json & args, const std::string & id = "call_001") {
|
|
return json{
|
|
{"id", id},
|
|
{"type", "function"},
|
|
{"function", json{
|
|
{"name", name},
|
|
{"arguments", args}
|
|
}}
|
|
};
|
|
}
|
|
|
|
// Helper to build tools definition
|
|
static json build_tools_definition() {
|
|
json parameters_schema = json::object();
|
|
parameters_schema["type"] = "object";
|
|
parameters_schema["properties"] = json::object();
|
|
parameters_schema["properties"]["param1"] = json::object({
|
|
{"type", "string"},
|
|
{"description", "First parameter"}
|
|
});
|
|
parameters_schema["properties"]["param2"] = json::object({
|
|
{"type", "string"},
|
|
{"description", "Second parameter"}
|
|
});
|
|
parameters_schema["required"] = json::array({"param1", "param2"});
|
|
|
|
return json::array({
|
|
json{
|
|
{"type", "function"},
|
|
{"function", json{
|
|
{"name", "test_function_name"},
|
|
{"description", "A test function for debugging"},
|
|
{"parameters", parameters_schema}
|
|
}}
|
|
}
|
|
});
|
|
}
|
|
|
|
// T1: Compare with/without tool call (user, assistant)
|
|
static void test_seed_oss_tool_presence(testing & t) {
|
|
common_chat_template tmpl = load_seed_oss_template(t);
|
|
|
|
json assistant_no_tools = json{
|
|
{"role", "assistant"},
|
|
{"content", "Let me help you."}
|
|
};
|
|
|
|
json assistant_with_tools = json{
|
|
{"role", "assistant"},
|
|
{"content", nullptr},
|
|
{"tool_calls", json::array({
|
|
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
|
|
})}
|
|
};
|
|
|
|
json user_msg = json{
|
|
{"role", "user"},
|
|
{"content", "Hello, please help me."}
|
|
};
|
|
|
|
template_params params_no_tools;
|
|
params_no_tools.messages = json::array({user_msg, assistant_no_tools});
|
|
params_no_tools.tools = build_tools_definition();
|
|
params_no_tools.add_generation_prompt = false;
|
|
params_no_tools.enable_thinking = true;
|
|
|
|
template_params params_with_tools;
|
|
params_with_tools.messages = json::array({user_msg, assistant_with_tools});
|
|
params_with_tools.tools = build_tools_definition();
|
|
params_with_tools.add_generation_prompt = false;
|
|
params_with_tools.enable_thinking = true;
|
|
|
|
auto result = differential_analyzer::compare_variants(tmpl, params_no_tools,
|
|
[&](template_params & p) {
|
|
p.messages = params_with_tools.messages;
|
|
});
|
|
|
|
t.assert_true("T1 result should have value", result.has_value());
|
|
|
|
const auto & diff = result->diff;
|
|
t.assert_true("T1 prefix should contain system", diff.prefix.find("system") != std::string::npos);
|
|
t.assert_true("T1 prefix should contain user", diff.prefix.find("user") != std::string::npos);
|
|
t.assert_true("T1 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos);
|
|
|
|
// Left should be the assistant content without tool
|
|
t.assert_equal("T1 left should contain 'Let me help you.'", "Let me help you.", diff.left);
|
|
|
|
// Right should contain the tool call markers
|
|
t.assert_true("T1 right should contain tool_call begin", diff.right.find("<seed:tool_call>") != std::string::npos);
|
|
t.assert_true("T1 right should contain function tag", diff.right.find("<function=test_function_name>") != std::string::npos);
|
|
t.assert_true("T1 right should contain parameter=param1", diff.right.find("<parameter=param1>") != std::string::npos);
|
|
t.assert_true("T1 right should contain parameter=param2", diff.right.find("<parameter=param2>") != std::string::npos);
|
|
t.assert_true("T1 right should contain value1", diff.right.find("value1") != std::string::npos);
|
|
t.assert_true("T1 right should contain value2", diff.right.find("value2") != std::string::npos);
|
|
t.assert_true("T1 right should contain tool_call end", diff.right.find("</seed:tool_call>") != std::string::npos);
|
|
|
|
// Suffix should be the eos token
|
|
t.assert_equal("T1 suffix should be '<seed:eos>'", "<seed:eos>", diff.suffix);
|
|
}
|
|
|
|
// T2: Compare one vs two tool calls
|
|
static void test_seed_oss_call_count(testing & t) {
|
|
common_chat_template tmpl = load_seed_oss_template(t);
|
|
|
|
json assistant_one_call = json{
|
|
{"role", "assistant"},
|
|
{"content", nullptr},
|
|
{"tool_calls", json::array({
|
|
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
|
|
})}
|
|
};
|
|
|
|
json assistant_two_calls = json{
|
|
{"role", "assistant"},
|
|
{"content", nullptr},
|
|
{"tool_calls", json::array({
|
|
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})),
|
|
build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002")
|
|
})}
|
|
};
|
|
|
|
json user_msg = json{
|
|
{"role", "user"},
|
|
{"content", "Hello, please help me."}
|
|
};
|
|
|
|
template_params params_one;
|
|
params_one.messages = json::array({user_msg, assistant_one_call});
|
|
params_one.tools = build_tools_definition();
|
|
params_one.add_generation_prompt = false;
|
|
params_one.enable_thinking = true;
|
|
|
|
auto result = differential_analyzer::compare_variants(tmpl, params_one,
|
|
[&](template_params & p) {
|
|
p.messages = json::array({user_msg, assistant_two_calls});
|
|
});
|
|
|
|
t.assert_true("T2 result should have value", result.has_value());
|
|
|
|
const auto & diff = result->diff;
|
|
|
|
// Prefix should include the first tool call
|
|
t.assert_true("T2 prefix should contain first tool_call begin", diff.prefix.find("<seed:tool_call>") != std::string::npos);
|
|
t.assert_true("T2 prefix should contain first function", diff.prefix.find("<function=test_function_name>") != std::string::npos);
|
|
t.assert_true("T2 prefix should contain value1", diff.prefix.find("value1") != std::string::npos);
|
|
t.assert_true("T2 prefix should contain value2", diff.prefix.find("value2") != std::string::npos);
|
|
t.assert_true("T2 prefix should contain first tool_call end", diff.prefix.find("</seed:tool_call>") != std::string::npos);
|
|
|
|
// Left should be empty (no second tool call in variant A)
|
|
t.assert_equal("T2 left should be empty", "", diff.left);
|
|
|
|
// Right should contain the second tool call
|
|
t.assert_true("T2 right should contain second tool_call begin", diff.right.find("<seed:tool_call>") != std::string::npos);
|
|
t.assert_true("T2 right should contain second function", diff.right.find("<function=test_function_name>") != std::string::npos);
|
|
t.assert_true("T2 right should contain value3", diff.right.find("value3") != std::string::npos);
|
|
t.assert_true("T2 right should contain value4", diff.right.find("value4") != std::string::npos);
|
|
t.assert_true("T2 right should contain second tool_call end", diff.right.find("</seed:tool_call>") != std::string::npos);
|
|
|
|
// Suffix should be the eos token
|
|
t.assert_equal("T2 suffix should be '<seed:eos>'", "<seed:eos>", diff.suffix);
|
|
}
|
|
|
|
// T3: Compare different function names
|
|
static void test_seed_oss_function_names(testing & t) {
|
|
common_chat_template tmpl = load_seed_oss_template(t);
|
|
|
|
// Build tools with two different function names
|
|
json parameters_schema = json::object();
|
|
parameters_schema["type"] = "object";
|
|
parameters_schema["properties"] = json::object();
|
|
parameters_schema["properties"]["arg1"] = json::object({
|
|
{"type", "string"},
|
|
{"description", "Argument 1"}
|
|
});
|
|
parameters_schema["required"] = json::array({"arg1"});
|
|
|
|
json tools = json::array({
|
|
json{
|
|
{"type", "function"},
|
|
{"function", json{
|
|
{"name", "func_alpha"},
|
|
{"description", "First function"},
|
|
{"parameters", parameters_schema}
|
|
}}
|
|
},
|
|
json{
|
|
{"type", "function"},
|
|
{"function", json{
|
|
{"name", "func_beta"},
|
|
{"description", "Second function"},
|
|
{"parameters", parameters_schema}
|
|
}}
|
|
}
|
|
});
|
|
|
|
json assistant_func_alpha = json{
|
|
{"role", "assistant"},
|
|
{"content", nullptr},
|
|
{"tool_calls", json::array({
|
|
build_tool_call("func_alpha", json::object({{"arg1", "test_value"}}))
|
|
})}
|
|
};
|
|
|
|
json assistant_func_beta = json{
|
|
{"role", "assistant"},
|
|
{"content", nullptr},
|
|
{"tool_calls", json::array({
|
|
build_tool_call("func_beta", json::object({{"arg1", "test_value"}}))
|
|
})}
|
|
};
|
|
|
|
json user_msg = json{
|
|
{"role", "user"},
|
|
{"content", "Hello"}
|
|
};
|
|
|
|
template_params params_alpha;
|
|
params_alpha.messages = json::array({user_msg, assistant_func_alpha});
|
|
params_alpha.tools = tools;
|
|
params_alpha.add_generation_prompt = false;
|
|
params_alpha.enable_thinking = true;
|
|
|
|
auto result = differential_analyzer::compare_variants(tmpl, params_alpha,
|
|
[&](template_params & p) {
|
|
p.messages = json::array({user_msg, assistant_func_beta});
|
|
});
|
|
|
|
t.assert_true("T3 result should have value", result.has_value());
|
|
|
|
const auto & diff = result->diff;
|
|
|
|
bool func_alpha_in_left = diff.left.find("func_alpha") != std::string::npos;
|
|
bool func_alpha_in_prefix = diff.prefix.find("func_alpha") != std::string::npos;
|
|
bool func_beta_in_right = diff.right.find("func_beta") != std::string::npos;
|
|
bool func_beta_in_prefix = diff.prefix.find("func_beta") != std::string::npos;
|
|
bool func_beta_in_suffix = diff.suffix.find("func_beta") != std::string::npos;
|
|
|
|
// Left should contain func_alpha (or be in prefix)
|
|
t.assert_true("T3 left should contain func_alpha (or prefix)", func_alpha_in_left || func_alpha_in_prefix);
|
|
|
|
// Right should contain func_beta
|
|
t.assert_true("T3 right should contain func_beta", func_beta_in_right || func_beta_in_prefix || func_beta_in_suffix);
|
|
|
|
// Both should have the same parameter value (in common parts, not in diffs)
|
|
// Since both have same args, test_value will be in prefix/suffix
|
|
t.assert_true("T3 diff should contain test_value (in prefix or suffix)",
|
|
diff.prefix.find("test_value") != std::string::npos || diff.suffix.find("test_value") != std::string::npos);
|
|
}
|
|
|
|
// T4: Compare different argument counts (zero, one, two parameters)
|
|
static void test_seed_oss_argument_count(testing & t) {
|
|
common_chat_template tmpl = load_seed_oss_template(t);
|
|
|
|
// Build tools with 0, 1, or 2 required parameters
|
|
json params_2_required = json::object();
|
|
params_2_required["type"] = "object";
|
|
params_2_required["properties"] = json::object();
|
|
params_2_required["properties"]["arg1"] = json::object({
|
|
{"type", "string"},
|
|
{"description", "Argument 1"}
|
|
});
|
|
params_2_required["properties"]["arg2"] = json::object({
|
|
{"type", "string"},
|
|
{"description", "Argument 2"}
|
|
});
|
|
params_2_required["required"] = json::array({"arg1", "arg2"});
|
|
|
|
json params_1_required = json::object();
|
|
params_1_required["type"] = "object";
|
|
params_1_required["properties"] = json::object();
|
|
params_1_required["properties"]["arg1"] = json::object({
|
|
{"type", "string"},
|
|
{"description", "Argument 1"}
|
|
});
|
|
params_1_required["required"] = json::array({"arg1"});
|
|
|
|
json tools = json::array({
|
|
json{
|
|
{"type", "function"},
|
|
{"function", json{
|
|
{"name", "test_func"},
|
|
{"description", "Test function"},
|
|
{"parameters", params_2_required}
|
|
}}
|
|
}
|
|
});
|
|
|
|
// Test: zero args vs one arg
|
|
json assistant_zero_args = json{
|
|
{"role", "assistant"},
|
|
{"content", nullptr},
|
|
{"tool_calls", json::array({
|
|
build_tool_call("test_func", json::object())
|
|
})}
|
|
};
|
|
|
|
json assistant_one_arg = json{
|
|
{"role", "assistant"},
|
|
{"content", nullptr},
|
|
{"tool_calls", json::array({
|
|
build_tool_call("test_func", json::object({{"arg1", "value1"}}))
|
|
})}
|
|
};
|
|
|
|
json assistant_two_args = json{
|
|
{"role", "assistant"},
|
|
{"content", nullptr},
|
|
{"tool_calls", json::array({
|
|
build_tool_call("test_func", json::object({{"arg1", "value1"}, {"arg2", "value2"}}))
|
|
})}
|
|
};
|
|
|
|
json user_msg = json{
|
|
{"role", "user"},
|
|
{"content", "Hello"}
|
|
};
|
|
|
|
// Test zero vs one
|
|
template_params params_zero;
|
|
params_zero.messages = json::array({user_msg, assistant_zero_args});
|
|
params_zero.tools = tools;
|
|
params_zero.add_generation_prompt = false;
|
|
params_zero.enable_thinking = true;
|
|
|
|
auto result_zero_one = differential_analyzer::compare_variants(tmpl, params_zero,
|
|
[&](template_params & p) {
|
|
p.messages = json::array({user_msg, assistant_one_arg});
|
|
});
|
|
|
|
t.assert_true("T4 zero vs one result should have value", result_zero_one.has_value());
|
|
t.assert_true("T4 zero vs one left should be empty or minimal", result_zero_one->diff.left.empty() || result_zero_one->diff.left == "");
|
|
t.assert_true("T4 zero vs one right should contain arg1", result_zero_one->diff.right.find("arg1") != std::string::npos);
|
|
|
|
// Test one vs two
|
|
template_params params_one;
|
|
params_one.messages = json::array({user_msg, assistant_one_arg});
|
|
params_one.tools = tools;
|
|
params_one.add_generation_prompt = false;
|
|
params_one.enable_thinking = true;
|
|
|
|
auto result_one_two = differential_analyzer::compare_variants(tmpl, params_one,
|
|
[&](template_params & p) {
|
|
p.messages = json::array({user_msg, assistant_two_args});
|
|
});
|
|
|
|
t.assert_true("T4 one vs two result should have value", result_one_two.has_value());
|
|
|
|
const auto & diff4 = result_one_two->diff;
|
|
t.assert_true("T4 one vs two left should contain arg1 (or prefix)",
|
|
diff4.left.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos);
|
|
t.assert_true("T4 one vs two right should contain arg1 (or prefix)",
|
|
diff4.right.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos);
|
|
t.assert_true("T4 one vs two right should contain arg2 (or prefix/suffix)",
|
|
diff4.right.find("arg2") != std::string::npos || diff4.prefix.find("arg2") != std::string::npos || diff4.suffix.find("arg2") != std::string::npos);
|
|
}
|
|
|
|
// T5: Compare different argument values
|
|
static void test_seed_oss_args_presence(testing & t) {
|
|
common_chat_template tmpl = load_seed_oss_template(t);
|
|
|
|
json assistant_same_arg = json{
|
|
{"role", "assistant"},
|
|
{"content", nullptr},
|
|
{"tool_calls", json::array({
|
|
build_tool_call("test_function_name", json::object({{"param1", "value1"}}))
|
|
})}
|
|
};
|
|
|
|
json assistant_other_arg = json{
|
|
{"role", "assistant"},
|
|
{"content", nullptr},
|
|
{"tool_calls", json::array({
|
|
build_tool_call("test_function_name", json::object({{"param2", "value2"}}))
|
|
})}
|
|
};
|
|
|
|
json assistant_both_args = json{
|
|
{"role", "assistant"},
|
|
{"content", nullptr},
|
|
{"tool_calls", json::array({
|
|
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
|
|
})}
|
|
};
|
|
|
|
json user_msg = json{
|
|
{"role", "user"},
|
|
{"content", "Hello"}
|
|
};
|
|
|
|
template_params params_same;
|
|
params_same.messages = json::array({user_msg, assistant_same_arg});
|
|
params_same.tools = build_tools_definition();
|
|
params_same.add_generation_prompt = false;
|
|
params_same.enable_thinking = true;
|
|
|
|
// Test same arg vs other arg
|
|
auto result_same_other = differential_analyzer::compare_variants(tmpl, params_same,
|
|
[&](template_params & p) {
|
|
p.messages = json::array({user_msg, assistant_other_arg});
|
|
});
|
|
|
|
t.assert_true("T5 same vs other result should have value", result_same_other.has_value());
|
|
const auto & diff5a = result_same_other->diff;
|
|
t.assert_true("T5 same vs other left should contain param1 (or prefix/suffix)",
|
|
diff5a.left.find("param1") != std::string::npos || diff5a.prefix.find("param1") != std::string::npos || diff5a.suffix.find("param1") != std::string::npos);
|
|
t.assert_true("T5 same vs other left should contain value1 (or prefix/suffix)",
|
|
diff5a.left.find("value1") != std::string::npos || diff5a.prefix.find("value1") != std::string::npos);
|
|
t.assert_true("T5 same vs other right should contain param2 (or prefix/suffix)",
|
|
diff5a.right.find("param2") != std::string::npos || diff5a.prefix.find("param2") != std::string::npos || diff5a.suffix.find("param2") != std::string::npos);
|
|
t.assert_true("T5 same vs other right should contain value2 (or prefix/suffix)",
|
|
diff5a.right.find("value2") != std::string::npos || diff5a.prefix.find("value2") != std::string::npos || diff5a.suffix.find("value2") != std::string::npos);
|
|
|
|
// Test same arg vs both args
|
|
auto result_same_both = differential_analyzer::compare_variants(tmpl, params_same,
|
|
[&](template_params & p) {
|
|
p.messages = json::array({user_msg, assistant_both_args});
|
|
});
|
|
|
|
t.assert_true("T5 same vs both result should have value", result_same_both.has_value());
|
|
const auto & diff5b = result_same_both->diff;
|
|
t.assert_true("T5 same vs both left should contain param1 (or prefix/suffix)",
|
|
diff5b.left.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos);
|
|
t.assert_true("T5 same vs both right should contain param1 (or prefix/suffix)",
|
|
diff5b.right.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos);
|
|
t.assert_true("T5 same vs both right should contain param2 (or prefix/suffix)",
|
|
diff5b.right.find("param2") != std::string::npos || diff5b.prefix.find("param2") != std::string::npos || diff5b.suffix.find("param2") != std::string::npos);
|
|
}
|
|
|
|
// T6: Tool call with vs without reasoning_content
|
|
static void test_seed_oss_tool_with_reasoning(testing & t) {
|
|
common_chat_template tmpl = load_seed_oss_template(t);
|
|
|
|
json assistant_tool_only = json{
|
|
{"role", "assistant"},
|
|
{"content", nullptr},
|
|
{"tool_calls", json::array({
|
|
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
|
|
})}
|
|
};
|
|
|
|
json assistant_tool_with_reasoning = json{
|
|
{"role", "assistant"},
|
|
{"content", nullptr},
|
|
{"tool_calls", json::array({
|
|
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
|
|
})},
|
|
{"reasoning_content", "I need to call the tool first."}
|
|
};
|
|
|
|
json user_msg = json{
|
|
{"role", "user"},
|
|
{"content", "Hello, please help me."}
|
|
};
|
|
|
|
template_params params_tool_only;
|
|
params_tool_only.messages = json::array({user_msg, assistant_tool_only});
|
|
params_tool_only.tools = build_tools_definition();
|
|
params_tool_only.add_generation_prompt = false;
|
|
params_tool_only.enable_thinking = true;
|
|
|
|
auto result = differential_analyzer::compare_variants(tmpl, params_tool_only,
|
|
[&](template_params & p) {
|
|
p.messages = json::array({user_msg, assistant_tool_with_reasoning});
|
|
});
|
|
|
|
t.assert_true("T6 result should have value", result.has_value());
|
|
|
|
const auto & diff = result->diff;
|
|
|
|
// Left should be empty (no reasoning in variant A)
|
|
t.assert_equal("T6 left should be empty", "", diff.left);
|
|
|
|
// Right should contain the thinking token with reasoning content
|
|
t.assert_true("T6 right should contain think begin", diff.right.find("<seed:think>") != std::string::npos);
|
|
t.assert_true("T6 right should contain reasoning content", diff.right.find("I need to call the tool first.") != std::string::npos);
|
|
t.assert_true("T6 right should contain think end", diff.right.find("</seed:think>") != std::string::npos);
|
|
|
|
// Prefix should contain the assistant role
|
|
t.assert_true("T6 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos);
|
|
|
|
// Suffix should contain the tool call
|
|
t.assert_true("T6 suffix should contain tool_call begin", diff.suffix.find("<seed:tool_call>") != std::string::npos);
|
|
t.assert_true("T6 suffix should contain function name", diff.suffix.find("test_function_name") != std::string::npos);
|
|
t.assert_true("T6 suffix should contain eos", diff.suffix.find("<seed:eos>") != std::string::npos);
|
|
}
|
|
|
|
static common_chat_template load_template(testing & t, const std::string & template_path) {
|
|
std::ifstream fin(template_path, std::ios::binary);
|
|
std::ostringstream buf;
|
|
if (fin.is_open()) {
|
|
buf << fin.rdbuf();
|
|
}
|
|
std::string template_source = buf.str();
|
|
common_chat_template tmpl(template_source, "", "");
|
|
t.assert_true("Nemotron template loaded successfully", template_source.length() > 0);
|
|
return tmpl;
|
|
}
|
|
|
|
// ============================================================================
|
|
// Nemotron Template Analysis Tests
|
|
// ============================================================================
|
|
static common_chat_template load_nemotron_template(testing & t) {
|
|
return load_template(t, "models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja");
|
|
}
|
|
|
|
static void test_nemotron_analysis(testing & t) {
|
|
t.test("Nemotron reasoning detection", test_nemotron_reasoning_detection);
|
|
t.test("Nemotron tool format", test_nemotron_tool_format);
|
|
}
|
|
|
|
static void test_nemotron_reasoning_detection(testing & t) {
|
|
common_chat_template tmpl = load_nemotron_template(t);
|
|
|
|
// Test the comparison manually to see what's happening
|
|
json user_msg = json{ { "role", "user" }, { "content", "Hello" } };
|
|
json assistant_no_reasoning = json{
|
|
{ "role", "assistant" },
|
|
{ "content", "I can help." }
|
|
};
|
|
json assistant_with_reasoning = json{
|
|
{ "role", "assistant" },
|
|
{ "content", "I can help." },
|
|
{ "reasoning_content", "Let me think about this." }
|
|
};
|
|
|
|
template_params params;
|
|
params.messages = json::array({ user_msg, assistant_no_reasoning });
|
|
params.add_generation_prompt = false;
|
|
params.enable_thinking = true;
|
|
|
|
// Run differential analysis
|
|
auto analysis = differential_analyzer::analyze(tmpl);
|
|
|
|
// Check reasoning markers
|
|
t.assert_equal("reasoning_start should be '<think>'", "<think>", analysis.markers.reasoning_start);
|
|
t.assert_equal("reasoning_end should be '</think>'", "</think>", analysis.markers.reasoning_end);
|
|
|
|
// Check reasoning mode detection
|
|
// Nemotron uses forced closed reasoning with add_generation_prompt
|
|
t.assert_equal("reasoning should be FORCED_CLOSED", reasoning_mode::FORCED_CLOSED, analysis.reasoning);
|
|
|
|
// Make sure reasoning markers don't spill over to content markers
|
|
t.assert_equal("content start should be empty", "", analysis.markers.content_start);
|
|
t.assert_equal("content end should be empty", "", analysis.markers.content_end);
|
|
|
|
t.assert_equal("content should be PLAIN", content_mode::PLAIN, analysis.content);
|
|
}
|
|
|
|
static void test_nemotron_tool_format(testing & t) {
|
|
common_chat_template tmpl = load_nemotron_template(t);
|
|
|
|
// Run differential analysis
|
|
auto analysis = differential_analyzer::analyze(tmpl);
|
|
|
|
// Check tool markers - Nemotron uses per-call wrapping (each call individually wrapped)
|
|
t.assert_equal("tool_section_start should be empty (per-call format)", "", analysis.markers.tool_section_start);
|
|
t.assert_equal("tool_section_end should be empty (per-call format)", "", analysis.markers.tool_section_end);
|
|
t.assert_equal("per_call_start should be '<tool_call>\\n'", "<tool_call>\n", analysis.markers.per_call_start);
|
|
t.assert_equal("per_call_end should be '</tool_call>'", "</tool_call>", analysis.markers.per_call_end);
|
|
t.assert_true("should support parallel calls", analysis.supports_parallel_calls);
|
|
|
|
// Check function markers
|
|
t.assert_equal("func_name_prefix should be '<function='", "<function=", analysis.markers.func_name_prefix);
|
|
t.assert_equal("func_name_suffix should be '>\\n'", ">\n", analysis.markers.func_name_suffix);
|
|
t.assert_equal("func_close should be '</function>\\n'", "</function>\n", analysis.markers.func_close);
|
|
|
|
// Check argument markers (note: markers retain trailing newlines for proper parsing)
|
|
t.assert_equal("arg_name_prefix should be '<parameter='", "<parameter=", analysis.markers.arg_name_prefix);
|
|
t.assert_equal("arg_name_suffix should be '>\\n'", ">\n", analysis.markers.arg_name_suffix);
|
|
t.assert_equal("arg_value_suffix should be '</parameter>\\n'", "</parameter>\n", analysis.markers.arg_value_suffix);
|
|
|
|
// Check format classification
|
|
t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools == tool_format::TAG_WITH_TAGGED);
|
|
|
|
// Verify tool support
|
|
t.assert_true("should support tools", analysis.supports_tools);
|
|
}
|
|
|
|
static common_chat_template load_cohere_template(testing & t) {
|
|
return load_template(t, "models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja");
|
|
}
|
|
|
|
static void test_cohere_analysis(testing & t) {
|
|
t.test("Cohere reasoning detection", test_cohere_reasoning_detection);
|
|
t.test("Cohere tool format", test_cohere_tool_format);
|
|
}
|
|
|
|
static void test_cohere_reasoning_detection(testing & t) {
|
|
common_chat_template tmpl = load_cohere_template(t);
|
|
|
|
// Run differential analysis
|
|
auto analysis = differential_analyzer::analyze(tmpl);
|
|
|
|
// Check reasoning markers - Cohere uses special token format
|
|
t.assert_equal("reasoning_start should be '<|START_THINKING|>'", "<|START_THINKING|>", analysis.markers.reasoning_start);
|
|
t.assert_equal("reasoning_end should be '<|END_THINKING|>'", "<|END_THINKING|>", analysis.markers.reasoning_end);
|
|
|
|
// Check reasoning mode - Cohere only shows reasoning with tool calls (TOOLS_ONLY)
|
|
t.assert_equal("reasoning should be TOOLS_ONLY", reasoning_mode::TOOLS_ONLY, analysis.reasoning);
|
|
|
|
// Check content markers - Cohere wraps all content with START/END_RESPONSE
|
|
t.assert_equal("content_start should be '<|START_RESPONSE|>'", "<|START_RESPONSE|>", analysis.markers.content_start);
|
|
t.assert_equal("content_end should be '<|END_RESPONSE|>'", "<|END_RESPONSE|>", analysis.markers.content_end);
|
|
|
|
// Content is always wrapped (both with and without tools)
|
|
t.assert_equal("content should be ALWAYS_WRAPPED", content_mode::ALWAYS_WRAPPED, analysis.content);
|
|
}
|
|
|
|
static void test_cohere_tool_format(testing & t) {
|
|
common_chat_template tmpl = load_cohere_template(t);
|
|
|
|
// Run differential analysis
|
|
auto analysis = differential_analyzer::analyze(tmpl);
|
|
|
|
// Check tool section markers - Cohere uses ACTION markers
|
|
t.assert_equal("tool_section_start should be '<|START_ACTION|>'", "<|START_ACTION|>", analysis.markers.tool_section_start);
|
|
t.assert_equal("tool_section_end should be '<|END_ACTION|>'", "<|END_ACTION|>", analysis.markers.tool_section_end);
|
|
|
|
// JSON_NATIVE format has no per-call markers
|
|
t.assert_equal("per_call_start should be empty", "", analysis.markers.per_call_start);
|
|
t.assert_equal("per_call_end should be empty", "", analysis.markers.per_call_end);
|
|
|
|
// JSON_NATIVE format has empty function markers (no XML-style markers)
|
|
t.assert_equal("func_name_prefix should be empty", "", analysis.markers.func_name_prefix);
|
|
t.assert_equal("func_name_suffix should be empty", "", analysis.markers.func_name_suffix);
|
|
t.assert_equal("func_close should be empty", "", analysis.markers.func_close);
|
|
|
|
// JSON_NATIVE format has empty args markers
|
|
t.assert_equal("args_start should be empty", "", analysis.markers.args_start);
|
|
t.assert_equal("args_end should be empty", "", analysis.markers.args_end);
|
|
|
|
// JSON_NATIVE format has empty argument markers
|
|
t.assert_equal("arg_name_prefix should be empty", "", analysis.markers.arg_name_prefix);
|
|
t.assert_equal("arg_name_suffix should be empty", "", analysis.markers.arg_name_suffix);
|
|
t.assert_equal("arg_value_prefix should be empty", "", analysis.markers.arg_value_prefix);
|
|
t.assert_equal("arg_value_suffix should be empty", "", analysis.markers.arg_value_suffix);
|
|
t.assert_equal("arg_separator should be empty", "", analysis.markers.arg_separator);
|
|
|
|
// Check JSON field names - Cohere uses non-standard names
|
|
t.assert_equal("name_field should be 'tool_name'", "tool_name", analysis.name_field);
|
|
t.assert_equal("args_field should be 'parameters'", "parameters", analysis.args_field);
|
|
// This isn't a real tool call id field, i.e. with the OpenAI tool call ID format
|
|
t.assert_equal("id_field should be 'tool_call_id'", "", analysis.id_field);
|
|
|
|
// Check format classification
|
|
t.assert_equal("tool format should be JSON_NATIVE", tool_format::JSON_NATIVE, analysis.tools);
|
|
|
|
// Check flags
|
|
t.assert_true("should support tools", analysis.supports_tools);
|
|
t.assert_true("should support parallel calls", analysis.supports_parallel_calls);
|
|
t.assert_true("should not require nonnull content", !analysis.requires_nonnull_content);
|
|
t.assert_true("tools_array_wrapped should be true", analysis.tools_array_wrapped);
|
|
}
|
|
|
|
// ============================================================================
|
|
// standard_json_tools Format Tests
|
|
// ============================================================================
|
|
|
|
// Helper to build tools definition for tests
|
|
static json build_test_tools() {
|
|
json parameters_schema = json::object();
|
|
parameters_schema["type"] = "object";
|
|
parameters_schema["properties"] = json::object();
|
|
parameters_schema["properties"]["location"] = json::object({
|
|
{"type", "string"},
|
|
{"description", "The city and state"}
|
|
});
|
|
parameters_schema["properties"]["unit"] = json::object({
|
|
{"type", "string"},
|
|
{"description", "Temperature unit"},
|
|
{"enum", json::array({"celsius", "fahrenheit"})}
|
|
});
|
|
parameters_schema["required"] = json::array({"location"});
|
|
|
|
return json::array({
|
|
json{
|
|
{"type", "function"},
|
|
{"function", json{
|
|
{"name", "get_current_weather"},
|
|
{"description", "Get the current weather in a given location"},
|
|
{"parameters", parameters_schema}
|
|
}}
|
|
}
|
|
});
|
|
}
|
|
|
|
static void test_standard_json_tools_formats(testing & t) {
|
|
t.test("OpenAI format", test_standard_json_tools_openai);
|
|
t.test("Cohere format", test_standard_json_tools_cohere);
|
|
t.test("function-as-key format", test_standard_json_tools_function_key);
|
|
}
|
|
|
|
// Test 1: OpenAI Standard Format
|
|
// {"id": "call_abc", "function": {"name": "get_weather", "arguments": {"location": "NYC"}}}
|
|
static void test_standard_json_tools_openai(testing & t) {
|
|
json tools = build_test_tools();
|
|
|
|
auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
|
|
auto tool_call = p.standard_json_tools(
|
|
"<tool_call>", "</tool_call>", tools,
|
|
/* parallel */ true,
|
|
/* force */ false,
|
|
/* name_key */ "function.name",
|
|
/* args_key */ "function.arguments",
|
|
/* array_wrapped */ false,
|
|
/* function_is_key */ false,
|
|
/* call_id_key */ "id",
|
|
/* gen_call_id_key */ "",
|
|
/* parameters_order */ {}
|
|
);
|
|
return p.content(p.until("<tool_call>")) + p.optional(tool_call) + p.end();
|
|
});
|
|
|
|
std::string input =
|
|
"Let me check the weather."
|
|
"<tool_call>"
|
|
R"({"id": "call_abc123", "function": {"name": "get_current_weather", "arguments": {"location": "NYC"}}})"
|
|
"</tool_call>";
|
|
|
|
common_peg_parse_context ctx(input, false);
|
|
auto result = parser.parse(ctx);
|
|
|
|
t.assert_true("parse success", result.success());
|
|
|
|
common_chat_msg msg;
|
|
auto mapper = common_chat_peg_unified_mapper(msg);
|
|
mapper.from_ast(ctx.ast, result);
|
|
|
|
t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
|
|
if (!msg.tool_calls.empty()) {
|
|
t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
|
|
t.assert_equal("tool id", "call_abc123", msg.tool_calls[0].id);
|
|
}
|
|
t.assert_true("content present", msg.content.find("Let me check the weather") != std::string::npos);
|
|
}
|
|
|
|
// Test 2: Cohere Format
|
|
// {"tool_call_id": 0, "tool_name": "get_weather", "parameters": {"location": "NYC"}}
|
|
static void test_standard_json_tools_cohere(testing & t) {
|
|
json tools = build_test_tools();
|
|
|
|
auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
|
|
auto tool_call = p.standard_json_tools(
|
|
"<|START_ACTION|>[", "]<|END_ACTION|>", tools,
|
|
/* parallel */ true,
|
|
/* force */ false,
|
|
/* name_key */ "tool_name",
|
|
/* args_key */ "parameters",
|
|
/* array_wrapped */ false, // Brackets are part of section markers
|
|
/* function_is_key */ false,
|
|
/* call_id_key */ "",
|
|
/* gen_call_id_key */ "tool_call_id",
|
|
/* parameters_order */ {"tool_call_id", "tool_name", "parameters"}
|
|
);
|
|
return p.content(p.until("<|START_ACTION|>")) + p.optional(tool_call) + p.end();
|
|
});
|
|
|
|
std::string input =
|
|
"Let me search for that."
|
|
"<|START_ACTION|>["
|
|
R"({"tool_call_id": 0, "tool_name": "get_current_weather", "parameters": {"location": "NYC", "unit": "celsius"}})"
|
|
"]<|END_ACTION|>";
|
|
|
|
common_peg_parse_context ctx(input, false);
|
|
auto result = parser.parse(ctx);
|
|
|
|
t.assert_true("parse success", result.success());
|
|
|
|
common_chat_msg msg;
|
|
auto mapper = common_chat_peg_unified_mapper(msg);
|
|
mapper.from_ast(ctx.ast, result);
|
|
|
|
t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
|
|
if (!msg.tool_calls.empty()) {
|
|
t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
|
|
t.assert_equal("tool id", "0", msg.tool_calls[0].id);
|
|
}
|
|
t.assert_true("content present", msg.content.find("Let me search") != std::string::npos);
|
|
}
|
|
|
|
// Test 3: Function-as-Key Format
|
|
// {"get_current_weather": {"id": "call-0001", "args": {"location": "NYC"}}}
|
|
static void test_standard_json_tools_function_key(testing & t) {
|
|
json tools = build_test_tools();
|
|
|
|
auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
|
|
auto tool_call = p.standard_json_tools(
|
|
"<tool_calls>[", "]</tool_calls>", tools,
|
|
/* parallel */ true,
|
|
/* force */ false,
|
|
/* name_key */ "", // Name is the key itself
|
|
/* args_key */ "args",
|
|
/* array_wrapped */ false,
|
|
/* function_is_key */ true,
|
|
/* call_id_key */ "id",
|
|
/* gen_call_id_key */ "",
|
|
/* parameters_order */ {}
|
|
);
|
|
return p.content(p.until("<tool_calls>")) + p.optional(tool_call) + p.end();
|
|
});
|
|
|
|
std::string input =
|
|
"I'll call the weather function."
|
|
"<tool_calls>["
|
|
R"({"get_current_weather": {"id": "call-0001", "args": {"location": "NYC", "unit": "celsius"}}})"
|
|
"]</tool_calls>";
|
|
|
|
common_peg_parse_context ctx(input, false);
|
|
auto result = parser.parse(ctx);
|
|
|
|
t.assert_true("parse success", result.success());
|
|
|
|
common_chat_msg msg;
|
|
auto mapper = common_chat_peg_unified_mapper(msg);
|
|
mapper.from_ast(ctx.ast, result);
|
|
|
|
t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
|
|
if (!msg.tool_calls.empty()) {
|
|
t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
|
|
t.assert_equal("tool id", "call-0001", msg.tool_calls[0].id);
|
|
}
|
|
t.assert_true("content present", msg.content.find("I'll call the weather") != std::string::npos);
|
|
}
|
|
|
|
// ============================================================================
|
|
// normalize_quotes_to_json Tests
|
|
// ============================================================================
|
|
|
|
// Copy of the function for isolated testing (original is static in chat-peg-parser.cpp)
|
|
static std::string normalize_quotes_to_json(const std::string & input) {
|
|
std::string result;
|
|
result.reserve(input.size() + 16);
|
|
|
|
bool in_single_quoted = false;
|
|
bool in_double_quoted = false;
|
|
|
|
for (size_t i = 0; i < input.size(); ++i) {
|
|
char c = input[i];
|
|
|
|
if (c == '\\' && i + 1 < input.size()) {
|
|
char next = input[i + 1];
|
|
|
|
if (in_single_quoted) {
|
|
if (next == '\'') {
|
|
result += '\'';
|
|
++i;
|
|
continue;
|
|
}
|
|
if (next == '"') {
|
|
result += "\\\"";
|
|
++i;
|
|
continue;
|
|
}
|
|
result += c;
|
|
result += next;
|
|
++i;
|
|
continue;
|
|
}
|
|
|
|
if (in_double_quoted) {
|
|
result += c;
|
|
result += next;
|
|
++i;
|
|
continue;
|
|
}
|
|
|
|
result += c;
|
|
continue;
|
|
}
|
|
|
|
if (c == '"') {
|
|
if (in_single_quoted) {
|
|
result += "\\\"";
|
|
} else {
|
|
in_double_quoted = !in_double_quoted;
|
|
result += c;
|
|
}
|
|
} else if (c == '\'') {
|
|
if (in_double_quoted) {
|
|
result += c;
|
|
} else if (in_single_quoted) {
|
|
in_single_quoted = false;
|
|
result += '"';
|
|
} else {
|
|
in_single_quoted = true;
|
|
result += '"';
|
|
}
|
|
} else {
|
|
result += c;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static void test_normalize_quotes_to_json(testing & t) {
|
|
t.test("basic single to double quotes", [](testing & t) {
|
|
std::string input = "{'key': 'value'}";
|
|
std::string expected = "{\"key\": \"value\"}";
|
|
std::string result = normalize_quotes_to_json(input);
|
|
t.assert_equal("basic conversion", expected, result);
|
|
});
|
|
|
|
t.test("escaped single quote inside single-quoted string", [](testing & t) {
|
|
std::string input = "{'code': 'print(\\'hello\\')'}";
|
|
std::string expected = "{\"code\": \"print('hello')\"}";
|
|
std::string result = normalize_quotes_to_json(input);
|
|
t.assert_equal("escaped single quote", expected, result);
|
|
});
|
|
|
|
t.test("double quote inside single-quoted string", [](testing & t) {
|
|
std::string input = "{'msg': 'He said \"hi\"'}";
|
|
std::string expected = "{\"msg\": \"He said \\\"hi\\\"\"}";
|
|
std::string result = normalize_quotes_to_json(input);
|
|
t.assert_equal("double quote escaping", expected, result);
|
|
});
|
|
|
|
t.test("nested backslash escapes", [](testing & t) {
|
|
std::string input = "{'path': 'C:\\\\Users\\\\test'}";
|
|
std::string expected = "{\"path\": \"C:\\\\Users\\\\test\"}";
|
|
std::string result = normalize_quotes_to_json(input);
|
|
t.assert_equal("backslash escaping", expected, result);
|
|
});
|
|
|
|
t.test("newline escapes", [](testing & t) {
|
|
std::string input = "{'text': 'line1\\nline2'}";
|
|
std::string expected = "{\"text\": \"line1\\nline2\"}";
|
|
std::string result = normalize_quotes_to_json(input);
|
|
t.assert_equal("newline escaping", expected, result);
|
|
});
|
|
|
|
t.test("mixed quotes", [](testing & t) {
|
|
std::string input = "{\"already_double\": 'single_value'}";
|
|
std::string expected = "{\"already_double\": \"single_value\"}";
|
|
std::string result = normalize_quotes_to_json(input);
|
|
t.assert_equal("mixed quotes", expected, result);
|
|
});
|
|
|
|
t.test("embedded quotes - the test case", test_normalize_quotes_with_embedded_quotes);
|
|
}
|
|
|
|
// Test case that mirrors the Seed-OSS failing test scenario
|
|
static void test_normalize_quotes_with_embedded_quotes(testing & t) {
|
|
// This is similar to the Seed-OSS template test case
|
|
// The input has embedded double quotes like "14" and "bar" inside string values
|
|
std::string input = "{'filename': 'foo.cpp', 'oldString': 'def foo(arg = \"14\"):\\n return arg + \"bar\"\\n', 'newString': 'def foo(arg = \"15\"):\\n pass\\n'}";
|
|
|
|
// Expected: Python single quotes -> JSON double quotes, internal double quotes escaped
|
|
std::string expected = "{\"filename\": \"foo.cpp\", \"oldString\": \"def foo(arg = \\\"14\\\"):\\n return arg + \\\"bar\\\"\\n\", \"newString\": \"def foo(arg = \\\"15\\\"):\\n pass\\n\"}";
|
|
|
|
std::string result = normalize_quotes_to_json(input);
|
|
|
|
t.assert_equal("normalize quotes with embedded double quotes", expected, result);
|
|
|
|
// Also verify the result is valid JSON
|
|
try {
|
|
json parsed = json::parse(result);
|
|
t.assert_true("result is valid JSON", true);
|
|
t.assert_equal("filename field", "foo.cpp", parsed["filename"].get<std::string>());
|
|
t.assert_true("oldString contains embedded quotes",
|
|
parsed["oldString"].get<std::string>().find("\"14\"") != std::string::npos);
|
|
t.assert_true("newString contains embedded quotes",
|
|
parsed["newString"].get<std::string>().find("\"15\"") != std::string::npos);
|
|
} catch (const std::exception & e) {
|
|
t.assert_true(std::string("JSON parse failed: ") + e.what(), false);
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// TAG_WITH_TAGGED Argument Parsing Tests
|
|
// ============================================================================
|
|
|
|
// Build tools definition for edit function
|
|
static json build_edit_tool() {
|
|
json parameters_schema = json::object();
|
|
parameters_schema["type"] = "object";
|
|
parameters_schema["properties"] = json::object();
|
|
parameters_schema["properties"]["filename"] = json::object({
|
|
{"type", "string"},
|
|
{"description", "Path of file to edit"}
|
|
});
|
|
parameters_schema["properties"]["oldString"] = json::object({
|
|
{"type", "string"},
|
|
{"description", "String to replace"}
|
|
});
|
|
parameters_schema["properties"]["newString"] = json::object({
|
|
{"type", "string"},
|
|
{"description", "New (replacement) value"}
|
|
});
|
|
parameters_schema["required"] = json::array({"filename", "oldString", "newString"});
|
|
|
|
return json::array({
|
|
json{
|
|
{"type", "function"},
|
|
{"function", json{
|
|
{"name", "edit"},
|
|
{"description", "Edit a file"},
|
|
{"parameters", parameters_schema}
|
|
}}
|
|
}
|
|
});
|
|
}
|
|
|
|
// Test that reproduces the Seed-OSS template issue with embedded quotes
|
|
static void test_tagged_args_with_embedded_quotes(testing & t) {
|
|
json tools = build_edit_tool();
|
|
|
|
// Build a parser for TAG_WITH_TAGGED format like Seed-OSS/Nemotron
|
|
auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
|
|
// Build tool choice for the edit function
|
|
auto tool_choice = p.choice();
|
|
|
|
for (const auto & tool_def : tools) {
|
|
if (!tool_def.contains("function")) continue;
|
|
const auto & function = tool_def.at("function");
|
|
std::string name = function.at("name");
|
|
const auto & params = function.at("parameters");
|
|
|
|
if (!params.contains("properties") || !params.at("properties").is_object()) continue;
|
|
|
|
const auto & properties = params.at("properties");
|
|
|
|
// Build argument parsers
|
|
std::vector<common_peg_parser> arg_parsers;
|
|
for (const auto & [param_name, param_schema] : properties.items()) {
|
|
auto arg = p.tool_arg(
|
|
p.tool_arg_open(p.literal("<parameter=") + p.tool_arg_name(p.literal(param_name)) + p.literal(">")) +
|
|
p.space() +
|
|
p.tool_arg_string_value(p.until("</parameter>")) +
|
|
p.space() +
|
|
p.tool_arg_close(p.literal("</parameter>"))
|
|
);
|
|
arg_parsers.push_back(p.optional(p.rule("arg-" + param_name, arg)));
|
|
}
|
|
|
|
// Build arg sequence with space() between
|
|
common_peg_parser args_seq = p.eps();
|
|
for (size_t i = 0; i < arg_parsers.size(); i++) {
|
|
if (i > 0) {
|
|
args_seq = args_seq + p.space();
|
|
}
|
|
args_seq = args_seq + arg_parsers[i];
|
|
}
|
|
|
|
auto func_parser =
|
|
p.tool_open(p.literal("<function=") + p.tool_name(p.literal(name)) + p.literal(">")) +
|
|
p.space() + args_seq + p.space() +
|
|
p.tool_close(p.literal("</function>"));
|
|
|
|
tool_choice |= p.rule("tool-" + name, p.tool(func_parser));
|
|
}
|
|
|
|
auto tool_section =
|
|
p.literal("<seed:tool_call>") + p.space() +
|
|
tool_choice +
|
|
p.space() + p.literal("</seed:tool_call>");
|
|
|
|
return p.content(p.until("<seed:tool_call>")) + p.optional(tool_section) + p.end();
|
|
});
|
|
|
|
// The exact input from the failing test
|
|
std::string input =
|
|
"<seed:tool_call>\n"
|
|
"<function=edit>\n"
|
|
"<parameter=filename>\n"
|
|
"foo.cpp\n"
|
|
"</parameter>\n"
|
|
"<parameter=oldString>"
|
|
"def foo(arg = \"14\"):\n"
|
|
" return arg + \"bar\"\n"
|
|
"\n"
|
|
"</parameter>\n"
|
|
"<parameter=newString>"
|
|
"def foo(arg = \"15\"):\n"
|
|
" pass\n"
|
|
"\n"
|
|
"</parameter>\n"
|
|
"</function>\n"
|
|
"</seed:tool_call>";
|
|
|
|
common_peg_parse_context ctx(input, false);
|
|
auto result = parser.parse(ctx);
|
|
|
|
t.assert_true("parse success", result.success());
|
|
|
|
common_chat_msg msg;
|
|
auto mapper = common_chat_peg_unified_mapper(msg);
|
|
mapper.from_ast(ctx.ast, result);
|
|
|
|
t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
|
|
|
|
if (!msg.tool_calls.empty()) {
|
|
t.assert_equal("tool name", "edit", msg.tool_calls[0].name);
|
|
|
|
// Parse the arguments as JSON to verify they're valid
|
|
std::string args = msg.tool_calls[0].arguments;
|
|
|
|
try {
|
|
json parsed = json::parse(args);
|
|
t.assert_true("arguments is valid JSON", true);
|
|
|
|
// Verify each field has proper value
|
|
t.assert_equal("filename", "foo.cpp", parsed.value("filename", ""));
|
|
|
|
std::string oldString = parsed.value("oldString", "");
|
|
t.assert_true("oldString contains embedded quotes",
|
|
oldString.find("\"14\"") != std::string::npos);
|
|
t.assert_true("oldString contains bar with quotes",
|
|
oldString.find("\"bar\"") != std::string::npos);
|
|
|
|
std::string newString = parsed.value("newString", "");
|
|
t.assert_true("newString contains embedded quotes",
|
|
newString.find("\"15\"") != std::string::npos);
|
|
|
|
} catch (const std::exception & e) {
|
|
t.assert_true(std::string("arguments should be valid JSON: ") + e.what(), false);
|
|
}
|
|
}
|
|
}
|
|
|