diff --git a/common/jinja/runtime.cpp b/common/jinja/runtime.cpp index 5b51427aa0..f81d98d954 100644 --- a/common/jinja/runtime.cpp +++ b/common/jinja/runtime.cpp @@ -251,6 +251,23 @@ value binary_expression::execute_impl(context & ctx) { return res; } + // Python-style string repetition + // TODO: support array/tuple repetition (e.g., [1, 2] * 3 → [1, 2, 1, 2, 1, 2]) + if (op.value == "*" && + ((is_val(left_val) && is_val(right_val)) || + (is_val(left_val) && is_val(right_val)))) { + const auto & str = is_val(left_val) ? left_val->as_string() : right_val->as_string(); + const int64_t repeat = is_val(right_val) ? right_val->as_int() : left_val->as_int(); + auto res = mk_val(); + if (repeat <= 0) { + return res; + } + for (int64_t i = 0; i < repeat; ++i) { + res->val_str = res->val_str.append(str); + } + return res; + } + // String membership if (is_val(left_val) && is_val(right_val)) { // case: "a" in "abc" diff --git a/common/jinja/value.cpp b/common/jinja/value.cpp index 7dc1d65407..8e86a715f5 100644 --- a/common/jinja/value.cpp +++ b/common/jinja/value.cpp @@ -1,4 +1,5 @@ #include "runtime.h" +#include "unicode.h" #include "value.h" // for converting from JSON to jinja values @@ -154,6 +155,83 @@ static value test_compare_fn(const func_args & args) { return mk_val(value_compare(args.get_pos(0), args.get_pos(1), op)); } +static void append_codepoint_as_ascii_json_escape(std::string & out, uint32_t codepoint) { + auto append_u16 = [&out](uint32_t value) { + char buf[8]; + snprintf(buf, sizeof(buf), "\\u%04x", static_cast(value)); + out += buf; + }; + + if (codepoint <= 0xFFFF) { + append_u16(codepoint); + return; + } + + codepoint -= 0x10000; + append_u16(0xD800 + ((codepoint >> 10) & 0x3FF)); + append_u16(0xDC00 + (codepoint & 0x3FF)); +} + +static std::string json_ensure_ascii_preserving_format(const std::string & json_str) { + std::string output; + output.reserve(json_str.size()); + + bool in_string = false; + bool escaped = false; + + for (size_t pos = 0; pos < json_str.size();) { + const char ch = json_str[pos]; + if (!in_string) { + output.push_back(ch); + if (ch == '"') { + in_string = true; + } + ++pos; + continue; + } + + if (escaped) { + output.push_back(ch); + escaped = false; + ++pos; + continue; + } + + if (ch == '\\') { + output.push_back(ch); + escaped = true; + ++pos; + continue; + } + + if (ch == '"') { + output.push_back(ch); + in_string = false; + ++pos; + continue; + } + + const unsigned char uch = static_cast(ch); + if (uch < 0x80) { + output.push_back(ch); + ++pos; + continue; + } + + auto parsed = common_parse_utf8_codepoint(json_str, pos); + if (parsed.status != utf8_parse_result::SUCCESS) { + output += "\\ufffd"; + ++pos; + continue; + } + + append_codepoint_as_ascii_json_escape(output, parsed.codepoint); + pos += parsed.bytes_consumed; + } + + return output; +} + static value tojson(const func_args & args) { args.ensure_count(1, 5); value val_ascii = args.get_kwarg_or_pos("ensure_ascii", 1); @@ -169,16 +247,17 @@ static value tojson(const func_args & args) { if (is_val(val_indent)) { indent = static_cast(val_indent->as_int()); } - if (val_ascii->as_bool()) { // undefined == false - throw not_implemented_exception("tojson ensure_ascii=true not implemented"); - } if (val_sort->as_bool()) { // undefined == false throw not_implemented_exception("tojson sort_keys=true not implemented"); } + const bool ensure_ascii = val_ascii->as_bool(); // undefined == false auto separators = (is_val(val_separators) ? val_separators : mk_val())->as_array(); std::string item_sep = separators.size() > 0 ? separators[0]->as_string().str() : (indent < 0 ? ", " : ","); std::string key_sep = separators.size() > 1 ? separators[1]->as_string().str() : ": "; std::string json_str = value_to_json(args.get_pos(0), indent, item_sep, key_sep); + if (ensure_ascii) { + json_str = json_ensure_ascii_preserving_format(json_str); + } return mk_val(json_str); } @@ -460,6 +539,10 @@ const func_builtins & value_int_t::get_builtins() const { int64_t val = args.get_pos(0)->as_int(); return mk_val(val < 0 ? -val : val); }}, + {"int", [](const func_args & args) -> value { + args.ensure_vals(); + return mk_val(args.get_pos(0)->as_int()); + }}, {"float", [](const func_args & args) -> value { args.ensure_vals(); double val = static_cast(args.get_pos(0)->as_int()); @@ -486,6 +569,10 @@ const func_builtins & value_float_t::get_builtins() const { int64_t val = static_cast(args.get_pos(0)->as_float()); return mk_val(val); }}, + {"float", [](const func_args & args) -> value { + args.ensure_vals(); + return mk_val(args.get_pos(0)->as_float()); + }}, {"safe", tojson}, {"string", tojson}, {"tojson", tojson}, diff --git a/tests/test-jinja.cpp b/tests/test-jinja.cpp index ce3008f4c7..b5ee53461e 100644 --- a/tests/test-jinja.cpp +++ b/tests/test-jinja.cpp @@ -447,6 +447,18 @@ static void test_expressions(testing & t) { "hello world" ); + test_template(t, "string repetition", + "{{ 'ab' * 3 }}", + json::object(), + "ababab" + ); + + test_template(t, "reversed string repetition", + "{{ 3 * 'ab' }}", + json::object(), + "ababab" + ); + test_template(t, "ternary", "{{ 'yes' if cond else 'no' }}", {{"cond", true}}, @@ -693,6 +705,33 @@ static void test_filters(testing & t) { "\"\\u2713\"" ); + test_template(t, "tojson ensure_ascii=true nested object", + "{{ data|tojson(ensure_ascii=true) }}", + {{"data", { + {"text", "\u2713"}, + {"items", json::array({"é", {{"snowman", "☃"}}})} + }}}, + "{\"text\": \"\\u2713\", \"items\": [\"\\u00e9\", {\"snowman\": \"\\u2603\"}]}" + ); + + test_template(t, "tojson ensure_ascii=true indent=2", + "{{ data|tojson(ensure_ascii=true, indent=2) }}", + {{"data", { + {"text", "\u2713"}, + {"nested", {{"accent", "é"}}} + }}}, + "{\n \"text\": \"\\u2713\",\n \"nested\": {\n \"accent\": \"\\u00e9\"\n }\n}" + ); + + test_template(t, "tojson ensure_ascii=true preserves existing escapes", + "{{ data|tojson(ensure_ascii=true) }}", + {{"data", { + {"emoji", "😀"}, + {"line", "a\nb"} + }}}, + "{\"emoji\": \"\\ud83d\\ude00\", \"line\": \"a\\nb\"}" + ); + test_template(t, "tojson sort_keys=true", "{{ data|tojson(sort_keys=true) }}", {{"data", {{"b", 2}, {"a", 1}}}}, @@ -771,6 +810,12 @@ static void test_filters(testing & t) { "hello" ); + test_template(t, "int filter on integer is identity", + "{{ value|int }}", + {{"value", 7}}, + "7" + ); + test_template(t, "none to string", "{{ x|string }}", {{"x", nullptr}}, @@ -2458,4 +2503,12 @@ static void test_fuzzing(testing & t) { t.assert_true("builtin " + type_name + "." + fn_name + " #" + std::to_string(i), fuzz_test_template(tmpl, vars)); } }); + + t.test("tojson ensure_ascii=true with invalid utf-8", [&](testing & t) { + t.assert_true("invalid utf-8 does not crash", + fuzz_test_template( + "{{ data|tojson(ensure_ascii=true) }}", + {{"data", std::string("hello\xfe\xffworld")}} + )); + }); }