From 56ca124850d7c462d31bdb9289481a063f0d7a3a Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Wed, 11 Feb 2026 22:41:58 +0100 Subject: [PATCH] Document helpers --- common/chat-auto-parser-helpers.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/common/chat-auto-parser-helpers.h b/common/chat-auto-parser-helpers.h index 53d3454566..47e7a2a3d8 100644 --- a/common/chat-auto-parser-helpers.h +++ b/common/chat-auto-parser-helpers.h @@ -10,16 +10,47 @@ std::string trim_trailing_newlines(const std::string & str); // calculate a diff split (longest common prefix, longest common suffix excluding prefix, // mismatched part on the left, mismatched part on the right) between two strings +// account for markers - align prefix and suffix endings so that they end on markers +// * eg.: +// calculate_diff_split("
", "

Something

") -> +// { "prefix": "" (not: "<"), "suffix": "", "left": "
", "right": "

Something

" } +// calculate_diff_split("Something", "") -> +// { "prefix": "", "suffix": "", "left": "Something", "right": "" } diff_split calculate_diff_split(const std::string & left, const std::string & right); // Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right` +// Returns empty string if there's no common prefix +// * eg.: +// until_common_prefix("really want a FUNCTION call", "FUNCTION alpha", "FUNCTION beta") -> "really want a " +// until_common_prefix("", "", "") -> "" +// until_common_prefix("some text", "1234", "abcd") -> "" +// until_common_prefix("one arg two args three args four", "argument alpha", "argument beta") -> "one "" std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right); // Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right` +// Returns empty string if there's no common suffix +// Mirror function of `until_common_prefix` +// * eg.: +// after_common_suffix("really want a FUNCTION call", "first FUNCTION", "second FUNCTION") -> " call" +// after_common_suffix("one arg two-args three args four", "alpha-args", "beta-args") -> " three args four" std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right); // Segmentize text into markers and non-marker fragments +// * eg.: +// segmentize_markers("The site title
Here's some content
" -> +// [ (MARKER, ""), (MARKER, ""), (MARKER, ""), (TEXT, "The site title"), (MARKER, ""), +// (MARKER, ""), (MARKER, "
"), (TEXT, "Here's some "), (MARKER, ""), (TEXT, "content"), (MARKER, ""), +// (MARKER, "
"), (MARKER, ""), (MARKER, "") +// ] +// segmentize_markers("<|tool_call|>[args]{ are here }[/args]<|tool_call_end|>") -> +// [ (MARKER, "<|tool_call|>"), (MARKER, "[args]"), (TEXT, "{ are here }"), (MARKER, "[/args]"), (MARKER, "<|tool_call_end|>") ] std::vector segmentize_markers(const std::string & text); // Prune whitespace-only segments from a vector of segments +// * eg.: +// segmentize_markers("\n\n\n \n\n\n") -> +// X = [ (MARKER, ""), (TEXT, "\n"), (MARKER, ""), (TEXT, "\n"), (MARKER, ""), (TEXT, "\n \n"), +// (MARKER, ""), (TEXT, "\n"), (MARKER, ""), (TEXT, "\n"), (MARKER, "") ] +// prune_whitespace_segments(X) -> [ (MARKER, ""), (MARKER, ""), (MARKER, ""), (MARKER, ""), +// (MARKER, ""), (MARKER, "") ] std::vector prune_whitespace_segments(const std::vector & segments);