no more std::regex
This commit is contained in:
parent
d34efd9626
commit
a10fbc77a3
|
|
@ -4,7 +4,6 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <regex>
|
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
|
@ -23,7 +22,7 @@ static void trim_template_markers_inplace(std::string & s) {
|
||||||
// i = head ; j = tail (i <= j)
|
// i = head ; j = tail (i <= j)
|
||||||
size_t j = 0; // Write pointer
|
size_t j = 0; // Write pointer
|
||||||
const size_t len = s.length();
|
const size_t len = s.length();
|
||||||
|
|
||||||
for (size_t i = 0; i < len; ) {
|
for (size_t i = 0; i < len; ) {
|
||||||
bool handled = false;
|
bool handled = false;
|
||||||
|
|
||||||
|
|
@ -75,6 +74,32 @@ static void trim_template_markers_inplace(std::string & s) {
|
||||||
s.resize(j);
|
s.resize(j);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void trim_newline_after_tag_inplace(std::string & s) {
|
||||||
|
// i = head ; j = tail (i <= j)
|
||||||
|
size_t j = 0; // Write pointer
|
||||||
|
const size_t len = s.length();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < len; ) {
|
||||||
|
s[j++] = s[i++];
|
||||||
|
|
||||||
|
if (i < len && (s[j-1] == '}' || s[j-1] == '%' || s[j-1] == '#' || s[j-1] == '-')) {
|
||||||
|
if (s[i] == '}') {
|
||||||
|
// We have a potential tag closer like %} or -} or #} or }}
|
||||||
|
// Now check if the next character is a newline
|
||||||
|
if (i + 1 < len && s[i + 1] == '\n') {
|
||||||
|
// Skip the } and the following \n
|
||||||
|
++i; // skip the }
|
||||||
|
++i; // skip the \n
|
||||||
|
// Do not advance j, we effectively removed the \n
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s.resize(j);
|
||||||
|
}
|
||||||
|
|
||||||
std::string lexer::preprocess(const std::string & template_str, const preprocess_options & options) const {
|
std::string lexer::preprocess(const std::string & template_str, const preprocess_options & options) const {
|
||||||
std::string result = template_str;
|
std::string result = template_str;
|
||||||
// According to https://jinja.palletsprojects.com/en/3.0.x/templates/#whitespace-control
|
// According to https://jinja.palletsprojects.com/en/3.0.x/templates/#whitespace-control
|
||||||
|
|
@ -97,7 +122,8 @@ std::string lexer::preprocess(const std::string & template_str, const preprocess
|
||||||
if (options.trim_blocks) {
|
if (options.trim_blocks) {
|
||||||
// If an application configures Jinja to trim_blocks, the first newline after
|
// If an application configures Jinja to trim_blocks, the first newline after
|
||||||
// a template tag is removed automatically (like in PHP).
|
// a template tag is removed automatically (like in PHP).
|
||||||
result = std::regex_replace(result, std::regex(R"(([#%-]\})\n)"), "$1");
|
// Equivalent JS code: template.replace(/^[ \t]*({[#%-])/gm, "$1")
|
||||||
|
trim_newline_after_tag_inplace(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle whitespace control with - in tags
|
// Handle whitespace control with - in tags
|
||||||
|
|
@ -105,8 +131,8 @@ std::string lexer::preprocess(const std::string & template_str, const preprocess
|
||||||
|
|
||||||
// Handle custom transformers-specific `generation` tag
|
// Handle custom transformers-specific `generation` tag
|
||||||
// See https://github.com/huggingface/transformers/pull/30650 for more information.
|
// See https://github.com/huggingface/transformers/pull/30650 for more information.
|
||||||
result = std::regex_replace(result, std::regex(R"(\{%\s*generation\s*%\})"), "");
|
// result = std::regex_replace(result, std::regex(R"(\{%\s*generation\s*%\})"), "");
|
||||||
result = std::regex_replace(result, std::regex(R"(\{%\s*endgeneration\s*%\})"), "");
|
// result = std::regex_replace(result, std::regex(R"(\{%\s*endgeneration\s*%\})"), "");
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue