#pragma once /** * Provides a simple direct 1-level only config file logic * * ## File format * * It can consist of multiple config groups. * * the group name needs to start at the begining of the line. * Each group can inturn contain multiple config fields (key:value pairs) wrt that group. * * the group fields need to have 1 or more space at the begining of line. * * ## Supported data types * * The fields can have values belonging to ane one of the below types * * strings - enclosed in double quotes * this is also the fallback catch all type, but dont rely on this behaviour. * * int - using decimal number system * * float - needs to have a decimal point and or e/E * if decimal point is used, there should be atleast one decimal number on its either side * * bool - either true or false * * It tries to provide a crude expanded form of array wrt any of the above supported types. * For this one needs to define keys using the pattern TheKeyName-0, TheKeyName-1, .... * * ## Additional notes * * NativeCharSize encoded char refers to chars which fit within the size of char type in a given * type of c++ string or base bitsize of a encoding standard, like 1 byte in case of std::string, * utf-8, ... * * example english alphabets in utf-8 encoding space are 1byte chars, in its variable length * encoding space. * * MultiNativeCharSize encoded char refers to chars which occupy multiple base-char-bit-size of * a c++ string type or char encoding standard. * * example indian scripts alphabets in utf-8 encoding space occupy multiple bytes in its variable * length encoding space. * * Sane variable length encoding - refers to encoding where the values of NativeCharSized chars of * a char encoding space cant overlap with values in NativeCharSize subparts of MultiNativeCharSized * chars of the same char encoding standard. * * utf-8 shows this behaviour * * chances are utf-16 and utf-32 also show this behaviour (need to cross check once) */ #include #include #include #include #include #include #include #include #include #include #define SC_DEBUG #define SC_TEST_PRG #ifdef SC_TEST_PRG #define LINFO_LN(FMT, ...) fprintf(stdout, FMT"\n", __VA_ARGS__) #define LDBUG_LN(FMT, ...) fprintf(stderr, FMT"\n", __VA_ARGS__) #define LERRR_LN(FMT, ...) fprintf(stderr, FMT"\n", __VA_ARGS__) #define LWARN_LN(FMT, ...) fprintf(stderr, FMT"\n", __VA_ARGS__) #else #include "log.h" #define LINFO_LN LOG_TEELN #define LDBUG_LN LOGLN #define LERRR_LN LOG_TEELN #define LWARN_LN LOG_TEELN #endif #undef SC_STR_OVERSMART #ifdef SC_STR_OVERSMART #define str_trim str_trim_oversmart #else #define str_trim str_trim_dumb #endif // **** **** **** String related helpers **** **** **** // size_t wcs_to_mbs(std::string &sDest, const std::wstring &wSrc) { std::mbstate_t mbState = std::mbstate_t(); const wchar_t *wSrcP = wSrc.c_str(); auto reqLen = std::wcsrtombs(nullptr, &wSrcP, 0, &mbState); sDest.resize(reqLen); return std::wcsrtombs(sDest.data(), &wSrcP, sDest.length(), &mbState); } size_t mbs_to_wcs(std::wstring &wDest, const std::string &sSrc) { std::mbstate_t mbState = std::mbstate_t(); const char *sSrcP = sSrc.c_str(); auto reqLen = std::mbsrtowcs(nullptr, &sSrcP, 0, &mbState); wDest.resize(reqLen); return std::mbsrtowcs(wDest.data(), &sSrcP, wDest.length(), &mbState); } template void dumphex_string(const TString &sIn, const std::string &msgTag){ std::cout << msgTag << "[ "; for(auto c: sIn) { auto cSize = sizeof(c); if (cSize == 1) { std::cout << std::format("{:02x}, ", (uint8_t)c); } else if (cSize == 2) { std::cout << std::format("{:04x}, ", (uint16_t)c); } else if (cSize == 4) { std::cout << std::format("{:08x}, ", (uint32_t)c); } else { throw std::runtime_error( std::format("ERRR:{}:Unsupported char type with size [{}]", __func__, cSize) ); } } std::cout << " ]" << std::endl; } // Remove chars from begin and end of the passed string, provided the char // belongs to one of the chars in trimChars. // // NOTE: This will work perfectly provided the string being trimmed as well as // chars being trimmed are made up of NativeCharSize chars from same encoded space. // For utf-8, this means the ascii equivalent 1byteSized chars of utf8 and not // variable length MultiNativeCharSize (ie multibye in case of utf-8) ones. // NOTE: It will also work, if atleast either end of string as well as trimChars // have NativeCharSize chars from their encoding space, rather than variable // length MultiNativeCharSize based chars if any. // // NOTE: Given the way UTF-8 char encoding is designed, where NativeCharSize 1byte // encoded chars are fully unique and dont overlap with any bytes from any of the // variable length MultiNativeCharSize encoded chars in the utf-8 space, so as long as // the trimChars belong to NativeCharSize chars subset, the logic should work, even // if string has a mixture of NativeCharSize and MultiNativeCharSize encoded chars. // Chances are utf-16 and utf-32 also have similar characteristics wrt thier // NativeCharSize encoded chars (ie fully encoded within single 16bit and 32bit value // respectively), and so equivalent semantic applies to them also. // // ALERT: Given that this simple minded logic, works at individual NativeCharSize level // only, If trimChars involve variable length MultiNativeCharSize encoded chars, then // * because different NativeCharSize subparts (bytes in case of utf-8) from different // MultiNativeCharSize trim chars when clubbed together can map to some other new char // in a variable length encoded char space, if there is that new char at either end // of the string, it may get trimmed, because of the possibility of mix up mentioned. // * given that different variable length MultiNativeCharSize encoded chars may have // some common NativeCharSize subparts (bytes in case of utf-8) between them, if one // of these chars is at either end of the string and another char is in trimChars, // then string may get partially trimmed. // template TString str_trim_dumb(TString sin, const TString &trimChars=" \t\n") { #ifdef SC_DEBUG dumphex_string(sin, "DBUG:TrimDumb:Str:"); dumphex_string(trimChars, "DBUG:TrimDumb:Tim:"); #endif sin.erase(sin.find_last_not_of(trimChars)+1); sin.erase(0, sin.find_first_not_of(trimChars)); return sin; } // Remove chars from begin and end of the passed string, provided the char belongs // to one of the chars in trimChars. // NOTE: Internally converts to wchar/wstring to try and support proper trimming, // wrt possibly more languages, to some extent. IE even if the passed string // contains multibyte encoded characters in it in utf-8 space (ie MultiNativeCharSize), // it may get converted to NativeCharSize chars in the expanded wchar_t encoding space, // thus leading to fixed NativeCharSize driven logic itself handling things sufficiently. // Look at str_trim_dumb comments for additional aspects. std::string str_trim_oversmart(std::string sIn, const std::string &trimChars=" \t\n") { std::wstring wIn; mbs_to_wcs(wIn, sIn); std::wstring wTrimChars; mbs_to_wcs(wTrimChars, trimChars); auto wOut = str_trim_dumb(wIn, wTrimChars); std::string sOut; wcs_to_mbs(sOut, wOut); return sOut; } // Remove atmost 1 char at the begin and 1 char at the end of the passed string, // provided the char belongs to one of the chars in trimChars. // // NOTE: Chars being trimmed (ie in trimChars) needs to be part of NativeCharSize // subset of the string's encoded char space, to avoid mix up when working with // strings which can be utf-8/utf-16/utf-32/sane-variable-length encoded strings. // // NOTE:UTF8: This will work provided the string being trimmed as well the chars // being trimmed are made up of 1byte encoded chars in case of utf8 encoding space. // If the string being trimmed includes multibyte (ie MultiNativeCharSize) encoded // characters at the end, then trimming can mess things up, if you have multibyte // encoded utf-8 chars in the trimChars set. // // Currently given that SimpCfg only uses this with NativeCharSize chars in the // trimChars and most of the platforms are likely to be using utf-8 based char // space (which is a realtively sane variable length char encoding from this // logics perspective), so not providing oversmart variant. // template TString str_trim_single(TString sin, const TString& trimChars=" \t\n") { if (sin.empty()) return sin; for(auto c: trimChars) { if (c == sin.front()) { sin = sin.substr(1, TString::npos); break; } } if (sin.empty()) return sin; for(auto c: trimChars) { if (c == sin.back()) { sin = sin.substr(0, sin.length()-1); break; } } return sin; } // This works for NativeCharSize encoded chars, including in utf8 encoding space. // This wont work for multibyte encoded chars. template TString str_tolower(const TString &sin) { TString sout; sout.resize(sin.size()); std::transform(sin.begin(), sin.end(), sout.begin(), [](auto c)->auto {return std::tolower(c);}); #ifdef SC_DEBUG_VERBOSE dumphex_string(sin, std::format("DBUG:{}:in:", __func__)); dumphex_string(sout, std::format("DBUG:{}:out:", __func__)); #endif return sout; } void str_compare_dump(const std::string &s1, const std::string &s2) { LDBUG_LN("DBUG:%s:%s:Len:%zu", __func__, s1.c_str(), s1.length()); LDBUG_LN("DBUG:%s:%s:Len:%zu", __func__, s2.c_str(), s2.length()); int minLen = s1.length() < s2.length() ? s1.length() : s2.length(); for(int i=0; i std::string str(TypeWithStrSupp value) { std::stringstream ss; ss << value; return ss.str(); } template std::string str(std::vector values) { std::stringstream ss; ss << "[ "; int cnt = 0; for(auto value: values) { cnt += 1; if (cnt != 1) ss << ", "; ss << value; } ss << " ]"; return ss.str(); } // **** **** **** SimpCfg related helpers **** **** **** // typedef std::variant SimpCfgData; class SimpCfg { private: std::map> mapV = {}; std::regex rInt {R"(^[-+]?\d+$)"}; std::regex rFloat {R"(^[-+]?\d+(?:\.\d+)?(?:[eE][-+]?\d+)?$)"}; public: std::string to_str(const SimpCfgData &value) { auto visitor = [](auto value) -> auto { std::stringstream ss; ss << value; return ss.str(); }; return std::visit(visitor, value); } template void set_value(const std::string &group, const std::string &key, const SupportedDataType &value, const std::string &callerName="") { auto &gm = mapV[group]; gm[key] = value; std::stringstream ss; ss << value; LDBUG_LN("DBUG:SC:%s_%s:%s:%s:%s", __func__, callerName.c_str(), group.c_str(), key.c_str(), ss.str().c_str()); } void set_string(const std::string &group, const std::string &key, const std::string &value) { set_value(group, key, value, __func__); } void set_bool(const std::string &group, const std::string &key, bool value) { set_value(group, key, value, __func__); } void set_bool(const std::string &group, const std::string &key, const std::string &value) { std::string sValue = str_tolower(value); bool bValue = sValue == "true" ? true : false; //LDBUG_LN("DBUG:%s:%s:%s:%d", __func__, value.c_str(), sValue.c_str(), bValue); set_bool(group, key, bValue); } void set_int64(const std::string &group, const std::string &key, int64_t value) { set_value(group, key, value, __func__); } void set_int64(const std::string &group, const std::string &key, std::string &value) { auto ivalue = strtoll(value.c_str(), nullptr, 0); set_int64(group, key, ivalue); } void set_double(const std::string &group, const std::string &key, double value) { set_value(group, key, value, __func__); } void set_double(const std::string &group, const std::string &key, std::string &value) { auto dvalue = strtod(value.c_str(), nullptr); set_double(group, key, dvalue); } void dump(const std::string &group) { for (auto gm: mapV) { if (!group.empty() && (gm.first != group)) { LINFO_LN("INFO:SC:%s:%s:Skipping...", __func__, gm.first.c_str()); continue; } for(auto k: gm.second) { LINFO_LN("DBUG:SC:%s:%s:Iterate:%s:%s", __func__, gm.first.c_str(), k.first.c_str(), to_str(k.second).c_str()); } } } template SupportedDataType get_value(const std::string &group, const std::string &key, const SupportedDataType &defaultValue, const std::string &callerName="") { auto gm = mapV[group]; if (gm.find(key) == gm.end()) { std::stringstream ss; ss << defaultValue; LWARN_LN("DBUG:SC:%s_%s:%s:%s:%s[default]", __func__, callerName.c_str(), group.c_str(), key.c_str(), ss.str().c_str()); return defaultValue; } auto value = gm[key]; LDBUG_LN("DBUG:SC:%s_%s:%s:%s:%s", __func__, callerName.c_str(), group.c_str(), key.c_str(), to_str(value).c_str()); return std::get(value); } std::string get_string(const std::string &group, const std::string &key, const std::string &defaultValue) { return get_value(group, key, defaultValue, __func__); } bool get_bool(const std::string &group, const std::string &key, bool defaultValue) { return get_value(group, key, defaultValue, __func__); } int64_t get_int64(const std::string &group, const std::string &key, int64_t defaultValue) { return get_value(group, key, defaultValue, __func__); } double get_double(const std::string &group, const std::string &key, double defaultValue) { return get_value(group, key, defaultValue, __func__); } template std::vector get_vector(const std::string &group, const std::string &key, const std::vector &defaultValue, const std::string &callerName="") { auto gm = mapV[group]; std::vector array; int i = 0; while(true) { std::stringstream ssArrayKey; ssArrayKey << key << "-" << i; auto arrayKey = ssArrayKey.str(); if (gm.find(arrayKey) == gm.end()) { break; } array.push_back(std::get(gm[arrayKey])); i += 1; } if (array.empty()) { LWARN_LN("DBUG:SC:%s_%s:%s:%s:%s[default]", __func__, callerName.c_str(), group.c_str(), key.c_str(), str(defaultValue).c_str()); return defaultValue; } LDBUG_LN("DBUG:SC:%s_%s:%s:%s:%s", __func__, callerName.c_str(), group.c_str(), key.c_str(), str(array).c_str()); return array; } static void locale_prepare(std::string &sSavedLocale) { sSavedLocale = std::setlocale(LC_ALL, nullptr); auto sUpdatedLocale = std::setlocale(LC_ALL, "en_US.UTF-8"); LDBUG_LN("DBUG:%s:Locale:Prev:%s:Cur:%s", __func__, sSavedLocale.c_str(), sUpdatedLocale); } static void locale_restore(const std::string &sSavedLocale) { auto sCurLocale = std::setlocale(LC_ALL, sSavedLocale.c_str()); LDBUG_LN("DBUG:%s:Locale:Requested:%s:Got:%s", __func__, sSavedLocale.c_str(), sCurLocale); } void load(const std::string &fname) { std::ifstream f {fname}; if (!f) { LERRR_LN("ERRR:SC:%s:%s:failed to load...", __func__, fname.c_str()); throw std::runtime_error { "ERRR:SimpCfg:File not found" }; } else { LDBUG_LN("DBUG:SC:%s:%s", __func__, fname.c_str()); } std::string group; int iLine = 0; while(!f.eof()) { iLine += 1; std::string curL; getline(f, curL); if (curL.empty()) { continue; } if (curL[0] == '#') { continue; } bool bGroup = !isspace(curL[0]); curL = str_trim(curL); if (bGroup) { curL = str_trim_single(curL, {"\""}); group = curL; LDBUG_LN("DBUG:SC:%s:group:%s", __func__, group.c_str()); continue; } auto dPos = curL.find(':'); if (dPos == std::string::npos) { LERRR_LN("ERRR:SC:%s:%d:invalid key value line:%s", __func__, iLine, curL.c_str()); throw std::runtime_error { "ERRR:SimpCfg:Invalid key value line" }; } auto dEnd = curL.length() - dPos; if ((dPos == 0) || (dEnd < 2)) { LERRR_LN("ERRR:SC:%s:%d:invalid key value line:%s", __func__, iLine, curL.c_str()); throw std::runtime_error { "ERRR:SimpCfg:Invalid key value line" }; } std::string key = curL.substr(0, dPos); key = str_trim(key); key = str_trim_single(key, {"\""}); std::string value = curL.substr(dPos+1); value = str_trim(value); value = str_trim(value, {","}); std::string vtype = "bool"; auto valueLower = str_tolower(value); if ((valueLower.compare("true") == 0) || (valueLower == "false")) { set_bool(group, key, value); } else if (std::regex_match(value, rInt)) { vtype = "int"; set_int64(group, key, value); } else if (std::regex_match(value, rFloat)) { vtype = "float"; set_double(group, key, value); } else { vtype = "string"; if (!value.empty() && (value.front() != '"')) { LWARN_LN("WARN:SC:%s:%d:%s:k:%s:v:%s:is this string?", __func__, iLine, group.c_str(), key.c_str(), value.c_str()); } value = str_trim_single(value, {"\""}); set_string(group, key, value); } //LDBUG_LN("DBUG:SC:%s:%d:kv:%s:%s:%s:%s", __func__, iLine, group.c_str(), key.c_str(), vtype.c_str(), value.c_str()); } } }; #ifdef SC_TEST_PRG void check_string() { std::vector vStandard = { "123", "1अ3" }; std::cout << "**** string **** " << vStandard.size() << std::endl; for(auto sCur: vStandard) { std::cout << std::format("string: [{}] len[{}] size[{}]", sCur, sCur.length(), sCur.size()) << std::endl; int i = 0; for(auto c: sCur) { std::cout << std::format("string:{}:pos:{}:char:{}[0x{:x}]\n", sCur, i, c, (uint8_t)c); i += 1; } } } void check_u8string() { std::vector vU8s = { u8"123", u8"1अ3" }; std::cout << "**** u8string **** " << vU8s.size() << std::endl; for(auto sCur: vU8s) { std::string sCurx (sCur.begin(), sCur.end()); std::cout << std::format("u8string: [{}] len[{}] size[{}]", sCurx, sCur.length(), sCur.size()) << std::endl; int i = 0; for(auto c: sCur) { //std::cout << c << std::endl; std::cout << std::format("u8string:{}:pos:{}:char:{}[0x{:x}]\n", sCurx, i, (unsigned char)c, (unsigned char)c); i += 1; } } } void check_wstring_wcout() { std::wcout.imbue(std::locale("en_US.UTF-8")); std::vector vWide = { L"123", L"1अ3" }; std::cout << "**** wstring wcout **** " << vWide.size() << std::endl; for(auto sCur: vWide) { std::wcout << sCur << std::endl; std::wcout << std::format(L"wstring: [{}] len[{}] size[{}]", sCur, sCur.length(), sCur.size()) << std::endl; int i = 0; for(auto c: sCur) { std::wcout << std::format(L"wstring:{}:pos:{}:char:{}[0x{:x}]\n", sCur, i, c, c); i += 1; } } } void check_wstring_cout() { std::vector vWide = { L"123", L"1अ3" }; std::cout << "**** wstring cout **** " << vWide.size() << std::endl; for(auto sCur: vWide) { std::string sCury; wcs_to_mbs(sCury, sCur); std::cout << std::format("wstring: [{}] len[{}] size[{}]", sCury, sCur.length(), sCur.size()) << std::endl; int i = 0; for(auto c: sCur) { std::wstringstream wsc; wsc << c; std::string ssc; wcs_to_mbs(ssc, wsc.str()); std::cout << std::format("wstring:{}:pos:{}:char:{}[0x{:x}]\n", sCury, i, ssc, (uint32_t)c); i += 1; } } } void check_nonenglish() { std::vector vTest1 = { "\n\tAഅअಅ\n\t", "\n\tAഅअಅ " }; for (auto sTest: vTest1) { std::string sGotDumb = str_trim_dumb(sTest, {" \n\t"}); std::string sGotOSmart = str_trim_oversmart(sTest, {" \n\t"}); std::cout << std::format("{}: Test1[{}] Dumb[{}] OverSmart[{}]", __func__, sTest, sGotDumb, sGotOSmart) << std::endl; } std::vector vTest2 = { "\n\t this र remove 0s at end 000 ", "\n\tthis र remove 0s and अs at end 000रअ0अ ", "\n\tthis र remove 0s and अs at end 000रअ0\xa4अ "}; for (auto sTest: vTest2) { std::string sGotDumb = str_trim_dumb(sTest, {" \n\t0अ"}); std::cout << std::format("{}: Test2[{}] Dumb[{}]", __func__, sTest, sGotDumb) << std::endl; } // This partly invalid utf8 string will mess up str_trim_dumb "\n\tthis र remove 0s and अs at end 000रअ0\xa4अ " // but will trigger a exception with oversmart. // std::vector vTest3 = { "\n\t this र remove 0s at end 000 ", "\n\tthis र remove 0s and अs at end 000रअ0अ ", "\n\tthis र remove 0s and अs at end 000रअ0\xa4अ "}; std::vector vTest3 = { "\n\t this र remove 0s at end 000 ", "\n\tthis र remove 0s and अs at end 000रअ0अ ", "\n\tthis र remove 0s and अs at end 000रअ0\xe0\xa4\x30अ "}; // \xe0\xa4 for (auto sTest: vTest3) { std::string sGotOSmart = str_trim_oversmart(sTest, {" \n\t0अ"}); std::cout << std::format("{}: Test3[{}] OverSmart[{}]", __func__, sTest, sGotOSmart) << std::endl; } } void check_strings() { std::string sSavedLocale; SimpCfg::locale_prepare(sSavedLocale); check_string(); check_u8string(); //check_wstring_wcout(); check_wstring_cout(); check_nonenglish(); SimpCfg::locale_restore(sSavedLocale); } int main(int argc, char **argv) { if (argc != 2) { LERRR_LN("USAGE:%s simp.cfg", argv[0]); exit(1); } check_strings(); std::string fname {argv[1]}; SimpCfg sc; sc.load(fname); sc.dump(""); sc.get_bool("testme", "key101b", false); sc.get_string("testme", "key101s", "Not found"); sc.get_int64("testme", "key101i", 123456); sc.get_double("testme", "key101d", 123456.789); sc.set_bool("testme", "key201b", true); sc.set_string("testme", "key201s", "hello world"); sc.set_int64("testme", "key201i", 987654); sc.set_double("testme", "key201d", 9988.7766); sc.dump("testme"); sc.get_bool("testme", "key201b", false); sc.get_string("testme", "key201s", "Not found"); sc.get_int64("testme", "key201i", 123456); sc.get_double("testme", "key201d", 123456.789); sc.get_string("mistral", "system-prefix", "Not found"); sc.get_string("\"mistral\"", "\"system-prefix\"", "Not found"); sc.get_vector("testme", "keyA100", {1, 2, 3}); sc.get_vector("testme", "keyA100", { "A", "അ", "अ", "ಅ" }); sc.set_int64("testme", "keyA300-0", 330); sc.set_int64("testme", "keyA300-1", 331); sc.set_int64("testme", "keyA300-2", 332); sc.set_string("testme", "keyA301-0", "India"); sc.set_value("testme", "keyA301-1", "World"); sc.set_string("testme", "keyA301-2", "AkashaGanga"); sc.get_vector("testme", "keyA300", {1, 2, 3}); sc.get_vector("testme", "keyA301", { "yes 1", "No 2", "very well 3" }); return 0; } #endif