From 07fbe19f1fbcfa09abca7cccc62eaf82c1567b7e Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Tue, 6 Jan 2026 17:51:08 +0100 Subject: [PATCH] arg: use CSV escape style for multiple-value args (#18643) * arg: use CSV escape style for multiple-value args * add test --- common/arg.cpp | 107 ++++++++++++++++++++++++-------------- tests/test-arg-parser.cpp | 9 ++++ 2 files changed, 76 insertions(+), 40 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index b52b3e70b7..c3610d262b 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -854,6 +854,54 @@ bool common_arg_utils::is_autoy(const std::string & value) { return value == "auto" || value == "-1"; } +// Simple CSV parser that handles quoted fields and escaped quotes +// example: +// input: value1,"value, with, commas","value with ""escaped"" quotes",value4 +// output: [value1] [value, with, commas] [value with "escaped" quotes] [value4] +static std::vector parse_csv_row(const std::string& input) { + std::vector fields; + std::string field; + bool in_quotes = false; + + for (size_t i = 0; i < input.length(); ++i) { + char ch = input[i]; + + if (ch == '"') { + if (!in_quotes) { + // start of quoted field (only valid if at beginning of field) + if (!field.empty()) { + // quote appeared in middle of unquoted field, treat as literal + field += '"'; + } else { + in_quotes = true; // start + } + } else { + if (i + 1 < input.length() && input[i + 1] == '"') { + // escaped quote: "" + field += '"'; + ++i; // skip the next quote + } else { + in_quotes = false; // end + } + } + } else if (ch == ',') { + if (in_quotes) { + field += ','; + } else { + fields.push_back(std::move(field)); + field.clear(); + } + } else { + field += ch; + } + } + + // Add the last field + fields.push_back(std::move(field)); + + return fields; +} + common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **)) { // per-example default params // we define here to make sure it's included in llama-gen-docs @@ -1250,7 +1298,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--in-file"}, "FNAME", "an input file (use comma-separated values to specify multiple files)", [](common_params & params, const std::string & value) { - for (const auto & item : string_split(value, ',')) { + for (const auto & item : parse_csv_row(value)) { std::ifstream file(item); if (!file) { throw std::runtime_error(string_format("error: failed to open file '%s'\n", item.c_str())); @@ -2002,7 +2050,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--image", "--audio"}, "FILE", "path to an image or audio file. use with multimodal models, use comma-separated values for multiple files\n", [](common_params & params, const std::string & value) { - for (const auto & item : string_split(value, ',')) { + for (const auto & item : parse_csv_row(value)) { params.image.emplace_back(item); } } @@ -2259,37 +2307,12 @@ common_params_context common_params_parser_init(common_params & params, llama_ex )); add_opt(common_arg( {"--override-kv"}, "KEY=TYPE:VALUE,...", - "advanced option to override model metadata by key. to specify multiple overrides, either use comma-separated or repeat this argument.\n" + "advanced option to override model metadata by key. to specify multiple overrides, either use comma-separated values.\n" "types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false,tokenizer.ggml.add_eos_token=bool:false", [](common_params & params, const std::string & value) { - std::vector kv_overrides; - - std::string current; - bool escaping = false; - - for (const char c : value) { - if (escaping) { - current.push_back(c); - escaping = false; - } else if (c == '\\') { - escaping = true; - } else if (c == ',') { - kv_overrides.push_back(current); - current.clear(); - } else { - current.push_back(c); - } - } - - if (escaping) { - current.push_back('\\'); - } - - kv_overrides.push_back(current); - - for (const auto & kv_override : kv_overrides) { - if (!string_parse_kv_override(kv_override.c_str(), params.kv_overrides)) { - throw std::runtime_error(string_format("error: Invalid type for KV override: %s\n", kv_override.c_str())); + for (const auto & item : parse_csv_row(value)) { + if (!string_parse_kv_override(item.c_str(), params.kv_overrides)) { + throw std::runtime_error(string_format("error: Invalid type for KV override: %s\n", item.c_str())); } } } @@ -2306,7 +2329,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--lora"}, "FNAME", "path to LoRA adapter (use comma-separated values to load multiple adapters)", [](common_params & params, const std::string & value) { - for (const auto & item : string_split(value, ',')) { + for (const auto & item : parse_csv_row(value)) { params.lora_adapters.push_back({ item, 1.0, "", "", nullptr }); } } @@ -2317,7 +2340,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex "path to LoRA adapter with user defined scaling (format: FNAME:SCALE,...)\n" "note: use comma-separated values", [](common_params & params, const std::string & value) { - for (const auto & item : string_split(value, ',')) { + for (const auto & item : parse_csv_row(value)) { auto parts = string_split(item, ':'); if (parts.size() != 2) { throw std::invalid_argument("lora-scaled format: FNAME:SCALE"); @@ -2331,7 +2354,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--control-vector"}, "FNAME", "add a control vector\nnote: use comma-separated values to add multiple control vectors", [](common_params & params, const std::string & value) { - for (const auto & item : string_split(value, ',')) { + for (const auto & item : parse_csv_row(value)) { params.control_vectors.push_back({ 1.0f, item, }); } } @@ -2341,7 +2364,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex "add a control vector with user defined scaling SCALE\n" "note: use comma-separated values (format: FNAME:SCALE,...)", [](common_params & params, const std::string & value) { - for (const auto & item : string_split(value, ',')) { + for (const auto & item : parse_csv_row(value)) { auto parts = string_split(item, ':'); if (parts.size() != 2) { throw std::invalid_argument("control-vector-scaled format: FNAME:SCALE"); @@ -2439,7 +2462,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--context-file"}, "FNAME", "file to load context from (use comma-separated values to specify multiple files)", [](common_params & params, const std::string & value) { - for (const auto & item : string_split(value, ',')) { + for (const auto & item : parse_csv_row(value)) { std::ifstream file(item, std::ios::binary); if (!file) { throw std::runtime_error(string_format("error: failed to open file '%s'\n", item.c_str())); @@ -2675,9 +2698,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_RERANKING")); add_opt(common_arg( {"--api-key"}, "KEY", - "API key to use for authentication (default: none)", + "API key to use for authentication, multiple keys can be provided as a comma-separated list (default: none)", [](common_params & params, const std::string & value) { - params.api_keys.push_back(value); + for (const auto & key : parse_csv_row(value)) { + if (!key.empty()) { + params.api_keys.push_back(key); + } + } } ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_API_KEY")); add_opt(common_arg( @@ -2691,7 +2718,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex std::string key; while (std::getline(key_file, key)) { if (!key.empty()) { - params.api_keys.push_back(key); + params.api_keys.push_back(key); } } key_file.close(); @@ -2713,7 +2740,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SSL_CERT_FILE")); add_opt(common_arg( {"--chat-template-kwargs"}, "STRING", - string_format("sets additional params for the json template parser"), + "sets additional params for the json template parser, must be a valid json object string, e.g. '{\"key1\":\"value1\",\"key2\":\"value2\"}'", [](common_params & params, const std::string & value) { auto parsed = json::parse(value); for (const auto & item : parsed.items()) { diff --git a/tests/test-arg-parser.cpp b/tests/test-arg-parser.cpp index 1bbb745e78..e995974a2e 100644 --- a/tests/test-arg-parser.cpp +++ b/tests/test-arg-parser.cpp @@ -127,6 +127,15 @@ int main(void) { assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_SPECULATIVE)); assert(params.speculative.n_max == 123); + // multi-value args (CSV) + argv = {"binary_name", "--lora", "file1.gguf,\"file2,2.gguf\",\"file3\"\"3\"\".gguf\",file4\".gguf"}; + assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON)); + assert(params.lora_adapters.size() == 4); + assert(params.lora_adapters[0].path == "file1.gguf"); + assert(params.lora_adapters[1].path == "file2,2.gguf"); + assert(params.lora_adapters[2].path == "file3\"3\".gguf"); + assert(params.lora_adapters[3].path == "file4\".gguf"); + // skip this part on windows, because setenv is not supported #ifdef _WIN32 printf("test-arg-parser: skip on windows build\n");