common: improve verbosity level definitions (#17630)
* common: improve verbosity level definitions * string_format * update autogen docs
This commit is contained in:
parent
cd3c118908
commit
7733409734
|
|
@ -2674,7 +2674,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||||
).set_env("LLAMA_OFFLINE"));
|
).set_env("LLAMA_OFFLINE"));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"-lv", "--verbosity", "--log-verbosity"}, "N",
|
{"-lv", "--verbosity", "--log-verbosity"}, "N",
|
||||||
"Set the verbosity threshold. Messages with a higher verbosity will be ignored.",
|
string_format("Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:\n"
|
||||||
|
" - 0: generic output\n"
|
||||||
|
" - 1: error\n"
|
||||||
|
" - 2: warning\n"
|
||||||
|
" - 3: info\n"
|
||||||
|
" - 4: debug\n"
|
||||||
|
"(default: %d)\n", params.verbosity),
|
||||||
[](common_params & params, int value) {
|
[](common_params & params, int value) {
|
||||||
params.verbosity = value;
|
params.verbosity = value;
|
||||||
common_log_set_verbosity_thold(value);
|
common_log_set_verbosity_thold(value);
|
||||||
|
|
|
||||||
|
|
@ -369,7 +369,7 @@ struct common_params {
|
||||||
|
|
||||||
std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
|
std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
|
||||||
|
|
||||||
int32_t verbosity = 0;
|
int32_t verbosity = 3; // LOG_LEVEL_INFO
|
||||||
int32_t control_vector_layer_start = -1; // layer range for control vector
|
int32_t control_vector_layer_start = -1; // layer range for control vector
|
||||||
int32_t control_vector_layer_end = -1; // layer range for control vector
|
int32_t control_vector_layer_end = -1; // layer range for control vector
|
||||||
bool offline = false;
|
bool offline = false;
|
||||||
|
|
|
||||||
|
|
@ -430,7 +430,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
|
||||||
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
||||||
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
|
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
|
||||||
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
||||||
curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L);
|
curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 0L);
|
||||||
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
|
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
|
||||||
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
|
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
|
||||||
auto data_vec = static_cast<std::vector<char> *>(data);
|
auto data_vec = static_cast<std::vector<char> *>(data);
|
||||||
|
|
|
||||||
|
|
@ -443,8 +443,22 @@ void common_log_set_timestamps(struct common_log * log, bool timestamps) {
|
||||||
log->set_timestamps(timestamps);
|
log->set_timestamps(timestamps);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int common_get_verbosity(enum ggml_log_level level) {
|
||||||
|
switch (level) {
|
||||||
|
case GGML_LOG_LEVEL_DEBUG: return LOG_LEVEL_DEBUG;
|
||||||
|
case GGML_LOG_LEVEL_INFO: return LOG_LEVEL_INFO;
|
||||||
|
case GGML_LOG_LEVEL_WARN: return LOG_LEVEL_WARN;
|
||||||
|
case GGML_LOG_LEVEL_ERROR: return LOG_LEVEL_ERROR;
|
||||||
|
case GGML_LOG_LEVEL_CONT: return LOG_LEVEL_INFO; // same as INFO
|
||||||
|
case GGML_LOG_LEVEL_NONE:
|
||||||
|
default:
|
||||||
|
return LOG_LEVEL_OUTPUT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void common_log_default_callback(enum ggml_log_level level, const char * text, void * /*user_data*/) {
|
void common_log_default_callback(enum ggml_log_level level, const char * text, void * /*user_data*/) {
|
||||||
if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) {
|
auto verbosity = common_get_verbosity(level);
|
||||||
|
if (verbosity <= common_log_verbosity_thold) {
|
||||||
common_log_add(common_log_main(), level, "%s", text);
|
common_log_add(common_log_main(), level, "%s", text);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
29
common/log.h
29
common/log.h
|
|
@ -21,8 +21,14 @@
|
||||||
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define LOG_DEFAULT_DEBUG 1
|
#define LOG_LEVEL_DEBUG 4
|
||||||
#define LOG_DEFAULT_LLAMA 0
|
#define LOG_LEVEL_INFO 3
|
||||||
|
#define LOG_LEVEL_WARN 2
|
||||||
|
#define LOG_LEVEL_ERROR 1
|
||||||
|
#define LOG_LEVEL_OUTPUT 0 // output data from tools
|
||||||
|
|
||||||
|
#define LOG_DEFAULT_DEBUG LOG_LEVEL_DEBUG
|
||||||
|
#define LOG_DEFAULT_LLAMA LOG_LEVEL_INFO
|
||||||
|
|
||||||
enum log_colors {
|
enum log_colors {
|
||||||
LOG_COLORS_AUTO = -1,
|
LOG_COLORS_AUTO = -1,
|
||||||
|
|
@ -67,10 +73,11 @@ void common_log_add(struct common_log * log, enum ggml_log_level level, const ch
|
||||||
// 0.00.090.578 I llm_load_tensors: offloading 32 repeating layers to GPU
|
// 0.00.090.578 I llm_load_tensors: offloading 32 repeating layers to GPU
|
||||||
// 0.00.090.579 I llm_load_tensors: offloading non-repeating layers to GPU
|
// 0.00.090.579 I llm_load_tensors: offloading non-repeating layers to GPU
|
||||||
//
|
//
|
||||||
// I - info (stdout, V = 0)
|
|
||||||
// W - warning (stderr, V = 0)
|
|
||||||
// E - error (stderr, V = 0)
|
|
||||||
// D - debug (stderr, V = LOG_DEFAULT_DEBUG)
|
// D - debug (stderr, V = LOG_DEFAULT_DEBUG)
|
||||||
|
// I - info (stdout, V = LOG_DEFAULT_INFO)
|
||||||
|
// W - warning (stderr, V = LOG_DEFAULT_WARN)
|
||||||
|
// E - error (stderr, V = LOG_DEFAULT_ERROR)
|
||||||
|
// O - output (stdout, V = LOG_DEFAULT_OUTPUT)
|
||||||
//
|
//
|
||||||
|
|
||||||
void common_log_set_file (struct common_log * log, const char * file); // not thread-safe
|
void common_log_set_file (struct common_log * log, const char * file); // not thread-safe
|
||||||
|
|
@ -95,14 +102,14 @@ void common_log_set_timestamps(struct common_log * log, bool timestamps); // w
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE, 0, __VA_ARGS__)
|
#define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE, LOG_LEVEL_OUTPUT, __VA_ARGS__)
|
||||||
#define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)
|
#define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)
|
||||||
|
|
||||||
#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, 0, __VA_ARGS__)
|
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_LEVEL_DEBUG, __VA_ARGS__)
|
||||||
#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, 0, __VA_ARGS__)
|
#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, LOG_LEVEL_INFO, __VA_ARGS__)
|
||||||
#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, 0, __VA_ARGS__)
|
#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, LOG_LEVEL_WARN, __VA_ARGS__)
|
||||||
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_DEFAULT_DEBUG, __VA_ARGS__)
|
#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, LOG_LEVEL_ERROR, __VA_ARGS__)
|
||||||
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, 0, __VA_ARGS__)
|
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, LOG_LEVEL_INFO, __VA_ARGS__) // same as INFO
|
||||||
|
|
||||||
#define LOG_INFV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_INFO, verbosity, __VA_ARGS__)
|
#define LOG_INFV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_INFO, verbosity, __VA_ARGS__)
|
||||||
#define LOG_WRNV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_WARN, verbosity, __VA_ARGS__)
|
#define LOG_WRNV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_WARN, verbosity, __VA_ARGS__)
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ The project is under active development, and we are [looking for feedback and co
|
||||||
| `-ub, --ubatch-size N` | physical maximum batch size (default: 512)<br/>(env: LLAMA_ARG_UBATCH) |
|
| `-ub, --ubatch-size N` | physical maximum batch size (default: 512)<br/>(env: LLAMA_ARG_UBATCH) |
|
||||||
| `--keep N` | number of tokens to keep from the initial prompt (default: 0, -1 = all) |
|
| `--keep N` | number of tokens to keep from the initial prompt (default: 0, -1 = all) |
|
||||||
| `--swa-full` | use full-size SWA cache (default: false)<br/>[(more info)](https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)<br/>(env: LLAMA_ARG_SWA_FULL) |
|
| `--swa-full` | use full-size SWA cache (default: false)<br/>[(more info)](https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)<br/>(env: LLAMA_ARG_SWA_FULL) |
|
||||||
| `--kv-unified, -kvu` | use single unified KV buffer for the KV cache of all sequences (default: false)<br/>[(more info)](https://github.com/ggml-org/llama.cpp/pull/14363)<br/>(env: LLAMA_ARG_KV_SPLIT) |
|
| `--kv-unified, -kvu` | use single unified KV buffer for the KV cache of all sequences (default: false)<br/>[(more info)](https://github.com/ggml-org/llama.cpp/pull/14363)<br/>(env: LLAMA_ARG_KV_UNIFIED) |
|
||||||
| `-fa, --flash-attn [on\|off\|auto]` | set Flash Attention use ('on', 'off', or 'auto', default: 'auto')<br/>(env: LLAMA_ARG_FLASH_ATTN) |
|
| `-fa, --flash-attn [on\|off\|auto]` | set Flash Attention use ('on', 'off', or 'auto', default: 'auto')<br/>(env: LLAMA_ARG_FLASH_ATTN) |
|
||||||
| `--no-perf` | disable internal libllama performance timings (default: false)<br/>(env: LLAMA_ARG_NO_PERF) |
|
| `--no-perf` | disable internal libllama performance timings (default: false)<br/>(env: LLAMA_ARG_NO_PERF) |
|
||||||
| `-e, --escape` | process escapes sequences (\n, \r, \t, \', \", \\) (default: true) |
|
| `-e, --escape` | process escapes sequences (\n, \r, \t, \', \", \\) (default: true) |
|
||||||
|
|
@ -103,11 +103,11 @@ The project is under active development, and we are [looking for feedback and co
|
||||||
| `-hffv, --hf-file-v FILE` | Hugging Face model file for the vocoder model (default: unused)<br/>(env: LLAMA_ARG_HF_FILE_V) |
|
| `-hffv, --hf-file-v FILE` | Hugging Face model file for the vocoder model (default: unused)<br/>(env: LLAMA_ARG_HF_FILE_V) |
|
||||||
| `-hft, --hf-token TOKEN` | Hugging Face access token (default: value from HF_TOKEN environment variable)<br/>(env: HF_TOKEN) |
|
| `-hft, --hf-token TOKEN` | Hugging Face access token (default: value from HF_TOKEN environment variable)<br/>(env: HF_TOKEN) |
|
||||||
| `--log-disable` | Log disable |
|
| `--log-disable` | Log disable |
|
||||||
| `--log-file FNAME` | Log to file |
|
| `--log-file FNAME` | Log to file<br/>(env: LLAMA_LOG_FILE) |
|
||||||
| `--log-colors [on\|off\|auto]` | Set colored logging ('on', 'off', or 'auto', default: 'auto')<br/>'auto' enables colors when output is to a terminal<br/>(env: LLAMA_LOG_COLORS) |
|
| `--log-colors [on\|off\|auto]` | Set colored logging ('on', 'off', or 'auto', default: 'auto')<br/>'auto' enables colors when output is to a terminal<br/>(env: LLAMA_LOG_COLORS) |
|
||||||
| `-v, --verbose, --log-verbose` | Set verbosity level to infinity (i.e. log all messages, useful for debugging) |
|
| `-v, --verbose, --log-verbose` | Set verbosity level to infinity (i.e. log all messages, useful for debugging) |
|
||||||
| `--offline` | Offline mode: forces use of cache, prevents network access<br/>(env: LLAMA_OFFLINE) |
|
| `--offline` | Offline mode: forces use of cache, prevents network access<br/>(env: LLAMA_OFFLINE) |
|
||||||
| `-lv, --verbosity, --log-verbosity N` | Set the verbosity threshold. Messages with a higher verbosity will be ignored.<br/>(env: LLAMA_LOG_VERBOSITY) |
|
| `-lv, --verbosity, --log-verbosity N` | Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:<br/> - 0: generic output<br/> - 1: error<br/> - 2: warning<br/> - 3: info<br/> - 4: debug<br/>(default: 3)<br/><br/>(env: LLAMA_LOG_VERBOSITY) |
|
||||||
| `--log-prefix` | Enable prefix in log messages<br/>(env: LLAMA_LOG_PREFIX) |
|
| `--log-prefix` | Enable prefix in log messages<br/>(env: LLAMA_LOG_PREFIX) |
|
||||||
| `--log-timestamps` | Enable timestamps in log messages<br/>(env: LLAMA_LOG_TIMESTAMPS) |
|
| `--log-timestamps` | Enable timestamps in log messages<br/>(env: LLAMA_LOG_TIMESTAMPS) |
|
||||||
| `-ctkd, --cache-type-k-draft TYPE` | KV cache data type for K for the draft model<br/>allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1<br/>(default: f16)<br/>(env: LLAMA_ARG_CACHE_TYPE_K_DRAFT) |
|
| `-ctkd, --cache-type-k-draft TYPE` | KV cache data type for K for the draft model<br/>allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1<br/>(default: f16)<br/>(env: LLAMA_ARG_CACHE_TYPE_K_DRAFT) |
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue