diff --git a/common/arg.cpp b/common/arg.cpp index 32d8e69f96..5bab9abc77 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1079,7 +1079,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params) { params.verbose_prompt = true; } - )); + ).set_examples({LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL})); add_opt(common_arg( {"--display-prompt"}, {"--no-display-prompt"}, diff --git a/tools/server/README.md b/tools/server/README.md index cb53678416..f99103a584 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -36,7 +36,6 @@ For the full list of features, please refer to [server's changelog](https://gith | `--license` | show source code license and dependencies | | `-cl, --cache-list` | show list of models in cache | | `--completion-bash` | print source-able bash completion script for llama.cpp | -| `--verbose-prompt` | print a verbose prompt before generation (default: false) | | `-t, --threads N` | number of CPU threads to use during generation (default: -1)
(env: LLAMA_ARG_THREADS) | | `-tb, --threads-batch N` | number of threads to use during batch and prompt processing (default: same as --threads) | | `-C, --cpu-mask M` | CPU affinity mask: arbitrarily long hex. Complements cpu-range (default: "") |