diff --git a/tools/llama-bench/README.md b/tools/llama-bench/README.md index c837bb6d26..d7b9d0ce46 100644 --- a/tools/llama-bench/README.md +++ b/tools/llama-bench/README.md @@ -51,7 +51,7 @@ test parameters: --poll <0...100> (default: 50) -ngl, --n-gpu-layers (default: 99) -ncmoe, --n-cpu-moe (default: 0) - -sm, --split-mode (default: layer) + -sm, --split-mode (default: layer) -mg, --main-gpu (default: 0) -nkvo, --no-kv-offload <0|1> (default: 0) -fa, --flash-attn <0|1> (default: 0) diff --git a/tools/llama-bench/llama-bench.cpp b/tools/llama-bench/llama-bench.cpp index 7da6c3957c..5b66c834fd 100644 --- a/tools/llama-bench/llama-bench.cpp +++ b/tools/llama-bench/llama-bench.cpp @@ -259,6 +259,8 @@ static const char * split_mode_str(llama_split_mode mode) { return "layer"; case LLAMA_SPLIT_MODE_ROW: return "row"; + case LLAMA_SPLIT_MODE_GROUP: + return "group"; default: GGML_ABORT("invalid split mode"); } @@ -440,8 +442,8 @@ static void print_usage(int /* argc */, char ** argv) { join(cmd_params_defaults.n_gpu_layers, ",").c_str()); printf(" -ncmoe, --n-cpu-moe (default: %s)\n", join(cmd_params_defaults.n_cpu_moe, ",").c_str()); - printf(" -sm, --split-mode (default: %s)\n", - join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str()); + printf(" -sm, --split-mode (default: %s)\n", + join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str()); printf(" -mg, --main-gpu (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str()); printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", @@ -723,6 +725,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { mode = LLAMA_SPLIT_MODE_LAYER; } else if (m == "row") { mode = LLAMA_SPLIT_MODE_ROW; + } else if (m == "group") { + mode = LLAMA_SPLIT_MODE_GROUP; } else { invalid_param = true; break;