cann: optimize multi-stream execution

- Use parse_bool() for GGML_CANN_MULTI_STREAM environment variable
  parsing, consistent with other env var handling
- Only synchronize dependent streams instead of all streams when
  a node has multiple dependencies, reducing sync overhead
- Performance improvement: ~9% faster prompt processing on 0.5B model
  (1838 t/s vs 1688 t/s with ACL graph disabled)
This commit is contained in:
hipudding 2026-02-03 07:37:18 +00:00
parent 906bfed0ca
commit 4951a4ff7a
1 changed files with 9 additions and 15 deletions

View File

@ -2229,10 +2229,7 @@ static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx
static bool opt_fusion = parse_bool(get_env_as_lowercase("GGML_CANN_OPERATOR_FUSION").value_or(""));
// Check if multi-stream execution is enabled
static bool multi_stream_enabled = [] {
const char * env = getenv("GGML_CANN_MULTI_STREAM");
return env != nullptr && (strcmp(env, "1") == 0 || strcmp(env, "true") == 0);
}();
static bool multi_stream_enabled = parse_bool(get_env_as_lowercase("GGML_CANN_MULTI_STREAM").value_or(""));
if (!use_cann_graph || cann_graph_capture_required) {
if (multi_stream_enabled) {
@ -2336,15 +2333,15 @@ static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx
// Single dependency - execute on the same stream to avoid sync overhead
exec_stream = *dependent_streams.begin();
} else {
// Multiple dependencies - sync all to stream 0 and execute there
sync_all_to_stream(0);
exec_stream = 0;
current_stream = 1;
// Multiple dependencies - pick the first dependent stream and wait for others
exec_stream = *dependent_streams.begin();
}
// If we depend on a different stream, wait for it
if (dependent_streams.size() == 1 && *dependent_streams.begin() != exec_stream) {
wait_for_stream(*dependent_streams.begin(), exec_stream);
// Wait for all dependent streams (except the exec_stream itself)
for (int dep_stream : dependent_streams) {
if (dep_stream != exec_stream) {
wait_for_stream(dep_stream, exec_stream);
}
}
// Execute the node on the chosen stream
@ -2442,10 +2439,7 @@ static enum ggml_status ggml_backend_cann_graph_compute(ggml_backend_t backend,
bool use_cann_graph = true;
// Check if multi-stream execution is enabled (must check before using use_cann_graph)
static bool multi_stream_enabled = [] {
const char * env = getenv("GGML_CANN_MULTI_STREAM");
return env != nullptr && (strcmp(env, "1") == 0 || strcmp(env, "true") == 0);
}();
static bool multi_stream_enabled = parse_bool(get_env_as_lowercase("GGML_CANN_MULTI_STREAM").value_or(""));
// Multi-stream mode is incompatible with ACL graph capture/execution
if (multi_stream_enabled) {