cann: optimize multi-stream execution
- Use parse_bool() for GGML_CANN_MULTI_STREAM environment variable parsing, consistent with other env var handling - Only synchronize dependent streams instead of all streams when a node has multiple dependencies, reducing sync overhead - Performance improvement: ~9% faster prompt processing on 0.5B model (1838 t/s vs 1688 t/s with ACL graph disabled)
This commit is contained in:
parent
906bfed0ca
commit
4951a4ff7a
|
|
@ -2229,10 +2229,7 @@ static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx
|
||||||
static bool opt_fusion = parse_bool(get_env_as_lowercase("GGML_CANN_OPERATOR_FUSION").value_or(""));
|
static bool opt_fusion = parse_bool(get_env_as_lowercase("GGML_CANN_OPERATOR_FUSION").value_or(""));
|
||||||
|
|
||||||
// Check if multi-stream execution is enabled
|
// Check if multi-stream execution is enabled
|
||||||
static bool multi_stream_enabled = [] {
|
static bool multi_stream_enabled = parse_bool(get_env_as_lowercase("GGML_CANN_MULTI_STREAM").value_or(""));
|
||||||
const char * env = getenv("GGML_CANN_MULTI_STREAM");
|
|
||||||
return env != nullptr && (strcmp(env, "1") == 0 || strcmp(env, "true") == 0);
|
|
||||||
}();
|
|
||||||
|
|
||||||
if (!use_cann_graph || cann_graph_capture_required) {
|
if (!use_cann_graph || cann_graph_capture_required) {
|
||||||
if (multi_stream_enabled) {
|
if (multi_stream_enabled) {
|
||||||
|
|
@ -2336,15 +2333,15 @@ static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx
|
||||||
// Single dependency - execute on the same stream to avoid sync overhead
|
// Single dependency - execute on the same stream to avoid sync overhead
|
||||||
exec_stream = *dependent_streams.begin();
|
exec_stream = *dependent_streams.begin();
|
||||||
} else {
|
} else {
|
||||||
// Multiple dependencies - sync all to stream 0 and execute there
|
// Multiple dependencies - pick the first dependent stream and wait for others
|
||||||
sync_all_to_stream(0);
|
exec_stream = *dependent_streams.begin();
|
||||||
exec_stream = 0;
|
|
||||||
current_stream = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we depend on a different stream, wait for it
|
// Wait for all dependent streams (except the exec_stream itself)
|
||||||
if (dependent_streams.size() == 1 && *dependent_streams.begin() != exec_stream) {
|
for (int dep_stream : dependent_streams) {
|
||||||
wait_for_stream(*dependent_streams.begin(), exec_stream);
|
if (dep_stream != exec_stream) {
|
||||||
|
wait_for_stream(dep_stream, exec_stream);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Execute the node on the chosen stream
|
// Execute the node on the chosen stream
|
||||||
|
|
@ -2442,10 +2439,7 @@ static enum ggml_status ggml_backend_cann_graph_compute(ggml_backend_t backend,
|
||||||
bool use_cann_graph = true;
|
bool use_cann_graph = true;
|
||||||
|
|
||||||
// Check if multi-stream execution is enabled (must check before using use_cann_graph)
|
// Check if multi-stream execution is enabled (must check before using use_cann_graph)
|
||||||
static bool multi_stream_enabled = [] {
|
static bool multi_stream_enabled = parse_bool(get_env_as_lowercase("GGML_CANN_MULTI_STREAM").value_or(""));
|
||||||
const char * env = getenv("GGML_CANN_MULTI_STREAM");
|
|
||||||
return env != nullptr && (strcmp(env, "1") == 0 || strcmp(env, "true") == 0);
|
|
||||||
}();
|
|
||||||
|
|
||||||
// Multi-stream mode is incompatible with ACL graph capture/execution
|
// Multi-stream mode is incompatible with ACL graph capture/execution
|
||||||
if (multi_stream_enabled) {
|
if (multi_stream_enabled) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue