fix mul_mat_id stats, add throughput stat, add envvar trigger, add concurrent mode fix
This commit is contained in:
parent
64e438465f
commit
9da7e4e74f
|
|
@ -251,11 +251,17 @@ int main(int argc, char ** argv) {
|
|||
if (params.profiling_output.empty()) {
|
||||
ggml_backend_sched_print_profiling(sched);
|
||||
} else {
|
||||
int ret = ggml_backend_sched_export_profiling_json(sched, params.profiling_output.c_str());
|
||||
if (ret == 0) {
|
||||
LOG("\nProfiling data exported to: %s\n", params.profiling_output.c_str());
|
||||
const std::string & path = params.profiling_output;
|
||||
int ret;
|
||||
if (path.size() >= 4 && path.compare(path.size() - 4, 4, ".txt") == 0) {
|
||||
ret = ggml_backend_sched_export_profiling_text(sched, path.c_str());
|
||||
} else {
|
||||
LOG_ERR("\nFailed to export profiling data to: %s\n", params.profiling_output.c_str());
|
||||
ret = ggml_backend_sched_export_profiling_json(sched, path.c_str());
|
||||
}
|
||||
if (ret == 0) {
|
||||
LOG("\nProfiling data exported to: %s\n", path.c_str());
|
||||
} else {
|
||||
LOG_ERR("\nFailed to export profiling data to: %s\n", path.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ extern "C" {
|
|||
void * profiling_context;
|
||||
|
||||
// callback for recording a profile record from C code (set by backend when profiling)
|
||||
// params: context, type, name, split_id, start_ns, end_ns, bytes, extra, ne_src0[4], ne_src1[4]
|
||||
// params: context, type, name, split_id, start_ns, end_ns, bytes, extra, ne_src0[4], ne_src1[4], ne_src2[4]
|
||||
void (*profiling_record_fn)(void * context,
|
||||
int type,
|
||||
const char * name,
|
||||
|
|
@ -38,7 +38,8 @@ extern "C" {
|
|||
uint64_t bytes,
|
||||
const char * extra,
|
||||
const int64_t ne_src0[4],
|
||||
const int64_t ne_src1[4]);
|
||||
const int64_t ne_src1[4],
|
||||
const int64_t ne_src2[4]);
|
||||
};
|
||||
|
||||
// numa strategies
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ typedef struct ggml_profile_record {
|
|||
const char * extra; // fusion name for fused ops, or NULL
|
||||
int64_t ne_src0[4]; // src[0] tensor dimensions (e.g. weight matrix for MUL_MAT)
|
||||
int64_t ne_src1[4]; // src[1] tensor dimensions (e.g. input matrix for MUL_MAT)
|
||||
int64_t ne_src2[4]; // src[2] tensor dimensions (e.g. ids for MUL_MAT_ID)
|
||||
} ggml_profile_record;
|
||||
|
||||
// Backend profiler interface - each backend optionally implements this
|
||||
|
|
@ -99,6 +100,13 @@ GGML_API int ggml_backend_sched_export_profiling_json(ggml_backend_sched_t sched
|
|||
// Export profiling data as JSON to a FILE pointer
|
||||
GGML_API int ggml_backend_sched_write_profiling_json(ggml_backend_sched_t sched, FILE * fp);
|
||||
|
||||
// Export profiling data as plain text statistics to a file
|
||||
// Returns 0 on success, -1 on error
|
||||
GGML_API int ggml_backend_sched_export_profiling_text(ggml_backend_sched_t sched, const char * filepath);
|
||||
|
||||
// Export profiling data as plain text statistics to a FILE pointer
|
||||
GGML_API int ggml_backend_sched_write_profiling_text(ggml_backend_sched_t sched, FILE * fp);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
|
@ -749,8 +750,17 @@ struct ggml_backend_sched {
|
|||
|
||||
// profiling
|
||||
bool profiling_enabled;
|
||||
std::vector<ggml_profile_record> copy_records; // copy events recorded by the scheduler
|
||||
std::vector<ggml_profile_record> profiling_records; // merged records from all sources
|
||||
std::string profiling_env_path; // GGML_PROFILE env var value (for auto-export on free)
|
||||
std::vector<ggml_profile_record> copy_records; // copy events recorded by the scheduler
|
||||
std::vector<ggml_profile_record> profiling_records; // merged records from all sources
|
||||
|
||||
// Cached backend metadata for safe access during auto-export (backends may be freed first)
|
||||
struct backend_meta {
|
||||
std::string name;
|
||||
std::string device;
|
||||
int device_type;
|
||||
};
|
||||
std::vector<backend_meta> profiling_backend_meta;
|
||||
};
|
||||
|
||||
#define hash_id(tensor) ggml_hash_find_or_insert(&sched->hash_set, tensor)
|
||||
|
|
@ -1515,7 +1525,7 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
|||
}
|
||||
|
||||
sched->copy_records.push_back({ GGML_PROFILE_EVENT_COPY, copy_dir, split_backend_id, split_id,
|
||||
copy_start, copy_end, ggml_nbytes(input), NULL, {0}, {0} });
|
||||
copy_start, copy_end, ggml_nbytes(input), NULL, {0}, {0}, {0} });
|
||||
} else {
|
||||
ggml_backend_tensor_copy(input, input_cpy);
|
||||
}
|
||||
|
|
@ -1635,7 +1645,7 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
|||
|
||||
sched->copy_records.push_back({ GGML_PROFILE_EVENT_COPY, copy_dir, split_backend_id,
|
||||
split_id, moe_copy_start, moe_copy_end,
|
||||
(uint64_t) total_copied_bytes, NULL, {0}, {0} });
|
||||
(uint64_t) total_copied_bytes, NULL, {0}, {0}, {0} });
|
||||
}
|
||||
} else {
|
||||
// try async copy, but if not possible, we can still use a sync copy without synchronizing the dst backend, since we handle the synchronization here with multiple copies and events
|
||||
|
|
@ -1671,7 +1681,7 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
|||
}
|
||||
|
||||
sched->copy_records.push_back({ GGML_PROFILE_EVENT_COPY, copy_dir, split_backend_id,
|
||||
split_id, copy_start, copy_end, ggml_nbytes(input), NULL, {0}, {0} });
|
||||
split_id, copy_start, copy_end, ggml_nbytes(input), NULL, {0}, {0}, {0} });
|
||||
} else {
|
||||
ggml_backend_tensor_copy(input, input_cpy);
|
||||
}
|
||||
|
|
@ -1691,7 +1701,7 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
|||
}
|
||||
|
||||
sched->copy_records.push_back({ GGML_PROFILE_EVENT_COPY, copy_dir, split_backend_id,
|
||||
split_id, copy_start, copy_end, ggml_nbytes(input), NULL, {0}, {0} });
|
||||
split_id, copy_start, copy_end, ggml_nbytes(input), NULL, {0}, {0}, {0} });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1837,7 +1847,24 @@ ggml_backend_sched_t ggml_backend_sched_new(
|
|||
|
||||
sched->galloc = ggml_gallocr_new_n(sched->bufts, n_backends);
|
||||
sched->op_offload = op_offload;
|
||||
sched->profiling_enabled = (getenv("GGML_PROFILE") != NULL);
|
||||
const char * profile_env = getenv("GGML_PROFILE");
|
||||
if (profile_env != NULL) {
|
||||
sched->profiling_enabled = true;
|
||||
sched->profiling_env_path = profile_env;
|
||||
}
|
||||
|
||||
// Cache backend metadata for safe access during auto-export
|
||||
for (int b = 0; b < n_backends; b++) {
|
||||
ggml_backend_sched::backend_meta meta;
|
||||
meta.name = ggml_backend_name(backends[b]);
|
||||
meta.device = "unknown";
|
||||
meta.device_type = 0;
|
||||
if (backends[b]->device != NULL) {
|
||||
meta.device = ggml_backend_dev_name(backends[b]->device);
|
||||
meta.device_type = (int) ggml_backend_dev_type(backends[b]->device);
|
||||
}
|
||||
sched->profiling_backend_meta.push_back(std::move(meta));
|
||||
}
|
||||
|
||||
ggml_backend_sched_reset(sched);
|
||||
|
||||
|
|
@ -1848,6 +1875,33 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
|
|||
if (sched == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Auto-export profiling data if enabled via GGML_PROFILE env var
|
||||
// GGML_PROFILE=1 or GGML_PROFILE="" → print to stdout
|
||||
// GGML_PROFILE=file.json → export JSON
|
||||
// GGML_PROFILE=file.txt → export text
|
||||
if (!sched->profiling_records.empty() && getenv("GGML_PROFILE") != NULL) {
|
||||
const std::string & path = sched->profiling_env_path;
|
||||
if (path.empty() || path == "1" || path == "stdout") {
|
||||
ggml_backend_sched_print_profiling(sched);
|
||||
} else if (path.size() >= 4 && path.compare(path.size() - 4, 4, ".txt") == 0) {
|
||||
int ret = ggml_backend_sched_export_profiling_text(sched, path.c_str());
|
||||
if (ret == 0) {
|
||||
GGML_LOG_INFO("[profiler] Data exported to: %s\n", path.c_str());
|
||||
} else {
|
||||
GGML_LOG_ERROR("[profiler] Failed to export data to: %s\n", path.c_str());
|
||||
}
|
||||
} else {
|
||||
// Default to JSON for any other path (including .json)
|
||||
int ret = ggml_backend_sched_export_profiling_json(sched, path.c_str());
|
||||
if (ret == 0) {
|
||||
GGML_LOG_INFO("[profiler] Data exported to: %s\n", path.c_str());
|
||||
} else {
|
||||
GGML_LOG_ERROR("[profiler] Failed to export data to: %s\n", path.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int b = 0; b < sched->n_backends; b++) {
|
||||
for (int c = 0; c < sched->n_copies; c++) {
|
||||
ggml_backend_event_free(sched->events[b][c]);
|
||||
|
|
@ -2518,18 +2572,16 @@ void ggml_backend_sched_print_profiling(ggml_backend_sched_t sched) {
|
|||
double min_us = (double) s.min_ns / 1000.0;
|
||||
double max_us = (double) s.max_ns / 1000.0;
|
||||
|
||||
if (s.type == GGML_PROFILE_EVENT_COPY) {
|
||||
GGML_LOG_INFO(
|
||||
" [%s] backend %d %-28s %7.1f%% count=%-6d total=%8.2f ms avg=%8.2f us min=%8.2f us max=%8.2f us",
|
||||
type_str[s.type], s.backend_id, s.name, pct, s.count, (double) s.total_ns / 1e6, avg_us, min_us, max_us);
|
||||
if (s.total_bytes > 0 && s.total_ns > 0) {
|
||||
double bw_gbps = (double) s.total_bytes / (double) s.total_ns;
|
||||
GGML_LOG_INFO(
|
||||
" [%s] backend %d %-28s %7.1f%% count=%-6d total=%8.2f ms avg=%8.2f us min=%8.2f us max=%8.2f us "
|
||||
" %8.2f GB/s",
|
||||
type_str[s.type], s.backend_id, s.name, pct, s.count, (double) s.total_ns / 1e6, avg_us, min_us, max_us,
|
||||
bw_gbps);
|
||||
} else {
|
||||
GGML_LOG_INFO(
|
||||
" [%s] backend %d %-28s %7.1f%% count=%-6d total=%8.2f ms avg=%8.2f us min=%8.2f us max=%8.2f us",
|
||||
type_str[s.type], s.backend_id, s.name, pct, s.count, (double) s.total_ns / 1e6, avg_us, min_us,
|
||||
max_us);
|
||||
if (bw_gbps >= 1000.0) {
|
||||
GGML_LOG_INFO(" %6.2f TB/s", bw_gbps / 1000.0);
|
||||
} else {
|
||||
GGML_LOG_INFO(" %6.2f GB/s", bw_gbps);
|
||||
}
|
||||
}
|
||||
// Print representative tensor shape (first record's ne)
|
||||
if (s.representative_ne[0] > 0 || s.representative_ne[1] > 0) {
|
||||
|
|
@ -2565,16 +2617,22 @@ int ggml_backend_sched_write_profiling_json(ggml_backend_sched_t sched, FILE * f
|
|||
fprintf(fp, " \"total_records\": %d,\n", (int) sched->profiling_records.size());
|
||||
fprintf(fp, " \"total_ns\": %llu,\n", (unsigned long long) total_ns);
|
||||
|
||||
// Backend metadata
|
||||
// Backend metadata (use cached data if available, fall back to live pointers)
|
||||
fprintf(fp, " \"backends\": [\n");
|
||||
for (int b = 0; b < sched->n_backends; b++) {
|
||||
ggml_backend_t backend = sched->backends[b];
|
||||
const char * name = ggml_backend_name(backend);
|
||||
const char * dev_name = "unknown";
|
||||
int dev_type = 0;
|
||||
if (backend->device != NULL) {
|
||||
dev_name = ggml_backend_dev_name(backend->device);
|
||||
dev_type = (int) ggml_backend_dev_type(backend->device);
|
||||
const char * name = "unknown";
|
||||
const char * dev_name = "unknown";
|
||||
int dev_type = 0;
|
||||
if (b < (int) sched->profiling_backend_meta.size()) {
|
||||
name = sched->profiling_backend_meta[b].name.c_str();
|
||||
dev_name = sched->profiling_backend_meta[b].device.c_str();
|
||||
dev_type = sched->profiling_backend_meta[b].device_type;
|
||||
} else if (sched->backends[b] != NULL) {
|
||||
name = ggml_backend_name(sched->backends[b]);
|
||||
if (sched->backends[b]->device != NULL) {
|
||||
dev_name = ggml_backend_dev_name(sched->backends[b]->device);
|
||||
dev_type = (int) ggml_backend_dev_type(sched->backends[b]->device);
|
||||
}
|
||||
}
|
||||
fprintf(fp, " {\"id\": %d, \"name\": \"%s\", \"device\": \"%s\", \"device_type\": %d}%s\n", b, name,
|
||||
dev_name, dev_type, (b < sched->n_backends - 1) ? "," : "");
|
||||
|
|
@ -2600,11 +2658,13 @@ int ggml_backend_sched_write_profiling_json(ggml_backend_sched_t sched, FILE * f
|
|||
fprintf(fp, "null");
|
||||
}
|
||||
|
||||
// Tensor dimensions (both source tensors)
|
||||
// Tensor dimensions (all source tensors)
|
||||
fprintf(fp, ", \"ne_src0\": [%lld, %lld, %lld, %lld]", (long long) rec.ne_src0[0], (long long) rec.ne_src0[1],
|
||||
(long long) rec.ne_src0[2], (long long) rec.ne_src0[3]);
|
||||
fprintf(fp, ", \"ne_src1\": [%lld, %lld, %lld, %lld]", (long long) rec.ne_src1[0], (long long) rec.ne_src1[1],
|
||||
(long long) rec.ne_src1[2], (long long) rec.ne_src1[3]);
|
||||
fprintf(fp, ", \"ne_src2\": [%lld, %lld, %lld, %lld]", (long long) rec.ne_src2[0], (long long) rec.ne_src2[1],
|
||||
(long long) rec.ne_src2[2], (long long) rec.ne_src2[3]);
|
||||
|
||||
fprintf(fp, "}%s\n", (i < (int) sched->profiling_records.size() - 1) ? "," : "");
|
||||
}
|
||||
|
|
@ -2630,3 +2690,254 @@ int ggml_backend_sched_export_profiling_json(ggml_backend_sched_t sched, const c
|
|||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Helper: format ne dimensions as string, e.g. "[4096, 4096, 1]"
|
||||
static void fmt_ne(char * buf, size_t bufsize, const int64_t ne[4]) {
|
||||
if (ne[0] == 0 && ne[1] == 0 && ne[2] == 0 && ne[3] == 0) {
|
||||
buf[0] = '\0';
|
||||
return;
|
||||
}
|
||||
int ndims = 4;
|
||||
while (ndims > 1 && ne[ndims - 1] <= 1) {
|
||||
ndims--;
|
||||
}
|
||||
int pos = snprintf(buf, bufsize, "[");
|
||||
for (int i = 0; i < ndims && pos < (int) bufsize - 1; i++) {
|
||||
pos += snprintf(buf + pos, bufsize - pos, "%s%lld", i > 0 ? ", " : "", (long long) ne[i]);
|
||||
}
|
||||
snprintf(buf + pos, bufsize - pos, "]");
|
||||
}
|
||||
|
||||
// Helper: format bandwidth as string
|
||||
static void fmt_bandwidth(char * buf, size_t bufsize, uint64_t bytes, uint64_t ns) {
|
||||
if (ns == 0 || bytes == 0) {
|
||||
buf[0] = '\0';
|
||||
return;
|
||||
}
|
||||
double bw_gbps = (double) bytes / (double) ns;
|
||||
if (bw_gbps >= 1000.0) {
|
||||
snprintf(buf, bufsize, "%.2f TB/s", bw_gbps / 1000.0);
|
||||
} else {
|
||||
snprintf(buf, bufsize, "%.2f GB/s", bw_gbps);
|
||||
}
|
||||
}
|
||||
|
||||
int ggml_backend_sched_write_profiling_text(ggml_backend_sched_t sched, FILE * fp) {
|
||||
GGML_ASSERT(sched);
|
||||
GGML_ASSERT(fp != NULL);
|
||||
|
||||
if (sched->profiling_records.empty()) {
|
||||
fprintf(fp, "No profiling data available.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Aggregate by (name, type, backend_id)
|
||||
struct op_stats {
|
||||
const char * name;
|
||||
enum ggml_profile_event_type type;
|
||||
int backend_id;
|
||||
uint64_t total_ns;
|
||||
uint64_t min_ns;
|
||||
uint64_t max_ns;
|
||||
int count;
|
||||
uint64_t total_bytes;
|
||||
int64_t representative_ne_src0[4];
|
||||
int64_t representative_ne_src1[4];
|
||||
int64_t representative_ne_src2[4];
|
||||
};
|
||||
|
||||
std::vector<op_stats> stats;
|
||||
for (const auto & rec : sched->profiling_records) {
|
||||
uint64_t dur = (rec.end_ns > rec.start_ns) ? (rec.end_ns - rec.start_ns) : 0;
|
||||
bool found = false;
|
||||
for (auto & s : stats) {
|
||||
if (s.type == rec.type && s.backend_id == rec.backend_id && strcmp(s.name, rec.name) == 0) {
|
||||
s.total_ns += dur;
|
||||
s.min_ns = std::min(s.min_ns, dur);
|
||||
s.max_ns = std::max(s.max_ns, dur);
|
||||
s.count++;
|
||||
s.total_bytes += rec.bytes;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
op_stats s = {};
|
||||
s.name = rec.name;
|
||||
s.type = rec.type;
|
||||
s.backend_id = rec.backend_id;
|
||||
s.total_ns = dur;
|
||||
s.min_ns = dur;
|
||||
s.max_ns = dur;
|
||||
s.count = 1;
|
||||
s.total_bytes = rec.bytes;
|
||||
memcpy(s.representative_ne_src0, rec.ne_src0, sizeof(s.representative_ne_src0));
|
||||
memcpy(s.representative_ne_src1, rec.ne_src1, sizeof(s.representative_ne_src1));
|
||||
memcpy(s.representative_ne_src2, rec.ne_src2, sizeof(s.representative_ne_src2));
|
||||
stats.push_back(s);
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(stats.begin(), stats.end(),
|
||||
[](const op_stats & a, const op_stats & b) { return a.total_ns > b.total_ns; });
|
||||
|
||||
uint64_t grand_total = 0;
|
||||
for (const auto & s : stats) {
|
||||
grand_total += s.total_ns;
|
||||
}
|
||||
|
||||
// --- Section 1: Overall summary ---
|
||||
fprintf(fp, "=== Profiling Summary ===\n");
|
||||
fprintf(fp, "Total time: %.2f ms\n", (double) grand_total / 1e6);
|
||||
fprintf(fp, "Total records: %d\n", (int) sched->profiling_records.size());
|
||||
fprintf(fp, "Unique ops: %d\n\n", (int) stats.size());
|
||||
|
||||
// --- Section 2: Per-backend breakdown ---
|
||||
fprintf(fp, "=== Per-Backend Summary ===\n");
|
||||
{
|
||||
struct backend_stats {
|
||||
int backend_id;
|
||||
int op_count;
|
||||
int copy_count;
|
||||
uint64_t op_ns;
|
||||
uint64_t copy_ns;
|
||||
uint64_t op_bytes;
|
||||
uint64_t copy_bytes;
|
||||
};
|
||||
std::vector<backend_stats> bstats;
|
||||
for (const auto & s : stats) {
|
||||
bool found = false;
|
||||
for (auto & bs : bstats) {
|
||||
if (bs.backend_id == s.backend_id) {
|
||||
if (s.type == GGML_PROFILE_EVENT_OP) {
|
||||
bs.op_count += s.count;
|
||||
bs.op_ns += s.total_ns;
|
||||
bs.op_bytes += s.total_bytes;
|
||||
} else {
|
||||
bs.copy_count += s.count;
|
||||
bs.copy_ns += s.total_ns;
|
||||
bs.copy_bytes += s.total_bytes;
|
||||
}
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
backend_stats bs = {};
|
||||
bs.backend_id = s.backend_id;
|
||||
if (s.type == GGML_PROFILE_EVENT_OP) {
|
||||
bs.op_count = s.count;
|
||||
bs.op_ns = s.total_ns;
|
||||
bs.op_bytes = s.total_bytes;
|
||||
} else {
|
||||
bs.copy_count = s.count;
|
||||
bs.copy_ns = s.total_ns;
|
||||
bs.copy_bytes = s.total_bytes;
|
||||
}
|
||||
bstats.push_back(bs);
|
||||
}
|
||||
}
|
||||
std::sort(bstats.begin(), bstats.end(),
|
||||
[](const backend_stats & a, const backend_stats & b) {
|
||||
return (a.op_ns + a.copy_ns) > (b.op_ns + b.copy_ns);
|
||||
});
|
||||
|
||||
for (const auto & bs : bstats) {
|
||||
uint64_t total = bs.op_ns + bs.copy_ns;
|
||||
double pct = grand_total > 0 ? 100.0 * (double) total / (double) grand_total : 0;
|
||||
|
||||
const char * bname = "unknown";
|
||||
if (bs.backend_id >= 0 && bs.backend_id < (int) sched->profiling_backend_meta.size()) {
|
||||
bname = sched->profiling_backend_meta[bs.backend_id].name.c_str();
|
||||
} else if (bs.backend_id >= 0 && bs.backend_id < sched->n_backends && sched->backends[bs.backend_id] != NULL) {
|
||||
bname = ggml_backend_name(sched->backends[bs.backend_id]);
|
||||
}
|
||||
|
||||
fprintf(fp, " Backend %d (%s): %.2f ms (%.1f%%)\n", bs.backend_id, bname, (double) total / 1e6, pct);
|
||||
if (bs.op_count > 0) {
|
||||
char bw_buf[32];
|
||||
fmt_bandwidth(bw_buf, sizeof(bw_buf), bs.op_bytes, bs.op_ns);
|
||||
fprintf(fp, " OPs: %d calls, %.2f ms", bs.op_count, (double) bs.op_ns / 1e6);
|
||||
if (bw_buf[0]) {
|
||||
fprintf(fp, ", %s", bw_buf);
|
||||
}
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
if (bs.copy_count > 0) {
|
||||
char bw_buf[32];
|
||||
fmt_bandwidth(bw_buf, sizeof(bw_buf), bs.copy_bytes, bs.copy_ns);
|
||||
fprintf(fp, " COPYs: %d calls, %.2f ms", bs.copy_count, (double) bs.copy_ns / 1e6);
|
||||
if (bw_buf[0]) {
|
||||
fprintf(fp, ", %s", bw_buf);
|
||||
}
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf(fp, "\n");
|
||||
|
||||
// --- Section 3: Detailed operation table ---
|
||||
fprintf(fp, "=== Operations (sorted by total time) ===\n");
|
||||
fprintf(fp, "%-5s %4s %-28s %7s %6s %10s %10s %10s %10s %12s %s\n",
|
||||
"TYPE", "BKND", "Operation", "%Time", "Count", "Total(ms)", "Avg(us)", "Min(us)", "Max(us)", "Bandwidth", "Tensors");
|
||||
fprintf(fp, "%-5s %4s %-28s %7s %6s %10s %10s %10s %10s %12s %s\n",
|
||||
"-----", "----", "----------------------------", "-------", "------",
|
||||
"----------", "----------", "----------", "----------", "------------", "-------");
|
||||
|
||||
const char * type_str[] = { "OP", "COPY" };
|
||||
for (const auto & s : stats) {
|
||||
double pct = grand_total > 0 ? 100.0 * (double) s.total_ns / (double) grand_total : 0;
|
||||
double avg_us = (double) s.total_ns / (double) s.count / 1000.0;
|
||||
double min_us = (double) s.min_ns / 1000.0;
|
||||
double max_us = (double) s.max_ns / 1000.0;
|
||||
|
||||
char bw_buf[32] = "";
|
||||
fmt_bandwidth(bw_buf, sizeof(bw_buf), s.total_bytes, s.total_ns);
|
||||
|
||||
char ne0_buf[64];
|
||||
char ne1_buf[64];
|
||||
char ne2_buf[64];
|
||||
fmt_ne(ne0_buf, sizeof(ne0_buf), s.representative_ne_src0);
|
||||
fmt_ne(ne1_buf, sizeof(ne1_buf), s.representative_ne_src1);
|
||||
fmt_ne(ne2_buf, sizeof(ne2_buf), s.representative_ne_src2);
|
||||
|
||||
// Build tensor shapes string
|
||||
char tensors_buf[256] = "";
|
||||
int tpos = 0;
|
||||
if (ne0_buf[0]) {
|
||||
tpos += snprintf(tensors_buf + tpos, sizeof(tensors_buf) - tpos, "%s", ne0_buf);
|
||||
}
|
||||
if (ne1_buf[0]) {
|
||||
tpos += snprintf(tensors_buf + tpos, sizeof(tensors_buf) - tpos, " x %s", ne1_buf);
|
||||
}
|
||||
if (ne2_buf[0]) {
|
||||
tpos += snprintf(tensors_buf + tpos, sizeof(tensors_buf) - tpos, " x %s", ne2_buf);
|
||||
}
|
||||
|
||||
fprintf(fp, "%-5s %4d %-28s %6.1f%% %6d %10.2f %10.2f %10.2f %10.2f %12s %s\n",
|
||||
type_str[s.type], s.backend_id, s.name, pct, s.count,
|
||||
(double) s.total_ns / 1e6, avg_us, min_us, max_us,
|
||||
bw_buf, tensors_buf);
|
||||
}
|
||||
|
||||
fprintf(fp, "\nTotal: %.2f ms (%d records, %d unique ops)\n", (double) grand_total / 1e6,
|
||||
(int) sched->profiling_records.size(), (int) stats.size());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ggml_backend_sched_export_profiling_text(ggml_backend_sched_t sched, const char * filepath) {
|
||||
GGML_ASSERT(sched);
|
||||
GGML_ASSERT(filepath != NULL);
|
||||
|
||||
FILE * fp = fopen(filepath, "w");
|
||||
if (fp == NULL) {
|
||||
GGML_LOG_ERROR("%s: failed to open %s for writing\n", __func__, filepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int ret = ggml_backend_sched_write_profiling_text(sched, fp);
|
||||
fclose(fp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -276,6 +276,7 @@ static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t backend,
|
|||
rec.extra = NULL;
|
||||
if (node->src[0]) { memcpy(rec.ne_src0, node->src[0]->ne, sizeof(rec.ne_src0)); } else { memset(rec.ne_src0, 0, sizeof(rec.ne_src0)); }
|
||||
if (node->src[1]) { memcpy(rec.ne_src1, node->src[1]->ne, sizeof(rec.ne_src1)); } else { memset(rec.ne_src1, 0, sizeof(rec.ne_src1)); }
|
||||
if (node->op == GGML_OP_MUL_MAT_ID && node->src[2]) { memcpy(rec.ne_src2, node->src[2]->ne, sizeof(rec.ne_src2)); } else { memset(rec.ne_src2, 0, sizeof(rec.ne_src2)); }
|
||||
ctx->profiling_records.push_back(rec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3009,9 +3009,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|||
static const int64_t zero_ne[4] = {0, 0, 0, 0};
|
||||
const int64_t * src0_ne = node->src[0] ? node->src[0]->ne : zero_ne;
|
||||
const int64_t * src1_ne = node->src[1] ? node->src[1]->ne : zero_ne;
|
||||
const int64_t * src2_ne = (node->op == GGML_OP_MUL_MAT_ID && node->src[2]) ? node->src[2]->ne : zero_ne;
|
||||
cplan->profiling_record_fn(cplan->profiling_context, 0 /* GGML_PROFILE_EVENT_OP */,
|
||||
ggml_op_name(node->op), -1, t_start, t_end, ggml_nbytes(node), NULL,
|
||||
src0_ne, src1_ne);
|
||||
src0_ne, src1_ne, src2_ne);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -183,7 +183,8 @@ static void ggml_cpu_profiler_record_callback(void * context,
|
|||
uint64_t bytes,
|
||||
const char * extra,
|
||||
const int64_t ne_src0[4],
|
||||
const int64_t ne_src1[4]) {
|
||||
const int64_t ne_src1[4],
|
||||
const int64_t ne_src2[4]) {
|
||||
auto * cpu_ctx = (ggml_backend_cpu_context *) context;
|
||||
ggml_profile_record rec;
|
||||
rec.type = (enum ggml_profile_event_type) type;
|
||||
|
|
@ -204,6 +205,11 @@ static void ggml_cpu_profiler_record_callback(void * context,
|
|||
} else {
|
||||
memset(rec.ne_src1, 0, sizeof(rec.ne_src1));
|
||||
}
|
||||
if (ne_src2) {
|
||||
memcpy(rec.ne_src2, ne_src2, sizeof(rec.ne_src2));
|
||||
} else {
|
||||
memset(rec.ne_src2, 0, sizeof(rec.ne_src2));
|
||||
}
|
||||
cpu_ctx->profiling_records.push_back(rec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ struct ggml_cuda_profiler_state {
|
|||
}
|
||||
|
||||
void record_end(const char * name, int backend_id, int split_id, uint64_t bytes, const char * extra,
|
||||
const int64_t ne_src0[4], const int64_t ne_src1[4]) {
|
||||
const int64_t ne_src0[4], const int64_t ne_src1[4], const int64_t ne_src2[4]) {
|
||||
cudaEvent_t ev;
|
||||
(void) cudaEventCreate(&ev);
|
||||
(void) cudaEventRecord(ev, stream);
|
||||
|
|
@ -156,6 +156,7 @@ struct ggml_cuda_profiler_state {
|
|||
rec.extra = extra;
|
||||
if (ne_src0) { memcpy(rec.ne_src0, ne_src0, sizeof(rec.ne_src0)); } else { memset(rec.ne_src0, 0, sizeof(rec.ne_src0)); }
|
||||
if (ne_src1) { memcpy(rec.ne_src1, ne_src1, sizeof(rec.ne_src1)); } else { memset(rec.ne_src1, 0, sizeof(rec.ne_src1)); }
|
||||
if (ne_src2) { memcpy(rec.ne_src2, ne_src2, sizeof(rec.ne_src2)); } else { memset(rec.ne_src2, 0, sizeof(rec.ne_src2)); }
|
||||
records.push_back(rec);
|
||||
}
|
||||
|
||||
|
|
@ -4137,7 +4138,8 @@ static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cud
|
|||
ggml_nbytes(node),
|
||||
nullptr,
|
||||
node->src[0] ? node->src[0]->ne : nullptr,
|
||||
node->src[1] ? node->src[1]->ne : nullptr
|
||||
node->src[1] ? node->src[1]->ne : nullptr,
|
||||
(node->op == GGML_OP_MUL_MAT_ID && node->src[2]) ? node->src[2]->ne : nullptr
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14682,6 +14682,7 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
|
|||
static const int64_t zero_ne[4] = {0, 0, 0, 0};
|
||||
const int64_t * src0_ne = node->src[0] ? node->src[0]->ne : zero_ne;
|
||||
const int64_t * src1_ne = node->src[1] ? node->src[1]->ne : zero_ne;
|
||||
const int64_t * src2_ne = (node->op == GGML_OP_MUL_MAT_ID && node->src[2]) ? node->src[2]->ne : zero_ne;
|
||||
uint64_t cpu_ts = (i < (int)ctx->profiler_state->cpu_timestamps.size())
|
||||
? ctx->profiler_state->cpu_timestamps[i] : 0;
|
||||
|
||||
|
|
@ -14696,6 +14697,7 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
|
|||
rec.extra = name; // fusion name or NULL
|
||||
memcpy(rec.ne_src0, src0_ne, sizeof(rec.ne_src0));
|
||||
memcpy(rec.ne_src1, src1_ne, sizeof(rec.ne_src1));
|
||||
memcpy(rec.ne_src2, src2_ne, sizeof(rec.ne_src2));
|
||||
ctx->profiler_state->records.push_back(rec);
|
||||
}
|
||||
}
|
||||
|
|
@ -14724,27 +14726,31 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
|
|||
if (has_profiler && !nodes.empty()) {
|
||||
uint64_t cpu_ts = (i < (int)ctx->profiler_state->cpu_timestamps.size())
|
||||
? ctx->profiler_state->cpu_timestamps[i] : 0;
|
||||
// In concurrent mode, distribute duration evenly across ops in group
|
||||
uint64_t per_op_ns = duration_ns / nodes.size();
|
||||
for (size_t j = 0; j < nodes.size(); j++) {
|
||||
auto * node = nodes[j];
|
||||
static const int64_t zero_ne[4] = {0, 0, 0, 0};
|
||||
const int64_t * src0_ne = node->src[0] ? node->src[0]->ne : zero_ne;
|
||||
const int64_t * src1_ne = node->src[1] ? node->src[1]->ne : zero_ne;
|
||||
// In concurrent mode, report the group as a single combined operation
|
||||
auto * node = nodes[0];
|
||||
static const int64_t zero_ne[4] = {0, 0, 0, 0};
|
||||
const int64_t * src0_ne = node->src[0] ? node->src[0]->ne : zero_ne;
|
||||
const int64_t * src1_ne = node->src[1] ? node->src[1]->ne : zero_ne;
|
||||
const int64_t * src2_ne = (node->op == GGML_OP_MUL_MAT_ID && node->src[2]) ? node->src[2]->ne : zero_ne;
|
||||
|
||||
ggml_profile_record rec;
|
||||
rec.type = GGML_PROFILE_EVENT_OP;
|
||||
rec.name = ggml_op_name(node->op);
|
||||
rec.backend_id = -1;
|
||||
rec.split_id = ctx->profiler_state->split_id;
|
||||
rec.start_ns = cpu_ts + j * per_op_ns;
|
||||
rec.end_ns = cpu_ts + (j + 1) * per_op_ns;
|
||||
rec.bytes = ggml_nbytes(node);
|
||||
rec.extra = names[j];
|
||||
memcpy(rec.ne_src0, src0_ne, sizeof(rec.ne_src0));
|
||||
memcpy(rec.ne_src1, src1_ne, sizeof(rec.ne_src1));
|
||||
ctx->profiler_state->records.push_back(rec);
|
||||
uint64_t total_bytes = 0;
|
||||
for (size_t j = 0; j < nodes.size(); j++) {
|
||||
total_bytes += ggml_nbytes(nodes[j]);
|
||||
}
|
||||
|
||||
ggml_profile_record rec;
|
||||
rec.type = GGML_PROFILE_EVENT_OP;
|
||||
rec.name = ggml_op_name(node->op);
|
||||
rec.backend_id = -1;
|
||||
rec.split_id = ctx->profiler_state->split_id;
|
||||
rec.start_ns = cpu_ts;
|
||||
rec.end_ns = cpu_ts + duration_ns;
|
||||
rec.bytes = total_bytes;
|
||||
rec.extra = names[0]; // fusion name of first op, or NULL
|
||||
memcpy(rec.ne_src0, src0_ne, sizeof(rec.ne_src0));
|
||||
memcpy(rec.ne_src1, src1_ne, sizeof(rec.ne_src1));
|
||||
memcpy(rec.ne_src2, src2_ne, sizeof(rec.ne_src2));
|
||||
ctx->profiler_state->records.push_back(rec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -651,11 +651,17 @@ int main(int argc, char ** argv) {
|
|||
if (params.profiling_output.empty()) {
|
||||
ggml_backend_sched_print_profiling(sched);
|
||||
} else {
|
||||
int ret = ggml_backend_sched_export_profiling_json(sched, params.profiling_output.c_str());
|
||||
if (ret == 0) {
|
||||
console::log("\nProfiling data exported to: %s\n", params.profiling_output.c_str());
|
||||
const std::string & path = params.profiling_output;
|
||||
int ret;
|
||||
if (path.size() >= 4 && path.compare(path.size() - 4, 4, ".txt") == 0) {
|
||||
ret = ggml_backend_sched_export_profiling_text(sched, path.c_str());
|
||||
} else {
|
||||
console::error("\nFailed to export profiling data to: %s\n", params.profiling_output.c_str());
|
||||
ret = ggml_backend_sched_export_profiling_json(sched, path.c_str());
|
||||
}
|
||||
if (ret == 0) {
|
||||
console::log("\nProfiling data exported to: %s\n", path.c_str());
|
||||
} else {
|
||||
console::error("\nFailed to export profiling data to: %s\n", path.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -998,11 +998,17 @@ int main(int argc, char ** argv) {
|
|||
if (params.profiling_output.empty()) {
|
||||
ggml_backend_sched_print_profiling(sched);
|
||||
} else {
|
||||
int ret = ggml_backend_sched_export_profiling_json(sched, params.profiling_output.c_str());
|
||||
if (ret == 0) {
|
||||
LOG("\nProfiling data exported to: %s\n", params.profiling_output.c_str());
|
||||
const std::string & path = params.profiling_output;
|
||||
int ret;
|
||||
if (path.size() >= 4 && path.compare(path.size() - 4, 4, ".txt") == 0) {
|
||||
ret = ggml_backend_sched_export_profiling_text(sched, path.c_str());
|
||||
} else {
|
||||
LOG_ERR("\nFailed to export profiling data to: %s\n", params.profiling_output.c_str());
|
||||
ret = ggml_backend_sched_export_profiling_json(sched, path.c_str());
|
||||
}
|
||||
if (ret == 0) {
|
||||
LOG("\nProfiling data exported to: %s\n", path.c_str());
|
||||
} else {
|
||||
LOG_ERR("\nFailed to export profiling data to: %s\n", path.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ class ProfileRecord:
|
|||
extra: Optional[str]
|
||||
ne_src0: list[int] = field(default_factory=lambda: [0, 0, 0, 0])
|
||||
ne_src1: list[int] = field(default_factory=lambda: [0, 0, 0, 0])
|
||||
ne_src2: list[int] = field(default_factory=lambda: [0, 0, 0, 0])
|
||||
|
||||
@property
|
||||
def type_name(self) -> str:
|
||||
|
|
@ -48,7 +49,7 @@ class ProfileRecord:
|
|||
|
||||
@property
|
||||
def bandwidth_gbps(self) -> float:
|
||||
"""Bandwidth in GB/s (only meaningful for copy events)."""
|
||||
"""Bandwidth in GB/s."""
|
||||
if self.duration_ns == 0 or self.bytes == 0:
|
||||
return 0.0
|
||||
return self.bytes / self.duration_ns
|
||||
|
|
@ -62,12 +63,12 @@ class ProfileRecord:
|
|||
|
||||
@property
|
||||
def shape_str(self) -> str:
|
||||
"""Human-readable tensor shapes, e.g. '[4096, 4096] x [4096, 1]'."""
|
||||
"""Human-readable tensor shapes, e.g. '[4096, 4096] x [4096, 1] x [8, 1]'."""
|
||||
s0 = self._fmt_ne(self.ne_src0)
|
||||
s1 = self._fmt_ne(self.ne_src1)
|
||||
if s0 and s1:
|
||||
return s0 + " x " + s1
|
||||
return s0 or s1
|
||||
s2 = self._fmt_ne(self.ne_src2)
|
||||
parts = [s for s in (s0, s1, s2) if s]
|
||||
return " x ".join(parts)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
|
|
@ -81,6 +82,7 @@ class ProfileRecord:
|
|||
"extra": self.extra,
|
||||
"ne_src0": self.ne_src0,
|
||||
"ne_src1": self.ne_src1,
|
||||
"ne_src2": self.ne_src2,
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -160,6 +162,7 @@ class ProfileData:
|
|||
# Support both old "ne" format and new "ne_src0"/"ne_src1" format
|
||||
ne_src0 = _pad_ne(r.get("ne_src0", r.get("ne", [0, 0, 0, 0])))
|
||||
ne_src1 = _pad_ne(r.get("ne_src1", [0, 0, 0, 0]))
|
||||
ne_src2 = _pad_ne(r.get("ne_src2", [0, 0, 0, 0]))
|
||||
records.append(ProfileRecord(
|
||||
type=r.get("type", 0),
|
||||
name=r.get("name", "unknown"),
|
||||
|
|
@ -171,6 +174,7 @@ class ProfileData:
|
|||
extra=r.get("extra"),
|
||||
ne_src0=ne_src0,
|
||||
ne_src1=ne_src1,
|
||||
ne_src2=ne_src2,
|
||||
))
|
||||
|
||||
backends_raw = data.get("backends", [])
|
||||
|
|
@ -267,9 +271,9 @@ class ProfileData:
|
|||
return
|
||||
|
||||
print(f" {'TYPE':<5} {'BKND':>4} {'Operation':<28} {'%Time':>7} {'Count':>6} "
|
||||
f"{'Total':>10} {'Avg':>10} {'Min':>10} {'Max':>10} {'Bytes':>10}")
|
||||
f"{'Total':>10} {'Avg':>10} {'Min':>10} {'Max':>10} {'Bandwidth':>12}")
|
||||
print(f" {'':->5} {'':->4} {'':->28} {'':->7} {'':->6} "
|
||||
f"{'(ms)':>10} {'(us)':>10} {'(us)':>10} {'(us)':>10} {'':->10}")
|
||||
f"{'(ms)':>10} {'(us)':>10} {'(us)':>10} {'(us)':>10} {'':->12}")
|
||||
|
||||
for s in stats:
|
||||
pct = 100.0 * s.total_ns / self.total_ns if self.total_ns > 0 else 0
|
||||
|
|
@ -278,15 +282,14 @@ class ProfileData:
|
|||
f"{s.count:>6} {s.total_ms:>10.2f} {s.avg_us:>10.2f} "
|
||||
f"{s.min_us:>10.2f} {s.max_us:>10.2f}")
|
||||
|
||||
if s.total_bytes > 0:
|
||||
if s.total_bytes > 0 and s.total_ns > 0:
|
||||
bw = s.bandwidth_gbps
|
||||
bytes_str = f"{s.total_bytes / 1e6:.1f} MB"
|
||||
if s.event_type == COPY_EVENT:
|
||||
line += f" {bw:>8.2f} GB/s"
|
||||
if bw >= 1000.0:
|
||||
line += f" {bw / 1000.0:>9.2f} TB/s"
|
||||
else:
|
||||
line += f" {bytes_str:>10}"
|
||||
line += f" {bw:>9.2f} GB/s"
|
||||
else:
|
||||
line += f" {'':>10}"
|
||||
line += f" {'':>12}"
|
||||
|
||||
# Tensor shape from longest call
|
||||
shape_dims = [n for n in s.representative_ne if n > 0]
|
||||
|
|
@ -807,19 +810,19 @@ function buildStats(){
|
|||
var ops={};
|
||||
for(var i=0;i<EVENTS.length;i++){
|
||||
var ev=EVENTS[i];
|
||||
if(!ops[ev.n])ops[ev.n]={name:ev.n,d:0,count:0,min:Infinity,max:0,backends:{}};
|
||||
if(!ops[ev.n])ops[ev.n]={name:ev.n,d:0,count:0,min:Infinity,max:0,bytes:0,backends:{}};
|
||||
var op=ops[ev.n];
|
||||
op.d+=ev.d;op.count++;
|
||||
op.d+=ev.d;op.count++;op.bytes+=(ev.b||0);
|
||||
if(ev.d<op.min)op.min=ev.d;if(ev.d>op.max)op.max=ev.d;
|
||||
var bk=String(ev.bid);
|
||||
if(!op.backends[bk])op.backends[bk]={bid:ev.bid,d:0,count:0,min:Infinity,max:0,shapes:{}};
|
||||
if(!op.backends[bk])op.backends[bk]={bid:ev.bid,d:0,count:0,min:Infinity,max:0,bytes:0,shapes:{}};
|
||||
var b=op.backends[bk];
|
||||
b.d+=ev.d;b.count++;
|
||||
b.d+=ev.d;b.count++;b.bytes+=(ev.b||0);
|
||||
if(ev.d<b.min)b.min=ev.d;if(ev.d>b.max)b.max=ev.d;
|
||||
var sh=ev.s||'\u2014';
|
||||
if(!b.shapes[sh])b.shapes[sh]={d:0,count:0,min:Infinity,max:0};
|
||||
if(!b.shapes[sh])b.shapes[sh]={d:0,count:0,min:Infinity,max:0,bytes:0};
|
||||
var s=b.shapes[sh];
|
||||
s.d+=ev.d;s.count++;
|
||||
s.d+=ev.d;s.count++;s.bytes+=(ev.b||0);
|
||||
if(ev.d<s.min)s.min=ev.d;if(ev.d>s.max)s.max=ev.d;
|
||||
}
|
||||
var sorted=[];for(var k in ops)sorted.push(ops[k]);
|
||||
|
|
@ -832,7 +835,7 @@ function buildStats(){
|
|||
var opId=rid++;
|
||||
var bkeys=[];for(var bk in op.backends)bkeys.push(bk);
|
||||
bkeys.sort(function(a,b){return op.backends[b].d-op.backends[a].d;});
|
||||
rows.push({id:opId,p:-1,lv:0,name:op.name,d:op.d,count:op.count,
|
||||
rows.push({id:opId,p:-1,lv:0,name:op.name,d:op.d,count:op.count,bytes:op.bytes,
|
||||
min:op.min,max:op.max,pct:op.d/TOTAL_US*100,ch:bkeys.length>0});
|
||||
for(var bi=0;bi<bkeys.length;bi++){
|
||||
var bdata=op.backends[bkeys[bi]];
|
||||
|
|
@ -840,25 +843,33 @@ function buildStats(){
|
|||
var bname=BACKENDS[bdata.bid]||('B'+bdata.bid);
|
||||
var skeys=[];for(var sk in bdata.shapes)skeys.push(sk);
|
||||
skeys.sort(function(a,b){return bdata.shapes[b].d-bdata.shapes[a].d;});
|
||||
rows.push({id:bId,p:opId,lv:1,name:bname,d:bdata.d,count:bdata.count,
|
||||
rows.push({id:bId,p:opId,lv:1,name:bname,d:bdata.d,count:bdata.count,bytes:bdata.bytes,
|
||||
min:bdata.min,max:bdata.max,pct:bdata.d/TOTAL_US*100,ch:skeys.length>0});
|
||||
for(var si=0;si<skeys.length;si++){
|
||||
var sdata=bdata.shapes[skeys[si]];
|
||||
var sId=rid++;
|
||||
rows.push({id:sId,p:bId,lv:2,name:skeys[si],d:sdata.d,count:sdata.count,
|
||||
rows.push({id:sId,p:bId,lv:2,name:skeys[si],d:sdata.d,count:sdata.count,bytes:sdata.bytes,
|
||||
min:sdata.min,max:sdata.max,pct:sdata.d/TOTAL_US*100,ch:false});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Render
|
||||
var h='<table><thead><tr><th style="width:30%">Operation</th>'
|
||||
+'<th class="r" style="width:12%">% Time</th>'
|
||||
+'<th class="r" style="width:12%">Total</th>'
|
||||
+'<th class="r" style="width:10%">Count</th>'
|
||||
+'<th class="r" style="width:12%">Avg</th>'
|
||||
+'<th class="r" style="width:12%">Min</th>'
|
||||
+'<th class="r" style="width:12%">Max</th>'
|
||||
function fmtBW(bytes,d_us){
|
||||
if(!bytes||!d_us)return'\u2014';
|
||||
var gbps=bytes/(d_us*1000);
|
||||
if(gbps>=1000)return(gbps/1000).toFixed(2)+' TB/s';
|
||||
if(gbps>=1)return gbps.toFixed(2)+' GB/s';
|
||||
return(gbps*1000).toFixed(1)+' MB/s';
|
||||
}
|
||||
var h='<table><thead><tr><th style="width:26%">Operation</th>'
|
||||
+'<th class="r" style="width:10%">% Time</th>'
|
||||
+'<th class="r" style="width:10%">Total</th>'
|
||||
+'<th class="r" style="width:8%">Count</th>'
|
||||
+'<th class="r" style="width:10%">Avg</th>'
|
||||
+'<th class="r" style="width:10%">Min</th>'
|
||||
+'<th class="r" style="width:10%">Max</th>'
|
||||
+'<th class="r" style="width:10%">Bandwidth</th>'
|
||||
+'</tr></thead><tbody>';
|
||||
|
||||
for(var ri=0;ri<rows.length;ri++){
|
||||
|
|
@ -883,6 +894,7 @@ function buildStats(){
|
|||
h+='<td class="r">'+fmtT(r.d/r.count)+'</td>';
|
||||
h+='<td class="r">'+fmtT(r.min)+'</td>';
|
||||
h+='<td class="r">'+fmtT(r.max)+'</td>';
|
||||
h+='<td class="r">'+fmtBW(r.bytes,r.d)+'</td>';
|
||||
h+='</tr>';
|
||||
}
|
||||
h+='</tbody></table>';
|
||||
|
|
|
|||
Loading…
Reference in New Issue