Converge implementation with export-graph-ops
This commit is contained in:
parent
65ab03a3a4
commit
bd6f8008a9
|
|
@ -2693,6 +2693,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
}
|
||||
).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_FINETUNE,
|
||||
LLAMA_EXAMPLE_RESULTS, LLAMA_EXAMPLE_EXPORT_GRAPH_OPS}));
|
||||
add_opt(common_arg(
|
||||
{"--with-backends"},
|
||||
"export graph ops with backend assignments (default: CPU only)",
|
||||
[](common_params & params) {
|
||||
params.with_backends = true;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_EXPORT_GRAPH_OPS}));
|
||||
add_opt(common_arg(
|
||||
{"-ofreq", "--output-frequency"}, "N",
|
||||
string_format("output the imatrix every N iterations (default: %d)", params.n_out_freq),
|
||||
|
|
|
|||
|
|
@ -438,6 +438,7 @@ struct common_params {
|
|||
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
|
||||
float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
|
||||
bool fit_params = true; // whether to fit unset model/context parameters to free device memory
|
||||
bool with_backends = false; // export graph ops with backend assignments
|
||||
int32_t fit_params_min_ctx = 4096; // minimum context size to set when trying to reduce memory use
|
||||
|
||||
// margin per device in bytes for fitting parameters to free memory:
|
||||
|
|
|
|||
|
|
@ -1526,7 +1526,7 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
|||
|
||||
sched->copy_records.push_back({ GGML_PROFILE_EVENT_COPY, copy_dir, split_backend_id, split_id,
|
||||
copy_start, copy_end, ggml_nbytes(input), input->name,
|
||||
{input->ne[0], input->ne[1], input->ne[2], input->ne[3]}, {0}, {0} });
|
||||
{input->ne[0], input->ne[1], input->ne[2], input->ne[3]}, {0}, {0}, -1, -1, -1, -1 });
|
||||
} else {
|
||||
ggml_backend_tensor_copy(input, input_cpy);
|
||||
}
|
||||
|
|
@ -1647,7 +1647,7 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
|||
sched->copy_records.push_back({ GGML_PROFILE_EVENT_COPY, copy_dir, split_backend_id,
|
||||
split_id, moe_copy_start, moe_copy_end,
|
||||
(uint64_t) total_copied_bytes, input->name,
|
||||
{input->ne[0], input->ne[1], input->ne[2], input->ne[3]}, {0}, {0} });
|
||||
{input->ne[0], input->ne[1], input->ne[2], input->ne[3]}, {0}, {0}, -1, -1, -1, -1 });
|
||||
}
|
||||
} else {
|
||||
// try async copy, but if not possible, we can still use a sync copy without synchronizing the dst backend, since we handle the synchronization here with multiple copies and events
|
||||
|
|
@ -1684,7 +1684,7 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
|||
|
||||
sched->copy_records.push_back({ GGML_PROFILE_EVENT_COPY, copy_dir, split_backend_id,
|
||||
split_id, copy_start, copy_end, ggml_nbytes(input), input->name,
|
||||
{input->ne[0], input->ne[1], input->ne[2], input->ne[3]}, {0}, {0} });
|
||||
{input->ne[0], input->ne[1], input->ne[2], input->ne[3]}, {0}, {0}, -1, -1, -1, -1 });
|
||||
} else {
|
||||
ggml_backend_tensor_copy(input, input_cpy);
|
||||
}
|
||||
|
|
@ -1705,7 +1705,7 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
|||
|
||||
sched->copy_records.push_back({ GGML_PROFILE_EVENT_COPY, copy_dir, split_backend_id,
|
||||
split_id, copy_start, copy_end, ggml_nbytes(input), input->name,
|
||||
{input->ne[0], input->ne[1], input->ne[2], input->ne[3]}, {0}, {0} });
|
||||
{input->ne[0], input->ne[1], input->ne[2], input->ne[3]}, {0}, {0}, -1, -1, -1, -1 });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@
|
|||
#include "../src/llama-ext.h"
|
||||
#include "ggml.h"
|
||||
#include "gguf-model-data.h"
|
||||
#include "gguf.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "download.h"
|
||||
|
||||
|
|
@ -14,7 +13,6 @@
|
|||
#include <set>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
|
||||
// Noop because weights are not needed
|
||||
static void set_tensor_data(struct ggml_tensor * tensor, void * userdata) {
|
||||
|
|
@ -55,6 +53,7 @@ struct test_object {
|
|||
std::vector<int32_t> op_params;
|
||||
std::vector<input_tensor> sources;
|
||||
std::string name;
|
||||
std::string backend_name;
|
||||
|
||||
void serialize(std::ostream& out) const {
|
||||
out << op << ' ' << type << ' ';
|
||||
|
|
@ -78,16 +77,21 @@ struct test_object {
|
|||
out << '-';
|
||||
}
|
||||
|
||||
if (!backend_name.empty()) {
|
||||
out << ' ' << backend_name;
|
||||
}
|
||||
|
||||
out << '\n';
|
||||
}
|
||||
|
||||
bool operator<(const test_object &b) const {
|
||||
return std::tie(op, type, ne, op_params, sources) <
|
||||
std::tie(b.op, b.type, b.ne, b.op_params, b.sources);
|
||||
return std::tie(op, type, ne, op_params, sources, backend_name) <
|
||||
std::tie(b.op, b.type, b.ne, b.op_params, b.sources, b.backend_name);
|
||||
}
|
||||
};
|
||||
|
||||
static void extract_graph_ops(ggml_cgraph * cgraph, const char * label, std::set<test_object> & tests) {
|
||||
static void extract_graph_ops(ggml_cgraph * cgraph, const char * label, std::set<test_object> & tests,
|
||||
ggml_backend_sched_t sched = nullptr) {
|
||||
int n_nodes = ggml_graph_n_nodes(cgraph);
|
||||
int n_skipped = 0;
|
||||
int n_before = (int) tests.size();
|
||||
|
|
@ -117,6 +121,14 @@ static void extract_graph_ops(ggml_cgraph * cgraph, const char * label, std::set
|
|||
}
|
||||
|
||||
test.name = node->name;
|
||||
|
||||
if (sched) {
|
||||
ggml_backend_t backend = ggml_backend_sched_get_tensor_backend(sched, node);
|
||||
if (backend) {
|
||||
test.backend_name = ggml_backend_name(backend);
|
||||
}
|
||||
}
|
||||
|
||||
tests.insert(test);
|
||||
}
|
||||
|
||||
|
|
@ -135,11 +147,12 @@ int main(int argc, char ** argv) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
// Load CPU-only
|
||||
ggml_backend_dev_t cpu_device = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
||||
params.devices = { cpu_device, nullptr };
|
||||
params.fit_params = false;
|
||||
params.n_gpu_layers = 0;
|
||||
if (!params.with_backends) {
|
||||
ggml_backend_dev_t cpu_device = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
||||
params.devices = { cpu_device, nullptr };
|
||||
params.fit_params = false;
|
||||
params.n_gpu_layers = 0;
|
||||
}
|
||||
|
||||
params.warmup = false;
|
||||
|
||||
|
|
@ -195,19 +208,21 @@ int main(int argc, char ** argv) {
|
|||
|
||||
std::set<test_object> tests;
|
||||
|
||||
ggml_backend_sched_t sched = params.with_backends ? llama_context_get_sched(ctx) : nullptr;
|
||||
|
||||
auto * gf_pp = llama_graph_reserve(ctx, n_tokens, n_seqs, n_tokens);
|
||||
if (!gf_pp) {
|
||||
LOG_ERR("failed to reserve prompt processing graph\n");
|
||||
return 1;
|
||||
}
|
||||
extract_graph_ops(gf_pp, "pp", tests);
|
||||
extract_graph_ops(gf_pp, "pp", tests, sched);
|
||||
|
||||
auto * gf_tg = llama_graph_reserve(ctx, n_seqs, n_seqs, n_seqs);
|
||||
if (!gf_tg) {
|
||||
LOG_ERR("failed to reserve token generation graph\n");
|
||||
return 1;
|
||||
}
|
||||
extract_graph_ops(gf_tg, "tg", tests);
|
||||
extract_graph_ops(gf_tg, "tg", tests, sched);
|
||||
|
||||
LOG_INF("%d unique ops total\n", (int) tests.size());
|
||||
|
||||
|
|
|
|||
|
|
@ -20,8 +20,6 @@
|
|||
#include <ggml-backend.h>
|
||||
#include <ggml-cpp.h>
|
||||
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cfloat>
|
||||
|
|
@ -8962,7 +8960,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|||
return test_cases;
|
||||
}
|
||||
|
||||
static std::vector<std::unique_ptr<test_case>> make_test_cases_from_file(const char * path) {
|
||||
static std::vector<std::unique_ptr<test_case>> make_test_cases_from_file(const char * path, const char * backend_name = nullptr) {
|
||||
std::ifstream f(path);
|
||||
|
||||
if (!f.is_open()) {
|
||||
|
|
@ -9020,20 +9018,25 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_from_file(const c
|
|||
name = "";
|
||||
}
|
||||
|
||||
std::string file_backend;
|
||||
if (iss >> file_backend) {
|
||||
if (file_backend.length() == 1 && file_backend[0] == '-') {
|
||||
file_backend = "";
|
||||
}
|
||||
}
|
||||
|
||||
if (backend_name != nullptr && !file_backend.empty() && file_backend != backend_name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_generic_op(op, type, ne, op_params, sources, std::move(name)));
|
||||
}
|
||||
|
||||
return test_cases;
|
||||
}
|
||||
|
||||
struct profile_test_plan;
|
||||
|
||||
static profile_test_plan make_test_plan_from_profile(
|
||||
const char * profile_path, int top_n);
|
||||
|
||||
static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op_names_filter, const char * params_filter,
|
||||
printer * output_printer, const char * test_file_path,
|
||||
std::vector<std::unique_ptr<test_case>> profile_test_cases = {}) {
|
||||
printer * output_printer, const char * test_file_path) {
|
||||
auto filter_test_cases = [](std::vector<std::unique_ptr<test_case>> & test_cases, const char * params_filter) {
|
||||
if (params_filter == nullptr) {
|
||||
return;
|
||||
|
|
@ -9054,22 +9057,18 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|||
std::vector<std::unique_ptr<test_case>> test_cases;
|
||||
|
||||
if (test_file_path == nullptr) {
|
||||
if (!profile_test_cases.empty()) {
|
||||
test_cases = std::move(profile_test_cases);
|
||||
} else {
|
||||
switch (mode) {
|
||||
case MODE_TEST:
|
||||
case MODE_GRAD:
|
||||
case MODE_SUPPORT:
|
||||
test_cases = make_test_cases_eval();
|
||||
break;
|
||||
case MODE_PERF:
|
||||
test_cases = make_test_cases_perf();
|
||||
break;
|
||||
}
|
||||
switch (mode) {
|
||||
case MODE_TEST:
|
||||
case MODE_GRAD:
|
||||
case MODE_SUPPORT:
|
||||
test_cases = make_test_cases_eval();
|
||||
break;
|
||||
case MODE_PERF:
|
||||
test_cases = make_test_cases_perf();
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
test_cases = make_test_cases_from_file(test_file_path);
|
||||
test_cases = make_test_cases_from_file(test_file_path, ggml_backend_name(backend));
|
||||
}
|
||||
|
||||
filter_test_cases(test_cases, params_filter);
|
||||
|
|
@ -9252,7 +9251,7 @@ static void show_test_coverage() {
|
|||
|
||||
static void usage(char ** argv) {
|
||||
printf("Usage: %s [mode] [-o <op,..>] [-b <backend>] [-p <params regex>] [--output <console|sql|csv>] [--list-ops]", argv[0]);
|
||||
printf(" [--show-coverage] [--test-file <path>] [--from-profile <path>] [--top-n <N>]\n");
|
||||
printf(" [--show-coverage] [--test-file <path>]\n");
|
||||
printf(" valid modes:\n");
|
||||
printf(" - test (default, compare with CPU backend for correctness)\n");
|
||||
printf(" - grad (compare gradients from backpropagation with method of finite differences)\n");
|
||||
|
|
@ -9263,436 +9262,31 @@ static void usage(char ** argv) {
|
|||
printf(" --output specifies output format (default: console, options: console, sql, csv)\n");
|
||||
printf(" --list-ops lists all available GGML operations\n");
|
||||
printf(" --show-coverage shows test coverage\n");
|
||||
printf(" --test-file reads test operators from a test file generated by llama-export-graph-ops\n");
|
||||
}
|
||||
|
||||
// ##############################
|
||||
// ## Profiler-based perf ##
|
||||
// ##############################
|
||||
|
||||
static ggml_op profile_name_to_op(const std::string & name) {
|
||||
static const std::unordered_map<std::string, ggml_op> map = {
|
||||
{"ADD", GGML_OP_ADD},
|
||||
{"ADD1", GGML_OP_ADD1},
|
||||
{"ARGSORT", GGML_OP_ARGSORT},
|
||||
{"CLAMP", GGML_OP_CLAMP},
|
||||
{"CONCAT", GGML_OP_CONCAT},
|
||||
{"CONT", GGML_OP_CONT},
|
||||
{"CPY", GGML_OP_CPY},
|
||||
{"DIV", GGML_OP_DIV},
|
||||
{"FLASH_ATTN_EXT", GGML_OP_FLASH_ATTN_EXT},
|
||||
{"GET_ROWS", GGML_OP_GET_ROWS},
|
||||
{"GET_ROWS_BACK", GGML_OP_GET_ROWS_BACK},
|
||||
{"GLU", GGML_OP_GLU},
|
||||
{"IM2COL_BACK", GGML_OP_IM2COL_BACK},
|
||||
{"MUL", GGML_OP_MUL},
|
||||
{"MUL_MAT", GGML_OP_MUL_MAT},
|
||||
{"MUL_MAT_ID", GGML_OP_MUL_MAT_ID},
|
||||
{"OUT_PROD", GGML_OP_OUT_PROD},
|
||||
{"POOL_2D", GGML_OP_POOL_2D},
|
||||
{"RMS_NORM", GGML_OP_RMS_NORM},
|
||||
{"SCALE", GGML_OP_SCALE},
|
||||
{"SET_ROWS", GGML_OP_SET_ROWS},
|
||||
{"SQR", GGML_OP_SQR},
|
||||
{"SSM_CONV", GGML_OP_SSM_CONV},
|
||||
{"SSM_SCAN", GGML_OP_SSM_SCAN},
|
||||
{"SUM_ROWS", GGML_OP_SUM_ROWS},
|
||||
{"UNARY", GGML_OP_UNARY},
|
||||
{"SOFT_MAX", GGML_OP_SOFT_MAX},
|
||||
};
|
||||
auto it = map.find(name);
|
||||
if (it != map.end()) {
|
||||
return it->second;
|
||||
}
|
||||
return GGML_OP_COUNT;
|
||||
}
|
||||
|
||||
static bool compute_output_ne(ggml_op op,
|
||||
const int64_t ne0[4], const int64_t ne1[4], const int64_t ne2[4],
|
||||
int64_t ne_out[4]) {
|
||||
ne_out[0] = ne_out[1] = ne_out[2] = ne_out[3] = 0;
|
||||
switch (op) {
|
||||
case GGML_OP_MUL_MAT:
|
||||
ne_out[0] = ne0[1]; ne_out[1] = ne1[1];
|
||||
ne_out[2] = std::max(ne0[2], ne1[2]);
|
||||
ne_out[3] = std::max(ne0[3], ne1[3]);
|
||||
return true;
|
||||
case GGML_OP_MUL_MAT_ID:
|
||||
ne_out[0] = ne0[1]; ne_out[1] = ne2[0];
|
||||
ne_out[2] = ne1[2]; ne_out[3] = 1;
|
||||
return true;
|
||||
case GGML_OP_ADD:
|
||||
case GGML_OP_MUL:
|
||||
case GGML_OP_DIV:
|
||||
case GGML_OP_SCALE:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ne_out[i] = std::max(ne0[i], ne1[i]);
|
||||
}
|
||||
return true;
|
||||
case GGML_OP_ADD1:
|
||||
ne_out[0] = ne0[0]; ne_out[1] = ne0[1];
|
||||
ne_out[2] = ne0[2]; ne_out[3] = ne0[3];
|
||||
return true;
|
||||
case GGML_OP_SQR:
|
||||
case GGML_OP_UNARY:
|
||||
case GGML_OP_SSM_SCAN:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ne_out[i] = ne0[i];
|
||||
}
|
||||
return true;
|
||||
case GGML_OP_SOFT_MAX:
|
||||
ne_out[0] = ne0[0]; ne_out[1] = ne0[1];
|
||||
ne_out[2] = ne0[2]; ne_out[3] = ne0[3];
|
||||
return true;
|
||||
case GGML_OP_RMS_NORM:
|
||||
ne_out[0] = ne0[0]; ne_out[1] = ne0[1];
|
||||
ne_out[2] = ne0[2]; ne_out[3] = ne0[3];
|
||||
return true;
|
||||
case GGML_OP_FLASH_ATTN_EXT:
|
||||
ne_out[0] = ne1[1]; ne_out[1] = ne1[1];
|
||||
ne_out[2] = ne0[2]; ne_out[3] = ne0[3];
|
||||
return true;
|
||||
case GGML_OP_GET_ROWS:
|
||||
ne_out[0] = ne0[0]; ne_out[1] = ne1[1];
|
||||
ne_out[2] = ne1[2]; ne_out[3] = ne1[3];
|
||||
return true;
|
||||
case GGML_OP_GET_ROWS_BACK:
|
||||
ne_out[0] = ne0[0]; ne_out[1] = ne1[1];
|
||||
ne_out[2] = ne1[2]; ne_out[3] = ne1[3];
|
||||
return true;
|
||||
case GGML_OP_SET_ROWS:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ne_out[i] = ne0[i];
|
||||
}
|
||||
return true;
|
||||
case GGML_OP_OUT_PROD:
|
||||
ne_out[0] = ne0[0]; ne_out[1] = ne1[0];
|
||||
ne_out[2] = std::max(ne0[2], ne1[2]);
|
||||
ne_out[3] = std::max(ne0[3], ne1[3]);
|
||||
return true;
|
||||
case GGML_OP_CONCAT:
|
||||
ne_out[0] = ne0[0] + ne1[0];
|
||||
ne_out[1] = std::max(ne0[1], ne1[1]);
|
||||
ne_out[2] = std::max(ne0[2], ne1[2]);
|
||||
ne_out[3] = std::max(ne0[3], ne1[3]);
|
||||
return true;
|
||||
case GGML_OP_ARGSORT:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ne_out[i] = ne0[i];
|
||||
}
|
||||
return true;
|
||||
case GGML_OP_CLAMP:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ne_out[i] = ne0[i];
|
||||
}
|
||||
return true;
|
||||
case GGML_OP_CPY:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ne_out[i] = ne0[i];
|
||||
}
|
||||
return true;
|
||||
case GGML_OP_POOL_2D:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ne_out[i] = ne0[i];
|
||||
}
|
||||
return true;
|
||||
case GGML_OP_SSM_CONV:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ne_out[i] = ne0[i];
|
||||
}
|
||||
return true;
|
||||
case GGML_OP_IM2COL_BACK:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ne_out[i] = ne0[i];
|
||||
}
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static std::vector<int64_t> json_get_ne(const nlohmann::json & arr) {
|
||||
std::vector<int64_t> ne(4, 0);
|
||||
if (!arr.is_array()) return ne;
|
||||
for (size_t i = 0; i < std::min(arr.size(), (size_t)4); i++) {
|
||||
ne[i] = arr[i].get<int64_t>();
|
||||
}
|
||||
return ne;
|
||||
}
|
||||
|
||||
static bool ne_is_zero(const std::vector<int64_t> & ne) {
|
||||
for (auto v : ne) if (v != 0) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
struct profile_op_key {
|
||||
std::string name;
|
||||
int backend_id;
|
||||
int type_src0;
|
||||
int type_src1;
|
||||
int type_src2;
|
||||
int sub_op;
|
||||
std::vector<int64_t> ne_src0;
|
||||
std::vector<int64_t> ne_src1;
|
||||
std::vector<int64_t> ne_src2;
|
||||
|
||||
bool operator==(const profile_op_key & o) const {
|
||||
return name == o.name && backend_id == o.backend_id &&
|
||||
type_src0 == o.type_src0 && type_src1 == o.type_src1 && type_src2 == o.type_src2 &&
|
||||
sub_op == o.sub_op &&
|
||||
ne_src0 == o.ne_src0 && ne_src1 == o.ne_src1 && ne_src2 == o.ne_src2;
|
||||
}
|
||||
};
|
||||
|
||||
struct profile_op_key_hash {
|
||||
size_t operator()(const profile_op_key & k) const {
|
||||
size_t h = std::hash<std::string>{}(k.name);
|
||||
h ^= std::hash<int>{}(k.backend_id) + 0x9e3779b9 + (h << 6) + (h >> 2);
|
||||
h ^= std::hash<int>{}(k.type_src0) + 0x9e3779b9 + (h << 6) + (h >> 2);
|
||||
h ^= std::hash<int>{}(k.type_src1) + 0x9e3779b9 + (h << 6) + (h >> 2);
|
||||
h ^= std::hash<int>{}(k.type_src2) + 0x9e3779b9 + (h << 6) + (h >> 2);
|
||||
h ^= std::hash<int>{}(k.sub_op) + 0x9e3779b9 + (h << 6) + (h >> 2);
|
||||
for (auto v : k.ne_src0) { h ^= std::hash<int64_t>{}(v) + 0x9e3779b9 + (h << 6) + (h >> 2); }
|
||||
for (auto v : k.ne_src1) { h ^= std::hash<int64_t>{}(v) + 0x9e3779b9 + (h << 6) + (h >> 2); }
|
||||
for (auto v : k.ne_src2) { h ^= std::hash<int64_t>{}(v) + 0x9e3779b9 + (h << 6) + (h >> 2); }
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
||||
struct profile_op_agg {
|
||||
profile_op_key key;
|
||||
uint64_t total_ns;
|
||||
int64_t count;
|
||||
double max_ns;
|
||||
};
|
||||
|
||||
struct profile_test_plan {
|
||||
struct backend_plan {
|
||||
int backend_id;
|
||||
std::string backend_name;
|
||||
std::vector<std::unique_ptr<test_case>> test_cases;
|
||||
std::vector<profile_op_agg> aggs;
|
||||
};
|
||||
std::vector<backend_plan> backends;
|
||||
};
|
||||
|
||||
static profile_test_plan make_test_plan_from_profile(
|
||||
const char * profile_path, int top_n) {
|
||||
using json = nlohmann::json;
|
||||
profile_test_plan plan;
|
||||
|
||||
std::ifstream f(profile_path);
|
||||
if (!f.is_open()) {
|
||||
fprintf(stderr, "Error: cannot open profile file: %s\n", profile_path);
|
||||
return plan;
|
||||
}
|
||||
|
||||
json root;
|
||||
try {
|
||||
root = json::parse(f);
|
||||
} catch (const json::parse_error & e) {
|
||||
fprintf(stderr, "Error: failed to parse profile JSON: %s\n", e.what());
|
||||
return plan;
|
||||
}
|
||||
|
||||
if (!root.contains("records") || !root["records"].is_array()) {
|
||||
fprintf(stderr, "Error: no 'records' array found in profile\n");
|
||||
return plan;
|
||||
}
|
||||
|
||||
std::unordered_map<int, std::string> backend_names;
|
||||
if (root.contains("backends") && root["backends"].is_array()) {
|
||||
for (const auto & be : root["backends"]) {
|
||||
int id = be.value("id", -1);
|
||||
std::string name = be.value("name", "");
|
||||
if (id >= 0 && !name.empty()) {
|
||||
backend_names[id] = name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const auto & records = root["records"];
|
||||
std::unordered_map<profile_op_key, profile_op_agg, profile_op_key_hash> aggs;
|
||||
|
||||
for (const auto & rec : records) {
|
||||
int rec_type = rec.value("type", -1);
|
||||
if (rec_type != 0) continue;
|
||||
|
||||
std::string name = rec.value("name", "");
|
||||
ggml_op op = profile_name_to_op(name);
|
||||
if (op == GGML_OP_COUNT) continue;
|
||||
|
||||
profile_op_key key;
|
||||
key.name = name;
|
||||
key.backend_id = rec.value("backend_id", 0);
|
||||
key.type_src0 = rec.value("type_src0", -1);
|
||||
key.type_src1 = rec.value("type_src1", -1);
|
||||
key.type_src2 = rec.value("type_src2", -1);
|
||||
key.sub_op = rec.value("sub_op", -1);
|
||||
key.ne_src0 = json_get_ne(rec.value("ne_src0", json::array()));
|
||||
key.ne_src1 = json_get_ne(rec.value("ne_src1", json::array()));
|
||||
key.ne_src2 = json_get_ne(rec.value("ne_src2", json::array()));
|
||||
|
||||
uint64_t dur = rec.value("duration_ns", (uint64_t)0);
|
||||
|
||||
auto & agg = aggs[key];
|
||||
agg.key = key;
|
||||
agg.total_ns += dur;
|
||||
agg.count++;
|
||||
agg.max_ns = std::max(agg.max_ns, (double)dur);
|
||||
}
|
||||
|
||||
std::vector<profile_op_agg> sorted;
|
||||
sorted.reserve(aggs.size());
|
||||
for (auto & [_, agg] : aggs) {
|
||||
sorted.push_back(std::move(agg));
|
||||
}
|
||||
std::sort(sorted.begin(), sorted.end(),
|
||||
[](const profile_op_agg & a, const profile_op_agg & b) {
|
||||
return a.total_ns > b.total_ns;
|
||||
});
|
||||
|
||||
uint64_t global_max_ns = sorted.empty() ? 1 : sorted[0].total_ns;
|
||||
|
||||
int n = std::min(top_n, (int)sorted.size());
|
||||
if (n == 0) {
|
||||
fprintf(stderr, "Warning: no matching OP records found in profile\n");
|
||||
return plan;
|
||||
}
|
||||
|
||||
auto make_src = [](int type_id, const std::vector<int64_t> & ne) -> input_tensor {
|
||||
input_tensor src;
|
||||
src.type = (type_id >= 0) ? (ggml_type)type_id : GGML_TYPE_F32;
|
||||
for (int d = 0; d < 4; d++) {
|
||||
src.ne[d] = d < (int)ne.size() ? ne[d] : 0;
|
||||
src.nb[d] = 0;
|
||||
}
|
||||
return src;
|
||||
};
|
||||
|
||||
auto make_test_from_agg = [&](const profile_op_agg & agg) -> std::unique_ptr<test_case> {
|
||||
ggml_op op = profile_name_to_op(agg.key.name);
|
||||
|
||||
std::vector<input_tensor> sources;
|
||||
if (!ne_is_zero(agg.key.ne_src0)) {
|
||||
sources.push_back(make_src(agg.key.type_src0, agg.key.ne_src0));
|
||||
}
|
||||
if (!ne_is_zero(agg.key.ne_src1)) {
|
||||
sources.push_back(make_src(agg.key.type_src1, agg.key.ne_src1));
|
||||
}
|
||||
if (op == GGML_OP_MUL_MAT_ID) {
|
||||
if (!ne_is_zero(agg.key.ne_src2)) {
|
||||
sources.push_back(make_src(agg.key.type_src2, agg.key.ne_src2));
|
||||
} else if (sources.size() >= 2) {
|
||||
input_tensor src;
|
||||
src.type = GGML_TYPE_I32;
|
||||
src.ne[0] = sources[1].ne[1];
|
||||
src.ne[1] = 1;
|
||||
src.ne[2] = 1;
|
||||
src.ne[3] = 1;
|
||||
src.nb[0] = src.nb[1] = src.nb[2] = src.nb[3] = 0;
|
||||
sources.push_back(src);
|
||||
}
|
||||
}
|
||||
|
||||
int64_t ne0[4] = {0}, ne1[4] = {0}, ne2[4] = {0};
|
||||
if (sources.size() > 0) { for (int d = 0; d < 4; d++) ne0[d] = sources[0].ne[d]; }
|
||||
if (sources.size() > 1) { for (int d = 0; d < 4; d++) ne1[d] = sources[1].ne[d]; }
|
||||
if (sources.size() > 2) { for (int d = 0; d < 4; d++) ne2[d] = sources[2].ne[d]; }
|
||||
|
||||
int64_t ne_out[4] = {0, 0, 0, 0};
|
||||
if (!compute_output_ne(op, ne0, ne1, ne2, ne_out)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ggml_type out_type = GGML_TYPE_F32;
|
||||
std::array<int32_t, GGML_MAX_OP_PARAMS / sizeof(int32_t)> op_params{};
|
||||
op_params.fill(0);
|
||||
|
||||
if (op == GGML_OP_MUL_MAT_ID && sources.size() >= 2) {
|
||||
op_params[0] = (int32_t)sources[1].ne[1];
|
||||
} else if ((op == GGML_OP_UNARY || op == GGML_OP_GLU) && agg.key.sub_op >= 0) {
|
||||
op_params[0] = (int32_t)agg.key.sub_op;
|
||||
}
|
||||
|
||||
std::array<int64_t, 4> out_ne;
|
||||
for (int d = 0; d < 4; d++) out_ne[d] = ne_out[d];
|
||||
|
||||
return std::unique_ptr<test_case>(new test_generic_op(op, out_type, out_ne, op_params, sources,
|
||||
agg.key.name + " [from profile]"));
|
||||
};
|
||||
|
||||
printf(" Loaded %d profiler ops, running top %d:\n", (int)sorted.size(), n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
const auto & agg = sorted[i];
|
||||
double pct = 100.0 * agg.total_ns / global_max_ns;
|
||||
int bid = agg.key.backend_id;
|
||||
std::string bname = backend_names.count(bid) ? backend_names[bid] : std::to_string(bid);
|
||||
printf(" #%d: %s @ %s %ldx %.3fms total (%.1f%% of top)\n",
|
||||
i + 1, agg.key.name.c_str(), bname.c_str(), agg.count,
|
||||
agg.total_ns / 1e6, pct);
|
||||
if (!ne_is_zero(agg.key.ne_src0)) {
|
||||
const char * tn = agg.key.type_src0 >= 0 ? ggml_type_name((ggml_type)agg.key.type_src0) : "?";
|
||||
printf(" src0: [%lld, %lld, %lld, %lld] (%s)\n",
|
||||
(long long)agg.key.ne_src0[0], (long long)agg.key.ne_src0[1],
|
||||
(long long)agg.key.ne_src0[2], (long long)agg.key.ne_src0[3], tn);
|
||||
}
|
||||
if (!ne_is_zero(agg.key.ne_src1)) {
|
||||
const char * tn = agg.key.type_src1 >= 0 ? ggml_type_name((ggml_type)agg.key.type_src1) : "?";
|
||||
printf(" src1: [%lld, %lld, %lld, %lld] (%s)\n",
|
||||
(long long)agg.key.ne_src1[0], (long long)agg.key.ne_src1[1],
|
||||
(long long)agg.key.ne_src1[2], (long long)agg.key.ne_src1[3], tn);
|
||||
}
|
||||
if (!ne_is_zero(agg.key.ne_src2)) {
|
||||
const char * tn = agg.key.type_src2 >= 0 ? ggml_type_name((ggml_type)agg.key.type_src2) : "?";
|
||||
printf(" src2: [%lld, %lld, %lld, %lld] (%s)\n",
|
||||
(long long)agg.key.ne_src2[0], (long long)agg.key.ne_src2[1],
|
||||
(long long)agg.key.ne_src2[2], (long long)agg.key.ne_src2[3], tn);
|
||||
}
|
||||
|
||||
auto tc = make_test_from_agg(agg);
|
||||
if (!tc) continue;
|
||||
|
||||
int bid2 = agg.key.backend_id;
|
||||
std::string bname2 = backend_names.count(bid2) ? backend_names[bid2] : std::to_string(bid2);
|
||||
|
||||
profile_test_plan::backend_plan * bp = nullptr;
|
||||
for (auto & b : plan.backends) {
|
||||
if (b.backend_id == bid2 && b.backend_name == bname2) {
|
||||
bp = &b;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!bp) {
|
||||
plan.backends.push_back({bid2, bname2, {}, {}});
|
||||
bp = &plan.backends.back();
|
||||
}
|
||||
bp->test_cases.push_back(std::move(tc));
|
||||
bp->aggs.push_back(agg);
|
||||
}
|
||||
|
||||
return plan;
|
||||
printf(" --test-file reads test operators from a test file generated by llama-export-graph-ops or the profiler\n");
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
test_mode mode = MODE_TEST;
|
||||
bool mode_explicit = false;
|
||||
output_formats output_format = CONSOLE;
|
||||
const char * op_names_filter = nullptr;
|
||||
const char * backend_filter = nullptr;
|
||||
const char * params_filter = nullptr;
|
||||
const char * test_file_path = nullptr;
|
||||
const char * profile_path = nullptr;
|
||||
int profile_top_n = 10;
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
if (strcmp(argv[i], "test") == 0) {
|
||||
mode = MODE_TEST;
|
||||
mode_explicit = true;
|
||||
} else if (strcmp(argv[i], "perf") == 0) {
|
||||
mode = MODE_PERF;
|
||||
mode_explicit = true;
|
||||
} else if (strcmp(argv[i], "grad") == 0) {
|
||||
mode = MODE_GRAD;
|
||||
mode_explicit = true;
|
||||
} else if (strcmp(argv[i], "support") == 0) {
|
||||
mode = MODE_SUPPORT;
|
||||
mode_explicit = true;
|
||||
} else if (strcmp(argv[i], "-o") == 0) {
|
||||
if (i + 1 < argc) {
|
||||
op_names_filter = argv[++i];
|
||||
|
|
@ -9737,34 +9331,19 @@ int main(int argc, char ** argv) {
|
|||
usage(argv);
|
||||
return 1;
|
||||
}
|
||||
} else if (strcmp(argv[i], "--from-profile") == 0) {
|
||||
if (i + 1 < argc) {
|
||||
profile_path = argv[++i];
|
||||
} else {
|
||||
usage(argv);
|
||||
return 1;
|
||||
}
|
||||
} else if (strcmp(argv[i], "--top-n") == 0) {
|
||||
if (i + 1 < argc) {
|
||||
profile_top_n = atoi(argv[++i]);
|
||||
if (profile_top_n <= 0) profile_top_n = 10;
|
||||
} else {
|
||||
usage(argv);
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
usage(argv);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// load and enumerate backends
|
||||
ggml_backend_load_all();
|
||||
|
||||
if (profile_path != nullptr) {
|
||||
if (test_file_path != nullptr && !mode_explicit) {
|
||||
mode = MODE_PERF;
|
||||
}
|
||||
|
||||
// load and enumerate backends
|
||||
ggml_backend_load_all();
|
||||
|
||||
// Create printer for output format
|
||||
std::unique_ptr<printer> output_printer = create_printer(output_format);
|
||||
if (output_printer) {
|
||||
|
|
@ -9773,83 +9352,6 @@ int main(int argc, char ** argv) {
|
|||
|
||||
output_printer->print_testing_start(testing_start_info(ggml_backend_dev_count()));
|
||||
|
||||
if (profile_path != nullptr) {
|
||||
profile_test_plan plan = make_test_plan_from_profile(profile_path, profile_top_n);
|
||||
|
||||
size_t n_ok = 0;
|
||||
size_t total = plan.backends.size();
|
||||
|
||||
for (size_t bi = 0; bi < plan.backends.size(); bi++) {
|
||||
auto & bp = plan.backends[bi];
|
||||
|
||||
ggml_backend_dev_t dev = nullptr;
|
||||
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
||||
ggml_backend_dev_t d = ggml_backend_dev_get(i);
|
||||
if (strcmp(ggml_backend_dev_name(d), bp.backend_name.c_str()) == 0) {
|
||||
dev = d;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (dev == nullptr) {
|
||||
fprintf(stderr, "Warning: backend '%s' from profile not found, skipping\n", bp.backend_name.c_str());
|
||||
n_ok++;
|
||||
output_printer->print_backend_init(
|
||||
backend_init_info(bi, total, bp.backend_name.c_str(), true, "Not found"));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (backend_filter != NULL && strcmp(backend_filter, bp.backend_name.c_str()) != 0) {
|
||||
output_printer->print_backend_init(
|
||||
backend_init_info(bi, total, bp.backend_name.c_str(), true, "Skipping"));
|
||||
n_ok++;
|
||||
continue;
|
||||
}
|
||||
|
||||
ggml_backend_t backend = ggml_backend_dev_init(dev, NULL);
|
||||
GGML_ASSERT(backend != NULL);
|
||||
|
||||
ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
|
||||
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
|
||||
if (ggml_backend_set_n_threads_fn) {
|
||||
ggml_backend_set_n_threads_fn(backend, N_THREADS);
|
||||
}
|
||||
|
||||
size_t free, total_mem;
|
||||
ggml_backend_dev_memory(dev, &free, &total_mem);
|
||||
output_printer->print_backend_init(backend_init_info(bi, plan.backends.size(), bp.backend_name.c_str(),
|
||||
false, "", ggml_backend_dev_description(dev),
|
||||
total_mem / 1024 / 1024, free / 1024 / 1024, true));
|
||||
|
||||
std::vector<std::unique_ptr<test_case>> cases;
|
||||
for (auto & tc : bp.test_cases) {
|
||||
cases.push_back(std::move(tc));
|
||||
}
|
||||
|
||||
bool ok = test_backend(backend, MODE_PERF, op_names_filter, params_filter,
|
||||
output_printer.get(), nullptr, std::move(cases));
|
||||
|
||||
if (ok) {
|
||||
n_ok++;
|
||||
}
|
||||
output_printer->print_backend_status(
|
||||
backend_status_info(ggml_backend_name(backend), ok ? test_status_t::OK : test_status_t::FAIL));
|
||||
|
||||
ggml_backend_free(backend);
|
||||
}
|
||||
|
||||
ggml_quantize_free();
|
||||
|
||||
if (output_printer) {
|
||||
output_printer->print_footer();
|
||||
}
|
||||
|
||||
output_printer->print_overall_summary(
|
||||
overall_summary_info(n_ok, total, n_ok == total));
|
||||
|
||||
return n_ok != total;
|
||||
}
|
||||
|
||||
size_t n_ok = 0;
|
||||
|
||||
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
||||
|
|
@ -9862,7 +9364,7 @@ int main(int argc, char ** argv) {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (backend_filter == NULL &&
|
||||
if (backend_filter == NULL && test_file_path == NULL &&
|
||||
ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU && mode != MODE_GRAD) {
|
||||
output_printer->print_backend_init(backend_init_info(
|
||||
i, ggml_backend_dev_count(), ggml_backend_dev_name(dev), true, "Skipping CPU backend"));
|
||||
|
|
|
|||
|
|
@ -49,6 +49,107 @@ GGML_GLU_OP_NAMES = {
|
|||
4: "GEGLU_QUICK", 5: "SWIGLU_OAI",
|
||||
}
|
||||
|
||||
GGML_OP_NAMES = {
|
||||
0: "NONE", 1: "DUP", 2: "ADD", 3: "ADD_ID", 4: "ADD1",
|
||||
5: "ACC", 6: "SUB", 7: "MUL", 8: "DIV", 9: "SQR",
|
||||
10: "SQRT", 11: "LOG", 12: "SIN", 13: "COS", 14: "SUM",
|
||||
15: "SUM_ROWS", 16: "CUMSUM", 17: "MEAN", 18: "ARGMAX",
|
||||
19: "COUNT_EQUAL", 20: "REPEAT", 21: "REPEAT_BACK", 22: "CONCAT",
|
||||
23: "SILU_BACK", 24: "NORM", 25: "RMS_NORM", 26: "RMS_NORM_BACK",
|
||||
27: "GROUP_NORM", 28: "L2_NORM", 29: "MUL_MAT", 30: "MUL_MAT_ID",
|
||||
31: "OUT_PROD", 32: "SCALE", 33: "SET", 34: "CPY", 35: "CONT",
|
||||
36: "RESHAPE", 37: "VIEW", 38: "PERMUTE", 39: "TRANSPOSE",
|
||||
40: "GET_ROWS", 41: "GET_ROWS_BACK", 42: "SET_ROWS", 43: "DIAG",
|
||||
44: "DIAG_MASK_INF", 45: "DIAG_MASK_ZERO", 46: "SOFT_MAX",
|
||||
47: "SOFT_MAX_BACK", 48: "ROPE", 49: "ROPE_BACK", 50: "CLAMP",
|
||||
51: "CONV_TRANSPOSE_1D", 52: "IM2COL", 53: "IM2COL_BACK", 54: "IM2COL_3D",
|
||||
55: "CONV_2D", 56: "CONV_3D", 57: "CONV_2D_DW", 58: "CONV_TRANSPOSE_2D",
|
||||
59: "POOL_1D", 60: "POOL_2D", 61: "POOL_2D_BACK", 62: "UPSCALE",
|
||||
63: "PAD", 64: "PAD_REFLECT_1D", 65: "ROLL", 66: "ARANGE",
|
||||
67: "TIMESTEP_EMBEDDING", 68: "ARGSORT", 69: "TOP_K", 70: "LEAKY_RELU",
|
||||
71: "TRI", 72: "FILL", 73: "FLASH_ATTN_EXT", 74: "FLASH_ATTN_BACK",
|
||||
75: "SSM_CONV", 76: "SSM_SCAN", 77: "WIN_PART", 78: "WIN_UNPART",
|
||||
79: "GET_REL_POS", 80: "ADD_REL_POS", 81: "RWKV_WKV6",
|
||||
82: "GATED_LINEAR_ATTN", 83: "RWKV_WKV7", 84: "SOLVE_TRI",
|
||||
85: "GATED_DELTA_NET", 86: "UNARY", 87: "MAP_CUSTOM1",
|
||||
88: "MAP_CUSTOM2", 89: "MAP_CUSTOM3", 90: "CUSTOM",
|
||||
91: "CROSS_ENTROPY_LOSS", 92: "CROSS_ENTROPY_LOSS_BACK",
|
||||
93: "OPT_STEP_ADAMW", 94: "OPT_STEP_SGD", 95: "GLU",
|
||||
96: "COUNT",
|
||||
}
|
||||
|
||||
GGML_TYPE_NAMES_TO_ID = {v: k for k, v in GGML_TYPE_NAMES.items()}
|
||||
|
||||
GGML_OP_NAMES_TO_ID = {v: k for k, v in GGML_OP_NAMES.items()}
|
||||
|
||||
|
||||
_EXPORT_SKIP_OPS = frozenset({
|
||||
33, # SET
|
||||
34, # CPY
|
||||
35, # CONT
|
||||
36, # RESHAPE
|
||||
37, # VIEW
|
||||
38, # PERMUTE
|
||||
39, # TRANSPOSE
|
||||
41, # GET_ROWS_BACK
|
||||
42, # SET_ROWS
|
||||
43, # DIAG
|
||||
44, # DIAG_MASK_INF
|
||||
45, # DIAG_MASK_ZERO
|
||||
47, # SOFT_MAX_BACK
|
||||
49, # ROPE_BACK
|
||||
51, # CONV_TRANSPOSE_1D
|
||||
52, # IM2COL
|
||||
53, # IM2COL_BACK
|
||||
54, # IM2COL_3D
|
||||
58, # CONV_TRANSPOSE_2D
|
||||
61, # POOL_2D_BACK
|
||||
63, # PAD
|
||||
64, # PAD_REFLECT_1D
|
||||
65, # ROLL
|
||||
66, # ARANGE
|
||||
70, # LEAKY_RELU (covered by UNARY)
|
||||
71, # TRI
|
||||
72, # FILL
|
||||
77, # WIN_PART
|
||||
78, # WIN_UNPART
|
||||
92, # CROSS_ENTROPY_LOSS_BACK
|
||||
93, # OPT_STEP_ADAMW
|
||||
94, # OPT_STEP_SGD
|
||||
96, # COUNT
|
||||
})
|
||||
|
||||
|
||||
def _compute_output_ne(op_id: int, ne0: list, ne1: list, ne2: list) -> list | None:
|
||||
if op_id == 29: # MUL_MAT
|
||||
return [ne0[1], ne1[1], max(ne0[2], ne1[2]), max(ne0[3], ne1[3])]
|
||||
if op_id == 30: # MUL_MAT_ID
|
||||
return [ne0[1], ne2[0], ne1[2], 1]
|
||||
if op_id in (2, 7, 8, 32): # ADD, MUL, DIV, SCALE
|
||||
return [max(ne0[i], ne1[i]) for i in range(4)]
|
||||
if op_id == 4: # ADD1
|
||||
return list(ne0)
|
||||
if op_id in (9, 86): # SQR, UNARY
|
||||
return list(ne0)
|
||||
if op_id in (46, 25): # SOFT_MAX, RMS_NORM
|
||||
return list(ne0)
|
||||
if op_id == 73: # FLASH_ATTN_EXT
|
||||
return [ne1[1], ne1[1], ne0[2], ne0[3]]
|
||||
if op_id == 40: # GET_ROWS
|
||||
return [ne0[0], ne1[1], ne1[2], ne1[3]]
|
||||
if op_id == 41: # GET_ROWS_BACK
|
||||
return [ne0[0], ne1[1], ne1[2], ne1[3]]
|
||||
if op_id == 42: # SET_ROWS
|
||||
return list(ne0)
|
||||
if op_id == 31: # OUT_PROD
|
||||
return [ne0[0], ne1[0], max(ne0[2], ne1[2]), max(ne0[3], ne1[3])]
|
||||
if op_id == 22: # CONCAT
|
||||
return [ne0[0] + ne1[0], max(ne0[1], ne1[1]),
|
||||
max(ne0[2], ne1[2]), max(ne0[3], ne1[3])]
|
||||
if op_id in (34, 35, 50, 60, 53, 68): # CPY, CONT, CLAMP, POOL_2D, IM2COL_BACK, ARGSORT
|
||||
return list(ne0)
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProfileRecord:
|
||||
|
|
@ -464,6 +565,92 @@ class ProfileData:
|
|||
print(f"Chrome trace exported to: {filepath}")
|
||||
print(f"Open chrome://tracing in Chrome/Edge and load this file.")
|
||||
|
||||
def export_graph_ops(self, filepath: str | Path) -> None:
|
||||
"""Export operations in export-graph-ops format for test-backend-ops --test-file."""
|
||||
seen: set[tuple] = set()
|
||||
lines: list[str] = []
|
||||
|
||||
backend_by_id: dict[int, dict] = {}
|
||||
for b in self.metadata.get("backends", []):
|
||||
backend_by_id[b["id"]] = b
|
||||
|
||||
for rec in self.records:
|
||||
if rec.type != OP_EVENT:
|
||||
continue
|
||||
|
||||
op_id = GGML_OP_NAMES_TO_ID.get(rec.name, -1)
|
||||
if op_id < 0:
|
||||
continue
|
||||
|
||||
if op_id in _EXPORT_SKIP_OPS:
|
||||
continue
|
||||
|
||||
ne0 = rec.ne_src0
|
||||
ne1 = rec.ne_src1
|
||||
ne2 = rec.ne_src2
|
||||
|
||||
type_src0 = rec.type_src0 if rec.type_src0 >= 0 else 0
|
||||
type_src1 = rec.type_src1 if rec.type_src1 >= 0 else 0
|
||||
type_src2 = rec.type_src2 if rec.type_src2 >= 0 else 0
|
||||
|
||||
sources: list[tuple[int, list, list]] = []
|
||||
if any(v != 0 for v in ne0):
|
||||
sources.append((type_src0, ne0, [0, 0, 0, 0]))
|
||||
if any(v != 0 for v in ne1):
|
||||
sources.append((type_src1, ne1, [0, 0, 0, 0]))
|
||||
|
||||
if op_id == 30: # MUL_MAT_ID: ensure rows tensor (src2) is present
|
||||
if len(sources) < 3 and any(v != 0 for v in ne2):
|
||||
sources.append((type_src2, ne2, [0, 0, 0, 0]))
|
||||
elif len(sources) < 3 and len(sources) >= 2:
|
||||
sources.append((24, [sources[1][1][1], 1, 1, 1], [0, 0, 0, 0])) # I32
|
||||
elif any(v != 0 for v in ne2):
|
||||
sources.append((type_src2, ne2, [0, 0, 0, 0]))
|
||||
|
||||
src_ne0 = sources[0][1] if len(sources) > 0 else [0, 0, 0, 0]
|
||||
src_ne1 = sources[1][1] if len(sources) > 1 else [0, 0, 0, 0]
|
||||
src_ne2 = sources[2][1] if len(sources) > 2 else [0, 0, 0, 0]
|
||||
|
||||
ne_out = _compute_output_ne(op_id, src_ne0, src_ne1, src_ne2)
|
||||
if ne_out is None:
|
||||
continue
|
||||
|
||||
op_params: list[int] = []
|
||||
if op_id == 30 and len(sources) >= 2: # MUL_MAT_ID
|
||||
op_params.append(sources[1][1][1])
|
||||
elif op_id in (86, 95) and rec.sub_op >= 0: # UNARY, GLU
|
||||
op_params.append(rec.sub_op)
|
||||
|
||||
bname = ""
|
||||
if rec.backend_id in backend_by_id:
|
||||
bname = backend_by_id[rec.backend_id].get("device", "")
|
||||
if not bname or bname == "unknown":
|
||||
bname = backend_by_id[rec.backend_id].get("name", "")
|
||||
|
||||
key = (op_id, tuple(ne_out), tuple(op_params), tuple((s[0], tuple(s[1])) for s in sources), bname)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
|
||||
line = f"{op_id} 0 {ne_out[0]} {ne_out[1]} {ne_out[2]} {ne_out[3]} "
|
||||
line += f"{len(op_params)}"
|
||||
for p in op_params:
|
||||
line += f" {p}"
|
||||
line += f" {len(sources)}"
|
||||
for src_type, src_ne, src_nb in sources:
|
||||
line += f" {src_type} {src_ne[0]} {src_ne[1]} {src_ne[2]} {src_ne[3]} {src_nb[0]} {src_nb[1]} {src_nb[2]} {src_nb[3]}"
|
||||
name = rec.name if rec.name else "-"
|
||||
line += f" {name}"
|
||||
if bname:
|
||||
line += f" {bname}"
|
||||
line += "\n"
|
||||
lines.append(line)
|
||||
|
||||
with open(filepath, "w") as f:
|
||||
f.writelines(lines)
|
||||
|
||||
print(f"Exported {len(lines)} unique ops to: {filepath}")
|
||||
|
||||
def export_html_viewer(self, filepath: str | Path, max_records: int = 0) -> None:
|
||||
"""Export a self-contained interactive HTML timeline viewer using Canvas."""
|
||||
import json as json_mod
|
||||
|
|
@ -1007,6 +1194,7 @@ Examples:
|
|||
python -m tools.profiler.profiler profile.json
|
||||
python -m tools.profiler.profiler profile.json --chrome-trace trace.json
|
||||
python -m tools.profiler.profiler profile.json --top-ops 20
|
||||
python -m tools.profiler.profiler profile.json --export-ops ops.txt
|
||||
""",
|
||||
)
|
||||
parser.add_argument("profile", help="Path to profiler JSON file")
|
||||
|
|
@ -1014,6 +1202,8 @@ Examples:
|
|||
help="Export as Chrome Trace Event format")
|
||||
parser.add_argument("--html-viewer", metavar="FILE",
|
||||
help="Export as interactive HTML timeline viewer")
|
||||
parser.add_argument("--export-ops", metavar="FILE",
|
||||
help="Export ops in export-graph-ops format (for test-backend-ops --test-file)")
|
||||
parser.add_argument("--html-max-records", type=int, default=0,
|
||||
help="Max records in HTML viewer (0=unlimited, set to downsample for huge traces)")
|
||||
parser.add_argument("--top-ops", type=int, default=0,
|
||||
|
|
@ -1033,6 +1223,9 @@ Examples:
|
|||
if args.html_viewer:
|
||||
data.export_html_viewer(args.html_viewer, max_records=args.html_max_records)
|
||||
|
||||
if args.export_ops:
|
||||
data.export_graph_ops(args.export_ops)
|
||||
|
||||
if args.top_ops > 0:
|
||||
print(f"\nTop {args.top_ops} operations by total time:\n")
|
||||
for s in data.top_operations(args.top_ops):
|
||||
|
|
@ -1055,7 +1248,7 @@ Examples:
|
|||
f"{s.count:>6} calls {s.total_bytes / 1e6:.1f} MB")
|
||||
print()
|
||||
|
||||
if args.top_ops == 0 and args.top_kernels == 0 and not args.inefficiency and not args.chrome_trace and not args.html_viewer:
|
||||
if args.top_ops == 0 and args.top_kernels == 0 and not args.inefficiency and not args.chrome_trace and not args.html_viewer and not args.export_ops:
|
||||
data.summary()
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue