opencl: fix warnings and clean up profiling (#16688)
* opencl: remove unused headers, fix warnings * opencl: clean up profiling, only keep kernel time
This commit is contained in:
parent
fb349848f3
commit
6ea37f5739
|
|
@ -15,13 +15,12 @@
|
||||||
|
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
|
|
||||||
|
#include <inttypes.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <atomic>
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <limits>
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
@ -533,25 +532,17 @@ struct ggml_backend_opencl_context {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dump a csv
|
// Dump a csv
|
||||||
float total_kernel_time = 0;
|
fprintf(fperf, "op name, kernel name, exec duration (ms), global size, local size, output size\n");
|
||||||
fprintf(fperf, "op name, kernel name, queued duration (ms), submit duration(ms), exec duration (ms), complete duration (ms), total duration (ms), global size, local size, output size\n");
|
|
||||||
for (const ProfilingInfo & info : profiling_info) {
|
for (const ProfilingInfo & info : profiling_info) {
|
||||||
total_kernel_time += info.cmd_duration_ns/1.e6f;
|
fprintf(fperf, "%s,%s,%f,%zux%zux%zu,%zux%zux%zu,%zux%zux%zux%zu\n",
|
||||||
fprintf(fperf, "%s,%s,%f,%f,%f,%f,%f,%zux%zux%zu,%zux%zux%zu,%zux%zux%zux%zu\n",
|
|
||||||
info.op_name.c_str(), info.kernel_name.c_str(),
|
info.op_name.c_str(), info.kernel_name.c_str(),
|
||||||
info.cmd_queued_duration_ns/1.e6f,
|
|
||||||
info.cmd_submit_duration_ns/1.e6f,
|
|
||||||
info.cmd_duration_ns/1.e6f,
|
info.cmd_duration_ns/1.e6f,
|
||||||
info.cmd_complete_duration_ns/1.e6f,
|
|
||||||
info.cmd_total_duration_ns/1.e6f,
|
|
||||||
info.global_size[0], info.global_size[1], info.global_size[2],
|
info.global_size[0], info.global_size[1], info.global_size[2],
|
||||||
info.local_size[0], info.local_size[1], info.local_size[2],
|
info.local_size[0], info.local_size[1], info.local_size[2],
|
||||||
info.output_size[0], info.output_size[1], info.output_size[2], info.output_size[3]);
|
info.output_size[0], info.output_size[1], info.output_size[2], info.output_size[3]);
|
||||||
}
|
}
|
||||||
fclose(fperf);
|
fclose(fperf);
|
||||||
|
|
||||||
GGML_LOG_INFO("ggml_opencl: total kernel time: %f\n", total_kernel_time);
|
|
||||||
|
|
||||||
// Dump a simple chrome trace
|
// Dump a simple chrome trace
|
||||||
FILE* ftrace = fopen("cl_trace.json", "w");
|
FILE* ftrace = fopen("cl_trace.json", "w");
|
||||||
if (!ftrace) {
|
if (!ftrace) {
|
||||||
|
|
@ -561,14 +552,14 @@ struct ggml_backend_opencl_context {
|
||||||
|
|
||||||
fprintf(ftrace, "[\n");
|
fprintf(ftrace, "[\n");
|
||||||
for (const ProfilingInfo & info : profiling_info) {
|
for (const ProfilingInfo & info : profiling_info) {
|
||||||
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %llu, \"pid\": \"\", \"tid\": \"Host\"},\n",
|
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Host\"},\n",
|
||||||
info.kernel_name.c_str(), info.cmd_queued/1000);
|
info.kernel_name.c_str(), info.cmd_queued/1000);
|
||||||
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %llu, \"pid\": \"\", \"tid\": \"Host\"},\n",
|
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Host\"},\n",
|
||||||
info.kernel_name.c_str(), info.cmd_submit/1000);
|
info.kernel_name.c_str(), info.cmd_submit/1000);
|
||||||
|
|
||||||
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %llu, \"pid\": \"\", \"tid\": \"Device\"},\n",
|
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Device\"},\n",
|
||||||
info.kernel_name.c_str(), info.cmd_start/1000);
|
info.kernel_name.c_str(), info.cmd_start/1000);
|
||||||
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %llu, \"pid\": \"\", \"tid\": \"Device\"},\n",
|
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Device\"},\n",
|
||||||
info.kernel_name.c_str(), info.cmd_end/1000);
|
info.kernel_name.c_str(), info.cmd_end/1000);
|
||||||
}
|
}
|
||||||
fclose(ftrace);
|
fclose(ftrace);
|
||||||
|
|
@ -7652,6 +7643,8 @@ static void ggml_cl_mul_mat_id(ggml_backend_t backend, const ggml_tensor * src0,
|
||||||
const cl_ulong nb21 = src2->nb[1];
|
const cl_ulong nb21 = src2->nb[1];
|
||||||
const cl_ulong nb20 = src2->nb[0];
|
const cl_ulong nb20 = src2->nb[0];
|
||||||
|
|
||||||
|
UNUSED(nb20);
|
||||||
|
|
||||||
const int ne0 = dst->ne[0];
|
const int ne0 = dst->ne[0];
|
||||||
const int ne1 = dst->ne[1];
|
const int ne1 = dst->ne[1];
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue