opencl: fix warnings and clean up profiling (#16688)

* opencl: remove unused headers, fix warnings

* opencl: clean up profiling, only keep kernel time
This commit is contained in:
lhez 2025-10-20 22:26:17 -07:00 committed by GitHub
parent fb349848f3
commit 6ea37f5739
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 9 additions and 16 deletions

View File

@ -15,13 +15,12 @@
#include <CL/cl.h> #include <CL/cl.h>
#include <inttypes.h>
#include <string.h> #include <string.h>
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <atomic>
#include <fstream> #include <fstream>
#include <limits>
#include <vector> #include <vector>
#include <string> #include <string>
#include <cmath> #include <cmath>
@ -533,25 +532,17 @@ struct ggml_backend_opencl_context {
} }
// Dump a csv // Dump a csv
float total_kernel_time = 0; fprintf(fperf, "op name, kernel name, exec duration (ms), global size, local size, output size\n");
fprintf(fperf, "op name, kernel name, queued duration (ms), submit duration(ms), exec duration (ms), complete duration (ms), total duration (ms), global size, local size, output size\n");
for (const ProfilingInfo & info : profiling_info) { for (const ProfilingInfo & info : profiling_info) {
total_kernel_time += info.cmd_duration_ns/1.e6f; fprintf(fperf, "%s,%s,%f,%zux%zux%zu,%zux%zux%zu,%zux%zux%zux%zu\n",
fprintf(fperf, "%s,%s,%f,%f,%f,%f,%f,%zux%zux%zu,%zux%zux%zu,%zux%zux%zux%zu\n",
info.op_name.c_str(), info.kernel_name.c_str(), info.op_name.c_str(), info.kernel_name.c_str(),
info.cmd_queued_duration_ns/1.e6f,
info.cmd_submit_duration_ns/1.e6f,
info.cmd_duration_ns/1.e6f, info.cmd_duration_ns/1.e6f,
info.cmd_complete_duration_ns/1.e6f,
info.cmd_total_duration_ns/1.e6f,
info.global_size[0], info.global_size[1], info.global_size[2], info.global_size[0], info.global_size[1], info.global_size[2],
info.local_size[0], info.local_size[1], info.local_size[2], info.local_size[0], info.local_size[1], info.local_size[2],
info.output_size[0], info.output_size[1], info.output_size[2], info.output_size[3]); info.output_size[0], info.output_size[1], info.output_size[2], info.output_size[3]);
} }
fclose(fperf); fclose(fperf);
GGML_LOG_INFO("ggml_opencl: total kernel time: %f\n", total_kernel_time);
// Dump a simple chrome trace // Dump a simple chrome trace
FILE* ftrace = fopen("cl_trace.json", "w"); FILE* ftrace = fopen("cl_trace.json", "w");
if (!ftrace) { if (!ftrace) {
@ -561,14 +552,14 @@ struct ggml_backend_opencl_context {
fprintf(ftrace, "[\n"); fprintf(ftrace, "[\n");
for (const ProfilingInfo & info : profiling_info) { for (const ProfilingInfo & info : profiling_info) {
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %llu, \"pid\": \"\", \"tid\": \"Host\"},\n", fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Host\"},\n",
info.kernel_name.c_str(), info.cmd_queued/1000); info.kernel_name.c_str(), info.cmd_queued/1000);
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %llu, \"pid\": \"\", \"tid\": \"Host\"},\n", fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Host\"},\n",
info.kernel_name.c_str(), info.cmd_submit/1000); info.kernel_name.c_str(), info.cmd_submit/1000);
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %llu, \"pid\": \"\", \"tid\": \"Device\"},\n", fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Device\"},\n",
info.kernel_name.c_str(), info.cmd_start/1000); info.kernel_name.c_str(), info.cmd_start/1000);
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %llu, \"pid\": \"\", \"tid\": \"Device\"},\n", fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Device\"},\n",
info.kernel_name.c_str(), info.cmd_end/1000); info.kernel_name.c_str(), info.cmd_end/1000);
} }
fclose(ftrace); fclose(ftrace);
@ -7652,6 +7643,8 @@ static void ggml_cl_mul_mat_id(ggml_backend_t backend, const ggml_tensor * src0,
const cl_ulong nb21 = src2->nb[1]; const cl_ulong nb21 = src2->nb[1];
const cl_ulong nb20 = src2->nb[0]; const cl_ulong nb20 = src2->nb[0];
UNUSED(nb20);
const int ne0 = dst->ne[0]; const int ne0 = dst->ne[0];
const int ne1 = dst->ne[1]; const int ne1 = dst->ne[1];