diff --git a/mindspore/lite/include/ms_tensor.h b/mindspore/lite/include/ms_tensor.h index f3706af0955..ac0ee269b70 100644 --- a/mindspore/lite/include/ms_tensor.h +++ b/mindspore/lite/include/ms_tensor.h @@ -100,6 +100,10 @@ struct CallBackParam { std::string node_type; /**< node type argument */ }; +struct GPUCallBackParam : CallBackParam { + double execute_time{-1.f}; +}; + /// \brief KernelCallBack defined the function pointer for callBack. using KernelCallBack = std::function inputs, std::vector outputs, const CallBackParam &opInfo)>; diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.cc index 70c4447554b..b07ddf1335e 100644 --- a/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.cc +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.cc @@ -32,15 +32,18 @@ int OpenCLExecutor::RunOrTune(const std::vector &inputs, const std::ve const KernelCallBack &before, const KernelCallBack &after, bool is_tune) { int ret{RET_OK}; auto opencl_runtime_ins = ocl_runtime.GetInstance(); + if (before != nullptr && after != nullptr) { + opencl_runtime_ins->SetProfiling(true); + } auto profiling_tmp = opencl_runtime_ins->isProfiling(); if (is_tune) { opencl_runtime_ins->SetProfiling(true); } for (auto *kernel : kernels) { MS_ASSERT(kernel); - CallBackParam callbackParam; + GPUCallBackParam callbackParam; callbackParam.node_name = kernel->name(); - + callbackParam.node_type = kernel->type_str(); if (before != nullptr) { if (!before(TensorVectorCast(kernel->in_tensors()), TensorVectorCast(kernel->out_tensors()), callbackParam)) { MS_LOG(ERROR) << "run kernel before_callback failed, name: " << kernel->name(); @@ -70,9 +73,12 @@ int OpenCLExecutor::RunOrTune(const std::vector &inputs, const std::ve MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name(); return ret; } - if (profiling_tmp) + if (profiling_tmp) { + auto execute_time = op_kernel->GetProfilingTimeMs(); MS_LOG(INFO) << "OpenCl kernel " << kernel->name() << "(" << kernel->type_str() << ") execute time is: " << op_kernel->GetProfilingTimeMs() << "ms"; + callbackParam.execute_time = execute_time; + } } ret = kernel->PostProcess(); if (ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc index 96508fe0ccc..86dacd00862 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc @@ -198,12 +198,29 @@ int WinogradOpenCLKernel::Run() { ocl_runtime_->RunKernel(kernel_4x4to36_, global_4x4to36_, local_4x4to36_, nullptr, &event_); MS_LOG(DEBUG) << "winograd kernel1 Running!"; - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &kernel2_event_); MS_LOG(DEBUG) << "winograd kernel2 Running!"; ocl_runtime_->SetKernelArg(kernel_36to4x4_, 1, out_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_36to4x4_, global_36to4x4_, local_36to4x4_, nullptr, &event_); + ocl_runtime_->RunKernel(kernel_36to4x4_, global_36to4x4_, local_36to4x4_, nullptr, &kernel3_event_); return RET_OK; } +double WinogradOpenCLKernel::GetProfilingTimeMs() { + if (!ocl_runtime_->isProfiling()) { + return MAX_PROFILING_TIME_MILLI_SECOND; + } + cl_ulong time_start; + cl_ulong time_end; + event_.getProfilingInfo(CL_PROFILING_COMMAND_START, &time_start); + event_.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end); + cl_ulong time_ns = time_end - time_start; + kernel2_event_.getProfilingInfo(CL_PROFILING_COMMAND_START, &time_start); + kernel2_event_.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end); + time_ns += time_end - time_start; + kernel3_event_.getProfilingInfo(CL_PROFILING_COMMAND_START, &time_start); + kernel3_event_.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end); + time_ns += time_end - time_start; + return static_cast(time_ns) * 1e-6; +} } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h index edff463cbba..cd7b88d5370 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h @@ -39,6 +39,7 @@ class WinogradOpenCLKernel : public Conv2DOpenCLKernel { std::vector GenerateTuningParam() override { return {}; } int Tune() override { return RET_OK; } + double GetProfilingTimeMs() override; private: void BuildKernel() override; @@ -47,8 +48,10 @@ class WinogradOpenCLKernel : public Conv2DOpenCLKernel { cl::Kernel kernel_4x4to36_; cl::Kernel kernel_36to4x4_; + cl::Event kernel2_event_; cl::NDRange global_4x4to36_, local_4x4to36_; cl::NDRange global_36to4x4_, local_36to4x4_; + cl::Event kernel3_event_; void *winograd_mem0_{nullptr}; void *winograd_mem1_{nullptr}; }; diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h index 8d10f280166..c0a0320725b 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h @@ -195,7 +195,7 @@ class OpenCLKernel : public LiteKernel { lite::opencl::MemType GetMemType() { return out_mem_type_; } void SetMemType(lite::opencl::MemType mem_type) { out_mem_type_ = mem_type; } OpParameter *GetParameter() { return op_parameter_; } - double GetProfilingTimeMs(); + virtual double GetProfilingTimeMs(); int DequantWeight(); void FreeDequantedWeight(); virtual int InferShape(); diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc index befb436edb4..d8bddb295cc 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include "src/runtime/gpu/opencl/opencl_executor.h" #include "src/runtime/kernel/opencl/utils.h" #include "src/runtime/kernel/opencl/kernel/to_format.h" @@ -467,4 +468,33 @@ int OpenCLSubGraph::Run() { } return RET_OK; } + +int OpenCLSubGraph::Run(const KernelCallBack &before, const KernelCallBack &after) { + if (executor_ == nullptr) { + MS_LOG(ERROR) << "executor is nullptr"; + return RET_ERROR; + } + int ret; + for (auto &tensor : in_tensors_) { + MS_ASSERT(tensor); + if (tensor->data_c() == nullptr) { + MS_LOG(ERROR) << "OpenCL subgraph input tensor data is null"; + return RET_ERROR; + } + ret = allocator_->UnmapBuffer(tensor->data_c()); + if (ret != RET_OK) { + return ret; + } + } + + ret = executor_->Run(in_tensors_, out_tensors_, nodes_, allocator_, before, after); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Run opencl executor failed: " << ret; + return ret; + } + if (!ocl_runtime_->SyncCommandQueue()) { + return RET_ERROR; + } + return RET_OK; +} } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h index 953e3e4ba91..8bdb24c00cf 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h @@ -46,7 +46,7 @@ class OpenCLSubGraph : public SubGraphKernel { int ReSize() override; int ReSize(bool interrupt); int Run() override; - int Run(const KernelCallBack &before, const KernelCallBack &after) override { return this->Run(); }; + int Run(const KernelCallBack &before, const KernelCallBack &after) override; int InsertOpsPass(); bool IsSubGraphInferShapeDone(); diff --git a/mindspore/lite/tools/benchmark/benchmark.cc b/mindspore/lite/tools/benchmark/benchmark.cc index c0c445459e4..a9d1454d3d1 100644 --- a/mindspore/lite/tools/benchmark/benchmark.cc +++ b/mindspore/lite/tools/benchmark/benchmark.cc @@ -568,10 +568,8 @@ int Benchmark::RunBenchmark() { } auto &cpu_device_ctx = context->device_list_[0]; - if (flags_->cpu_bind_mode_ == MID_CPU) { - cpu_device_ctx.device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU; - } else if (flags_->cpu_bind_mode_ == HIGHER_CPU) { - cpu_device_ctx.device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU; + if (flags_->cpu_bind_mode_ == MID_CPU || flags_->cpu_bind_mode_ == HIGHER_CPU) { + cpu_device_ctx.device_info_.cpu_device_info_.cpu_bind_mode_ = CpuBindMode(flags_->cpu_bind_mode_); } else { cpu_device_ctx.device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND; } @@ -611,9 +609,8 @@ int Benchmark::RunBenchmark() { return ret; } } - if (model != nullptr) { - model->Free(); - } + if (model != nullptr) model->Free(); + ms_inputs_ = session_->GetInputs(); auto end_prepare_time = GetTimeUs(); MS_LOG(INFO) << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms"; @@ -682,147 +679,161 @@ void BenchmarkFlags::InitResizeDimsList() { } } -int Benchmark::InitCallbackParameter() { - if (flags_->time_profiling_) { - // before callback - before_call_back_ = [&](const std::vector &before_inputs, - const std::vector &before_outputs, - const CallBackParam &callParam) { - if (before_inputs.empty()) { - MS_LOG(INFO) << "The num of beforeInputs is empty"; - } - if (before_outputs.empty()) { - MS_LOG(INFO) << "The num of beforeOutputs is empty"; - } - if (op_times_by_type_.find(callParam.node_type) == op_times_by_type_.end()) { - op_times_by_type_.insert(std::make_pair(callParam.node_type, std::make_pair(0, 0.0f))); - } - if (op_times_by_name_.find(callParam.node_name) == op_times_by_name_.end()) { - op_times_by_name_.insert(std::make_pair(callParam.node_name, std::make_pair(0, 0.0f))); - } - - op_call_times_total_++; - op_begin_ = GetTimeUs(); - return true; - }; - - // after callback - after_call_back_ = [&](const std::vector &after_inputs, - const std::vector &after_outputs, - const CallBackParam &call_param) { - uint64_t opEnd = GetTimeUs(); - - if (after_inputs.empty()) { - MS_LOG(INFO) << "The num of after inputs is empty"; - } - if (after_outputs.empty()) { - MS_LOG(INFO) << "The num of after outputs is empty"; - } - - float cost = static_cast(opEnd - op_begin_) / 1000.0f; - op_cost_total_ += cost; - op_times_by_type_[call_param.node_type].first++; - op_times_by_type_[call_param.node_type].second += cost; - op_times_by_name_[call_param.node_name].first++; - op_times_by_name_[call_param.node_name].second += cost; - return true; - }; - } else if (flags_->perf_profiling_) { -#ifndef ENABLE_ARM64 - MS_LOG(ERROR) << "Only support perf_profiling on arm64."; - return RET_ERROR; -#else - struct perf_event_attr pe, pe2; - memset(&pe, 0, sizeof(struct perf_event_attr)); - memset(&pe2, 0, sizeof(struct perf_event_attr)); - pe.type = PERF_TYPE_HARDWARE; - pe2.type = PERF_TYPE_HARDWARE; - pe.size = sizeof(struct perf_event_attr); - pe2.size = sizeof(struct perf_event_attr); - pe.disabled = 1; - pe2.disabled = 1; - pe.exclude_kernel = 1; // don't count kernel - pe2.exclude_kernel = 1; // don't count kernel - pe.exclude_hv = 1; // don't count hypervisor - pe2.exclude_hv = 1; // don't count hypervisor - pe.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; - pe2.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; - if (flags_->perf_event_ == "CACHE") { - pe.config = PERF_COUNT_HW_CACHE_REFERENCES; - pe2.config = PERF_COUNT_HW_CACHE_MISSES; - } else if (flags_->perf_event_ == "STALL") { - pe.config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND; - pe2.config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND; - } else { - pe.config = PERF_COUNT_HW_CPU_CYCLES; - pe2.config = PERF_COUNT_HW_INSTRUCTIONS; +int Benchmark::InitTimeProfilingCallbackParameter() { + // before callback + before_call_back_ = [&](const std::vector &before_inputs, + const std::vector &before_outputs, + const CallBackParam &callParam) { + if (before_inputs.empty()) { + MS_LOG(INFO) << "The num of beforeInputs is empty"; } - perf_fd = syscall(__NR_perf_event_open, pe, 0, -1, -1, 0); - if (perf_fd == -1) { - MS_LOG(ERROR) << "Failed to open perf event " << pe.config; - return RET_ERROR; + if (before_outputs.empty()) { + MS_LOG(INFO) << "The num of beforeOutputs is empty"; } - perf_fd2 = syscall(__NR_perf_event_open, pe2, 0, -1, perf_fd, 0); - if (perf_fd2 == -1) { - MS_LOG(ERROR) << "Failed to open perf event " << pe2.config; - return RET_ERROR; + if (op_times_by_type_.find(callParam.node_type) == op_times_by_type_.end()) { + op_times_by_type_.insert(std::make_pair(callParam.node_type, std::make_pair(0, 0.0f))); + } + if (op_times_by_name_.find(callParam.node_name) == op_times_by_name_.end()) { + op_times_by_name_.insert(std::make_pair(callParam.node_name, std::make_pair(0, 0.0f))); } - struct PerfCount zero; - zero.value[0] = 0; - zero.value[1] = 0; - // before callback - before_call_back_ = [&](const std::vector &before_inputs, - const std::vector &before_outputs, - const CallBackParam &callParam) { - if (before_inputs.empty()) { - MS_LOG(INFO) << "The num of beforeInputs is empty"; - } - if (before_outputs.empty()) { - MS_LOG(INFO) << "The num of beforeOutputs is empty"; - } - if (op_perf_by_type_.find(callParam.node_type) == op_perf_by_type_.end()) { - op_perf_by_type_.insert(std::make_pair(callParam.node_type, std::make_pair(0, zero))); - } - if (op_perf_by_name_.find(callParam.node_name) == op_perf_by_name_.end()) { - op_perf_by_name_.insert(std::make_pair(callParam.node_name, std::make_pair(0, zero))); - } - op_call_times_total_++; - ioctl(perf_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); - ioctl(perf_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP); - return true; - }; + op_call_times_total_++; + op_begin_ = GetTimeUs(); + return true; + }; - // after callback - after_call_back_ = [&](const std::vector &after_inputs, - const std::vector &after_outputs, - const CallBackParam &call_param) { - struct PerfResult res; - ioctl(perf_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP); - read(perf_fd, &res, sizeof(struct PerfResult)); + // after callback + after_call_back_ = [&](const std::vector &after_inputs, + const std::vector &after_outputs, + const CallBackParam &call_param) { + uint64_t opEnd = GetTimeUs(); - if (after_inputs.empty()) { - MS_LOG(INFO) << "The num of after inputs is empty"; - } - if (after_outputs.empty()) { - MS_LOG(INFO) << "The num of after outputs is empty"; - } - float cost1 = static_cast(res.values[0].value); - float cost2 = static_cast(res.values[1].value); - op_cost_total_ += cost1; - op_cost2_total_ += cost2; - op_perf_by_type_[call_param.node_type].first++; - op_perf_by_type_[call_param.node_type].second.value[0] += cost1; - op_perf_by_type_[call_param.node_type].second.value[1] += cost2; - op_perf_by_name_[call_param.node_name].first++; - op_perf_by_name_[call_param.node_name].second.value[0] += cost1; - op_perf_by_name_[call_param.node_name].second.value[1] += cost2; - return true; - }; -#endif - } + if (after_inputs.empty()) { + MS_LOG(INFO) << "The num of after inputs is empty"; + } + if (after_outputs.empty()) { + MS_LOG(INFO) << "The num of after outputs is empty"; + } + + float cost = static_cast(opEnd - op_begin_) / 1000.0f; + if (flags_->device_ == "GPU") { + auto gpu_param = reinterpret_cast(call_param); + cost = static_cast(gpu_param.execute_time); + } + op_cost_total_ += cost; + op_times_by_type_[call_param.node_type].first++; + op_times_by_type_[call_param.node_type].second += cost; + op_times_by_name_[call_param.node_name].first++; + op_times_by_name_[call_param.node_name].second += cost; + return true; + }; return RET_OK; } +int Benchmark::InitPerfProfilingCallbackParameter() { +#ifndef ENABLE_ARM64 + MS_LOG(ERROR) << "Only support perf_profiling on arm64."; + return RET_ERROR; +#else + struct perf_event_attr pe, pe2; + memset(&pe, 0, sizeof(struct perf_event_attr)); + memset(&pe2, 0, sizeof(struct perf_event_attr)); + pe.type = PERF_TYPE_HARDWARE; + pe2.type = PERF_TYPE_HARDWARE; + pe.size = sizeof(struct perf_event_attr); + pe2.size = sizeof(struct perf_event_attr); + pe.disabled = 1; + pe2.disabled = 1; + pe.exclude_kernel = 1; // don't count kernel + pe2.exclude_kernel = 1; // don't count kernel + pe.exclude_hv = 1; // don't count hypervisor + pe2.exclude_hv = 1; // don't count hypervisor + pe.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; + pe2.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; + if (flags_->perf_event_ == "CACHE") { + pe.config = PERF_COUNT_HW_CACHE_REFERENCES; + pe2.config = PERF_COUNT_HW_CACHE_MISSES; + } else if (flags_->perf_event_ == "STALL") { + pe.config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND; + pe2.config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND; + } else { + pe.config = PERF_COUNT_HW_CPU_CYCLES; + pe2.config = PERF_COUNT_HW_INSTRUCTIONS; + } + perf_fd = syscall(__NR_perf_event_open, pe, 0, -1, -1, 0); + if (perf_fd == -1) { + MS_LOG(ERROR) << "Failed to open perf event " << pe.config; + return RET_ERROR; + } + perf_fd2 = syscall(__NR_perf_event_open, pe2, 0, -1, perf_fd, 0); + if (perf_fd2 == -1) { + MS_LOG(ERROR) << "Failed to open perf event " << pe2.config; + return RET_ERROR; + } + struct PerfCount zero; + zero.value[0] = 0; + zero.value[1] = 0; + // before callback + before_call_back_ = [&](const std::vector &before_inputs, + const std::vector &before_outputs, + const CallBackParam &callParam) { + if (before_inputs.empty()) { + MS_LOG(INFO) << "The num of beforeInputs is empty"; + } + if (before_outputs.empty()) { + MS_LOG(INFO) << "The num of beforeOutputs is empty"; + } + if (op_perf_by_type_.find(callParam.node_type) == op_perf_by_type_.end()) { + op_perf_by_type_.insert(std::make_pair(callParam.node_type, std::make_pair(0, zero))); + } + if (op_perf_by_name_.find(callParam.node_name) == op_perf_by_name_.end()) { + op_perf_by_name_.insert(std::make_pair(callParam.node_name, std::make_pair(0, zero))); + } + + op_call_times_total_++; + ioctl(perf_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); + ioctl(perf_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP); + return true; + }; + + // after callback + after_call_back_ = [&](const std::vector &after_inputs, + const std::vector &after_outputs, + const CallBackParam &call_param) { + struct PerfResult res; + ioctl(perf_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP); + read(perf_fd, &res, sizeof(struct PerfResult)); + + if (after_inputs.empty()) { + MS_LOG(INFO) << "The num of after inputs is empty"; + } + if (after_outputs.empty()) { + MS_LOG(INFO) << "The num of after outputs is empty"; + } + float cost1 = static_cast(res.values[0].value); + float cost2 = static_cast(res.values[1].value); + op_cost_total_ += cost1; + op_cost2_total_ += cost2; + op_perf_by_type_[call_param.node_type].first++; + op_perf_by_type_[call_param.node_type].second.value[0] += cost1; + op_perf_by_type_[call_param.node_type].second.value[1] += cost2; + op_perf_by_name_[call_param.node_name].first++; + op_perf_by_name_[call_param.node_name].second.value[0] += cost1; + op_perf_by_name_[call_param.node_name].second.value[1] += cost2; + return true; + }; +#endif + return RET_OK; +} + +int Benchmark::InitCallbackParameter() { + int ret = RET_OK; + if (flags_->time_profiling_) { + ret = InitTimeProfilingCallbackParameter(); + } else if (flags_->perf_profiling_) { + ret = InitPerfProfilingCallbackParameter(); + } + return ret; +} int Benchmark::Init() { if (this->flags_ == nullptr) { @@ -859,13 +870,10 @@ int Benchmark::Init() { std::cerr << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0" << std::endl; return RET_ERROR; } - - if (this->flags_->cpu_bind_mode_ == 2) { - MS_LOG(INFO) << "cpuBindMode = MID_CPU"; - std::cout << "cpuBindMode = MID_CPU" << std::endl; - } else if (this->flags_->cpu_bind_mode_ == 1) { - MS_LOG(INFO) << "cpuBindMode = HIGHER_CPU"; - std::cout << "cpuBindMode = HIGHER_CPU" << std::endl; + static std::vector CPU_BIND_MODE_MAP = {"NO_BIND", "HIGHER_CPU", "MID_CPU"}; + if (this->flags_->cpu_bind_mode_ >= 1) { + MS_LOG(INFO) << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_]; + std::cout << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_] << std::endl; } else { MS_LOG(INFO) << "cpuBindMode = NO_BIND"; std::cout << "cpuBindMode = NO_BIND" << std::endl; diff --git a/mindspore/lite/tools/benchmark/benchmark.h b/mindspore/lite/tools/benchmark/benchmark.h index df298b1e2d9..c62c973d66d 100644 --- a/mindspore/lite/tools/benchmark/benchmark.h +++ b/mindspore/lite/tools/benchmark/benchmark.h @@ -163,6 +163,8 @@ class MS_API Benchmark { int *total_size); int InitCallbackParameter(); + int InitTimeProfilingCallbackParameter(); + int InitPerfProfilingCallbackParameter(); int PrintResult(const std::vector &title, const std::map> &result);