!13485 [MS][LITE][GPU]GPU support print profiling info

From: @chenzupeng Reviewed-by: @ddwsky,@zhanghaibo5 Signed-off-by: @ddwsky
2021-03-18 10:28:47 +08:00 · 2021-03-18 10:28:47 +08:00 · 4b319bbac7
parent 4e1e16c335 eade4d8014
commit 4b319bbac7
9 changed files with 224 additions and 154 deletions
--- a/mindspore/lite/include/ms_tensor.h
+++ b/mindspore/lite/include/ms_tensor.h
@ -100,6 +100,10 @@ struct CallBackParam {
  std::string node_type; /**< node type argument */
 };

+struct GPUCallBackParam : CallBackParam {
+  double execute_time{-1.f};
+};
+
 /// \brief KernelCallBack defined the function pointer for callBack.
 using KernelCallBack = std::function<bool(std::vector<tensor::MSTensor *> inputs,
                                          std::vector<tensor::MSTensor *> outputs, const CallBackParam &opInfo)>;
--- a/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.cc
+++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.cc
@ -32,15 +32,18 @@ int OpenCLExecutor::RunOrTune(const std::vector<Tensor *> &inputs, const std::ve
                              const KernelCallBack &before, const KernelCallBack &after, bool is_tune) {
  int ret{RET_OK};
  auto opencl_runtime_ins = ocl_runtime.GetInstance();
+  if (before != nullptr && after != nullptr) {
+    opencl_runtime_ins->SetProfiling(true);
+  }
  auto profiling_tmp = opencl_runtime_ins->isProfiling();
  if (is_tune) {
    opencl_runtime_ins->SetProfiling(true);
  }
  for (auto *kernel : kernels) {
    MS_ASSERT(kernel);
-    CallBackParam callbackParam;
+    GPUCallBackParam callbackParam;
    callbackParam.node_name = kernel->name();
-
+    callbackParam.node_type = kernel->type_str();
    if (before != nullptr) {
      if (!before(TensorVectorCast(kernel->in_tensors()), TensorVectorCast(kernel->out_tensors()), callbackParam)) {
        MS_LOG(ERROR) << "run kernel before_callback failed, name: " << kernel->name();
@ -70,9 +73,12 @@ int OpenCLExecutor::RunOrTune(const std::vector<Tensor *> &inputs, const std::ve
        MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
        return ret;
      }
-      if (profiling_tmp)
+      if (profiling_tmp) {
+        auto execute_time = op_kernel->GetProfilingTimeMs();
        MS_LOG(INFO) << "OpenCl kernel " << kernel->name() << "(" << kernel->type_str()
                     << ") execute time is: " << op_kernel->GetProfilingTimeMs() << "ms";
+        callbackParam.execute_time = execute_time;
+      }
    }
    ret = kernel->PostProcess();
    if (ret != RET_OK) {
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc
@ -198,12 +198,29 @@ int WinogradOpenCLKernel::Run() {
  ocl_runtime_->RunKernel(kernel_4x4to36_, global_4x4to36_, local_4x4to36_, nullptr, &event_);

  MS_LOG(DEBUG) << "winograd kernel1 Running!";
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &kernel2_event_);

  MS_LOG(DEBUG) << "winograd kernel2 Running!";
  ocl_runtime_->SetKernelArg(kernel_36to4x4_, 1, out_tensors_.front()->data_c());
-  ocl_runtime_->RunKernel(kernel_36to4x4_, global_36to4x4_, local_36to4x4_, nullptr, &event_);
+  ocl_runtime_->RunKernel(kernel_36to4x4_, global_36to4x4_, local_36to4x4_, nullptr, &kernel3_event_);
  return RET_OK;
 }

+double WinogradOpenCLKernel::GetProfilingTimeMs() {
+  if (!ocl_runtime_->isProfiling()) {
+    return MAX_PROFILING_TIME_MILLI_SECOND;
+  }
+  cl_ulong time_start;
+  cl_ulong time_end;
+  event_.getProfilingInfo(CL_PROFILING_COMMAND_START, &time_start);
+  event_.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end);
+  cl_ulong time_ns = time_end - time_start;
+  kernel2_event_.getProfilingInfo(CL_PROFILING_COMMAND_START, &time_start);
+  kernel2_event_.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end);
+  time_ns += time_end - time_start;
+  kernel3_event_.getProfilingInfo(CL_PROFILING_COMMAND_START, &time_start);
+  kernel3_event_.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end);
+  time_ns += time_end - time_start;
+  return static_cast<double>(time_ns) * 1e-6;
+}
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h
@ -39,6 +39,7 @@ class WinogradOpenCLKernel : public Conv2DOpenCLKernel {

  std::vector<BaseTuningParameter> GenerateTuningParam() override { return {}; }
  int Tune() override { return RET_OK; }
+  double GetProfilingTimeMs() override;

 private:
  void BuildKernel() override;
@ -47,8 +48,10 @@ class WinogradOpenCLKernel : public Conv2DOpenCLKernel {

  cl::Kernel kernel_4x4to36_;
  cl::Kernel kernel_36to4x4_;
+  cl::Event kernel2_event_;
  cl::NDRange global_4x4to36_, local_4x4to36_;
  cl::NDRange global_36to4x4_, local_36to4x4_;
+  cl::Event kernel3_event_;
  void *winograd_mem0_{nullptr};
  void *winograd_mem1_{nullptr};
 };
--- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
@ -195,7 +195,7 @@ class OpenCLKernel : public LiteKernel {
  lite::opencl::MemType GetMemType() { return out_mem_type_; }
  void SetMemType(lite::opencl::MemType mem_type) { out_mem_type_ = mem_type; }
  OpParameter *GetParameter() { return op_parameter_; }
-  double GetProfilingTimeMs();
+  virtual double GetProfilingTimeMs();
  int DequantWeight();
  void FreeDequantedWeight();
  virtual int InferShape();
--- a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc
@ -18,6 +18,7 @@
 #include <set>
 #include <map>
 #include <string>
+#include <utility>
 #include "src/runtime/gpu/opencl/opencl_executor.h"
 #include "src/runtime/kernel/opencl/utils.h"
 #include "src/runtime/kernel/opencl/kernel/to_format.h"
@ -467,4 +468,33 @@ int OpenCLSubGraph::Run() {
  }
  return RET_OK;
 }
+
+int OpenCLSubGraph::Run(const KernelCallBack &before, const KernelCallBack &after) {
+  if (executor_ == nullptr) {
+    MS_LOG(ERROR) << "executor is nullptr";
+    return RET_ERROR;
+  }
+  int ret;
+  for (auto &tensor : in_tensors_) {
+    MS_ASSERT(tensor);
+    if (tensor->data_c() == nullptr) {
+      MS_LOG(ERROR) << "OpenCL subgraph input tensor data is null";
+      return RET_ERROR;
+    }
+    ret = allocator_->UnmapBuffer(tensor->data_c());
+    if (ret != RET_OK) {
+      return ret;
+    }
+  }
+
+  ret = executor_->Run(in_tensors_, out_tensors_, nodes_, allocator_, before, after);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Run opencl executor failed: " << ret;
+    return ret;
+  }
+  if (!ocl_runtime_->SyncCommandQueue()) {
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h
@ -46,7 +46,7 @@ class OpenCLSubGraph : public SubGraphKernel {
  int ReSize() override;
  int ReSize(bool interrupt);
  int Run() override;
-  int Run(const KernelCallBack &before, const KernelCallBack &after) override { return this->Run(); };
+  int Run(const KernelCallBack &before, const KernelCallBack &after) override;
  int InsertOpsPass();
  bool IsSubGraphInferShapeDone();

--- a/mindspore/lite/tools/benchmark/benchmark.cc
+++ b/mindspore/lite/tools/benchmark/benchmark.cc
@ -568,10 +568,8 @@ int Benchmark::RunBenchmark() {
  }

  auto &cpu_device_ctx = context->device_list_[0];
-  if (flags_->cpu_bind_mode_ == MID_CPU) {
-    cpu_device_ctx.device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU;
-  } else if (flags_->cpu_bind_mode_ == HIGHER_CPU) {
-    cpu_device_ctx.device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU;
+  if (flags_->cpu_bind_mode_ == MID_CPU || flags_->cpu_bind_mode_ == HIGHER_CPU) {
+    cpu_device_ctx.device_info_.cpu_device_info_.cpu_bind_mode_ = CpuBindMode(flags_->cpu_bind_mode_);
  } else {
    cpu_device_ctx.device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND;
  }
@ -611,9 +609,8 @@ int Benchmark::RunBenchmark() {
      return ret;
    }
  }
-  if (model != nullptr) {
-    model->Free();
-  }
+  if (model != nullptr) model->Free();
+
  ms_inputs_ = session_->GetInputs();
  auto end_prepare_time = GetTimeUs();
  MS_LOG(INFO) << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms";
@ -682,147 +679,161 @@ void BenchmarkFlags::InitResizeDimsList() {
  }
 }

-int Benchmark::InitCallbackParameter() {
-  if (flags_->time_profiling_) {
-    // before callback
-    before_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &before_inputs,
-                            const std::vector<mindspore::tensor::MSTensor *> &before_outputs,
-                            const CallBackParam &callParam) {
-      if (before_inputs.empty()) {
-        MS_LOG(INFO) << "The num of beforeInputs is empty";
-      }
-      if (before_outputs.empty()) {
-        MS_LOG(INFO) << "The num of beforeOutputs is empty";
-      }
-      if (op_times_by_type_.find(callParam.node_type) == op_times_by_type_.end()) {
-        op_times_by_type_.insert(std::make_pair(callParam.node_type, std::make_pair(0, 0.0f)));
-      }
-      if (op_times_by_name_.find(callParam.node_name) == op_times_by_name_.end()) {
-        op_times_by_name_.insert(std::make_pair(callParam.node_name, std::make_pair(0, 0.0f)));
-      }
-
-      op_call_times_total_++;
-      op_begin_ = GetTimeUs();
-      return true;
-    };
-
-    // after callback
-    after_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &after_inputs,
-                           const std::vector<mindspore::tensor::MSTensor *> &after_outputs,
-                           const CallBackParam &call_param) {
-      uint64_t opEnd = GetTimeUs();
-
-      if (after_inputs.empty()) {
-        MS_LOG(INFO) << "The num of after inputs is empty";
-      }
-      if (after_outputs.empty()) {
-        MS_LOG(INFO) << "The num of after outputs is empty";
-      }
-
-      float cost = static_cast<float>(opEnd - op_begin_) / 1000.0f;
-      op_cost_total_ += cost;
-      op_times_by_type_[call_param.node_type].first++;
-      op_times_by_type_[call_param.node_type].second += cost;
-      op_times_by_name_[call_param.node_name].first++;
-      op_times_by_name_[call_param.node_name].second += cost;
-      return true;
-    };
-  } else if (flags_->perf_profiling_) {
-#ifndef ENABLE_ARM64
-    MS_LOG(ERROR) << "Only support perf_profiling on arm64.";
-    return RET_ERROR;
-#else
-    struct perf_event_attr pe, pe2;
-    memset(&pe, 0, sizeof(struct perf_event_attr));
-    memset(&pe2, 0, sizeof(struct perf_event_attr));
-    pe.type = PERF_TYPE_HARDWARE;
-    pe2.type = PERF_TYPE_HARDWARE;
-    pe.size = sizeof(struct perf_event_attr);
-    pe2.size = sizeof(struct perf_event_attr);
-    pe.disabled = 1;
-    pe2.disabled = 1;
-    pe.exclude_kernel = 1;   // don't count kernel
-    pe2.exclude_kernel = 1;  // don't count kernel
-    pe.exclude_hv = 1;       // don't count hypervisor
-    pe2.exclude_hv = 1;      // don't count hypervisor
-    pe.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
-    pe2.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
-    if (flags_->perf_event_ == "CACHE") {
-      pe.config = PERF_COUNT_HW_CACHE_REFERENCES;
-      pe2.config = PERF_COUNT_HW_CACHE_MISSES;
-    } else if (flags_->perf_event_ == "STALL") {
-      pe.config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND;
-      pe2.config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND;
-    } else {
-      pe.config = PERF_COUNT_HW_CPU_CYCLES;
-      pe2.config = PERF_COUNT_HW_INSTRUCTIONS;
+int Benchmark::InitTimeProfilingCallbackParameter() {
+  // before callback
+  before_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &before_inputs,
+                          const std::vector<mindspore::tensor::MSTensor *> &before_outputs,
+                          const CallBackParam &callParam) {
+    if (before_inputs.empty()) {
+      MS_LOG(INFO) << "The num of beforeInputs is empty";
    }
-    perf_fd = syscall(__NR_perf_event_open, pe, 0, -1, -1, 0);
-    if (perf_fd == -1) {
-      MS_LOG(ERROR) << "Failed to open perf event " << pe.config;
-      return RET_ERROR;
+    if (before_outputs.empty()) {
+      MS_LOG(INFO) << "The num of beforeOutputs is empty";
    }
-    perf_fd2 = syscall(__NR_perf_event_open, pe2, 0, -1, perf_fd, 0);
-    if (perf_fd2 == -1) {
-      MS_LOG(ERROR) << "Failed to open perf event " << pe2.config;
-      return RET_ERROR;
+    if (op_times_by_type_.find(callParam.node_type) == op_times_by_type_.end()) {
+      op_times_by_type_.insert(std::make_pair(callParam.node_type, std::make_pair(0, 0.0f)));
+    }
+    if (op_times_by_name_.find(callParam.node_name) == op_times_by_name_.end()) {
+      op_times_by_name_.insert(std::make_pair(callParam.node_name, std::make_pair(0, 0.0f)));
    }
-    struct PerfCount zero;
-    zero.value[0] = 0;
-    zero.value[1] = 0;
-    // before callback
-    before_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &before_inputs,
-                            const std::vector<mindspore::tensor::MSTensor *> &before_outputs,
-                            const CallBackParam &callParam) {
-      if (before_inputs.empty()) {
-        MS_LOG(INFO) << "The num of beforeInputs is empty";
-      }
-      if (before_outputs.empty()) {
-        MS_LOG(INFO) << "The num of beforeOutputs is empty";
-      }
-      if (op_perf_by_type_.find(callParam.node_type) == op_perf_by_type_.end()) {
-        op_perf_by_type_.insert(std::make_pair(callParam.node_type, std::make_pair(0, zero)));
-      }
-      if (op_perf_by_name_.find(callParam.node_name) == op_perf_by_name_.end()) {
-        op_perf_by_name_.insert(std::make_pair(callParam.node_name, std::make_pair(0, zero)));
-      }

-      op_call_times_total_++;
-      ioctl(perf_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
-      ioctl(perf_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
-      return true;
-    };
+    op_call_times_total_++;
+    op_begin_ = GetTimeUs();
+    return true;
+  };

-    // after callback
-    after_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &after_inputs,
-                           const std::vector<mindspore::tensor::MSTensor *> &after_outputs,
-                           const CallBackParam &call_param) {
-      struct PerfResult res;
-      ioctl(perf_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
-      read(perf_fd, &res, sizeof(struct PerfResult));
+  // after callback
+  after_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &after_inputs,
+                         const std::vector<mindspore::tensor::MSTensor *> &after_outputs,
+                         const CallBackParam &call_param) {
+    uint64_t opEnd = GetTimeUs();

-      if (after_inputs.empty()) {
-        MS_LOG(INFO) << "The num of after inputs is empty";
-      }
-      if (after_outputs.empty()) {
-        MS_LOG(INFO) << "The num of after outputs is empty";
-      }
-      float cost1 = static_cast<float>(res.values[0].value);
-      float cost2 = static_cast<float>(res.values[1].value);
-      op_cost_total_ += cost1;
-      op_cost2_total_ += cost2;
-      op_perf_by_type_[call_param.node_type].first++;
-      op_perf_by_type_[call_param.node_type].second.value[0] += cost1;
-      op_perf_by_type_[call_param.node_type].second.value[1] += cost2;
-      op_perf_by_name_[call_param.node_name].first++;
-      op_perf_by_name_[call_param.node_name].second.value[0] += cost1;
-      op_perf_by_name_[call_param.node_name].second.value[1] += cost2;
-      return true;
-    };
-#endif
-  }
+    if (after_inputs.empty()) {
+      MS_LOG(INFO) << "The num of after inputs is empty";
+    }
+    if (after_outputs.empty()) {
+      MS_LOG(INFO) << "The num of after outputs is empty";
+    }
+
+    float cost = static_cast<float>(opEnd - op_begin_) / 1000.0f;
+    if (flags_->device_ == "GPU") {
+      auto gpu_param = reinterpret_cast<const GPUCallBackParam &>(call_param);
+      cost = static_cast<float>(gpu_param.execute_time);
+    }
+    op_cost_total_ += cost;
+    op_times_by_type_[call_param.node_type].first++;
+    op_times_by_type_[call_param.node_type].second += cost;
+    op_times_by_name_[call_param.node_name].first++;
+    op_times_by_name_[call_param.node_name].second += cost;
+    return true;
+  };
  return RET_OK;
 }
+int Benchmark::InitPerfProfilingCallbackParameter() {
+#ifndef ENABLE_ARM64
+  MS_LOG(ERROR) << "Only support perf_profiling on arm64.";
+  return RET_ERROR;
+#else
+  struct perf_event_attr pe, pe2;
+  memset(&pe, 0, sizeof(struct perf_event_attr));
+  memset(&pe2, 0, sizeof(struct perf_event_attr));
+  pe.type = PERF_TYPE_HARDWARE;
+  pe2.type = PERF_TYPE_HARDWARE;
+  pe.size = sizeof(struct perf_event_attr);
+  pe2.size = sizeof(struct perf_event_attr);
+  pe.disabled = 1;
+  pe2.disabled = 1;
+  pe.exclude_kernel = 1;   // don't count kernel
+  pe2.exclude_kernel = 1;  // don't count kernel
+  pe.exclude_hv = 1;       // don't count hypervisor
+  pe2.exclude_hv = 1;      // don't count hypervisor
+  pe.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
+  pe2.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
+  if (flags_->perf_event_ == "CACHE") {
+    pe.config = PERF_COUNT_HW_CACHE_REFERENCES;
+    pe2.config = PERF_COUNT_HW_CACHE_MISSES;
+  } else if (flags_->perf_event_ == "STALL") {
+    pe.config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND;
+    pe2.config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND;
+  } else {
+    pe.config = PERF_COUNT_HW_CPU_CYCLES;
+    pe2.config = PERF_COUNT_HW_INSTRUCTIONS;
+  }
+  perf_fd = syscall(__NR_perf_event_open, pe, 0, -1, -1, 0);
+  if (perf_fd == -1) {
+    MS_LOG(ERROR) << "Failed to open perf event " << pe.config;
+    return RET_ERROR;
+  }
+  perf_fd2 = syscall(__NR_perf_event_open, pe2, 0, -1, perf_fd, 0);
+  if (perf_fd2 == -1) {
+    MS_LOG(ERROR) << "Failed to open perf event " << pe2.config;
+    return RET_ERROR;
+  }
+  struct PerfCount zero;
+  zero.value[0] = 0;
+  zero.value[1] = 0;
+  // before callback
+  before_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &before_inputs,
+                          const std::vector<mindspore::tensor::MSTensor *> &before_outputs,
+                          const CallBackParam &callParam) {
+    if (before_inputs.empty()) {
+      MS_LOG(INFO) << "The num of beforeInputs is empty";
+    }
+    if (before_outputs.empty()) {
+      MS_LOG(INFO) << "The num of beforeOutputs is empty";
+    }
+    if (op_perf_by_type_.find(callParam.node_type) == op_perf_by_type_.end()) {
+      op_perf_by_type_.insert(std::make_pair(callParam.node_type, std::make_pair(0, zero)));
+    }
+    if (op_perf_by_name_.find(callParam.node_name) == op_perf_by_name_.end()) {
+      op_perf_by_name_.insert(std::make_pair(callParam.node_name, std::make_pair(0, zero)));
+    }
+
+    op_call_times_total_++;
+    ioctl(perf_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
+    ioctl(perf_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
+    return true;
+  };
+
+  // after callback
+  after_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &after_inputs,
+                         const std::vector<mindspore::tensor::MSTensor *> &after_outputs,
+                         const CallBackParam &call_param) {
+    struct PerfResult res;
+    ioctl(perf_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
+    read(perf_fd, &res, sizeof(struct PerfResult));
+
+    if (after_inputs.empty()) {
+      MS_LOG(INFO) << "The num of after inputs is empty";
+    }
+    if (after_outputs.empty()) {
+      MS_LOG(INFO) << "The num of after outputs is empty";
+    }
+    float cost1 = static_cast<float>(res.values[0].value);
+    float cost2 = static_cast<float>(res.values[1].value);
+    op_cost_total_ += cost1;
+    op_cost2_total_ += cost2;
+    op_perf_by_type_[call_param.node_type].first++;
+    op_perf_by_type_[call_param.node_type].second.value[0] += cost1;
+    op_perf_by_type_[call_param.node_type].second.value[1] += cost2;
+    op_perf_by_name_[call_param.node_name].first++;
+    op_perf_by_name_[call_param.node_name].second.value[0] += cost1;
+    op_perf_by_name_[call_param.node_name].second.value[1] += cost2;
+    return true;
+  };
+#endif
+  return RET_OK;
+}
+
+int Benchmark::InitCallbackParameter() {
+  int ret = RET_OK;
+  if (flags_->time_profiling_) {
+    ret = InitTimeProfilingCallbackParameter();
+  } else if (flags_->perf_profiling_) {
+    ret = InitPerfProfilingCallbackParameter();
+  }
+  return ret;
+}

 int Benchmark::Init() {
  if (this->flags_ == nullptr) {
@ -859,13 +870,10 @@ int Benchmark::Init() {
    std::cerr << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0" << std::endl;
    return RET_ERROR;
  }
-
-  if (this->flags_->cpu_bind_mode_ == 2) {
-    MS_LOG(INFO) << "cpuBindMode = MID_CPU";
-    std::cout << "cpuBindMode = MID_CPU" << std::endl;
-  } else if (this->flags_->cpu_bind_mode_ == 1) {
-    MS_LOG(INFO) << "cpuBindMode = HIGHER_CPU";
-    std::cout << "cpuBindMode = HIGHER_CPU" << std::endl;
+  static std::vector<std::string> CPU_BIND_MODE_MAP = {"NO_BIND", "HIGHER_CPU", "MID_CPU"};
+  if (this->flags_->cpu_bind_mode_ >= 1) {
+    MS_LOG(INFO) << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_];
+    std::cout << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_] << std::endl;
  } else {
    MS_LOG(INFO) << "cpuBindMode = NO_BIND";
    std::cout << "cpuBindMode = NO_BIND" << std::endl;
--- a/mindspore/lite/tools/benchmark/benchmark.h
+++ b/mindspore/lite/tools/benchmark/benchmark.h
@ -163,6 +163,8 @@ class MS_API Benchmark {
                                     int *total_size);

  int InitCallbackParameter();
+  int InitTimeProfilingCallbackParameter();
+  int InitPerfProfilingCallbackParameter();

  int PrintResult(const std::vector<std::string> &title, const std::map<std::string, std::pair<int, float>> &result);