!26022 [MS][LITE][Develop] add c api benchmark

Merge pull request !26022 from sunsuodong/add_c_api_1.3_b
2021-11-11 01:40:38 +00:00 · 2021-11-11 01:40:38 +00:00 · 050a166e78
parent 71a75387a4 3358ed610b
commit 050a166e78
13 changed files with 638 additions and 103 deletions
--- a/mindspore/lite/src/c_api/context_c.cc
+++ b/mindspore/lite/src/c_api/context_c.cc
@ -134,7 +134,7 @@ MSDeviceInfoHandle MSDeviceInfoCreate(MSDeviceType device_type) {
  } else if (device_type == kMSDeviceTypeKirinNPU) {
    impl = new (std::nothrow) mindspore::KirinNPUDeviceInfo();
  } else {
-    MS_LOG(ERROR) << "Unsupported Feature.";
+    MS_LOG(ERROR) << "Unsupported Feature. device_type: " << device_type;
    return nullptr;
  }
  if (impl == nullptr) {
@ -227,7 +227,7 @@ bool MSDeviceInfoGetEnableFP16(const MSDeviceInfoHandle device_info) {
    auto impl = static_cast<mindspore::GPUDeviceInfo *>(device_info);
    return impl->GetEnableFP16();
  } else {
-    MS_LOG(ERROR) << "Unsupported Feature.";
+    MS_LOG(ERROR) << "Unsupported Feature. device_type: " << device_type;
    return false;
  }
 }
--- a/mindspore/lite/src/c_api/model_c.cc
+++ b/mindspore/lite/src/c_api/model_c.cc
@ -95,7 +95,7 @@ Status ModelC::Resize(const std::vector<MSTensor::Impl *> &inputs, const std::ve
  for (size_t i = 0; i < input_num; i++) {
    auto input = inputs[i];
    if (input == nullptr || input->lite_tensor() == nullptr) {
-      MS_LOG(ERROR) << "Input tensor " << input->Name() << " is null.";
+      MS_LOG(ERROR) << "Input tensor is null.";
      return kLiteInputTensorError;
    }
    inner_input.push_back(input->lite_tensor());
--- a/mindspore/lite/tools/benchmark/CMakeLists.txt
+++ b/mindspore/lite/tools/benchmark/CMakeLists.txt
@ -13,6 +13,7 @@ add_executable(benchmark
        ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_base.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/benchmark.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_unified_api.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_c_api.cc
        ${COMMON_SRC})

 add_dependencies(benchmark fbs_src)
--- a/mindspore/lite/tools/benchmark/benchmark.cc
+++ b/mindspore/lite/tools/benchmark/benchmark.cc
@ -108,42 +108,13 @@ int Benchmark::ReadInputFile() {
  return RET_OK;
 }

-int Benchmark::ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name,
-                              const std::vector<size_t> &dims) {
-  std::string line;
-  getline(in_file_stream, line);
-  std::stringstream line_stream(line);
-  if (this->benchmark_data_.find(tensor_name) != this->benchmark_data_.end()) {
-    return RET_OK;
-  }
-  tensor::MSTensor *tensor = GetTensorByNameOrShape(tensor_name, dims);
+TypeId Benchmark::GetDataTypeByNameOrShape(const std::string &tensor_name, const std::vector<size_t> &dims) {
+  auto tensor = GetTensorByNameOrShape(tensor_name, dims);
  if (tensor == nullptr) {
    MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
-    return RET_ERROR;
+    return kTypeUnknown;
  }
-  std::vector<float> data;
-  std::vector<std::string> strings_data;
-  size_t shape_size = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<size_t>());
-  if (tensor->data_type() == kObjectTypeString) {
-    strings_data.push_back(line);
-    for (size_t i = 1; i < shape_size; i++) {
-      getline(in_file_stream, line);
-      strings_data.push_back(line);
-    }
-  } else {
-    for (size_t i = 0; i < shape_size; i++) {
-      float tmp_data;
-      line_stream >> tmp_data;
-      data.push_back(tmp_data);
-    }
-  }
-  auto *check_tensor = new (std::nothrow) CheckTensor(dims, data, strings_data);
-  if (check_tensor == nullptr) {
-    MS_LOG(ERROR) << "New CheckTensor failed, tensor name: " << tensor_name;
-    return RET_ERROR;
-  }
-  this->benchmark_data_.insert(std::make_pair(tensor_name, check_tensor));
-  return RET_OK;
+  return tensor->data_type();
 }

 void Benchmark::InitContext(const std::shared_ptr<Context> &context) {
@ -184,7 +155,8 @@ int Benchmark::CompareOutput() {
    }
    int ret;
    if (tensor->data_type() == kObjectTypeString) {
-      ret = CompareStringData(node_or_tensor_name, tensor);
+      std::vector<std::string> output_strings = MSTensorToStrings(tensor);
+      ret = CompareStringData(node_or_tensor_name, output_strings);
    } else {
      ret = CompareDataGetTotalBiasAndSize(node_or_tensor_name, tensor, &total_bias, &total_size);
    }
@ -524,13 +496,6 @@ int Benchmark::RunBenchmark() {
  }
  if (!flags_->benchmark_data_file_.empty()) {
    status = MarkAccuracy();
-    for (auto &data : benchmark_data_) {
-      data.second->shape.clear();
-      data.second->data.clear();
-      delete data.second;
-      data.second = nullptr;
-    }
-    benchmark_data_.clear();
    if (status != 0) {
      MS_LOG(ERROR) << "Run MarkAccuracy error: " << status;
      std::cout << "Run MarkAccuracy error: " << status << std::endl;
--- a/mindspore/lite/tools/benchmark/benchmark.h
+++ b/mindspore/lite/tools/benchmark/benchmark.h
@ -53,11 +53,10 @@ class MS_API Benchmark : public BenchmarkBase {

  int ReadInputFile() override;

-  int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name,
-                     const std::vector<size_t> &dims) override;
-
  void InitContext(const std::shared_ptr<Context> &context);

+  TypeId GetDataTypeByNameOrShape(const std::string &tensor_name, const std::vector<size_t> &dims) override;
+
  int CompareOutput() override;

  tensor::MSTensor *GetTensorByNameOrShape(const std::string &node_or_tensor_name, const std::vector<size_t> &dims);
--- a/mindspore/lite/tools/benchmark/benchmark_base.cc
+++ b/mindspore/lite/tools/benchmark/benchmark_base.cc
@ -173,11 +173,48 @@ int BenchmarkBase::ReadCalibData() {
  return RET_OK;
 }

-int BenchmarkBase::CompareStringData(const std::string &name, tensor::MSTensor *tensor) {
+int BenchmarkBase::ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name,
+                                  const std::vector<size_t> &dims) {
+  std::string line;
+  getline(in_file_stream, line);
+  std::stringstream line_stream(line);
+  if (this->benchmark_data_.find(tensor_name) != this->benchmark_data_.end()) {
+    return RET_OK;
+  }
+  TypeId data_type = GetDataTypeByNameOrShape(tensor_name, dims);
+  if (data_type == kTypeUnknown) {
+    MS_LOG(ERROR) << "Get data type failed, tensor name: " << tensor_name;
+    return RET_ERROR;
+  }
+  std::vector<float> data;
+  std::vector<std::string> strings_data;
+  size_t shape_size = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<size_t>());
+  if (data_type == kObjectTypeString) {
+    strings_data.push_back(line);
+    for (size_t i = 1; i < shape_size; i++) {
+      getline(in_file_stream, line);
+      strings_data.push_back(line);
+    }
+  } else {
+    for (size_t i = 0; i < shape_size; i++) {
+      float tmp_data;
+      line_stream >> tmp_data;
+      data.push_back(tmp_data);
+    }
+  }
+  auto *check_tensor = new (std::nothrow) CheckTensor(dims, data, strings_data);
+  if (check_tensor == nullptr) {
+    MS_LOG(ERROR) << "New CheckTensor failed, tensor name: " << tensor_name;
+    return RET_ERROR;
+  }
+  this->benchmark_data_.insert(std::make_pair(tensor_name, check_tensor));
+  return RET_OK;
+}
+
+int BenchmarkBase::CompareStringData(const std::string &name, const std::vector<std::string> &output_strings) {
  auto iter = this->benchmark_data_.find(name);
  if (iter != this->benchmark_data_.end()) {
-    std::vector<std::string> calib_strings = iter->second->strings_data;
-    std::vector<std::string> output_strings = MSTensorToStrings(tensor);
+    std::vector<std::string> &calib_strings = iter->second->strings_data;
    size_t compare_num = std::min(calib_strings.size(), output_strings.size());
    size_t print_num = std::min(compare_num, static_cast<size_t>(kNumPrintMin));

@ -207,6 +244,9 @@ void BenchmarkFlags::InitInputDataList() {

 void BenchmarkFlags::InitResizeDimsList() {
  std::string content = this->resize_dims_in_;
+  if (content.empty()) {
+    return;
+  }
  std::vector<int> shape;
  auto shape_strs = StrSplit(content, std::string(DELIM_COLON));
  for (const auto &shape_str : shape_strs) {
@ -563,8 +603,11 @@ int BenchmarkBase::PrintPerfResult(const std::vector<std::string> &title,
 #endif

 BenchmarkBase::~BenchmarkBase() {
-  for (const auto &iter : this->benchmark_data_) {
-    delete (iter.second);
+  for (auto &iter : this->benchmark_data_) {
+    iter.second->shape.clear();
+    iter.second->data.clear();
+    delete iter.second;
+    iter.second = nullptr;
  }
  this->benchmark_data_.clear();
 }
--- a/mindspore/lite/tools/benchmark/benchmark_base.h
+++ b/mindspore/lite/tools/benchmark/benchmark_base.h
@ -38,6 +38,10 @@
 #include "schema/model_generated.h"

 namespace mindspore::lite {
+#define BENCHMARK_LOG_ERROR(str) \
+  MS_LOG(ERROR) << str;          \
+  std::cerr << str << std::endl;
+
 enum MS_API InDataType { kImage = 0, kBinary = 1 };

 enum MS_API AiModelDescription_Frequency {
@ -182,12 +186,13 @@ class MS_API BenchmarkBase {

  int ReadCalibData();

-  virtual int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name,
-                             const std::vector<size_t> &dims) = 0;
+  int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name, const std::vector<size_t> &dims);
+
+  virtual TypeId GetDataTypeByNameOrShape(const std::string &tensor_name, const std::vector<size_t> &dims) = 0;

  virtual int CompareOutput() = 0;

-  int CompareStringData(const std::string &name, tensor::MSTensor *tensor);
+  int CompareStringData(const std::string &name, const std::vector<std::string> &output_strings);

  int InitDumpConfigFromJson(char *path);

--- a/mindspore/lite/tools/benchmark/benchmark_c_api.cc
+++ b/mindspore/lite/tools/benchmark/benchmark_c_api.cc
@ -0,0 +1,464 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "tools/benchmark/benchmark_c_api.h"
+#include <algorithm>
+#include <map>
+#include <string>
+#include <utility>
+
+using mindspore::lite::GetTimeUs;
+using mindspore::lite::kFloatMSEC;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+
+namespace mindspore {
+namespace tools {
+
+int BenchmarkCApi::RunBenchmark() {
+  auto start_prepare_time = GetTimeUs();
+  int ret = InitContext();
+  if (ret != RET_OK) {
+    BENCHMARK_LOG_ERROR("InitContext failed, ret: " << ret);
+    return ret;
+  }
+  model_ = MSModelCreate();
+  ret = MSModelBuildFromFile(model_, flags_->model_file_.c_str(), kMSModelTypeMindIR, context_);
+  if (ret != kMSStatusSuccess) {
+    BENCHMARK_LOG_ERROR("MSModelBuildFromFile failed, ret: " << ret);
+    return ret;
+  }
+  inputs_ = MSModelGetInputs(model_);
+  if (inputs_.handle_list == nullptr) {
+    BENCHMARK_LOG_ERROR("MSModelGetInputs failed, ret: " << ret);
+    return ret;
+  }
+  if (!flags_->resize_dims_.empty()) {
+    std::vector<MSShapeInfo> shape_infos;
+    std::transform(flags_->resize_dims_.begin(), flags_->resize_dims_.end(), std::back_inserter(shape_infos),
+                   [&](auto &shapes) {
+                     MSShapeInfo shape_info;
+                     shape_info.shape_num = shapes.size();
+                     for (size_t i = 0; i < shape_info.shape_num; i++) {
+                       shape_info.shape[i] = shapes[i];
+                     }
+                     return shape_info;
+                   });
+    ret = MSModelResize(model_, inputs_, shape_infos.data(), inputs_.handle_num);
+    if (ret != kMSStatusSuccess) {
+      BENCHMARK_LOG_ERROR("MSModelResize failed, ret: " << ret);
+      return ret;
+    }
+  }
+  auto end_prepare_time = GetTimeUs();
+  MS_LOG(INFO) << "PrepareTime = " << ((end_prepare_time - start_prepare_time) / kFloatMSEC) << " ms";
+  std::cout << "PrepareTime = " << ((end_prepare_time - start_prepare_time) / kFloatMSEC) << " ms" << std::endl;
+
+  ret = LoadInput();
+  if (ret != kMSStatusSuccess) {
+    BENCHMARK_LOG_ERROR("LoadInput failed, ret: " << ret)
+    return ret;
+  }
+  if (!flags_->benchmark_data_file_.empty()) {
+    ret = MarkAccuracy();
+  } else {
+    ret = MarkPerformance();
+  }
+  if (ret != kMSStatusSuccess) {
+    BENCHMARK_LOG_ERROR("Run failed, ret: " << ret);
+    return ret;
+  }
+  if (flags_->dump_tensor_data_) {
+    BENCHMARK_LOG_ERROR("Dumped file is saved to : " + dump_file_output_dir_)
+  }
+  return RET_OK;
+}
+
+int BenchmarkCApi::InitContext() {
+  constexpr int kFrequencyDefault = 3;
+  context_ = MSContextCreate();
+  if (context_ == nullptr) {
+    BENCHMARK_LOG_ERROR("MSContextCreate failed");
+    return RET_ERROR;
+  }
+  MSContextSetThreadNum(context_, flags_->num_threads_);
+  MSContextSetEnableParallel(context_, flags_->enable_parallel_);
+  MSContextSetThreadAffinityMode(context_, flags_->cpu_bind_mode_);
+
+  MSDeviceInfoHandle cpu_device_info = MSDeviceInfoCreate(kMSDeviceTypeCPU);
+  MSDeviceInfoSetEnableFP16(cpu_device_info, flags_->enable_fp16_);
+  MSContextAddDeviceInfo(context_, cpu_device_info);
+
+  if (flags_->device_ == "GPU") {
+    MSDeviceInfoHandle gpu_device_info = MSDeviceInfoCreate(kMSDeviceTypeGPU);
+    MSDeviceInfoSetEnableFP16(gpu_device_info, flags_->enable_fp16_);
+    MSContextAddDeviceInfo(context_, gpu_device_info);
+  }
+
+  if (flags_->device_ == "NPU") {
+    MSDeviceInfoHandle npu_device_info = MSDeviceInfoCreate(kMSDeviceTypeKirinNPU);
+    MSDeviceInfoSetFrequency(npu_device_info, kFrequencyDefault);
+    MSContextAddDeviceInfo(context_, npu_device_info);
+  }
+  return RET_OK;
+}
+
+int BenchmarkCApi::GenerateInputData() {
+  for (size_t i = 0; i < inputs_.handle_num; i++) {
+    MSTensorHandle tensor = inputs_.handle_list[i];
+    auto data_type = MSTensorGetDataType(tensor);
+    if (data_type == kMSDataTypeObjectTypeString) {
+      BENCHMARK_LOG_ERROR("Unsupported kMSDataTypeObjectTypeString");
+      return RET_ERROR;
+    } else {
+      auto data_ptr = MSTensorGetMutableData(tensor);
+      auto data_size = MSTensorGetDataSize(tensor);
+      (void)GenerateRandomData(data_size, data_ptr, static_cast<int>(data_type));
+    }
+  }
+  return RET_OK;
+}
+
+int BenchmarkCApi::ReadInputFile() {
+  if (this->flags_->in_data_type_ == lite::kImage) {
+    BENCHMARK_LOG_ERROR("Unsupported image input");
+    return RET_ERROR;
+  } else {
+    for (size_t i = 0; i < flags_->input_data_list_.size(); i++) {
+      MSTensorHandle tensor = inputs_.handle_list[i];
+      size_t size;
+      char *bin_buf = lite::ReadFile(flags_->input_data_list_[i].c_str(), &size);
+      if (bin_buf == nullptr) {
+        BENCHMARK_LOG_ERROR("ReadFile failed");
+        return RET_ERROR;
+      }
+      if (MSTensorGetDataType(tensor) == kMSDataTypeObjectTypeString) {
+        BENCHMARK_LOG_ERROR("Unsupported kMSDataTypeObjectTypeString");
+        return RET_ERROR;
+      } else {
+        auto tensor_data_size = MSTensorGetDataSize(tensor);
+        if (tensor_data_size != size) {
+          BENCHMARK_LOG_ERROR("Input file size error, required: " << tensor_data_size << ", in fact: " << size);
+          delete[] bin_buf;
+          return RET_ERROR;
+        }
+        auto input_data = MSTensorGetMutableData(tensor);
+        if (input_data == nullptr) {
+          BENCHMARK_LOG_ERROR("MSTensorGetMutableData failed");
+          return RET_ERROR;
+        }
+        memcpy(input_data, bin_buf, size);
+      }
+      delete[] bin_buf;
+    }
+  }
+  return RET_OK;
+}
+
+int BenchmarkCApi::MarkAccuracy() {
+  MS_LOG(INFO) << "MarkAccuracy";
+  std::cout << "MarkAccuracy" << std::endl;
+  auto status = PrintInputData();
+  if (status != RET_OK) {
+    BENCHMARK_LOG_ERROR("PrintInputData failed, ret: " << status);
+    return status;
+  }
+  status = MSModelPredict(model_, inputs_, &outputs_, before_call_back_, after_call_back_);
+  if (status != kMSStatusSuccess) {
+    BENCHMARK_LOG_ERROR("MSModelPredict failed, ret: " << status);
+    return RET_ERROR;
+  }
+  status = ReadCalibData();
+  if (status != RET_OK) {
+    BENCHMARK_LOG_ERROR("ReadCalibData failed, ret: " << status);
+    return status;
+  }
+  status = CompareOutput();
+  if (status != RET_OK) {
+    BENCHMARK_LOG_ERROR("CompareOutput failed, ret: " << status);
+    return status;
+  }
+  return RET_OK;
+}
+
+int BenchmarkCApi::MarkPerformance() {
+  MS_LOG(INFO) << "Running warm up loops...";
+  std::cout << "Running warm up loops..." << std::endl;
+  for (int i = 0; i < flags_->warm_up_loop_count_; i++) {
+    auto ret = MSModelPredict(model_, inputs_, &outputs_, before_call_back_, after_call_back_);
+    if (ret != kMSStatusSuccess) {
+      BENCHMARK_LOG_ERROR("MSModelPredict failed, ret: " << kMSStatusSuccess);
+      return RET_ERROR;
+    }
+  }
+
+  MS_LOG(INFO) << "Running benchmark loops...";
+  std::cout << "Running benchmark loops..." << std::endl;
+  uint64_t time_min = 1000000;
+  uint64_t time_max = 0;
+  uint64_t time_avg = 0;
+
+  for (int i = 0; i < flags_->loop_count_; i++) {
+    auto start = GetTimeUs();
+    auto ret = MSModelPredict(model_, inputs_, &outputs_, before_call_back_, after_call_back_);
+    if (ret != kMSStatusSuccess) {
+      BENCHMARK_LOG_ERROR("MSModelPredict failed, ret: " << kMSStatusSuccess);
+      return RET_ERROR;
+    }
+    auto end = GetTimeUs();
+    auto time = end - start;
+    time_min = std::min(time_min, time);
+    time_max = std::max(time_max, time);
+    time_avg += time;
+  }
+
+  if (flags_->time_profiling_) {
+    const std::vector<std::string> per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
+    const std::vector<std::string> per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
+    PrintResult(per_op_name, op_times_by_name_);
+    PrintResult(per_op_type, op_times_by_type_);
+  }
+
+  if (flags_->loop_count_ > 0) {
+    time_avg /= flags_->loop_count_;
+    MS_LOG(INFO) << "Model = "
+                 << flags_->model_file_.substr(flags_->model_file_.find_last_of(lite::DELIM_SLASH) + 1).c_str()
+                 << ", NumThreads = " << flags_->num_threads_ << ", MinRunTime = " << time_min / lite::kFloatMSEC
+                 << ", MaxRuntime = " << time_max / lite::kFloatMSEC
+                 << ", AvgRunTime = " << time_avg / lite::kFloatMSEC;
+    printf("Model = %s, NumThreads = %d, MinRunTime = %f ms, MaxRuntime = %f ms, AvgRunTime = %f ms\n",
+           flags_->model_file_.substr(flags_->model_file_.find_last_of(lite::DELIM_SLASH) + 1).c_str(),
+           flags_->num_threads_, time_min / lite::kFloatMSEC, time_max / lite::kFloatMSEC, time_avg / lite::kFloatMSEC);
+  }
+  return RET_OK;
+}
+
+TypeId BenchmarkCApi::GetDataTypeByNameOrShape(const std::string &tensor_name, const std::vector<size_t> &dims) {
+  MSTensorHandle tensor = GetTensorByNameOrShape(tensor_name, dims);
+  if (tensor == nullptr) {
+    BENCHMARK_LOG_ERROR("Get tensor failed, tensor name: " << tensor_name);
+    return kTypeUnknown;
+  }
+  return static_cast<TypeId>(MSTensorGetDataType(tensor));
+}
+
+int BenchmarkCApi::CompareOutput() {
+  constexpr int kPercentageDivisor = 100;
+  std::cout << "================ Comparing Output data ================" << std::endl;
+  float total_bias = 0;
+  int total_size = 0;
+  for (const auto &calib_tensor : benchmark_data_) {
+    std::string node_or_tensor_name = calib_tensor.first;
+    MSTensorHandle tensor = GetTensorByNameOrShape(node_or_tensor_name, calib_tensor.second->shape);
+    if (tensor == nullptr) {
+      BENCHMARK_LOG_ERROR("Get tensor failed, tensor name: " << node_or_tensor_name);
+      return RET_ERROR;
+    }
+    int ret;
+    if (static_cast<int>(MSTensorGetDataType(tensor)) == kObjectTypeString) {
+      BENCHMARK_LOG_ERROR("Unsupported kMSDataTypeObjectTypeString");
+      return RET_ERROR;
+    } else {
+      ret = CompareDataGetTotalBiasAndSize(node_or_tensor_name, tensor, &total_bias, &total_size);
+    }
+    if (ret != RET_OK) {
+      BENCHMARK_LOG_ERROR("Error in CompareData");
+      BENCHMARK_LOG_ERROR("=======================================================");
+      return ret;
+    }
+  }
+  float mean_bias;
+  if (total_size != 0) {
+    mean_bias = ((total_bias / float_t(total_size)) * kPercentageDivisor);
+  } else {
+    mean_bias = 0;
+  }
+
+  std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%" << std::endl;
+  std::cout << "=======================================================" << std::endl << std::endl;
+
+  if (mean_bias > this->flags_->accuracy_threshold_) {
+    BENCHMARK_LOG_ERROR("Mean bias of all nodes/tensors is too big: " << mean_bias << "%");
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+MSTensorHandle BenchmarkCApi::GetTensorByNameOrShape(const std::string &name, const std::vector<size_t> &dims) {
+  MSTensorHandle tensor = MSModelGetOutputByTensorName(model_, name.c_str());
+  if (tensor != nullptr) {
+    return tensor;
+  }
+  MS_LOG(INFO) << "Cannot find tensor: " << name << ", switch to MSGetModelOutputs";
+  std::vector<MSTensorHandle> match_tensors;
+  std::vector<int64_t> shape_vector;
+  (void)std::transform(dims.begin(), dims.end(), std::back_inserter(shape_vector),
+                       [](size_t value) { return static_cast<int64_t>(value); });
+  MSTensorHandleArray tensor_array = MSModelGetOutputs(model_);
+  for (size_t i = 0; i < tensor_array.handle_num; i++) {
+    size_t shape_num;
+    const int64_t *shape = MSTensorGetShape(tensor_array.handle_list[i], &shape_num);
+    if (shape_num != dims.size()) {
+      continue;
+    }
+    if (memcmp(shape, shape_vector.data(), shape_num * sizeof(int64_t)) == 0) {
+      match_tensors.push_back(tensor_array.handle_list[i]);
+    }
+  }
+  if (match_tensors.empty() || match_tensors.size() != 1) {
+    BENCHMARK_LOG_ERROR("get tensor by node shape failed");
+    return nullptr;
+  }
+  return match_tensors.front();
+}
+
+int BenchmarkCApi::CompareDataGetTotalBiasAndSize(const std::string &name, MSTensorHandle tensor, float *total_bias,
+                                                  int *total_size) {
+  auto tensor_data = MSTensorGetData(tensor);
+  if (tensor_data == nullptr) {
+    BENCHMARK_LOG_ERROR("MSTensorGetData failed.");
+    return RET_ERROR;
+  }
+  size_t shape_num;
+  const int64_t *shape = MSTensorGetShape(tensor, &shape_num);
+  std::vector<int64_t> vec_shape(shape, shape + shape_num);
+  float bias = 0;
+  switch (static_cast<TypeId>(MSTensorGetDataType(tensor))) {
+    case TypeId::kNumberTypeFloat:
+    case TypeId::kNumberTypeFloat32: {
+      bias = CompareData<float, int64_t>(name, vec_shape, tensor_data);
+      break;
+    }
+    case TypeId::kNumberTypeInt8: {
+      bias = CompareData<int8_t, int64_t>(name, vec_shape, tensor_data);
+      break;
+    }
+    case TypeId::kNumberTypeUInt8: {
+      bias = CompareData<uint8_t, int64_t>(name, vec_shape, tensor_data);
+      break;
+    }
+    case TypeId::kNumberTypeInt32: {
+      bias = CompareData<int32_t, int64_t>(name, vec_shape, tensor_data);
+      break;
+    }
+    case TypeId::kNumberTypeInt16: {
+      bias = CompareData<int16_t, int64_t>(name, vec_shape, tensor_data);
+      break;
+    }
+    case TypeId::kNumberTypeBool: {
+      bias = CompareData<bool, int64_t>(name, vec_shape, tensor_data);
+      break;
+    }
+    default:
+      BENCHMARK_LOG_ERROR("Unsupported data type" << static_cast<int>(MSTensorGetDataType(tensor)));
+      return RET_ERROR;
+  }
+  if (bias < 0) {
+    BENCHMARK_LOG_ERROR("CompareData failed, name: " << name);
+    return RET_ERROR;
+  }
+  *total_bias += bias;
+  *total_size += 1;
+  return RET_OK;
+}
+
+int BenchmarkCApi::PrintInputData() {
+  constexpr int64_t kPrintDataNum = 20;
+  for (size_t i = 0; i < inputs_.handle_num; i++) {
+    auto input = inputs_.handle_list[i];
+    std::cout << "InData" << i << ": ";
+    auto data_type = static_cast<TypeId>(MSTensorGetDataType(input));
+    if (data_type == TypeId::kObjectTypeString) {
+      BENCHMARK_LOG_ERROR("Unsupported kMSDataTypeObjectTypeString.");
+      return RET_ERROR;
+    }
+    auto tensor_data = MSTensorGetData(input);
+    size_t print_num = std::min(MSTensorGetElementNum(input), kPrintDataNum);
+    for (size_t j = 0; j < print_num; j++) {
+      if (data_type == TypeId::kNumberTypeFloat32 || data_type == TypeId::kNumberTypeFloat) {
+        std::cout << static_cast<const float *>(tensor_data)[j] << " ";
+      } else if (data_type == TypeId::kNumberTypeInt8) {
+        std::cout << static_cast<const int8_t *>(tensor_data)[j] << " ";
+      } else if (data_type == TypeId::kNumberTypeUInt8) {
+        std::cout << static_cast<const uint8_t *>(tensor_data)[j] << " ";
+      } else if (data_type == TypeId::kNumberTypeInt32) {
+        std::cout << static_cast<const int32_t *>(tensor_data)[j] << " ";
+      } else if (data_type == TypeId::kNumberTypeInt64) {
+        std::cout << static_cast<const int64_t *>(tensor_data)[j] << " ";
+      } else if (data_type == TypeId::kNumberTypeBool) {
+        std::cout << static_cast<const bool *>(tensor_data)[j] << " ";
+      } else {
+        BENCHMARK_LOG_ERROR("Datatype: " << data_type << " is not supported.");
+        return RET_ERROR;
+      }
+    }
+    std::cout << std::endl;
+  }
+  return RET_OK;
+}
+
+int BenchmarkCApi::InitTimeProfilingCallbackParameter() {
+  before_call_back_ = TimeBeforeCallback;
+  after_call_back_ = TimeAfterCallback;
+  return RET_OK;
+}
+
+int BenchmarkCApi::InitPerfProfilingCallbackParameter() {
+  BENCHMARK_LOG_ERROR("Unsupported feature.");
+  return RET_ERROR;
+}
+
+int BenchmarkCApi::InitPrintTensorDataCallbackParameter() {
+  BENCHMARK_LOG_ERROR("Unsupported feature.");
+  return RET_ERROR;
+}
+int BenchmarkCApi::InitDumpTensorDataCallbackParameter() {
+  BENCHMARK_LOG_ERROR("Unsupported feature.");
+  return RET_ERROR;
+}
+}  // namespace tools
+}  // namespace mindspore
+
+uint64_t g_op_begin_ = 0;
+int g_op_call_times_total_ = 0;
+float g_op_cost_total_ = 0.0f;
+std::map<std::string, std::pair<int, float>> g_op_times_by_type_;
+std::map<std::string, std::pair<int, float>> g_op_times_by_name_;
+
+bool TimeBeforeCallback(const MSTensorHandleArray inputs, const MSTensorHandleArray outputs,
+                        const MSCallBackParamC &kernel_Info) {
+  if (g_op_times_by_type_.find(kernel_Info.node_type) == g_op_times_by_type_.end()) {
+    g_op_times_by_type_.insert(std::make_pair(kernel_Info.node_type, std::make_pair(0, 0.0f)));
+  }
+  if (g_op_times_by_name_.find(kernel_Info.node_name) == g_op_times_by_name_.end()) {
+    g_op_times_by_name_.insert(std::make_pair(kernel_Info.node_name, std::make_pair(0, 0.0f)));
+  }
+
+  g_op_call_times_total_++;
+  g_op_begin_ = mindspore::lite::GetTimeUs();
+  return true;
+}
+
+bool TimeAfterCallback(const MSTensorHandleArray inputs, const MSTensorHandleArray outputs,
+                       const MSCallBackParamC &kernel_Info) {
+  uint64_t opEnd = mindspore::lite::GetTimeUs();
+  float cost = static_cast<float>(opEnd - g_op_begin_) / mindspore::lite::kFloatMSEC;
+  g_op_cost_total_ += cost;
+  g_op_times_by_type_[kernel_Info.node_type].first++;
+  g_op_times_by_type_[kernel_Info.node_type].second += cost;
+  g_op_times_by_name_[kernel_Info.node_name].first++;
+  g_op_times_by_name_[kernel_Info.node_name].second += cost;
+  return true;
+}
--- a/mindspore/lite/tools/benchmark/benchmark_c_api.h
+++ b/mindspore/lite/tools/benchmark/benchmark_c_api.h
@ -0,0 +1,82 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_BENCHMARK_C_API_H_
+#define MINDSPORE_LITE_TOOLS_BENCHMARK_BENCHMARK_C_API_H_
+
+#include <vector>
+#include <string>
+#include "tools/benchmark/benchmark_base.h"
+#include "include/c_api/model_c.h"
+#include "include/c_api/context_c.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+bool TimeBeforeCallback(const MSTensorHandleArray inputs, const MSTensorHandleArray outputs,
+                        const MSCallBackParamC &kernel_Info);
+bool TimeAfterCallback(const MSTensorHandleArray inputs, const MSTensorHandleArray outputs,
+                       const MSCallBackParamC &kernel_Info);
+#ifdef __cplusplus
+}
+#endif
+
+using mindspore::lite::BenchmarkBase;
+using mindspore::lite::BenchmarkFlags;
+
+namespace mindspore::tools {
+class MS_API BenchmarkCApi : public BenchmarkBase {
+ public:
+  explicit BenchmarkCApi(BenchmarkFlags *flags) : BenchmarkBase(flags) {}
+
+  virtual ~BenchmarkCApi() { MSModelDestroy(model_); }
+
+  int RunBenchmark() override;
+
+ protected:
+  int CompareDataGetTotalBiasAndSize(const std::string &name, MSTensorHandle tensor, float *total_bias,
+                                     int *total_size);
+  MSTensorHandle GetTensorByNameOrShape(const std::string &name, const std::vector<size_t> &dims);
+
+  int InitContext();
+  int GenerateInputData() override;
+  int ReadInputFile() override;
+
+  TypeId GetDataTypeByNameOrShape(const std::string &tensor_name, const std::vector<size_t> &dims) override;
+
+  int CompareOutput() override;
+
+  int InitTimeProfilingCallbackParameter() override;
+  int InitPerfProfilingCallbackParameter() override;
+  int InitDumpTensorDataCallbackParameter() override;
+  int InitPrintTensorDataCallbackParameter() override;
+
+  int PrintInputData();
+
+  int MarkPerformance();
+
+  int MarkAccuracy();
+
+ private:
+  MSModelHandle model_ = nullptr;
+  MSContextHandle context_ = nullptr;
+  MSTensorHandleArray inputs_;
+  MSTensorHandleArray outputs_;
+
+  MSKernelCallBackC before_call_back_ = nullptr;
+  MSKernelCallBackC after_call_back_ = nullptr;
+};
+}  // namespace mindspore::tools
+#endif  // MINDSPORE_LITE_TOOLS_BENCHMARK_BENCHMARK_C_API_H_
--- a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc
+++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc
@ -112,42 +112,13 @@ int BenchmarkUnifiedApi::ReadInputFile() {
  return RET_OK;
 }

-int BenchmarkUnifiedApi::ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name,
-                                        const std::vector<size_t> &dims) {
-  std::string line;
-  getline(in_file_stream, line);
-  std::stringstream line_stream(line);
-  if (this->benchmark_data_.find(tensor_name) != this->benchmark_data_.end()) {
-    return RET_OK;
-  }
-  mindspore::MSTensor tensor = GetMSTensorByNameOrShape(tensor_name, dims);
+TypeId BenchmarkUnifiedApi::GetDataTypeByNameOrShape(const std::string &tensor_name, const std::vector<size_t> &dims) {
+  auto tensor = GetMSTensorByNameOrShape(tensor_name, dims);
  if (tensor == nullptr) {
    MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
-    return RET_ERROR;
+    return kTypeUnknown;
  }
-  std::vector<float> data;
-  std::vector<std::string> strings_data;
-  size_t shape_size = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<size_t>());
-  if (static_cast<int>(tensor.DataType()) == kObjectTypeString) {
-    strings_data.push_back(line);
-    for (size_t i = 1; i < shape_size; i++) {
-      getline(in_file_stream, line);
-      strings_data.push_back(line);
-    }
-  } else {
-    for (size_t i = 0; i < shape_size; i++) {
-      float tmp_data;
-      line_stream >> tmp_data;
-      data.push_back(tmp_data);
-    }
-  }
-  auto *check_tensor = new (std::nothrow) CheckTensor(dims, data, strings_data);
-  if (check_tensor == nullptr) {
-    MS_LOG(ERROR) << "New CheckTensor failed, tensor name: " << tensor_name;
-    return RET_ERROR;
-  }
-  this->benchmark_data_.insert(std::make_pair(tensor_name, check_tensor));
-  return RET_OK;
+  return static_cast<TypeId>(tensor.DataType());
 }

 void BenchmarkUnifiedApi::InitMSContext(const std::shared_ptr<mindspore::Context> &context) {
@ -477,7 +448,7 @@ int BenchmarkUnifiedApi::RunBenchmark() {
    std::cout << "ms_model_.Build failed while running ", model_name.c_str();
    return RET_ERROR;
  }
-
+  delete[] graph_buf;
  if (!flags_->resize_dims_.empty()) {
    std::vector<std::vector<int64_t>> resize_dims;
    (void)std::transform(flags_->resize_dims_.begin(), flags_->resize_dims_.end(), std::back_inserter(resize_dims),
@ -505,13 +476,6 @@ int BenchmarkUnifiedApi::RunBenchmark() {
  }
  if (!flags_->benchmark_data_file_.empty()) {
    status = MarkAccuracy();
-    for (auto &data : benchmark_data_) {
-      data.second->shape.clear();
-      data.second->data.clear();
-      delete data.second;
-      data.second = nullptr;
-    }
-    benchmark_data_.clear();
    if (status != 0) {
      MS_LOG(ERROR) << "Run MarkAccuracy error: " << status;
      std::cout << "Run MarkAccuracy error: " << status << std::endl;
--- a/mindspore/lite/tools/benchmark/benchmark_unified_api.h
+++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.h
@ -60,11 +60,10 @@ class MS_API BenchmarkUnifiedApi : public BenchmarkBase {

  int ReadInputFile() override;

-  int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name,
-                     const std::vector<size_t> &dims) override;
-
  void InitMSContext(const std::shared_ptr<Context> &context);

+  TypeId GetDataTypeByNameOrShape(const std::string &tensor_name, const std::vector<size_t> &dims) override;
+
  int CompareOutput() override;

  int InitTimeProfilingCallbackParameter() override;
--- a/mindspore/lite/tools/benchmark/run_benchmark.cc
+++ b/mindspore/lite/tools/benchmark/run_benchmark.cc
@ -16,6 +16,8 @@

 #include "tools/benchmark/run_benchmark.h"
 #include <string>
+#include "tools/benchmark/benchmark_c_api.h"
+
 namespace mindspore {
 namespace lite {
 int RunBenchmark(int argc, const char **argv) {
@ -34,11 +36,21 @@ int RunBenchmark(int argc, const char **argv) {

  BenchmarkBase *benchmark = nullptr;
  // get dump data output path
-  auto new_api = std::getenv("ENABLE_NEW_API");
-  if (new_api == nullptr || std::string(new_api) != "true") {
-    benchmark = new Benchmark(&flags);
+  auto api_type = std::getenv("MSLITE_API_TYPE");
+  if (api_type != nullptr) {
+    MS_LOG(INFO) << "MSLITE_API_TYPE = " << api_type;
+    std::cout << "MSLITE_API_TYPE = " << api_type << std::endl;
+  }
+  if (api_type == nullptr || std::string(api_type) == "OLD") {
+    benchmark = new (std::nothrow) Benchmark(&flags);
+  } else if (std::string(api_type) == "NEW") {
+    benchmark = new (std::nothrow) BenchmarkUnifiedApi(&flags);
+  } else if (std::string(api_type) == "C") {
+    benchmark = new (std::nothrow) tools::BenchmarkCApi(&flags);
  } else {
-    benchmark = new BenchmarkUnifiedApi(&flags);
+    MS_LOG(ERROR) << "Invalid MSLITE_API_TYPE, (OLD/NEW/C, default:OLD)";
+    std::cerr << "Invalid MSLITE_API_TYPE, (OLD/NEW/C, default:OLD)" << std::endl;
+    return RET_ERROR;
  }
  if (benchmark == nullptr) {
    MS_LOG(ERROR) << "new benchmark failed ";
--- a/mindspore/lite/tools/cropper/build_cropper_config.sh
+++ b/mindspore/lite/tools/cropper/build_cropper_config.sh
@ -146,6 +146,7 @@ getCommonFile() {
  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/cxx_api/model/*.cc)
  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/cxx_api/tensor/*.cc)
  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/cxx_api/*.cc)
+  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/c_api/*.cc)
  mindrt_files=()
  while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/core/mindrt/src/*.cc)
  while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/core/mindrt/src/async/*.cc)