From 082e7556d5462cf4093f4a7001deac307098d4d5 Mon Sep 17 00:00:00 2001 From: zhangxuetong Date: Sat, 10 Jul 2021 10:35:19 +0800 Subject: [PATCH] add new api benchmark --- include/api/kernel.h | 20 +- include/api/types.h | 3 + mindspore/ccsrc/cxx_api/types.cc | 2 + mindspore/lite/CMakeLists.txt | 2 +- .../lite/src/cxx_api/model/model_impl.cc | 1 + mindspore/lite/src/cxx_api/types.cc | 4 + mindspore/lite/test/CMakeLists.txt | 5 + mindspore/lite/test/st/benchmark_test.cc | 2 +- .../lite/test/st/mindrt_parallel_test.cc | 2 +- mindspore/lite/tools/benchmark/CMakeLists.txt | 3 + mindspore/lite/tools/benchmark/benchmark.cc | 618 +------------ mindspore/lite/tools/benchmark/benchmark.h | 253 +----- .../lite/tools/benchmark/benchmark_base.cc | 606 +++++++++++++ .../lite/tools/benchmark/benchmark_base.h | 316 +++++++ .../tools/benchmark/benchmark_unified_api.cc | 828 ++++++++++++++++++ .../tools/benchmark/benchmark_unified_api.h | 103 +++ mindspore/lite/tools/benchmark/main.cc | 2 +- .../lite/tools/benchmark/run_benchmark.cc | 82 ++ .../lite/tools/benchmark/run_benchmark.h | 27 + 19 files changed, 2012 insertions(+), 867 deletions(-) create mode 100644 mindspore/lite/tools/benchmark/benchmark_base.cc create mode 100644 mindspore/lite/tools/benchmark/benchmark_base.h create mode 100644 mindspore/lite/tools/benchmark/benchmark_unified_api.cc create mode 100644 mindspore/lite/tools/benchmark/benchmark_unified_api.h create mode 100644 mindspore/lite/tools/benchmark/run_benchmark.cc create mode 100644 mindspore/lite/tools/benchmark/run_benchmark.h diff --git a/include/api/kernel.h b/include/api/kernel.h index 2ca6121f23b..e89d7c9ee1a 100644 --- a/include/api/kernel.h +++ b/include/api/kernel.h @@ -19,7 +19,9 @@ #include #include #include +#ifdef ENABLE_MSLITE #include "schema/model_generated.h" +#endif #include "include/api/types.h" #include "include/api/context.h" @@ -27,15 +29,15 @@ namespace mindspore::kernel { class Kernel { public: Kernel() = default; - +#ifdef ENABLE_MSLITE Kernel(const std::vector &inputs, const std::vector &outputs, const schema::Primitive *primitive, const mindspore::Context *ctx) - : inputs_(std::move(inputs)), outputs_(std::move(outputs)), primitive_(primitive), context_(ctx) { + : context_(ctx), inputs_(std::move(inputs)), outputs_(std::move(outputs)), primitive_(primitive) { if (primitive != nullptr) { type_ = primitive->value_type(); } } - +#endif virtual ~Kernel() = default; virtual int Prepare() = 0; @@ -44,8 +46,6 @@ class Kernel { virtual int ReSize() = 0; - virtual schema::PrimitiveType type() const { return type_; } - virtual void set_inputs(const std::vector &in_tensors) { this->inputs_ = in_tensors; } virtual void set_input(mindspore::MSTensor in_tensor, int index) { this->inputs_[index] = in_tensor; } @@ -63,16 +63,20 @@ class Kernel { void set_name(const std::string &name) { this->name_ = name; } const mindspore::Context *context() const { return this->context_; } - +#ifdef ENABLE_MSLITE + virtual schema::PrimitiveType type() const { return type_; } const schema::Primitive *primitive() const { return this->primitive_; } +#endif protected: + std::string name_; + const mindspore::Context *context_ = nullptr; std::vector inputs_; std::vector outputs_; +#ifdef ENABLE_MSLITE schema::PrimitiveType type_ = schema::PrimitiveType_NONE; - std::string name_; const schema::Primitive *primitive_ = nullptr; - const mindspore::Context *context_ = nullptr; +#endif }; } // namespace mindspore::kernel diff --git a/include/api/types.h b/include/api/types.h index 8130f530e20..02c8f13f460 100644 --- a/include/api/types.h +++ b/include/api/types.h @@ -193,5 +193,8 @@ struct MSCallBackParam { using MSKernelCallBack = std::function &inputs, const std::vector &outputs, const MSCallBackParam &opInfo)>; +std::vector CharVersion(); +inline std::string Version() { return CharToString(CharVersion()); } + } // namespace mindspore #endif // MINDSPORE_INCLUDE_API_TYPES_H diff --git a/mindspore/ccsrc/cxx_api/types.cc b/mindspore/ccsrc/cxx_api/types.cc index b318c66d950..5d6e20888b6 100644 --- a/mindspore/ccsrc/cxx_api/types.cc +++ b/mindspore/ccsrc/cxx_api/types.cc @@ -396,4 +396,6 @@ bool Buffer::SetData(const void *data, size_t data_len) { MS_EXCEPTION_IF_NULL(impl_); return impl_->SetData(data, data_len); } + +std::vector CharVersion() { return {}; } } // namespace mindspore diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt index 9f38f0e76f0..1a9f224f1ab 100644 --- a/mindspore/lite/CMakeLists.txt +++ b/mindspore/lite/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.14) project(Lite) set(BUILD_LITE "on") - +add_compile_definitions(ENABLE_MSLITE) if(TOOLCHAIN_NAME STREQUAL "himix200") set(TARGET_HIMIX200 on) add_compile_definitions(SUPPORT_NNIE) diff --git a/mindspore/lite/src/cxx_api/model/model_impl.cc b/mindspore/lite/src/cxx_api/model/model_impl.cc index eb5c81d26e0..6bf1a42c32b 100644 --- a/mindspore/lite/src/cxx_api/model/model_impl.cc +++ b/mindspore/lite/src/cxx_api/model/model_impl.cc @@ -43,6 +43,7 @@ CreateTrainSessionProto *CreateTrainSessionCallbackHolder(CreateTrainSessionProt Status ModelImpl::Build(const void *model_data, size_t data_size, ModelType model_type, const std::shared_ptr &ms_context) { + context_ = ms_context; lite::Context lite_context; auto status = A2L_ConvertContext(ms_context.get(), &lite_context); if (status != kSuccess) { diff --git a/mindspore/lite/src/cxx_api/types.cc b/mindspore/lite/src/cxx_api/types.cc index 3e9fbb7eff1..d12ad46d7f7 100644 --- a/mindspore/lite/src/cxx_api/types.cc +++ b/mindspore/lite/src/cxx_api/types.cc @@ -22,6 +22,7 @@ #include "include/api/dual_abi_helper.h" #include "src/cxx_api/tensor/tensor_impl.h" #include "src/common/log_adapter.h" +#include "include/version.h" namespace mindspore { namespace { @@ -345,4 +346,7 @@ bool Buffer::SetData(const void *data, size_t data_len) { MS_LOG(ERROR) << "Unsupported feature."; return false; } + +std::vector CharVersion() { return StringToChar(lite::Version()); } + } // namespace mindspore diff --git a/mindspore/lite/test/CMakeLists.txt b/mindspore/lite/test/CMakeLists.txt index cdda22493fe..7056d0c039b 100644 --- a/mindspore/lite/test/CMakeLists.txt +++ b/mindspore/lite/test/CMakeLists.txt @@ -141,6 +141,9 @@ set(TEST_LITE_SRC ${TEST_LITE_SRC} ${KERNEL_REG_SRC}) if(MSLITE_ENABLE_TOOLS) set(TEST_LITE_SRC ${TEST_LITE_SRC} + ${LITE_DIR}/tools/benchmark/run_benchmark.cc + ${LITE_DIR}/tools/benchmark/benchmark_base.cc + ${LITE_DIR}/tools/benchmark/benchmark_unified_api.cc ${LITE_DIR}/tools/benchmark/benchmark.cc ${LITE_DIR}/test/st/benchmark_test.cc ) @@ -384,6 +387,8 @@ if(ENABLE_FP16 AND SUPPORT_TRAIN) list(APPEND TEST_SRC ${TEST_CASE_KERNEL_FP16_SRC_GRAD}) endif() +file(GLOB_RECURSE API_SRC ${LITE_DI}/src/cxx_api/*.cc) +set(TEST_SRC ${TEST_SRC} ${API_SRC}) add_executable(lite-test ${TEST_SRC}) add_dependencies(lite-test fbs_src) diff --git a/mindspore/lite/test/st/benchmark_test.cc b/mindspore/lite/test/st/benchmark_test.cc index 052246621a4..3853e372e0f 100644 --- a/mindspore/lite/test/st/benchmark_test.cc +++ b/mindspore/lite/test/st/benchmark_test.cc @@ -16,7 +16,7 @@ #include #include #include "common/common_test.h" -#include "tools/benchmark/benchmark.h" +#include "tools/benchmark/run_benchmark.h" namespace mindspore { namespace lite { diff --git a/mindspore/lite/test/st/mindrt_parallel_test.cc b/mindspore/lite/test/st/mindrt_parallel_test.cc index ef3cc81555e..97f27e89904 100644 --- a/mindspore/lite/test/st/mindrt_parallel_test.cc +++ b/mindspore/lite/test/st/mindrt_parallel_test.cc @@ -18,7 +18,7 @@ #include "common/common_test.h" #include "include/errorcode.h" #include "tools/converter/converter.h" -#include "tools/benchmark/benchmark.h" +#include "tools/benchmark/run_benchmark.h" #include "src/mindrt_executor.h" #include "src/lite_session.h" #include "src/lite_kernel.h" diff --git a/mindspore/lite/tools/benchmark/CMakeLists.txt b/mindspore/lite/tools/benchmark/CMakeLists.txt index 3200e754340..5f0dde86366 100644 --- a/mindspore/lite/tools/benchmark/CMakeLists.txt +++ b/mindspore/lite/tools/benchmark/CMakeLists.txt @@ -7,7 +7,10 @@ set(COMMON_SRC if(NOT TARGET_HIMIX200) add_executable(benchmark ${CMAKE_CURRENT_SOURCE_DIR}/main.cc + ${CMAKE_CURRENT_SOURCE_DIR}/run_benchmark.cc + ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_base.cc ${CMAKE_CURRENT_SOURCE_DIR}/benchmark.cc + ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_unified_api.cc ${COMMON_SRC}) add_dependencies(benchmark fbs_src) diff --git a/mindspore/lite/tools/benchmark/benchmark.cc b/mindspore/lite/tools/benchmark/benchmark.cc index e9aeb7d21c1..87073fe37b2 100644 --- a/mindspore/lite/tools/benchmark/benchmark.cc +++ b/mindspore/lite/tools/benchmark/benchmark.cc @@ -42,69 +42,6 @@ namespace mindspore { namespace lite { -namespace { -constexpr int kNumPrintMin = 5; -} -static const char *DELIM_COLON = ":"; -static const char *DELIM_COMMA = ","; -static const char *DELIM_SLASH = "/"; -static const std::unordered_map TYPE_ID_MAP{ - {kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat, "Float32"}, {kNumberTypeFloat32, "Float32"}, - {kNumberTypeInt8, "Int8"}, {kNumberTypeInt16, "Int16"}, {kNumberTypeInt, "Int32"}, - {kNumberTypeInt32, "Int32"}, {kNumberTypeUInt8, "UInt8"}, {kNumberTypeUInt16, "UInt16"}, - {kNumberTypeUInt, "UInt32"}, {kNumberTypeUInt32, "UInt32"}, {kObjectTypeString, "String"}, - {kNumberTypeBool, "Bool"}, {kObjectTypeTensorType, "Tensor"}}; -static const std::unordered_map TENSOR_FORMAT_MAP{ - {schema::Format_NCHW, "NCHW"}, {schema::Format_NHWC, "NHWC"}, {schema::Format_NHWC4, "NHWC4"}, - {schema::Format_HWKC, "HWKC"}, {schema::Format_HWCK, "HWCK"}, {schema::Format_KCHW, "KCHW"}, - {schema::Format_CKHW, "CKHW"}, {schema::Format_KHWC, "KHWC"}, {schema::Format_CHWK, "CHWK"}, - {schema::Format_HW, "HW"}, {schema::Format_HW4, "HW4"}, {schema::Format_NC, "NC"}, - {schema::Format_NC4, "NC4"}, {schema::Format_NC4HW4, "NC4HW4"}, {schema::Format_NCDHW, "NCDHW"}}; - -namespace dump { -constexpr auto kConfigPath = "MINDSPORE_DUMP_CONFIG"; -constexpr auto kSettings = "common_dump_settings"; -constexpr auto kMode = "dump_mode"; -constexpr auto kPath = "path"; -constexpr auto kNetName = "net_name"; -constexpr auto kInputOutput = "input_output"; -constexpr auto kKernels = "kernels"; -} // namespace dump - -int Benchmark::GenerateRandomData(size_t size, void *data, TypeId data_type) { - MS_ASSERT(data != nullptr); - switch (data_type) { - case kNumberTypeFloat32: - case kNumberTypeFloat: - FillInputData(size, data, std::uniform_real_distribution(0.1f, 1.0f)); - break; - case kNumberTypeFloat64: - FillInputData(size, data, std::uniform_real_distribution(0.1, 1.0)); - break; - case kNumberTypeInt64: - FillInputData(size, data, std::uniform_int_distribution(0, 1)); - break; - case kNumberTypeInt: - case kNumberTypeInt32: - FillInputData(size, data, std::uniform_int_distribution(0, 1)); - break; - case kNumberTypeInt16: - FillInputData(size, data, std::uniform_int_distribution(0, 1)); - break; - case kNumberTypeInt8: - FillInputData(size, data, std::uniform_int_distribution(-127, 127)); - break; - case kNumberTypeUInt8: - FillInputData(size, data, std::uniform_int_distribution(0, 254)); - break; - default: - char *casted_data = static_cast(data); - for (size_t i = 0; i < size; i++) { - casted_data[i] = static_cast(i); - } - } - return RET_OK; -} int Benchmark::GenerateInputData() { for (auto tensor : ms_inputs_) { @@ -118,7 +55,7 @@ int Benchmark::GenerateInputData() { if (tensor->data_type() == kObjectTypeString) { status = StringsToMSTensor({"you're the best."}, tensor); } else { - status = GenerateRandomData(tensor->Size(), input_data, tensor->data_type()); + status = GenerateRandomData(tensor->Size(), input_data, static_cast(tensor->data_type())); } if (status != RET_OK) { std::cerr << "GenerateRandomData for inTensor failed: " << status << std::endl; @@ -129,25 +66,6 @@ int Benchmark::GenerateInputData() { return RET_OK; } -int Benchmark::LoadInput() { - if (flags_->in_data_file_.empty()) { - auto status = GenerateInputData(); - if (status != 0) { - std::cerr << "Generate input data error " << status << std::endl; - MS_LOG(ERROR) << "Generate input data error " << status; - return status; - } - } else { - auto status = ReadInputFile(); - if (status != 0) { - std::cerr << "ReadInputFile error, " << status << std::endl; - MS_LOG(ERROR) << "ReadInputFile error, " << status; - return status; - } - } - return RET_OK; -} - int Benchmark::ReadInputFile() { if (ms_inputs_.empty()) { return RET_OK; @@ -196,49 +114,6 @@ int Benchmark::ReadInputFile() { return RET_OK; } -// calibData is FP32 -int Benchmark::ReadCalibData() { - const char *calib_data_path = flags_->benchmark_data_file_.c_str(); - // read calib data - std::ifstream in_file(calib_data_path); - if (!in_file.good()) { - std::cerr << "file: " << calib_data_path << " is not exist" << std::endl; - MS_LOG(ERROR) << "file: " << calib_data_path << " is not exist"; - return RET_ERROR; - } - - if (!in_file.is_open()) { - std::cerr << "file: " << calib_data_path << " open failed" << std::endl; - MS_LOG(ERROR) << "file: " << calib_data_path << " open failed"; - in_file.close(); - return RET_ERROR; - } - MS_LOG(INFO) << "Start reading calibData file"; - std::string line; - std::string tensor_name; - - while (!in_file.eof()) { - getline(in_file, line); - std::stringstream string_line1(line); - size_t dim = 0; - string_line1 >> tensor_name >> dim; - std::vector dims; - for (size_t i = 0; i < dim; i++) { - size_t tmp_dim; - string_line1 >> tmp_dim; - dims.push_back(tmp_dim); - } - auto ret = ReadTensorData(in_file, tensor_name, dims); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Read tensor data failed, tensor name: " << tensor_name; - return RET_ERROR; - } - } - in_file.close(); - MS_LOG(INFO) << "Finish reading calibData file"; - return RET_OK; -} - int Benchmark::ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name, const std::vector &dims) { std::string line; @@ -379,28 +254,6 @@ tensor::MSTensor *Benchmark::GetTensorByNameOrShape(const std::string &node_or_t return tensor; } -int Benchmark::CompareStringData(const std::string &name, tensor::MSTensor *tensor) { - auto iter = this->benchmark_data_.find(name); - if (iter != this->benchmark_data_.end()) { - std::vector calib_strings = iter->second->strings_data; - std::vector output_strings = MSTensorToStrings(tensor); - size_t compare_num = std::min(calib_strings.size(), output_strings.size()); - size_t print_num = std::min(compare_num, static_cast(kNumPrintMin)); - - std::cout << "Data of node " << name << " : " << std::endl; - for (size_t i = 0; i < compare_num; i++) { - if (i < print_num) { - std::cout << " " << output_strings[i] << std::endl; - } - if (calib_strings[i] != output_strings[i]) { - MS_LOG(ERROR) << "Compare failed, index: " << i; - return RET_ERROR; - } - } - } - return RET_OK; -} - int Benchmark::CompareDataGetTotalBiasAndSize(const std::string &name, tensor::MSTensor *tensor, float *total_bias, int *total_size) { float bias = 0; @@ -698,36 +551,6 @@ int Benchmark::RunBenchmark() { return RET_OK; } -void BenchmarkFlags::InitInputDataList() { - char *input_list = new char[this->in_data_file_.length() + 1]; - snprintf(input_list, this->in_data_file_.length() + 1, "%s", this->in_data_file_.c_str()); - char *cur_input; - const char *split_c = ","; - cur_input = strtok(input_list, split_c); - while (cur_input != nullptr) { - input_data_list_.emplace_back(cur_input); - cur_input = strtok(nullptr, split_c); - } - delete[] input_list; -} - -void BenchmarkFlags::InitResizeDimsList() { - std::string content = this->resize_dims_in_; - std::vector shape; - auto shape_strs = StringSplit(content, std::string(DELIM_COLON)); - for (const auto &shape_str : shape_strs) { - shape.clear(); - auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA)); - std::cout << "Resize Dims: "; - for (const auto &dim_str : dim_strs) { - std::cout << dim_str << " "; - shape.emplace_back(static_cast(std::stoi(dim_str))); - } - std::cout << std::endl; - this->resize_dims_.emplace_back(shape); - } -} - int Benchmark::InitTimeProfilingCallbackParameter() { // before callback before_call_back_ = [&](const std::vector &before_inputs, @@ -1022,444 +845,7 @@ int Benchmark::InitDumpTensorDataCallbackParameter() { return RET_OK; } -int Benchmark::CheckThreadNumValid() { - if (this->flags_->num_threads_ < 1) { - MS_LOG(ERROR) << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0"; - std::cerr << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0" << std::endl; - return RET_ERROR; - } +Benchmark::~Benchmark() { delete (session_); } - if (flags_->enable_parallel_) { - if (flags_->num_threads_ < 2) { - MS_LOG(ERROR) << "enable parallel need more than 1 thread."; - std::cerr << "enable parallel need more than 1 thread." << std::endl; - return RET_ERROR; - } - } - return RET_OK; -} - -int Benchmark::InitDumpConfigFromJson(char *path) { - auto real_path = RealPath(path); - std::ifstream ifs(real_path); - if (!ifs.good()) { - MS_LOG(ERROR) << "file: " << real_path << " is not exist"; - return RET_ERROR; - } - if (!ifs.is_open()) { - MS_LOG(ERROR) << "file: " << real_path << " open failed"; - return RET_ERROR; - } - - try { - dump_cfg_json_ = nlohmann::json::parse(ifs); - } catch (const nlohmann::json::parse_error &error) { - MS_LOG(ERROR) << "parse json file failed, please check your file."; - return RET_ERROR; - } - if (dump_cfg_json_[dump::kSettings] == nullptr) { - MS_LOG(ERROR) << "\"common_dump_settings\" is required."; - return RET_ERROR; - } - if (dump_cfg_json_[dump::kSettings][dump::kMode] == nullptr) { - MS_LOG(ERROR) << "\"dump_mode\" is required."; - return RET_ERROR; - } - if (dump_cfg_json_[dump::kSettings][dump::kPath] == nullptr) { - MS_LOG(ERROR) << "\"path\" is required."; - return RET_ERROR; - } - if (dump_cfg_json_[dump::kSettings][dump::kNetName] == nullptr) { - dump_cfg_json_[dump::kSettings][dump::kNetName] = "Default"; - } - if (dump_cfg_json_[dump::kSettings][dump::kInputOutput] == nullptr) { - dump_cfg_json_[dump::kSettings][dump::kInputOutput] = 0; - } - if (dump_cfg_json_[dump::kSettings][dump::kKernels] != nullptr && - !dump_cfg_json_[dump::kSettings][dump::kKernels].empty()) { - if (dump_cfg_json_[dump::kSettings][dump::kMode] == 0) { - MS_LOG(ERROR) << R"("dump_mode" should be 1 when "kernels" isn't empty.)"; - return RET_ERROR; - } - } - - auto abs_path = dump_cfg_json_[dump::kSettings][dump::kPath].get(); - auto net_name = dump_cfg_json_[dump::kSettings][dump::kNetName].get(); - if (abs_path.back() == '\\' || abs_path.back() == '/') { - dump_file_output_dir_ = abs_path + net_name; - } else { -#ifdef _WIN32 - dump_file_output_dir_ = abs_path + "\\" + net_name; -#else - dump_file_output_dir_ = abs_path + "/" + net_name; -#endif - } - - auto status = CreateOutputDir(&dump_file_output_dir_); - if (status != RET_OK) { - MS_LOG(ERROR) << "create data output directory failed."; - return RET_ERROR; - } - - return RET_OK; -} - -int Benchmark::InitCallbackParameter() { - int ret = RET_OK; - if (flags_->time_profiling_) { - ret = InitTimeProfilingCallbackParameter(); - } else if (flags_->perf_profiling_) { - ret = InitPerfProfilingCallbackParameter(); - } else if (flags_->print_tensor_data_) { - ret = InitPrintTensorDataCallbackParameter(); - } else if (flags_->dump_tensor_data_) { - ret = InitDumpTensorDataCallbackParameter(); - } - return ret; -} - -int Benchmark::Init() { - if (this->flags_ == nullptr) { - return 1; - } - MS_LOG(INFO) << "ModelPath = " << this->flags_->model_file_; - MS_LOG(INFO) << "InDataPath = " << this->flags_->in_data_file_; - MS_LOG(INFO) << "InDataType = " << this->flags_->in_data_type_in_; - MS_LOG(INFO) << "LoopCount = " << this->flags_->loop_count_; - MS_LOG(INFO) << "DeviceType = " << this->flags_->device_; - MS_LOG(INFO) << "AccuracyThreshold = " << this->flags_->accuracy_threshold_; - MS_LOG(INFO) << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_; - MS_LOG(INFO) << "NumThreads = " << this->flags_->num_threads_; - MS_LOG(INFO) << "Fp16Priority = " << this->flags_->enable_fp16_; - MS_LOG(INFO) << "EnableParallel = " << this->flags_->enable_parallel_; - MS_LOG(INFO) << "calibDataPath = " << this->flags_->benchmark_data_file_; - std::cout << "ModelPath = " << this->flags_->model_file_ << std::endl; - std::cout << "InDataPath = " << this->flags_->in_data_file_ << std::endl; - std::cout << "InDataType = " << this->flags_->in_data_type_in_ << std::endl; - std::cout << "LoopCount = " << this->flags_->loop_count_ << std::endl; - std::cout << "DeviceType = " << this->flags_->device_ << std::endl; - std::cout << "AccuracyThreshold = " << this->flags_->accuracy_threshold_ << std::endl; - std::cout << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_ << std::endl; - std::cout << "NumThreads = " << this->flags_->num_threads_ << std::endl; - std::cout << "Fp16Priority = " << this->flags_->enable_fp16_ << std::endl; - std::cout << "EnableParallel = " << this->flags_->enable_parallel_ << std::endl; - std::cout << "calibDataPath = " << this->flags_->benchmark_data_file_ << std::endl; - if (this->flags_->loop_count_ < 1) { - MS_LOG(ERROR) << "LoopCount:" << this->flags_->loop_count_ << " must be greater than 0"; - std::cerr << "LoopCount:" << this->flags_->loop_count_ << " must be greater than 0" << std::endl; - return RET_ERROR; - } - - auto thread_ret = CheckThreadNumValid(); - if (thread_ret != RET_OK) { - MS_LOG(ERROR) << "Invalid numThreads."; - std::cerr << "Invalid numThreads." << std::endl; - return RET_ERROR; - } - static std::vector CPU_BIND_MODE_MAP = {"NO_BIND", "HIGHER_CPU", "MID_CPU"}; - if (this->flags_->cpu_bind_mode_ >= 1) { - MS_LOG(INFO) << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_]; - std::cout << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_] << std::endl; - } else { - MS_LOG(INFO) << "cpuBindMode = NO_BIND"; - std::cout << "cpuBindMode = NO_BIND" << std::endl; - } - - this->flags_->in_data_type_ = this->flags_->in_data_type_in_ == "img" ? kImage : kBinary; - - if (!flags_->benchmark_data_type_.empty()) { - if (data_type_map_.find(flags_->benchmark_data_type_) == data_type_map_.end()) { - MS_LOG(ERROR) << "CalibDataType not supported: " << flags_->benchmark_data_type_.c_str(); - return RET_ERROR; - } - msCalibDataType = data_type_map_.at(flags_->benchmark_data_type_); - MS_LOG(INFO) << "CalibDataType = " << flags_->benchmark_data_type_.c_str(); - std::cout << "CalibDataType = " << flags_->benchmark_data_type_.c_str() << std::endl; - } - - if (flags_->model_file_.empty()) { - MS_LOG(ERROR) << "modelPath is required"; - std::cerr << "modelPath is required" << std::endl; - return 1; - } - flags_->InitInputDataList(); - flags_->InitResizeDimsList(); - if (!flags_->resize_dims_.empty() && !flags_->input_data_list_.empty() && - flags_->resize_dims_.size() != flags_->input_data_list_.size()) { - MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath"; - std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl; - return RET_ERROR; - } - - if (flags_->device_ != "CPU" && flags_->device_ != "GPU" && flags_->device_ != "NPU") { - MS_LOG(ERROR) << "Device type:" << flags_->device_ << " is not supported."; - std::cerr << "Device type:" << flags_->device_ << " is not supported." << std::endl; - return RET_ERROR; - } - - if (flags_->time_profiling_ && flags_->perf_profiling_) { - MS_LOG(INFO) << "time_profiling is enabled, will not run perf_profiling."; - } - - // get dump data output path - auto dump_cfg_path = std::getenv(dump::kConfigPath); - if (dump_cfg_path != nullptr) { - flags_->dump_tensor_data_ = true; - if (InitDumpConfigFromJson(dump_cfg_path) != RET_OK) { - MS_LOG(ERROR) << "parse dump config file failed."; - return RET_ERROR; - } - } else { - MS_LOG(INFO) << "No MINDSPORE_DUMP_CONFIG in env, don't need to dump data"; - } - - auto status = InitCallbackParameter(); - if (status != RET_OK) { - MS_LOG(ERROR) << "Init callback Parameter failed."; - std::cerr << "Init callback Parameter failed." << std::endl; - return RET_ERROR; - } - - return RET_OK; -} - -int Benchmark::PrintResult(const std::vector &title, - const std::map> &result) { - std::vector columnLenMax(5); - std::vector> rows; - - for (auto &iter : result) { - char stringBuf[5][100] = {}; - std::vector columns; - size_t len = 0; - - len = iter.first.size(); - if (len > columnLenMax.at(0)) { - columnLenMax.at(0) = len + 4; - } - columns.push_back(iter.first); - - len = - snprintf(stringBuf[1], sizeof(stringBuf[1]), "%f", iter.second.second / static_cast(flags_->loop_count_)); - if (len > columnLenMax.at(1)) { - columnLenMax.at(1) = len + 4; - } - columns.emplace_back(stringBuf[1]); - - len = snprintf(stringBuf[2], sizeof(stringBuf[2]), "%f", iter.second.second / op_cost_total_); - if (len > columnLenMax.at(2)) { - columnLenMax.at(2) = len + 4; - } - columns.emplace_back(stringBuf[2]); - - len = snprintf(stringBuf[3], sizeof(stringBuf[3]), "%d", iter.second.first); - if (len > columnLenMax.at(3)) { - columnLenMax.at(3) = len + 4; - } - columns.emplace_back(stringBuf[3]); - - len = snprintf(stringBuf[4], sizeof(stringBuf[4]), "%f", iter.second.second); - if (len > columnLenMax.at(4)) { - columnLenMax.at(4) = len + 4; - } - columns.emplace_back(stringBuf[4]); - - rows.push_back(columns); - } - - printf("-------------------------------------------------------------------------\n"); - for (int i = 0; i < 5; i++) { - auto printBuf = title[i]; - if (printBuf.size() > columnLenMax.at(i)) { - columnLenMax.at(i) = printBuf.size(); - } - printBuf.resize(columnLenMax.at(i), ' '); - printf("%s\t", printBuf.c_str()); - } - printf("\n"); - for (auto &row : rows) { - for (int j = 0; j < 5; j++) { - auto printBuf = row[j]; - printBuf.resize(columnLenMax.at(j), ' '); - printf("%s\t", printBuf.c_str()); - } - printf("\n"); - } - return RET_OK; -} - -#ifdef ENABLE_ARM64 -int Benchmark::PrintPerfResult(const std::vector &title, - const std::map> &result) { - std::vector columnLenMax(5); - std::vector> rows; - - for (auto &iter : result) { - char stringBuf[5][100] = {}; - std::vector columns; - size_t len = 0; - - len = iter.first.size(); - if (len > columnLenMax.at(0)) { - columnLenMax.at(0) = len + 4; - } - columns.push_back(iter.first); - - float tmp = float_t(flags_->num_threads_) * iter.second.second.value[0] / float_t(flags_->loop_count_) / 1000.0f; - len = snprintf(stringBuf[1], sizeof(stringBuf[1]), "%.2f", tmp); - if (len > columnLenMax.at(1)) { - columnLenMax.at(1) = len + 4; - } - columns.emplace_back(stringBuf[1]); - - len = snprintf(stringBuf[2], sizeof(stringBuf[2]), "%f", iter.second.second.value[0] / op_cost_total_); - if (len > columnLenMax.at(2)) { - columnLenMax.at(2) = len + 4; - } - columns.emplace_back(stringBuf[2]); - - tmp = float_t(flags_->num_threads_) * iter.second.second.value[1] / float_t(flags_->loop_count_) / 1000.0f; - len = snprintf(stringBuf[3], sizeof(stringBuf[3]), "%.2f", tmp); - if (len > columnLenMax.at(3)) { - columnLenMax.at(3) = len + 4; - } - columns.emplace_back(stringBuf[3]); - - len = snprintf(stringBuf[4], sizeof(stringBuf[4]), "%f", iter.second.second.value[1] / op_cost2_total_); - if (len > columnLenMax.at(4)) { - columnLenMax.at(4) = len + 4; - } - columns.emplace_back(stringBuf[4]); - - rows.push_back(columns); - } - - printf("-------------------------------------------------------------------------\n"); - for (int i = 0; i < 5; i++) { - auto printBuf = title[i]; - if (printBuf.size() > columnLenMax.at(i)) { - columnLenMax.at(i) = printBuf.size(); - } - printBuf.resize(columnLenMax.at(i), ' '); - printf("%s\t", printBuf.c_str()); - } - printf("\n"); - for (auto &row : rows) { - for (int j = 0; j < 5; j++) { - auto printBuf = row[j]; - printBuf.resize(columnLenMax.at(j), ' '); - printf("%s\t", printBuf.c_str()); - } - printf("\n"); - } - return RET_OK; -} -#endif - -#ifdef SUPPORT_NNIE -int SvpSysInit() { - HI_S32 ret = HI_SUCCESS; - VB_CONFIG_S struVbConf; - - HI_MPI_SYS_Exit(); - HI_MPI_VB_Exit(); - - memset(&struVbConf, 0, sizeof(VB_CONFIG_S)); - struVbConf.u32MaxPoolCnt = 2; - struVbConf.astCommPool[1].u64BlkSize = 768 * 576 * 2; - struVbConf.astCommPool[1].u32BlkCnt = 1; - - ret = HI_MPI_VB_SetConfig((const VB_CONFIG_S *)&struVbConf); - if (HI_SUCCESS != ret) { - MS_LOG(ERROR) << "Error:HI_MPI_VB_SetConf failed!"; - return RET_ERROR; - } - - ret = HI_MPI_VB_Init(); - if (HI_SUCCESS != ret) { - MS_LOG(ERROR) << "Error:HI_MPI_VB_Init failed!"; - return RET_ERROR; - } - - ret = HI_MPI_SYS_Init(); - if (HI_SUCCESS != ret) { - MS_LOG(ERROR) << "Error:HI_MPI_SYS_Init failed!"; - return RET_ERROR; - } - - return RET_OK; -} - -int SvpSysExit() { - HI_S32 ret = HI_SUCCESS; - - ret = HI_MPI_SYS_Exit(); - if (HI_SUCCESS != ret) { - MS_LOG(ERROR) << "Error:HI_MPI_SYS_Exit failed!"; - return RET_ERROR; - } - - ret = HI_MPI_VB_Exit(); - if (HI_SUCCESS != ret) { - MS_LOG(ERROR) << "Error:HI_MPI_VB_Exit failed!"; - return RET_ERROR; - } - - return RET_OK; -} -#endif - -Benchmark::~Benchmark() { - for (const auto &iter : this->benchmark_data_) { - delete (iter.second); - } - this->benchmark_data_.clear(); - delete (session_); -#ifdef SUPPORT_NNIE - SvpSysExit(); -#endif -} - -int RunBenchmark(int argc, const char **argv) { - BenchmarkFlags flags; - Option err = flags.ParseFlags(argc, argv); -#ifdef SUPPORT_NNIE - SvpSysInit(); -#endif - if (err.IsSome()) { - std::cerr << err.Get() << std::endl; - std::cerr << flags.Usage() << std::endl; - return RET_ERROR; - } - - if (flags.help) { - std::cerr << flags.Usage() << std::endl; - return RET_OK; - } - - Benchmark benchmark(&flags); - auto status = benchmark.Init(); - if (status != 0) { - MS_LOG(ERROR) << "Benchmark init Error : " << status; - std::cerr << "Benchmark init Error : " << status << std::endl; - return RET_ERROR; - } - - status = benchmark.RunBenchmark(); - if (status != 0) { - MS_LOG(ERROR) << "Run Benchmark " - << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() - << " Failed : " << status; - std::cerr << "Run Benchmark " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() - << " Failed : " << status << std::endl; - return RET_ERROR; - } - - MS_LOG(INFO) << "Run Benchmark " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() - << " Success."; - std::cout << "Run Benchmark " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() - << " Success." << std::endl; - return RET_OK; -} } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/tools/benchmark/benchmark.h b/mindspore/lite/tools/benchmark/benchmark.h index 9ee935cc192..542184eb884 100644 --- a/mindspore/lite/tools/benchmark/benchmark.h +++ b/mindspore/lite/tools/benchmark/benchmark.h @@ -31,6 +31,7 @@ #include #include #include +#include "tools/benchmark/benchmark_base.h" #include "include/model.h" #include "tools/common/flag_parser.h" #include "src/common/file_utils.h" @@ -38,283 +39,57 @@ #include "include/lite_session.h" namespace mindspore::lite { -enum MS_API InDataType { kImage = 0, kBinary = 1 }; -constexpr float relativeTolerance = 1e-5; -constexpr float absoluteTolerance = 1e-8; - -#ifdef ENABLE_ARM64 -struct PerfResult { - int64_t nr; - struct { - int64_t value; - int64_t id; - } values[2]; -}; -struct PerfCount { - int64_t value[2]; -}; -#endif - -struct MS_API CheckTensor { - CheckTensor(const std::vector &shape, const std::vector &data, - const std::vector &strings_data = {""}) { - this->shape = shape; - this->data = data; - this->strings_data = strings_data; - } - std::vector shape; - std::vector data; - std::vector strings_data; -}; - -class MS_API BenchmarkFlags : public virtual FlagParser { +class MS_API Benchmark : public BenchmarkBase { public: - BenchmarkFlags() { - // common - AddFlag(&BenchmarkFlags::model_file_, "modelFile", "Input model file", ""); - AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", ""); - AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU", "CPU"); - AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode", - "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, default value: 1", 1); - // MarkPerformance - AddFlag(&BenchmarkFlags::loop_count_, "loopCount", "Run loop count", 10); - AddFlag(&BenchmarkFlags::num_threads_, "numThreads", "Run threads number", 2); - AddFlag(&BenchmarkFlags::enable_fp16_, "enableFp16", "Enable float16", false); - AddFlag(&BenchmarkFlags::enable_parallel_, "enableParallel", "Enable subgraph parallel : true | false", false); - AddFlag(&BenchmarkFlags::warm_up_loop_count_, "warmUpLoopCount", "Run warm up loop", 3); - AddFlag(&BenchmarkFlags::time_profiling_, "timeProfiling", "Run time profiling", false); - AddFlag(&BenchmarkFlags::perf_profiling_, "perfProfiling", - "Perf event profiling(only instructions statics enabled currently)", false); - AddFlag(&BenchmarkFlags::perf_event_, "perfEvent", "CYCLE|CACHE|STALL", "CYCLE"); - // MarkAccuracy - AddFlag(&BenchmarkFlags::benchmark_data_file_, "benchmarkDataFile", "Benchmark data file path", ""); - AddFlag(&BenchmarkFlags::benchmark_data_type_, "benchmarkDataType", - "Benchmark data type. FLOAT | INT32 | INT8 | UINT8", "FLOAT"); - AddFlag(&BenchmarkFlags::accuracy_threshold_, "accuracyThreshold", "Threshold of accuracy", 0.5); - AddFlag(&BenchmarkFlags::resize_dims_in_, "inputShapes", - "Shape of input data, the format should be NHWC. e.g. 1,32,32,32:1,1,32,32,1", ""); - } - - ~BenchmarkFlags() override = default; - - void InitInputDataList(); - - void InitResizeDimsList(); - - public: - // common - std::string model_file_; - std::string in_data_file_; - std::vector input_data_list_; - InDataType in_data_type_ = kBinary; - std::string in_data_type_in_ = "bin"; - int cpu_bind_mode_ = 1; - // MarkPerformance - int loop_count_ = 10; - int num_threads_ = 2; - bool enable_fp16_ = false; - bool enable_parallel_ = false; - int warm_up_loop_count_ = 3; - // MarkAccuracy - std::string benchmark_data_file_; - std::string benchmark_data_type_ = "FLOAT"; - float accuracy_threshold_ = 0.5; - // Resize - std::string resize_dims_in_; - std::vector> resize_dims_; - - std::string device_ = "CPU"; - bool time_profiling_ = false; - bool perf_profiling_ = false; - std::string perf_event_ = "CYCLE"; - bool dump_tensor_data_ = false; - bool print_tensor_data_ = false; -}; - -class MS_API Benchmark { - public: - explicit Benchmark(BenchmarkFlags *flags) : flags_(flags) {} + explicit Benchmark(BenchmarkFlags *flags) : BenchmarkBase(flags) {} virtual ~Benchmark(); - int Init(); - int RunBenchmark(); - - private: - // call GenerateInputData or ReadInputFile to init inputTensors - int LoadInput(); + int RunBenchmark() override; + protected: // call GenerateRandomData to fill inputTensors - int GenerateInputData(); + int GenerateInputData() override; - int GenerateRandomData(size_t size, void *data, TypeId data_type); + int ReadInputFile() override; - int ReadInputFile(); - - int ReadCalibData(); - - int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name, const std::vector &dims); + int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name, + const std::vector &dims) override; void InitContext(const std::shared_ptr &context); - int CompareOutput(); + int CompareOutput() override; tensor::MSTensor *GetTensorByNameOrShape(const std::string &node_or_tensor_name, const std::vector &dims); tensor::MSTensor *GetTensorByNodeShape(const std::vector &node_shape); - int CompareStringData(const std::string &name, tensor::MSTensor *tensor); - int CompareDataGetTotalBiasAndSize(const std::string &name, tensor::MSTensor *tensor, float *total_bias, int *total_size); - int InitDumpConfigFromJson(char *path); + int InitTimeProfilingCallbackParameter() override; - int InitCallbackParameter(); + int InitPerfProfilingCallbackParameter() override; - int InitTimeProfilingCallbackParameter(); + int InitDumpTensorDataCallbackParameter() override; - int InitPerfProfilingCallbackParameter(); - - int InitDumpTensorDataCallbackParameter(); - - int InitPrintTensorDataCallbackParameter(); - - int PrintResult(const std::vector &title, const std::map> &result); - -#ifdef ENABLE_ARM64 - int PrintPerfResult(const std::vector &title, - const std::map> &result); -#endif + int InitPrintTensorDataCallbackParameter() override; int PrintInputData(); - // tensorData need to be converter first - template - float CompareData(const std::string &nodeName, const std::vector &msShape, const void *tensor_data) { - const T *msTensorData = static_cast(tensor_data); - auto iter = this->benchmark_data_.find(nodeName); - if (iter != this->benchmark_data_.end()) { - std::vector castedMSShape; - size_t shapeSize = 1; - for (int64_t dim : msShape) { - castedMSShape.push_back(size_t(dim)); - shapeSize *= dim; - } - - CheckTensor *calibTensor = iter->second; - if (calibTensor->shape != castedMSShape) { - std::ostringstream oss; - oss << "Shape of mslite output("; - for (auto dim : castedMSShape) { - oss << dim << ","; - } - oss << ") and shape source model output("; - for (auto dim : calibTensor->shape) { - oss << dim << ","; - } - oss << ") are different"; - std::cerr << oss.str() << std::endl; - MS_LOG(ERROR) << oss.str().c_str(); - return RET_ERROR; - } - size_t errorCount = 0; - float meanError = 0; - std::cout << "Data of node " << nodeName << " : "; - for (size_t j = 0; j < shapeSize; j++) { - if (j < 50) { - std::cout << static_cast(msTensorData[j]) << " "; - } - - if (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j])) { - std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl; - MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail"; - return RET_ERROR; - } - - auto tolerance = absoluteTolerance + relativeTolerance * fabs(calibTensor->data.at(j)); - auto absoluteError = std::fabs(msTensorData[j] - calibTensor->data.at(j)); - if (absoluteError > tolerance) { - if (fabs(calibTensor->data.at(j) - 0.0f) < FLT_EPSILON) { - if (absoluteError > 1e-5) { - meanError += absoluteError; - errorCount++; - } else { - continue; - } - } else { - // just assume that atol = rtol - meanError += absoluteError / (fabs(calibTensor->data.at(j)) + FLT_MIN); - errorCount++; - } - } - } - std::cout << std::endl; - if (meanError > 0.0f) { - meanError /= errorCount; - } - - if (meanError <= 0.0000001) { - std::cout << "Mean bias of node/tensor " << nodeName << " : 0%" << std::endl; - } else { - std::cout << "Mean bias of node/tensor " << nodeName << " : " << meanError * 100 << "%" << std::endl; - } - return meanError; - } else { - MS_LOG(INFO) << "%s is not in Source Model output", nodeName.c_str(); - return RET_ERROR; - } - } - - template - void FillInputData(int size, void *data, Distribution distribution) { - MS_ASSERT(data != nullptr); - int elements_num = size / sizeof(T); - (void)std::generate_n(static_cast(data), elements_num, - [&]() { return static_cast(distribution(random_engine_)); }); - } - int MarkPerformance(); int MarkAccuracy(); - int CheckThreadNumValid(); - private: - BenchmarkFlags *flags_; session::LiteSession *session_{nullptr}; std::vector ms_inputs_; std::unordered_map> ms_outputs_; - std::unordered_map benchmark_data_; - std::unordered_map data_type_map_{{"FLOAT", TypeId::kNumberTypeFloat}, - {"INT8", TypeId::kNumberTypeInt8}, - {"INT32", TypeId::kNumberTypeInt32}, - {"UINT8", TypeId::kNumberTypeUInt8}}; - TypeId msCalibDataType = TypeId::kNumberTypeFloat; - // callback parameters - uint64_t op_begin_ = 0; - int op_call_times_total_ = 0; - float op_cost_total_ = 0.0f; - std::map> op_times_by_type_; - std::map> op_times_by_name_; - - // dump data - nlohmann::json dump_cfg_json_; - std::string dump_file_output_dir_; -#ifdef ENABLE_ARM64 - int perf_fd = 0; - int perf_fd2 = 0; - float op_cost2_total_ = 0.0f; - std::map> op_perf_by_type_; - std::map> op_perf_by_name_; -#endif KernelCallBack before_call_back_ = nullptr; KernelCallBack after_call_back_ = nullptr; - std::mt19937 random_engine_; }; -int MS_API RunBenchmark(int argc, const char **argv); } // namespace mindspore::lite #endif // MINNIE_BENCHMARK_BENCHMARK_H_ diff --git a/mindspore/lite/tools/benchmark/benchmark_base.cc b/mindspore/lite/tools/benchmark/benchmark_base.cc new file mode 100644 index 00000000000..4143b25cb37 --- /dev/null +++ b/mindspore/lite/tools/benchmark/benchmark_base.cc @@ -0,0 +1,606 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tools/benchmark/benchmark_base.h" +#define __STDC_FORMAT_MACROS +#include +#undef __STDC_FORMAT_MACROS +#include +#include +#include +#include "include/context.h" +#include "include/ms_tensor.h" +#include "include/version.h" +#include "schema/model_generated.h" +#include "src/common/common.h" +#include "src/tensor.h" +#ifdef ENABLE_ARM64 +#include +#include +#include +#include +#endif +#ifdef SUPPORT_NNIE +#include "include/hi_common.h" +#include "include/hi_comm_vb.h" +#include "include/mpi_sys.h" +#include "include/mpi_vb.h" +#endif + +namespace mindspore { +namespace lite { + +const std::unordered_map TYPE_ID_MAP{ + {kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat, "Float32"}, {kNumberTypeFloat32, "Float32"}, + {kNumberTypeInt8, "Int8"}, {kNumberTypeInt16, "Int16"}, {kNumberTypeInt, "Int32"}, + {kNumberTypeInt32, "Int32"}, {kNumberTypeUInt8, "UInt8"}, {kNumberTypeUInt16, "UInt16"}, + {kNumberTypeUInt, "UInt32"}, {kNumberTypeUInt32, "UInt32"}, {kObjectTypeString, "String"}, + {kNumberTypeBool, "Bool"}, {kObjectTypeTensorType, "Tensor"}}; +const std::unordered_map TENSOR_FORMAT_MAP{ + {schema::Format_NCHW, "NCHW"}, {schema::Format_NHWC, "NHWC"}, {schema::Format_NHWC4, "NHWC4"}, + {schema::Format_HWKC, "HWKC"}, {schema::Format_HWCK, "HWCK"}, {schema::Format_KCHW, "KCHW"}, + {schema::Format_CKHW, "CKHW"}, {schema::Format_KHWC, "KHWC"}, {schema::Format_CHWK, "CHWK"}, + {schema::Format_HW, "HW"}, {schema::Format_HW4, "HW4"}, {schema::Format_NC, "NC"}, + {schema::Format_NC4, "NC4"}, {schema::Format_NC4HW4, "NC4HW4"}, {schema::Format_NCDHW, "NCDHW"}}; + +int BenchmarkBase::GenerateRandomData(size_t size, void *data, int data_type) { + MS_ASSERT(data != nullptr); + switch (data_type) { + case kNumberTypeFloat32: + case kNumberTypeFloat: + FillInputData(size, data, std::uniform_real_distribution(0.1f, 1.0f)); + break; + case kNumberTypeFloat64: + FillInputData(size, data, std::uniform_real_distribution(0.1, 1.0)); + break; + case kNumberTypeInt64: + FillInputData(size, data, std::uniform_int_distribution(0, 1)); + break; + case kNumberTypeInt: + case kNumberTypeInt32: + FillInputData(size, data, std::uniform_int_distribution(0, 1)); + break; + case kNumberTypeInt16: + FillInputData(size, data, std::uniform_int_distribution(0, 1)); + break; + case kNumberTypeInt8: + FillInputData(size, data, std::uniform_int_distribution(-127, 127)); + break; + case kNumberTypeUInt8: + FillInputData(size, data, std::uniform_int_distribution(0, 254)); + break; + default: + char *casted_data = static_cast(data); + for (size_t i = 0; i < size; i++) { + casted_data[i] = static_cast(i); + } + } + return RET_OK; +} + +int BenchmarkBase::LoadInput() { + if (flags_->in_data_file_.empty()) { + auto status = GenerateInputData(); + if (status != 0) { + std::cerr << "Generate input data error " << status << std::endl; + MS_LOG(ERROR) << "Generate input data error " << status; + return status; + } + } else { + auto status = ReadInputFile(); + if (status != 0) { + std::cerr << "ReadInputFile error, " << status << std::endl; + MS_LOG(ERROR) << "ReadInputFile error, " << status; + return status; + } + } + return RET_OK; +} + +// calibData is FP32 +int BenchmarkBase::ReadCalibData() { + const char *calib_data_path = flags_->benchmark_data_file_.c_str(); + // read calib data + std::ifstream in_file(calib_data_path); + if (!in_file.good()) { + std::cerr << "file: " << calib_data_path << " is not exist" << std::endl; + MS_LOG(ERROR) << "file: " << calib_data_path << " is not exist"; + return RET_ERROR; + } + + if (!in_file.is_open()) { + std::cerr << "file: " << calib_data_path << " open failed" << std::endl; + MS_LOG(ERROR) << "file: " << calib_data_path << " open failed"; + in_file.close(); + return RET_ERROR; + } + MS_LOG(INFO) << "Start reading calibData file"; + std::string line; + std::string tensor_name; + + while (!in_file.eof()) { + getline(in_file, line); + std::stringstream string_line1(line); + size_t dim = 0; + string_line1 >> tensor_name >> dim; + std::vector dims; + for (size_t i = 0; i < dim; i++) { + size_t tmp_dim; + string_line1 >> tmp_dim; + dims.push_back(tmp_dim); + } + auto ret = ReadTensorData(in_file, tensor_name, dims); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Read tensor data failed, tensor name: " << tensor_name; + return RET_ERROR; + } + } + in_file.close(); + MS_LOG(INFO) << "Finish reading calibData file"; + return RET_OK; +} + +int BenchmarkBase::CompareStringData(const std::string &name, tensor::MSTensor *tensor) { + auto iter = this->benchmark_data_.find(name); + if (iter != this->benchmark_data_.end()) { + std::vector calib_strings = iter->second->strings_data; + std::vector output_strings = MSTensorToStrings(tensor); + size_t compare_num = std::min(calib_strings.size(), output_strings.size()); + size_t print_num = std::min(compare_num, static_cast(kNumPrintMin)); + + std::cout << "Data of node " << name << " : " << std::endl; + for (size_t i = 0; i < compare_num; i++) { + if (i < print_num) { + std::cout << " " << output_strings[i] << std::endl; + } + if (calib_strings[i] != output_strings[i]) { + MS_LOG(ERROR) << "Compare failed, index: " << i; + return RET_ERROR; + } + } + } + return RET_OK; +} + +void BenchmarkFlags::InitInputDataList() { + char *input_list = new char[this->in_data_file_.length() + 1]; + snprintf(input_list, this->in_data_file_.length() + 1, "%s", this->in_data_file_.c_str()); + char *cur_input; + const char *split_c = ","; + cur_input = strtok(input_list, split_c); + while (cur_input != nullptr) { + input_data_list_.emplace_back(cur_input); + cur_input = strtok(nullptr, split_c); + } + delete[] input_list; +} + +void BenchmarkFlags::InitResizeDimsList() { + std::string content = this->resize_dims_in_; + std::vector shape; + auto shape_strs = StringSplit(content, std::string(DELIM_COLON)); + for (const auto &shape_str : shape_strs) { + shape.clear(); + auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA)); + std::cout << "Resize Dims: "; + for (const auto &dim_str : dim_strs) { + std::cout << dim_str << " "; + shape.emplace_back(static_cast(std::stoi(dim_str))); + } + std::cout << std::endl; + this->resize_dims_.emplace_back(shape); + } +} + +int BenchmarkBase::CheckThreadNumValid() { + if (this->flags_->num_threads_ < 1) { + MS_LOG(ERROR) << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0"; + std::cerr << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0" << std::endl; + return RET_ERROR; + } + + if (flags_->enable_parallel_) { + if (flags_->num_threads_ < 2) { + MS_LOG(ERROR) << "enable parallel need more than 1 thread."; + std::cerr << "enable parallel need more than 1 thread." << std::endl; + return RET_ERROR; + } + } + return RET_OK; +} + +int BenchmarkBase::InitDumpConfigFromJson(char *path) { + auto real_path = RealPath(path); + std::ifstream ifs(real_path); + if (!ifs.good()) { + MS_LOG(ERROR) << "file: " << real_path << " is not exist"; + return RET_ERROR; + } + if (!ifs.is_open()) { + MS_LOG(ERROR) << "file: " << real_path << " open failed"; + return RET_ERROR; + } + + try { + dump_cfg_json_ = nlohmann::json::parse(ifs); + } catch (const nlohmann::json::parse_error &error) { + MS_LOG(ERROR) << "parse json file failed, please check your file."; + return RET_ERROR; + } + if (dump_cfg_json_[dump::kSettings] == nullptr) { + MS_LOG(ERROR) << "\"common_dump_settings\" is required."; + return RET_ERROR; + } + if (dump_cfg_json_[dump::kSettings][dump::kMode] == nullptr) { + MS_LOG(ERROR) << "\"dump_mode\" is required."; + return RET_ERROR; + } + if (dump_cfg_json_[dump::kSettings][dump::kPath] == nullptr) { + MS_LOG(ERROR) << "\"path\" is required."; + return RET_ERROR; + } + if (dump_cfg_json_[dump::kSettings][dump::kNetName] == nullptr) { + dump_cfg_json_[dump::kSettings][dump::kNetName] = "Default"; + } + if (dump_cfg_json_[dump::kSettings][dump::kInputOutput] == nullptr) { + dump_cfg_json_[dump::kSettings][dump::kInputOutput] = 0; + } + if (dump_cfg_json_[dump::kSettings][dump::kKernels] != nullptr && + !dump_cfg_json_[dump::kSettings][dump::kKernels].empty()) { + if (dump_cfg_json_[dump::kSettings][dump::kMode] == 0) { + MS_LOG(ERROR) << R"("dump_mode" should be 1 when "kernels" isn't empty.)"; + return RET_ERROR; + } + } + + auto abs_path = dump_cfg_json_[dump::kSettings][dump::kPath].get(); + auto net_name = dump_cfg_json_[dump::kSettings][dump::kNetName].get(); + if (abs_path.back() == '\\' || abs_path.back() == '/') { + dump_file_output_dir_ = abs_path + net_name; + } else { +#ifdef _WIN32 + dump_file_output_dir_ = abs_path + "\\" + net_name; +#else + dump_file_output_dir_ = abs_path + "/" + net_name; +#endif + } + + auto status = CreateOutputDir(&dump_file_output_dir_); + if (status != RET_OK) { + MS_LOG(ERROR) << "create data output directory failed."; + return RET_ERROR; + } + + return RET_OK; +} + +int BenchmarkBase::InitCallbackParameter() { + int ret = RET_OK; + if (flags_->time_profiling_) { + ret = InitTimeProfilingCallbackParameter(); + } else if (flags_->perf_profiling_) { + ret = InitPerfProfilingCallbackParameter(); + } else if (flags_->print_tensor_data_) { + ret = InitPrintTensorDataCallbackParameter(); + } else if (flags_->dump_tensor_data_) { + ret = InitDumpTensorDataCallbackParameter(); + } + return ret; +} + +int BenchmarkBase::Init() { + if (this->flags_ == nullptr) { + return 1; + } + MS_LOG(INFO) << "ModelPath = " << this->flags_->model_file_; + MS_LOG(INFO) << "InDataPath = " << this->flags_->in_data_file_; + MS_LOG(INFO) << "InDataType = " << this->flags_->in_data_type_in_; + MS_LOG(INFO) << "LoopCount = " << this->flags_->loop_count_; + MS_LOG(INFO) << "DeviceType = " << this->flags_->device_; + MS_LOG(INFO) << "AccuracyThreshold = " << this->flags_->accuracy_threshold_; + MS_LOG(INFO) << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_; + MS_LOG(INFO) << "NumThreads = " << this->flags_->num_threads_; + MS_LOG(INFO) << "Fp16Priority = " << this->flags_->enable_fp16_; + MS_LOG(INFO) << "EnableParallel = " << this->flags_->enable_parallel_; + MS_LOG(INFO) << "calibDataPath = " << this->flags_->benchmark_data_file_; + std::cout << "ModelPath = " << this->flags_->model_file_ << std::endl; + std::cout << "InDataPath = " << this->flags_->in_data_file_ << std::endl; + std::cout << "InDataType = " << this->flags_->in_data_type_in_ << std::endl; + std::cout << "LoopCount = " << this->flags_->loop_count_ << std::endl; + std::cout << "DeviceType = " << this->flags_->device_ << std::endl; + std::cout << "AccuracyThreshold = " << this->flags_->accuracy_threshold_ << std::endl; + std::cout << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_ << std::endl; + std::cout << "NumThreads = " << this->flags_->num_threads_ << std::endl; + std::cout << "Fp16Priority = " << this->flags_->enable_fp16_ << std::endl; + std::cout << "EnableParallel = " << this->flags_->enable_parallel_ << std::endl; + std::cout << "calibDataPath = " << this->flags_->benchmark_data_file_ << std::endl; + if (this->flags_->loop_count_ < 1) { + MS_LOG(ERROR) << "LoopCount:" << this->flags_->loop_count_ << " must be greater than 0"; + std::cerr << "LoopCount:" << this->flags_->loop_count_ << " must be greater than 0" << std::endl; + return RET_ERROR; + } + + auto thread_ret = CheckThreadNumValid(); + if (thread_ret != RET_OK) { + MS_LOG(ERROR) << "Invalid numThreads."; + std::cerr << "Invalid numThreads." << std::endl; + return RET_ERROR; + } + static std::vector CPU_BIND_MODE_MAP = {"NO_BIND", "HIGHER_CPU", "MID_CPU"}; + if (this->flags_->cpu_bind_mode_ >= 1) { + MS_LOG(INFO) << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_]; + std::cout << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_] << std::endl; + } else { + MS_LOG(INFO) << "cpuBindMode = NO_BIND"; + std::cout << "cpuBindMode = NO_BIND" << std::endl; + } + + this->flags_->in_data_type_ = this->flags_->in_data_type_in_ == "img" ? kImage : kBinary; + + if (!flags_->benchmark_data_type_.empty()) { + if (data_type_map_.find(flags_->benchmark_data_type_) == data_type_map_.end()) { + MS_LOG(ERROR) << "CalibDataType not supported: " << flags_->benchmark_data_type_.c_str(); + return RET_ERROR; + } + msCalibDataType = data_type_map_.at(flags_->benchmark_data_type_); + MS_LOG(INFO) << "CalibDataType = " << flags_->benchmark_data_type_.c_str(); + std::cout << "CalibDataType = " << flags_->benchmark_data_type_.c_str() << std::endl; + } + + if (flags_->model_file_.empty()) { + MS_LOG(ERROR) << "modelPath is required"; + std::cerr << "modelPath is required" << std::endl; + return 1; + } + flags_->InitInputDataList(); + flags_->InitResizeDimsList(); + if (!flags_->resize_dims_.empty() && !flags_->input_data_list_.empty() && + flags_->resize_dims_.size() != flags_->input_data_list_.size()) { + MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath"; + std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl; + return RET_ERROR; + } + + if (flags_->device_ != "CPU" && flags_->device_ != "GPU" && flags_->device_ != "NPU") { + MS_LOG(ERROR) << "Device type:" << flags_->device_ << " is not supported."; + std::cerr << "Device type:" << flags_->device_ << " is not supported." << std::endl; + return RET_ERROR; + } + + if (flags_->time_profiling_ && flags_->perf_profiling_) { + MS_LOG(INFO) << "time_profiling is enabled, will not run perf_profiling."; + } + + // get dump data output path + auto dump_cfg_path = std::getenv(dump::kConfigPath); + if (dump_cfg_path != nullptr) { + flags_->dump_tensor_data_ = true; + if (InitDumpConfigFromJson(dump_cfg_path) != RET_OK) { + MS_LOG(ERROR) << "parse dump config file failed."; + return RET_ERROR; + } + } else { + MS_LOG(INFO) << "No MINDSPORE_DUMP_CONFIG in env, don't need to dump data"; + } + + auto status = InitCallbackParameter(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Init callback Parameter failed."; + std::cerr << "Init callback Parameter failed." << std::endl; + return RET_ERROR; + } + + return RET_OK; +} + +int BenchmarkBase::PrintResult(const std::vector &title, + const std::map> &result) { + std::vector columnLenMax(5); + std::vector> rows; + + for (auto &iter : result) { + char stringBuf[5][100] = {}; + std::vector columns; + size_t len = 0; + + len = iter.first.size(); + if (len > columnLenMax.at(0)) { + columnLenMax.at(0) = len + 4; + } + columns.push_back(iter.first); + + len = + snprintf(stringBuf[1], sizeof(stringBuf[1]), "%f", iter.second.second / static_cast(flags_->loop_count_)); + if (len > columnLenMax.at(1)) { + columnLenMax.at(1) = len + 4; + } + columns.emplace_back(stringBuf[1]); + + len = snprintf(stringBuf[2], sizeof(stringBuf[2]), "%f", iter.second.second / op_cost_total_); + if (len > columnLenMax.at(2)) { + columnLenMax.at(2) = len + 4; + } + columns.emplace_back(stringBuf[2]); + + len = snprintf(stringBuf[3], sizeof(stringBuf[3]), "%d", iter.second.first); + if (len > columnLenMax.at(3)) { + columnLenMax.at(3) = len + 4; + } + columns.emplace_back(stringBuf[3]); + + len = snprintf(stringBuf[4], sizeof(stringBuf[4]), "%f", iter.second.second); + if (len > columnLenMax.at(4)) { + columnLenMax.at(4) = len + 4; + } + columns.emplace_back(stringBuf[4]); + + rows.push_back(columns); + } + + printf("-------------------------------------------------------------------------\n"); + for (int i = 0; i < 5; i++) { + auto printBuf = title[i]; + if (printBuf.size() > columnLenMax.at(i)) { + columnLenMax.at(i) = printBuf.size(); + } + printBuf.resize(columnLenMax.at(i), ' '); + printf("%s\t", printBuf.c_str()); + } + printf("\n"); + for (auto &row : rows) { + for (int j = 0; j < 5; j++) { + auto printBuf = row[j]; + printBuf.resize(columnLenMax.at(j), ' '); + printf("%s\t", printBuf.c_str()); + } + printf("\n"); + } + return RET_OK; +} + +#ifdef ENABLE_ARM64 +int BenchmarkBase::PrintPerfResult(const std::vector &title, + const std::map> &result) { + std::vector columnLenMax(5); + std::vector> rows; + + for (auto &iter : result) { + char stringBuf[5][100] = {}; + std::vector columns; + size_t len = 0; + + len = iter.first.size(); + if (len > columnLenMax.at(0)) { + columnLenMax.at(0) = len + 4; + } + columns.push_back(iter.first); + + float tmp = float_t(flags_->num_threads_) * iter.second.second.value[0] / float_t(flags_->loop_count_) / 1000.0f; + len = snprintf(stringBuf[1], sizeof(stringBuf[1]), "%.2f", tmp); + if (len > columnLenMax.at(1)) { + columnLenMax.at(1) = len + 4; + } + columns.emplace_back(stringBuf[1]); + + len = snprintf(stringBuf[2], sizeof(stringBuf[2]), "%f", iter.second.second.value[0] / op_cost_total_); + if (len > columnLenMax.at(2)) { + columnLenMax.at(2) = len + 4; + } + columns.emplace_back(stringBuf[2]); + + tmp = float_t(flags_->num_threads_) * iter.second.second.value[1] / float_t(flags_->loop_count_) / 1000.0f; + len = snprintf(stringBuf[3], sizeof(stringBuf[3]), "%.2f", tmp); + if (len > columnLenMax.at(3)) { + columnLenMax.at(3) = len + 4; + } + columns.emplace_back(stringBuf[3]); + + len = snprintf(stringBuf[4], sizeof(stringBuf[4]), "%f", iter.second.second.value[1] / op_cost2_total_); + if (len > columnLenMax.at(4)) { + columnLenMax.at(4) = len + 4; + } + columns.emplace_back(stringBuf[4]); + + rows.push_back(columns); + } + + printf("-------------------------------------------------------------------------\n"); + for (int i = 0; i < 5; i++) { + auto printBuf = title[i]; + if (printBuf.size() > columnLenMax.at(i)) { + columnLenMax.at(i) = printBuf.size(); + } + printBuf.resize(columnLenMax.at(i), ' '); + printf("%s\t", printBuf.c_str()); + } + printf("\n"); + for (auto &row : rows) { + for (int j = 0; j < 5; j++) { + auto printBuf = row[j]; + printBuf.resize(columnLenMax.at(j), ' '); + printf("%s\t", printBuf.c_str()); + } + printf("\n"); + } + return RET_OK; +} +#endif + +#ifdef SUPPORT_NNIE +int SvpSysInit() { + HI_S32 ret = HI_SUCCESS; + VB_CONFIG_S struVbConf; + + HI_MPI_SYS_Exit(); + HI_MPI_VB_Exit(); + + memset(&struVbConf, 0, sizeof(VB_CONFIG_S)); + struVbConf.u32MaxPoolCnt = 2; + struVbConf.astCommPool[1].u64BlkSize = 768 * 576 * 2; + struVbConf.astCommPool[1].u32BlkCnt = 1; + + ret = HI_MPI_VB_SetConfig((const VB_CONFIG_S *)&struVbConf); + if (HI_SUCCESS != ret) { + MS_LOG(ERROR) << "Error:HI_MPI_VB_SetConf failed!"; + return RET_ERROR; + } + + ret = HI_MPI_VB_Init(); + if (HI_SUCCESS != ret) { + MS_LOG(ERROR) << "Error:HI_MPI_VB_Init failed!"; + return RET_ERROR; + } + + ret = HI_MPI_SYS_Init(); + if (HI_SUCCESS != ret) { + MS_LOG(ERROR) << "Error:HI_MPI_SYS_Init failed!"; + return RET_ERROR; + } + + return RET_OK; +} + +int SvpSysExit() { + HI_S32 ret = HI_SUCCESS; + + ret = HI_MPI_SYS_Exit(); + if (HI_SUCCESS != ret) { + MS_LOG(ERROR) << "Error:HI_MPI_SYS_Exit failed!"; + return RET_ERROR; + } + + ret = HI_MPI_VB_Exit(); + if (HI_SUCCESS != ret) { + MS_LOG(ERROR) << "Error:HI_MPI_VB_Exit failed!"; + return RET_ERROR; + } + + return RET_OK; +} +#endif + +BenchmarkBase::~BenchmarkBase() { + for (const auto &iter : this->benchmark_data_) { + delete (iter.second); + } + this->benchmark_data_.clear(); +#ifdef SUPPORT_NNIE + SvpSysExit(); +#endif +} + +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/tools/benchmark/benchmark_base.h b/mindspore/lite/tools/benchmark/benchmark_base.h new file mode 100644 index 00000000000..ced7a3fc38b --- /dev/null +++ b/mindspore/lite/tools/benchmark/benchmark_base.h @@ -0,0 +1,316 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINNIE_BENCHMARK_BENCHMARK_BASE_H_ +#define MINNIE_BENCHMARK_BENCHMARK_BASE_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "include/model.h" +#include "tools/common/flag_parser.h" +#include "src/common/file_utils.h" +#include "src/common/utils.h" +#include "ir/dtype/type_id.h" +#include "schema/model_generated.h" + +namespace mindspore::lite { +enum MS_API InDataType { kImage = 0, kBinary = 1 }; + +constexpr float relativeTolerance = 1e-5; +constexpr float absoluteTolerance = 1e-8; + +constexpr int kNumPrintMin = 5; +constexpr const char *DELIM_COLON = ":"; +constexpr const char *DELIM_COMMA = ","; +constexpr const char *DELIM_SLASH = "/"; + +extern const std::unordered_map TYPE_ID_MAP; +extern const std::unordered_map TENSOR_FORMAT_MAP; + +// +namespace dump { +constexpr auto kConfigPath = "MINDSPORE_DUMP_CONFIG"; +constexpr auto kSettings = "common_dump_settings"; +constexpr auto kMode = "dump_mode"; +constexpr auto kPath = "path"; +constexpr auto kNetName = "net_name"; +constexpr auto kInputOutput = "input_output"; +constexpr auto kKernels = "kernels"; +} // namespace dump + +#ifdef ENABLE_ARM64 +struct PerfResult { + int64_t nr; + struct { + int64_t value; + int64_t id; + } values[2]; +}; +struct PerfCount { + int64_t value[2]; +}; +#endif + +struct MS_API CheckTensor { + CheckTensor(const std::vector &shape, const std::vector &data, + const std::vector &strings_data = {""}) { + this->shape = shape; + this->data = data; + this->strings_data = strings_data; + } + std::vector shape; + std::vector data; + std::vector strings_data; +}; + +class MS_API BenchmarkFlags : public virtual FlagParser { + public: + BenchmarkFlags() { + // common + AddFlag(&BenchmarkFlags::model_file_, "modelFile", "Input model file", ""); + AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", ""); + AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU", "CPU"); + AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode", + "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, default value: 1", 1); + // MarkPerformance + AddFlag(&BenchmarkFlags::loop_count_, "loopCount", "Run loop count", 10); + AddFlag(&BenchmarkFlags::num_threads_, "numThreads", "Run threads number", 2); + AddFlag(&BenchmarkFlags::enable_fp16_, "enableFp16", "Enable float16", false); + AddFlag(&BenchmarkFlags::enable_parallel_, "enableParallel", "Enable subgraph parallel : true | false", false); + AddFlag(&BenchmarkFlags::warm_up_loop_count_, "warmUpLoopCount", "Run warm up loop", 3); + AddFlag(&BenchmarkFlags::time_profiling_, "timeProfiling", "Run time profiling", false); + AddFlag(&BenchmarkFlags::perf_profiling_, "perfProfiling", + "Perf event profiling(only instructions statics enabled currently)", false); + AddFlag(&BenchmarkFlags::perf_event_, "perfEvent", "CYCLE|CACHE|STALL", "CYCLE"); + // MarkAccuracy + AddFlag(&BenchmarkFlags::benchmark_data_file_, "benchmarkDataFile", "Benchmark data file path", ""); + AddFlag(&BenchmarkFlags::benchmark_data_type_, "benchmarkDataType", + "Benchmark data type. FLOAT | INT32 | INT8 | UINT8", "FLOAT"); + AddFlag(&BenchmarkFlags::accuracy_threshold_, "accuracyThreshold", "Threshold of accuracy", 0.5); + AddFlag(&BenchmarkFlags::resize_dims_in_, "inputShapes", + "Shape of input data, the format should be NHWC. e.g. 1,32,32,32:1,1,32,32,1", ""); + } + + ~BenchmarkFlags() override = default; + + void InitInputDataList(); + + void InitResizeDimsList(); + + public: + // common + std::string model_file_; + std::string in_data_file_; + std::vector input_data_list_; + InDataType in_data_type_ = kBinary; + std::string in_data_type_in_ = "bin"; + int cpu_bind_mode_ = 1; + // MarkPerformance + int loop_count_ = 10; + int num_threads_ = 2; + bool enable_fp16_ = false; + bool enable_parallel_ = false; + int warm_up_loop_count_ = 3; + // MarkAccuracy + std::string benchmark_data_file_; + std::string benchmark_data_type_ = "FLOAT"; + float accuracy_threshold_ = 0.5; + // Resize + std::string resize_dims_in_; + std::vector> resize_dims_; + + std::string device_ = "CPU"; + bool time_profiling_ = false; + bool perf_profiling_ = false; + std::string perf_event_ = "CYCLE"; + bool dump_tensor_data_ = false; + bool print_tensor_data_ = false; +}; + +class MS_API BenchmarkBase { + public: + explicit BenchmarkBase(BenchmarkFlags *flags) : flags_(flags) {} + + virtual ~BenchmarkBase(); + + int Init(); + virtual int RunBenchmark() = 0; + + protected: + int LoadInput(); + + virtual int GenerateInputData() = 0; + + int GenerateRandomData(size_t size, void *data, int data_type); + + virtual int ReadInputFile() = 0; + + int ReadCalibData(); + + virtual int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name, + const std::vector &dims) = 0; + + virtual int CompareOutput() = 0; + + int CompareStringData(const std::string &name, tensor::MSTensor *tensor); + + int InitDumpConfigFromJson(char *path); + + int InitCallbackParameter(); + + virtual int InitTimeProfilingCallbackParameter() = 0; + + virtual int InitPerfProfilingCallbackParameter() = 0; + + virtual int InitDumpTensorDataCallbackParameter() = 0; + + virtual int InitPrintTensorDataCallbackParameter() = 0; + + int PrintResult(const std::vector &title, const std::map> &result); + +#ifdef ENABLE_ARM64 + int PrintPerfResult(const std::vector &title, + const std::map> &result); +#endif + + // tensorData need to be converter first + template + float CompareData(const std::string &nodeName, const std::vector &msShape, const void *tensor_data) { + const T *msTensorData = static_cast(tensor_data); + auto iter = this->benchmark_data_.find(nodeName); + if (iter != this->benchmark_data_.end()) { + std::vector castedMSShape; + size_t shapeSize = 1; + for (int64_t dim : msShape) { + castedMSShape.push_back(size_t(dim)); + shapeSize *= dim; + } + + CheckTensor *calibTensor = iter->second; + if (calibTensor->shape != castedMSShape) { + std::ostringstream oss; + oss << "Shape of mslite output("; + for (auto dim : castedMSShape) { + oss << dim << ","; + } + oss << ") and shape source model output("; + for (auto dim : calibTensor->shape) { + oss << dim << ","; + } + oss << ") are different"; + std::cerr << oss.str() << std::endl; + MS_LOG(ERROR) << oss.str().c_str(); + return RET_ERROR; + } + size_t errorCount = 0; + float meanError = 0; + std::cout << "Data of node " << nodeName << " : "; + for (size_t j = 0; j < shapeSize; j++) { + if (j < 50) { + std::cout << static_cast(msTensorData[j]) << " "; + } + + if (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j])) { + std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl; + MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail"; + return RET_ERROR; + } + + auto tolerance = absoluteTolerance + relativeTolerance * fabs(calibTensor->data.at(j)); + auto absoluteError = std::fabs(msTensorData[j] - calibTensor->data.at(j)); + if (absoluteError > tolerance) { + if (fabs(calibTensor->data.at(j) - 0.0f) < FLT_EPSILON) { + if (absoluteError > 1e-5) { + meanError += absoluteError; + errorCount++; + } else { + continue; + } + } else { + // just assume that atol = rtol + meanError += absoluteError / (fabs(calibTensor->data.at(j)) + FLT_MIN); + errorCount++; + } + } + } + std::cout << std::endl; + if (meanError > 0.0f) { + meanError /= errorCount; + } + + if (meanError <= 0.0000001) { + std::cout << "Mean bias of node/tensor " << nodeName << " : 0%" << std::endl; + } else { + std::cout << "Mean bias of node/tensor " << nodeName << " : " << meanError * 100 << "%" << std::endl; + } + return meanError; + } else { + MS_LOG(INFO) << "%s is not in Source Model output", nodeName.c_str(); + return RET_ERROR; + } + } + + template + void FillInputData(int size, void *data, Distribution distribution) { + MS_ASSERT(data != nullptr); + int elements_num = size / sizeof(T); + (void)std::generate_n(static_cast(data), elements_num, + [&]() { return static_cast(distribution(random_engine_)); }); + } + + int CheckThreadNumValid(); + + protected: + BenchmarkFlags *flags_; + std::unordered_map benchmark_data_; + std::unordered_map data_type_map_{ + {"FLOAT", kNumberTypeFloat}, {"INT8", kNumberTypeInt8}, {"INT32", kNumberTypeInt32}, {"UINT8", kNumberTypeUInt8}}; + int msCalibDataType = kNumberTypeFloat; + + // callback parameters + uint64_t op_begin_ = 0; + int op_call_times_total_ = 0; + float op_cost_total_ = 0.0f; + std::map> op_times_by_type_; + std::map> op_times_by_name_; + + // dump data + nlohmann::json dump_cfg_json_; + std::string dump_file_output_dir_; +#ifdef ENABLE_ARM64 + int perf_fd = 0; + int perf_fd2 = 0; + float op_cost2_total_ = 0.0f; + std::map> op_perf_by_type_; + std::map> op_perf_by_name_; +#endif + std::mt19937 random_engine_; +}; + +} // namespace mindspore::lite +#endif // MINNIE_BENCHMARK_BENCHMARK_BASE_H_ diff --git a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc new file mode 100644 index 00000000000..00a32a44667 --- /dev/null +++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc @@ -0,0 +1,828 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tools/benchmark/benchmark_unified_api.h" +#define __STDC_FORMAT_MACROS +#include +#undef __STDC_FORMAT_MACROS +#include +#include +#include +#include "include/context.h" +#include "include/ms_tensor.h" +#include "include/version.h" +#include "schema/model_generated.h" +#include "src/common/common.h" +#include "src/tensor.h" +#ifdef ENABLE_ARM64 +#include +#include +#include +#include +#endif +#ifdef SUPPORT_NNIE +#include "include/hi_common.h" +#include "include/hi_comm_vb.h" +#include "include/mpi_sys.h" +#include "include/mpi_vb.h" +#endif + +namespace mindspore { +namespace lite { + +int BenchmarkUnifiedApi::GenerateInputData() { + for (auto tensor : ms_inputs_for_api_) { + MS_ASSERT(tensor != nullptr); + auto input_data = tensor.MutableData(); + if (input_data == nullptr) { + MS_LOG(ERROR) << "MallocData for inTensor failed"; + return RET_ERROR; + } + int status; + if (static_cast(tensor.DataType()) == kObjectTypeString) { + std::cerr << "Unsupported kObjectTypeString:" << std::endl; + MS_LOG(ERROR) << "Unsupported kObjectTypeString:"; + return RET_ERROR; + // status = StringsToMSTensor({"you're the best."}, tensor); + } else { + status = GenerateRandomData(tensor.DataSize(), input_data, static_cast(tensor.DataType())); + } + if (status != RET_OK) { + std::cerr << "GenerateRandomData for inTensor failed: " << status << std::endl; + MS_LOG(ERROR) << "GenerateRandomData for inTensor failed:" << status; + return status; + } + } + return RET_OK; +} + +int BenchmarkUnifiedApi::ReadInputFile() { + if (ms_inputs_for_api_.empty()) { + return RET_OK; + } + + if (this->flags_->in_data_type_ == kImage) { + MS_LOG(ERROR) << "Not supported image input"; + return RET_ERROR; + } else { + for (size_t i = 0; i < flags_->input_data_list_.size(); i++) { + auto cur_tensor = ms_inputs_for_api_.at(i); + MS_ASSERT(cur_tensor != nullptr); + size_t size; + char *bin_buf = ReadFile(flags_->input_data_list_[i].c_str(), &size); + if (bin_buf == nullptr) { + MS_LOG(ERROR) << "ReadFile return nullptr"; + return RET_ERROR; + } + if (static_cast(cur_tensor.DataType()) == kObjectTypeString) { + std::cerr << "Unsupported kObjectTypeString:" << std::endl; + MS_LOG(ERROR) << "Unsupported kObjectTypeString:"; + return RET_ERROR; + + } else { + auto tensor_data_size = cur_tensor.DataSize(); + if (size != tensor_data_size) { + std::cerr << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size + << std::endl; + MS_LOG(ERROR) << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size; + delete[] bin_buf; + return RET_ERROR; + } + auto input_data = cur_tensor.MutableData(); + if (input_data == nullptr) { + MS_LOG(ERROR) << "input_data is nullptr."; + return RET_ERROR; + } + memcpy(input_data, bin_buf, tensor_data_size); + } + delete[] bin_buf; + } + } + return RET_OK; +} + +int BenchmarkUnifiedApi::ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name, + const std::vector &dims) { + std::string line; + getline(in_file_stream, line); + std::stringstream line_stream(line); + if (this->benchmark_data_.find(tensor_name) != this->benchmark_data_.end()) { + return RET_OK; + } + mindspore::MSTensor tensor = GetMSTensorByNameOrShape(tensor_name, dims); + if (tensor == nullptr) { + MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name; + return RET_ERROR; + } + std::vector data; + std::vector strings_data; + size_t shape_size = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies()); + if (static_cast(tensor.DataType()) == kObjectTypeString) { + strings_data.push_back(line); + for (size_t i = 1; i < shape_size; i++) { + getline(in_file_stream, line); + strings_data.push_back(line); + } + } else { + for (size_t i = 0; i < shape_size; i++) { + float tmp_data; + line_stream >> tmp_data; + data.push_back(tmp_data); + } + } + auto *check_tensor = new (std::nothrow) CheckTensor(dims, data, strings_data); + if (check_tensor == nullptr) { + MS_LOG(ERROR) << "New CheckTensor failed, tensor name: " << tensor_name; + return RET_ERROR; + } + this->benchmark_data_.insert(std::make_pair(tensor_name, check_tensor)); + return RET_OK; +} + +void BenchmarkUnifiedApi::InitMSContext(const std::shared_ptr &context) { + context->SetThreadNum(flags_->num_threads_); + context->SetEnableParallel(flags_->enable_parallel_); + context->SetThreadAffinity(flags_->cpu_bind_mode_); + auto &device_list = context->MutableDeviceInfo(); + + std::shared_ptr device_info = std::make_shared(); + device_info->SetEnableFP16(flags_->enable_fp16_); + device_list.push_back(device_info); + + if (flags_->device_ == "GPU") { + std::shared_ptr gpu_device_info = std::make_shared(); + gpu_device_info->SetEnableFP16(flags_->enable_fp16_); + device_list.push_back(gpu_device_info); + } + + if (flags_->device_ == "NPU") { + std::shared_ptr npu_device_info = std::make_shared(); + npu_device_info->SetFrequency(3); + device_list.push_back(npu_device_info); + } +} + +int BenchmarkUnifiedApi::CompareOutput() { + std::cout << "================ Comparing Output data ================" << std::endl; + float total_bias = 0; + int total_size = 0; + for (const auto &calib_tensor : benchmark_data_) { + std::string node_or_tensor_name = calib_tensor.first; + mindspore::MSTensor tensor = GetMSTensorByNameOrShape(node_or_tensor_name, calib_tensor.second->shape); + if (tensor == nullptr) { + MS_LOG(ERROR) << "Get tensor failed, tensor name: " << node_or_tensor_name; + return RET_ERROR; + } + int ret; + if (static_cast(tensor.DataType()) == kObjectTypeString) { + std::cerr << "Unsupported kObjectTypeString:" << std::endl; + MS_LOG(ERROR) << "Unsupported kObjectTypeString:"; + return RET_ERROR; + // ret = CompareStringData(node_or_tensor_name, tensor); + } else { + ret = CompareDataGetTotalBiasAndSize(node_or_tensor_name, &tensor, &total_bias, &total_size); + } + if (ret != RET_OK) { + MS_LOG(ERROR) << "Error in CompareData"; + std::cerr << "Error in CompareData" << std::endl; + std::cout << "=======================================================" << std::endl << std::endl; + return ret; + } + } + float mean_bias; + if (total_size != 0) { + mean_bias = total_bias / float_t(total_size) * 100; + } else { + mean_bias = 0; + } + + std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%" << std::endl; + std::cout << "=======================================================" << std::endl << std::endl; + + if (mean_bias > this->flags_->accuracy_threshold_) { + MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%"; + std::cerr << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%" << std::endl; + return RET_ERROR; + } + return RET_OK; +} + +mindspore::MSTensor BenchmarkUnifiedApi::GetMSTensorByNodeShape(const std::vector &node_shape) { + std::vector match_tensors; + std::vector shape_vector = ConverterToInt64Vector(node_shape); + auto tensors = ms_model_.GetOutputs(); + for (auto &out_tensor_pair : tensors) { + if (out_tensor_pair.Shape() == shape_vector) { + match_tensors.emplace_back(out_tensor_pair); + } + } + + return match_tensors.front(); +} + +mindspore::MSTensor BenchmarkUnifiedApi::GetMSTensorByNameOrShape(const std::string &node_or_tensor_name, + const std::vector &dims) { + mindspore::MSTensor tensor; + auto tensors = ms_model_.GetOutputsByNodeName(node_or_tensor_name); + if (tensors.empty() || tensors.size() != 1) { + MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name + << " or node has more than one output tensor, switch to GetOutputByTensorName"; + tensor = ms_model_.GetOutputByTensorName(node_or_tensor_name); + if (tensor == nullptr) { + return GetMSTensorByNodeShape(dims); + } + } else { + tensor = tensors.front(); + } + return tensor; +} + +int BenchmarkUnifiedApi::CompareDataGetTotalBiasAndSize(const std::string &name, mindspore::MSTensor *tensor, + float *total_bias, int *total_size) { + float bias = 0; + auto mutableData = tensor->MutableData(); + if (mutableData == nullptr) { + MS_LOG(ERROR) << "mutableData is nullptr."; + return RET_ERROR; + } + switch (static_cast(tensor->DataType())) { + case TypeId::kNumberTypeFloat: + case TypeId::kNumberTypeFloat32: { + bias = CompareData(name, tensor->Shape(), mutableData); + break; + } + case TypeId::kNumberTypeInt8: { + bias = CompareData(name, tensor->Shape(), mutableData); + break; + } + case TypeId::kNumberTypeUInt8: { + bias = CompareData(name, tensor->Shape(), mutableData); + break; + } + case TypeId::kNumberTypeInt32: { + bias = CompareData(name, tensor->Shape(), mutableData); + break; + } + case TypeId::kNumberTypeInt16: { + bias = CompareData(name, tensor->Shape(), mutableData); + break; + } + case TypeId::kNumberTypeBool: { + bias = CompareData(name, tensor->Shape(), mutableData); + break; + } + default: + MS_LOG(ERROR) << "Datatype " << static_cast(tensor->DataType()) << " is not supported."; + return RET_ERROR; + } + if (bias < 0) { + MS_LOG(ERROR) << "CompareData failed, name: " << name; + return RET_ERROR; + } + *total_bias += bias; + *total_size += 1; + return RET_OK; +} + +int BenchmarkUnifiedApi::MarkPerformance() { + MS_LOG(INFO) << "Running warm up loops..."; + std::cout << "Running warm up loops..." << std::endl; + std::vector outputs; + + for (int i = 0; i < flags_->warm_up_loop_count_; i++) { + auto status = ms_model_.Predict(ms_inputs_for_api_, &outputs); + if (status != kSuccess) { + MS_LOG(ERROR) << "Inference error "; + std::cerr << "Inference error " << std::endl; + return RET_ERROR; + } + } + + MS_LOG(INFO) << "Running benchmark loops..."; + std::cout << "Running benchmark loops..." << std::endl; + uint64_t time_min = 1000000; + uint64_t time_max = 0; + uint64_t time_avg = 0; + + for (int i = 0; i < flags_->loop_count_; i++) { + auto inputs = ms_model_.GetInputs(); + for (auto tensor : inputs) { + tensor.MutableData(); // prepare data + } + auto start = GetTimeUs(); + auto status = ms_model_.Predict(ms_inputs_for_api_, &outputs, ms_before_call_back_, ms_after_call_back_); + if (status != kSuccess) { + MS_LOG(ERROR) << "Inference error "; + std::cerr << "Inference error "; + return RET_ERROR; + } + + auto end = GetTimeUs(); + auto time = end - start; + time_min = std::min(time_min, time); + time_max = std::max(time_max, time); + time_avg += time; + } + + if (flags_->time_profiling_) { + const std::vector per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"}; + const std::vector per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"}; + PrintResult(per_op_name, op_times_by_name_); + PrintResult(per_op_type, op_times_by_type_); +#ifdef ENABLE_ARM64 + } else if (flags_->perf_profiling_) { + if (flags_->perf_event_ == "CACHE") { + const std::vector per_op_name = {"opName", "cache ref(k)", "cache ref(%)", "miss(k)", "miss(%)"}; + const std::vector per_op_type = {"opType", "cache ref(k)", "cache ref(%)", "miss(k)", "miss(%)"}; + PrintPerfResult(per_op_name, op_perf_by_name_); + PrintPerfResult(per_op_type, op_perf_by_type_); + } else if (flags_->perf_event_ == "STALL") { + const std::vector per_op_name = {"opName", "frontend(k)", "frontend(%)", "backendend(k)", + "backendend(%)"}; + const std::vector per_op_type = {"opType", "frontend(k)", "frontend(%)", "backendend(k)", + "backendend(%)"}; + PrintPerfResult(per_op_name, op_perf_by_name_); + PrintPerfResult(per_op_type, op_perf_by_type_); + } else { + const std::vector per_op_name = {"opName", "cycles(k)", "cycles(%)", "ins(k)", "ins(%)"}; + const std::vector per_op_type = {"opType", "cycles(k)", "cycles(%)", "ins(k)", "ins(%)"}; + PrintPerfResult(per_op_name, op_perf_by_name_); + PrintPerfResult(per_op_type, op_perf_by_type_); + } +#endif + } + + if (flags_->loop_count_ > 0) { + time_avg /= flags_->loop_count_; + MS_LOG(INFO) << "Model = " << flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str() + << ", NumThreads = " << flags_->num_threads_ << ", MinRunTime = " << time_min / 1000.0f + << ", MaxRuntime = " << time_max / 1000.0f << ", AvgRunTime = " << time_avg / 1000.0f; + printf("Model = %s, NumThreads = %d, MinRunTime = %f ms, MaxRuntime = %f ms, AvgRunTime = %f ms\n", + flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str(), flags_->num_threads_, + time_min / 1000.0f, time_max / 1000.0f, time_avg / 1000.0f); + } + return RET_OK; +} + +int BenchmarkUnifiedApi::MarkAccuracy() { + MS_LOG(INFO) << "MarkAccuracy"; + std::cout << "MarkAccuracy" << std::endl; + + auto status = PrintInputData(); + if (status != RET_OK) { + MS_LOG(ERROR) << "PrintInputData error " << status; + std::cerr << "PrintInputData error " << status << std::endl; + return status; + } + std::vector outputs; + auto ret = ms_model_.Predict(ms_inputs_for_api_, &outputs, ms_before_call_back_, ms_after_call_back_); + if (ret != kSuccess) { + MS_LOG(ERROR) << "Inference error "; + std::cerr << "Inference error " << std::endl; + return RET_ERROR; + } + status = ReadCalibData(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Read calib data error " << status; + std::cerr << "Read calib data error " << status << std::endl; + return status; + } + status = CompareOutput(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Compare output error " << status; + std::cerr << "Compare output error " << status << std::endl; + return status; + } + return RET_OK; +} + +int BenchmarkUnifiedApi::PrintInputData() { + for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) { + auto input = ms_inputs_for_api_[i]; + MS_ASSERT(input != nullptr); + auto tensor_data_type = static_cast(input.DataType()); + + std::cout << "InData" << i << ": "; + if (tensor_data_type == TypeId::kObjectTypeString) { + std::cerr << "Unsupported kObjectTypeString:" << std::endl; + MS_LOG(ERROR) << "Unsupported kObjectTypeString:"; + return RET_ERROR; + } + size_t print_num = std::min(static_cast(input.ElementNum()), 20); + const void *in_data = input.MutableData(); + if (in_data == nullptr) { + MS_LOG(ERROR) << "in_data is nullptr."; + return RET_ERROR; + } + + for (size_t j = 0; j < print_num; j++) { + if (tensor_data_type == TypeId::kNumberTypeFloat32 || tensor_data_type == TypeId::kNumberTypeFloat) { + std::cout << static_cast(in_data)[j] << " "; + } else if (tensor_data_type == TypeId::kNumberTypeInt8) { + std::cout << static_cast(in_data)[j] << " "; + } else if (tensor_data_type == TypeId::kNumberTypeUInt8) { + std::cout << static_cast(in_data)[j] << " "; + } else if (tensor_data_type == TypeId::kNumberTypeInt32) { + std::cout << static_cast(in_data)[j] << " "; + } else if (tensor_data_type == TypeId::kNumberTypeInt64) { + std::cout << static_cast(in_data)[j] << " "; + } else if (tensor_data_type == TypeId::kNumberTypeBool) { + std::cout << static_cast(in_data)[j] << " "; + } else { + MS_LOG(ERROR) << "Datatype: " << tensor_data_type << " is not supported."; + return RET_ERROR; + } + } + std::cout << std::endl; + } + return RET_OK; +} + +int BenchmarkUnifiedApi::RunBenchmark() { + auto start_prepare_time = GetTimeUs(); + // Load graph + std::string model_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1); + + MS_LOG(INFO) << "start reading model file"; + std::cout << "start reading model file" << std::endl; + size_t size = 0; + char *graph_buf = ReadFile(flags_->model_file_.c_str(), &size); + if (graph_buf == nullptr) { + MS_LOG(ERROR) << "Read model file failed while running " << model_name.c_str(); + std::cerr << "Read model file failed while running " << model_name.c_str() << std::endl; + return RET_ERROR; + } + + auto context = std::make_shared(); + if (context == nullptr) { + MS_LOG(ERROR) << "New context failed while running " << model_name.c_str(); + std::cerr << "New context failed while running " << model_name.c_str() << std::endl; + return RET_ERROR; + } + + (void)InitMSContext(context); + auto ret = ms_model_.Build(graph_buf, size, kMindIR, context); + if (ret != kSuccess) { + MS_LOG(ERROR) << "ms_model_.Build failed while running ", model_name.c_str(); + std::cout << "ms_model_.Build failed while running ", model_name.c_str(); + return RET_ERROR; + } + + if (!flags_->resize_dims_.empty()) { + std::vector> resize_dims; + (void)std::transform(flags_->resize_dims_.begin(), flags_->resize_dims_.end(), std::back_inserter(resize_dims), + [&](auto &shapes) { return this->ConverterToInt64Vector(shapes); }); + + ret = ms_model_.Resize(ms_model_.GetInputs(), resize_dims); + if (ret != kSuccess) { + MS_LOG(ERROR) << "Input tensor resize failed."; + std::cout << "Input tensor resize failed."; + return RET_ERROR; + } + } + + ms_inputs_for_api_ = ms_model_.GetInputs(); + auto end_prepare_time = GetTimeUs(); + MS_LOG(INFO) << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms"; + std::cout << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl; + + // Load input + MS_LOG(INFO) << "start generate input data"; + auto status = LoadInput(); + if (status != 0) { + MS_LOG(ERROR) << "Generate input data error"; + return status; + } + if (!flags_->benchmark_data_file_.empty()) { + status = MarkAccuracy(); + for (auto &data : benchmark_data_) { + data.second->shape.clear(); + data.second->data.clear(); + delete data.second; + data.second = nullptr; + } + benchmark_data_.clear(); + if (status != 0) { + MS_LOG(ERROR) << "Run MarkAccuracy error: " << status; + std::cout << "Run MarkAccuracy error: " << status << std::endl; + return status; + } + } else { + status = MarkPerformance(); + if (status != 0) { + MS_LOG(ERROR) << "Run MarkPerformance error: " << status; + std::cout << "Run MarkPerformance error: " << status << std::endl; + return status; + } + } + if (flags_->dump_tensor_data_) { + std::cout << "Dumped file is saved to : " + dump_file_output_dir_ << std::endl; + } + return RET_OK; +} + +int BenchmarkUnifiedApi::InitTimeProfilingCallbackParameter() { + // before callback + ms_before_call_back_ = [&](const std::vector &before_inputs, + const std::vector &before_outputs, + const MSCallBackParam &call_param) { + if (before_inputs.empty()) { + MS_LOG(INFO) << "The num of beforeInputs is empty"; + } + if (before_outputs.empty()) { + MS_LOG(INFO) << "The num of beforeOutputs is empty"; + } + if (op_times_by_type_.find(call_param.node_type_) == op_times_by_type_.end()) { + op_times_by_type_.insert(std::make_pair(call_param.node_type_, std::make_pair(0, 0.0f))); + } + if (op_times_by_name_.find(call_param.node_name_) == op_times_by_name_.end()) { + op_times_by_name_.insert(std::make_pair(call_param.node_name_, std::make_pair(0, 0.0f))); + } + + op_call_times_total_++; + op_begin_ = GetTimeUs(); + return true; + }; + + // after callback + ms_after_call_back_ = [&](const std::vector &after_inputs, + const std::vector &after_outputs, const MSCallBackParam &call_param) { + uint64_t opEnd = GetTimeUs(); + + if (after_inputs.empty()) { + MS_LOG(INFO) << "The num of after inputs is empty"; + } + if (after_outputs.empty()) { + MS_LOG(INFO) << "The num of after outputs is empty"; + } + + float cost = static_cast(opEnd - op_begin_) / 1000.0f; + if (flags_->device_ == "GPU") { + auto gpu_param = reinterpret_cast(call_param); + cost = static_cast(gpu_param.execute_time); + } + op_cost_total_ += cost; + op_times_by_type_[call_param.node_type_].first++; + op_times_by_type_[call_param.node_type_].second += cost; + op_times_by_name_[call_param.node_name_].first++; + op_times_by_name_[call_param.node_name_].second += cost; + return true; + }; + return RET_OK; +} + +int BenchmarkUnifiedApi::InitPerfProfilingCallbackParameter() { +#ifndef ENABLE_ARM64 + MS_LOG(ERROR) << "Only support perf_profiling on arm64."; + return RET_ERROR; +#else + struct perf_event_attr pe, pe2; + memset(&pe, 0, sizeof(struct perf_event_attr)); + memset(&pe2, 0, sizeof(struct perf_event_attr)); + pe.type = PERF_TYPE_HARDWARE; + pe2.type = PERF_TYPE_HARDWARE; + pe.size = sizeof(struct perf_event_attr); + pe2.size = sizeof(struct perf_event_attr); + pe.disabled = 1; + pe2.disabled = 1; + pe.exclude_kernel = 1; // don't count kernel + pe2.exclude_kernel = 1; // don't count kernel + pe.exclude_hv = 1; // don't count hypervisor + pe2.exclude_hv = 1; // don't count hypervisor + pe.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; + pe2.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; + if (flags_->perf_event_ == "CACHE") { + pe.config = PERF_COUNT_HW_CACHE_REFERENCES; + pe2.config = PERF_COUNT_HW_CACHE_MISSES; + } else if (flags_->perf_event_ == "STALL") { + pe.config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND; + pe2.config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND; + } else { + pe.config = PERF_COUNT_HW_CPU_CYCLES; + pe2.config = PERF_COUNT_HW_INSTRUCTIONS; + } + perf_fd = syscall(__NR_perf_event_open, pe, 0, -1, -1, 0); + if (perf_fd == -1) { + MS_LOG(ERROR) << "Failed to open perf event " << pe.config; + return RET_ERROR; + } + perf_fd2 = syscall(__NR_perf_event_open, pe2, 0, -1, perf_fd, 0); + if (perf_fd2 == -1) { + MS_LOG(ERROR) << "Failed to open perf event " << pe2.config; + return RET_ERROR; + } + struct PerfCount zero; + zero.value[0] = 0; + zero.value[1] = 0; + // before callback + ms_before_call_back_ = [&](const std::vector &before_inputs, + const std::vector &before_outputs, + const MSCallBackParam &call_param) { + if (before_inputs.empty()) { + MS_LOG(INFO) << "The num of beforeInputs is empty"; + } + if (before_outputs.empty()) { + MS_LOG(INFO) << "The num of beforeOutputs is empty"; + } + if (op_perf_by_type_.find(call_param.node_type_) == op_perf_by_type_.end()) { + op_perf_by_type_.insert(std::make_pair(call_param.node_type_, std::make_pair(0, zero))); + } + if (op_perf_by_name_.find(call_param.node_name_) == op_perf_by_name_.end()) { + op_perf_by_name_.insert(std::make_pair(call_param.node_name_, std::make_pair(0, zero))); + } + + op_call_times_total_++; + ioctl(perf_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); + ioctl(perf_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP); + return true; + }; + + // after callback + ms_after_call_back_ = [&](const std::vector &after_inputs, + const std::vector &after_outputs, const MSCallBackParam &call_param) { + struct PerfResult res; + ioctl(perf_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP); + read(perf_fd, &res, sizeof(struct PerfResult)); + + if (after_inputs.empty()) { + MS_LOG(INFO) << "The num of after inputs is empty"; + } + if (after_outputs.empty()) { + MS_LOG(INFO) << "The num of after outputs is empty"; + } + float cost1 = static_cast(res.values[0].value); + float cost2 = static_cast(res.values[1].value); + op_cost_total_ += cost1; + op_cost2_total_ += cost2; + op_perf_by_type_[call_param.node_type_].first++; + op_perf_by_type_[call_param.node_type_].second.value[0] += cost1; + op_perf_by_type_[call_param.node_type_].second.value[1] += cost2; + op_perf_by_name_[call_param.node_name_].first++; + op_perf_by_name_[call_param.node_name_].second.value[0] += cost1; + op_perf_by_name_[call_param.node_name_].second.value[1] += cost2; + return true; + }; +#endif + return RET_OK; +} + +namespace { +template +std::string DataToString(void *data, size_t data_number) { + if (data == nullptr) { + return "Data of tensor is nullptr"; + } + std::ostringstream oss; + auto casted_data = static_cast(data); + for (size_t i = 0; i < 40 && i < data_number; i++) { + oss << " " << casted_data[i]; + } + return oss.str(); +} + +std::string DumpMSTensor(mindspore::MSTensor *tensor) { + if (tensor == nullptr) { + return "Tensor is nullptr"; + } + std::ostringstream oss; + oss << " DataType: " << static_cast(tensor->DataType()); + oss << " Shape:"; + for (auto &dim : tensor->Shape()) { + oss << " " << dim; + } + oss << std::endl << " Data:"; + switch (static_cast(tensor->DataType())) { + case kNumberTypeFloat32: { + oss << DataToString(tensor->MutableData(), tensor->ElementNum()); + } break; + case kNumberTypeFloat16: { + oss << DataToString(tensor->MutableData(), tensor->ElementNum()); + } break; + case kNumberTypeInt32: { + oss << DataToString(tensor->MutableData(), tensor->ElementNum()); + } break; + case kNumberTypeInt16: { + oss << DataToString(tensor->MutableData(), tensor->ElementNum()); + } break; + case kNumberTypeInt8: { + oss << DataToString(tensor->MutableData(), tensor->ElementNum()); + } break; + default: + oss << "Unsupported data type to print"; + break; + } + return oss.str(); +} + +std::string GenerateOutputFileName(mindspore::MSTensor *tensor, const std::string &op_name, + const std::string &file_type, const size_t &idx) { + std::string file_name = op_name; + auto pos = file_name.find_first_of('/'); + while (pos != std::string::npos) { + file_name.replace(pos, 1, "."); + pos = file_name.find_first_of('/'); + } + file_name += "_" + file_type + "_" + std::to_string(idx) + "_shape_"; + for (const auto &dim : tensor->Shape()) { + file_name += std::to_string(dim) + "_"; + } + if (TYPE_ID_MAP.find(static_cast(tensor->DataType())) != TYPE_ID_MAP.end()) { + file_name += TYPE_ID_MAP.at(static_cast(tensor->DataType())); + } + + file_name += +".bin"; + return file_name; +} +} // namespace + +int BenchmarkUnifiedApi::InitPrintTensorDataCallbackParameter() { + // before callback + ms_before_call_back_ = [&](const std::vector &before_inputs, + const std::vector &before_outputs, + const MSCallBackParam &call_param) { return true; }; + + // after callback + ms_after_call_back_ = [&](const std::vector &after_inputs, + const std::vector &after_outputs, const MSCallBackParam &call_param) { + std::cout << "================================================================" << std::endl; + std::cout << call_param.node_name_ << " inputs : " << std::endl; + for (auto ms_tensor : after_inputs) { + std::cout << DumpMSTensor(&ms_tensor) << std::endl; + } + std::cout << "----------------------------------------------------------------" << std::endl; + std::cout << call_param.node_name_ << " outputs : " << std::endl; + for (auto ms_tensor : after_outputs) { + std::cout << DumpMSTensor(&ms_tensor) << std::endl; + } + std::cout << "================================================================" << std::endl; + return true; + }; + return RET_OK; +} +int BenchmarkUnifiedApi::InitDumpTensorDataCallbackParameter() { + // before callback + ms_before_call_back_ = [&](const std::vector &before_inputs, + const std::vector &before_outputs, + const MSCallBackParam &call_param) { + auto dump_mode = dump_cfg_json_[dump::kSettings][dump::kMode].get(); + auto input_output_mode = dump_cfg_json_[dump::kSettings][dump::kInputOutput].get(); + auto kernels = dump_cfg_json_[dump::kSettings][dump::kKernels].get>(); + + if (dump_mode == 0 || std::find(kernels.begin(), kernels.end(), call_param.node_name_) != kernels.end()) { + if (input_output_mode == 0 || input_output_mode == 1) { + for (size_t i = 0; i < before_inputs.size(); i++) { + auto ms_tensor = before_inputs.at(i); + auto file_name = GenerateOutputFileName(&ms_tensor, call_param.node_name_, "input", i); + auto abs_file_path = dump_file_output_dir_ + "/" + file_name; + if (WriteToBin(abs_file_path, ms_tensor.MutableData(), ms_tensor.DataSize()) != RET_OK) { // save to file + MS_LOG(ERROR) << "write tensor data to file failed."; + return false; + } + } + } + } + return true; + }; + + // after callback + ms_after_call_back_ = [&](const std::vector &after_inputs, + const std::vector &after_outputs, const MSCallBackParam &call_param) { + auto dump_mode = dump_cfg_json_[dump::kSettings][dump::kMode].get(); + auto input_output_mode = dump_cfg_json_[dump::kSettings][dump::kInputOutput].get(); + auto kernels = dump_cfg_json_[dump::kSettings][dump::kKernels].get>(); + + if (dump_mode == 0 || std::find(kernels.begin(), kernels.end(), call_param.node_name_) != kernels.end()) { + if (input_output_mode == 0 || input_output_mode == 2) { + for (size_t i = 0; i < after_outputs.size(); i++) { + auto ms_tensor = after_outputs.at(i); + auto file_name = GenerateOutputFileName(&ms_tensor, call_param.node_name_, "output", i); + auto abs_file_path = dump_file_output_dir_ + "/" + file_name; + if (WriteToBin(abs_file_path, ms_tensor.MutableData(), ms_tensor.DataSize()) != RET_OK) { // save to file + MS_LOG(ERROR) << "write tensor data to file failed."; + return false; + } + } + } + } + return true; + }; + return RET_OK; +} + +BenchmarkUnifiedApi::~BenchmarkUnifiedApi() {} + +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/tools/benchmark/benchmark_unified_api.h b/mindspore/lite/tools/benchmark/benchmark_unified_api.h new file mode 100644 index 00000000000..10f7a0cbacb --- /dev/null +++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.h @@ -0,0 +1,103 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_BENCHMARK_BENCHMARK_UNIFIED_API_H_ +#define MINDSPORE_BENCHMARK_BENCHMARK_UNIFIED_API_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "tools/benchmark/benchmark_base.h" +#include "include/model.h" +#include "tools/common/flag_parser.h" +#include "src/common/file_utils.h" +#include "src/common/utils.h" +#include "include/api/types.h" +#include "include/api/model.h" + +namespace mindspore::lite { + +class MS_API BenchmarkUnifiedApi : public BenchmarkBase { + public: + explicit BenchmarkUnifiedApi(BenchmarkFlags *flags) : BenchmarkBase(flags) {} + + virtual ~BenchmarkUnifiedApi(); + + int RunBenchmark() override; + + protected: + int CompareDataGetTotalBiasAndSize(const std::string &name, mindspore::MSTensor *tensor, float *total_bias, + int *total_size); + void InitContext(const std::shared_ptr &context); + mindspore::MSTensor GetMSTensorByNodeShape(const std::vector &node_shape); + mindspore::MSTensor GetMSTensorByNameOrShape(const std::string &node_or_tensor_name, const std::vector &dims); + + // call GenerateRandomData to fill inputTensors + int GenerateInputData() override; + + int ReadInputFile() override; + + int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name, + const std::vector &dims) override; + + void InitMSContext(const std::shared_ptr &context); + + int CompareOutput() override; + + int InitTimeProfilingCallbackParameter() override; + + int InitPerfProfilingCallbackParameter() override; + + int InitDumpTensorDataCallbackParameter() override; + + int InitPrintTensorDataCallbackParameter() override; + + int PrintInputData(); + + template + std::vector ConverterToInt64Vector(const std::vector &srcDims) { + std::vector dims; + for (auto shape : srcDims) { + dims.push_back(static_cast(shape)); + } + return dims; + } + + int MarkPerformance(); + + int MarkAccuracy(); + + private: + mindspore::Model ms_model_; + std::vector ms_inputs_for_api_; + + MSKernelCallBack ms_before_call_back_ = nullptr; + MSKernelCallBack ms_after_call_back_ = nullptr; +}; + +} // namespace mindspore::lite +#endif // MINNIE_BENCHMARK_BENCHMARK_H_ diff --git a/mindspore/lite/tools/benchmark/main.cc b/mindspore/lite/tools/benchmark/main.cc index 549fb7d915b..b9f3e53a8e9 100644 --- a/mindspore/lite/tools/benchmark/main.cc +++ b/mindspore/lite/tools/benchmark/main.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "tools/benchmark/benchmark.h" +#include "tools/benchmark/run_benchmark.h" #include "include/version.h" int main(int argc, const char **argv) { diff --git a/mindspore/lite/tools/benchmark/run_benchmark.cc b/mindspore/lite/tools/benchmark/run_benchmark.cc new file mode 100644 index 00000000000..bffbad9c93d --- /dev/null +++ b/mindspore/lite/tools/benchmark/run_benchmark.cc @@ -0,0 +1,82 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tools/benchmark/run_benchmark.h" +#include +namespace mindspore { +namespace lite { + +int RunBenchmark(int argc, const char **argv) { + BenchmarkFlags flags; + Option err = flags.ParseFlags(argc, argv); +#ifdef SUPPORT_NNIE + SvpSysInit(); +#endif + if (err.IsSome()) { + std::cerr << err.Get() << std::endl; + std::cerr << flags.Usage() << std::endl; + return RET_ERROR; + } + + if (flags.help) { + std::cerr << flags.Usage() << std::endl; + return RET_OK; + } + + BenchmarkBase *benchmark = nullptr; + // get dump data output path + auto new_api = std::getenv("ENABLE_NEW_API"); + if (new_api == nullptr || std::string(new_api) != "true") { + benchmark = new Benchmark(&flags); + } else { + benchmark = new BenchmarkUnifiedApi(&flags); + } + if (benchmark == nullptr) { + MS_LOG(ERROR) << "new benchmark failed "; + std::cerr << "new benchmark failed" << std::endl; + return RET_ERROR; + } + auto status = benchmark->Init(); + if (status != 0) { + MS_LOG(ERROR) << "Benchmark init Error : " << status; + std::cerr << "Benchmark init Error : " << status << std::endl; + delete benchmark; + benchmark = nullptr; + return RET_ERROR; + } + + status = benchmark->RunBenchmark(); + if (status != 0) { + MS_LOG(ERROR) << "Run Benchmark " + << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() + << " Failed : " << status; + std::cerr << "Run Benchmark " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() + << " Failed : " << status << std::endl; + delete benchmark; + benchmark = nullptr; + return RET_ERROR; + } + + MS_LOG(INFO) << "Run Benchmark " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() + << " Success."; + std::cout << "Run Benchmark " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() + << " Success." << std::endl; + delete benchmark; + benchmark = nullptr; + return RET_OK; +} +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/tools/benchmark/run_benchmark.h b/mindspore/lite/tools/benchmark/run_benchmark.h new file mode 100644 index 00000000000..7b648edee5b --- /dev/null +++ b/mindspore/lite/tools/benchmark/run_benchmark.h @@ -0,0 +1,27 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINNIE_BENCHMARK_RUN_BENCHMARK_H_ +#define MINNIE_BENCHMARK_RUN_BENCHMARK_H_ +#include "tools/benchmark/benchmark.h" +#include "tools/benchmark/benchmark_unified_api.h" + +namespace mindspore::lite { + +int MS_API RunBenchmark(int argc, const char **argv); + +} // namespace mindspore::lite +#endif // MINNIE_BENCHMARK_RUN_BENCHMARK_H_