forked from mindspore-Ecosystem/mindspore
!19987 [MS][LITE]fix bug and add new api benchmark
Merge pull request !19987 from 张学同/api13
This commit is contained in:
commit
ec2ec77666
|
@ -41,79 +41,9 @@ CreateTrainSessionProto *CreateTrainSessionCallbackHolder(CreateTrainSessionProt
|
|||
return proto_;
|
||||
}
|
||||
|
||||
lite::CpuBindMode ModelImpl::GetCpuBindMode() {
|
||||
auto affinity_mode = context_->GetThreadAffinityMode();
|
||||
switch (affinity_mode) {
|
||||
case 0:
|
||||
return lite::NO_BIND;
|
||||
case 1:
|
||||
return lite::HIGHER_CPU;
|
||||
case 2:
|
||||
return lite::MID_CPU;
|
||||
default:
|
||||
return lite::NO_BIND;
|
||||
}
|
||||
}
|
||||
|
||||
Status ModelImpl::ConverterContext(const std::shared_ptr<Context> &context, lite::Context *model_context) {
|
||||
auto device_list = context->MutableDeviceInfo();
|
||||
if (device_list.size() == 0) {
|
||||
MS_LOG(ERROR) << "Invalid device list.";
|
||||
return kLiteInputParamInvalid;
|
||||
}
|
||||
if (device_list.size() > 2) {
|
||||
MS_LOG(ERROR) << "Only CPU/CPU & GPU/CPU & NPU mode is supported.";
|
||||
return kLiteInputParamInvalid;
|
||||
}
|
||||
|
||||
model_context->thread_num_ = context->GetThreadNum();
|
||||
model_context->enable_parallel_ = context->GetEnableParallel();
|
||||
model_context->affinity_core_list_ = context->GetThreadAffinityCoreList();
|
||||
model_context->device_list_.clear();
|
||||
if (device_list[0]->GetDeviceType() != kCPU) {
|
||||
MS_LOG(ERROR) << "CPU context must be enabled and in the first place of device list.";
|
||||
return kLiteInputParamInvalid;
|
||||
}
|
||||
|
||||
auto cpu_context = device_list[0]->Cast<CPUDeviceInfo>();
|
||||
model_context->allocator = cpu_context->GetAllocator();
|
||||
if (model_context->allocator == nullptr) {
|
||||
model_context->allocator = Allocator::Create();
|
||||
if (model_context->allocator == nullptr) {
|
||||
MS_LOG(ERROR) << "Create Allocator failed.";
|
||||
return kLiteNullptr;
|
||||
}
|
||||
MS_LOG(DEBUG) << "Set new allocator.";
|
||||
cpu_context->SetAllocator(model_context->allocator);
|
||||
}
|
||||
|
||||
lite::CpuBindMode mode = GetCpuBindMode();
|
||||
lite::DeviceInfo cpu_info = {0};
|
||||
cpu_info.cpu_device_info_ = {cpu_context->GetEnableFP16(), mode};
|
||||
model_context->device_list_.push_back({lite::DT_CPU, cpu_info, cpu_context->GetProvider(),
|
||||
cpu_context->GetProviderDevice(), cpu_context->GetAllocator()});
|
||||
if (device_list.size() == 2) {
|
||||
lite::DeviceInfo device_info = {0};
|
||||
if (device_list[1]->GetDeviceType() == kMaliGPU) {
|
||||
auto gpu_context = device_list[1]->Cast<MaliGPUDeviceInfo>();
|
||||
device_info.gpu_device_info_ = {gpu_context->GetEnableFP16()};
|
||||
model_context->device_list_.push_back({lite::DT_GPU, device_info, gpu_context->GetProvider(),
|
||||
gpu_context->GetProviderDevice(), gpu_context->GetAllocator()});
|
||||
} else if (device_list[1]->GetDeviceType() == kKirinNPU) {
|
||||
auto npu_context = device_list[1]->Cast<KirinNPUDeviceInfo>();
|
||||
device_info.npu_device_info_ = {npu_context->GetFrequency()};
|
||||
model_context->device_list_.push_back({lite::DT_NPU, device_info});
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Invalid device.";
|
||||
return kLiteInputParamInvalid;
|
||||
}
|
||||
}
|
||||
model_context->delegate = context->GetDelegate();
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
Status ModelImpl::Build(const void *model_data, size_t data_size, ModelType model_type,
|
||||
const std::shared_ptr<Context> &ms_context) {
|
||||
context_ = ms_context;
|
||||
lite::Context lite_context;
|
||||
auto status = A2L_ConvertContext(ms_context.get(), &lite_context);
|
||||
if (status != kSuccess) {
|
||||
|
|
|
@ -100,8 +100,6 @@ class ModelImpl {
|
|||
void SetGraph(const std::shared_ptr<Graph> &graph) { graph_ = graph; }
|
||||
void SetContext(const std::shared_ptr<Context> &context) { context_ = context; }
|
||||
void SetConfig(const std::shared_ptr<TrainCfg> cfg) { cfg_ = cfg; }
|
||||
lite::CpuBindMode GetCpuBindMode();
|
||||
Status ConverterContext(const std::shared_ptr<Context> &context, lite::Context *model_context);
|
||||
Status RunGraph(const MSKernelCallBack &before, const MSKernelCallBack &after);
|
||||
};
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -331,6 +331,7 @@ if(MSLITE_ENABLE_CONVERTER)
|
|||
${TEST_SRC}
|
||||
${TEST_DIR}/st/converter_test.cc
|
||||
${TEST_DIR}/st/mindrt_parallel_test.cc
|
||||
${TEST_DIR}/st/graph_test.cc
|
||||
${TEST_DIR}/st/sub_graph_test.cc
|
||||
${TEST_DIR}/common/import_from_meta_graphT.cc
|
||||
${TEST_DIR}/ut/tools/optimizer/fusion/conv_biasadd_fusion_test.cc
|
||||
|
@ -384,6 +385,8 @@ if(ENABLE_FP16 AND SUPPORT_TRAIN)
|
|||
list(APPEND TEST_SRC ${TEST_CASE_KERNEL_FP16_SRC_GRAD})
|
||||
endif()
|
||||
|
||||
file(GLOB_RECURSE API_SRC ${LITE_DI}/src/cxx_api/*.cc)
|
||||
set(TEST_SRC ${TEST_SRC} ${API_SRC})
|
||||
add_executable(lite-test ${TEST_SRC})
|
||||
add_dependencies(lite-test fbs_src)
|
||||
|
||||
|
|
|
@ -7,7 +7,10 @@ set(COMMON_SRC
|
|||
if(NOT TARGET_HIMIX200)
|
||||
add_executable(benchmark
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/main.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/run_benchmark.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/benchmark_base.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/benchmark.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/benchmark_unified_api.cc
|
||||
${COMMON_SRC})
|
||||
|
||||
add_dependencies(benchmark fbs_src)
|
||||
|
|
|
@ -42,69 +42,6 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
namespace {
|
||||
constexpr int kNumPrintMin = 5;
|
||||
}
|
||||
static const char *DELIM_COLON = ":";
|
||||
static const char *DELIM_COMMA = ",";
|
||||
static const char *DELIM_SLASH = "/";
|
||||
static const std::unordered_map<TypeId, std::string> TYPE_ID_MAP{
|
||||
{kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat, "Float32"}, {kNumberTypeFloat32, "Float32"},
|
||||
{kNumberTypeInt8, "Int8"}, {kNumberTypeInt16, "Int16"}, {kNumberTypeInt, "Int32"},
|
||||
{kNumberTypeInt32, "Int32"}, {kNumberTypeUInt8, "UInt8"}, {kNumberTypeUInt16, "UInt16"},
|
||||
{kNumberTypeUInt, "UInt32"}, {kNumberTypeUInt32, "UInt32"}, {kObjectTypeString, "String"},
|
||||
{kNumberTypeBool, "Bool"}, {kObjectTypeTensorType, "Tensor"}};
|
||||
static const std::unordered_map<schema::Format, std::string> TENSOR_FORMAT_MAP{
|
||||
{schema::Format_NCHW, "NCHW"}, {schema::Format_NHWC, "NHWC"}, {schema::Format_NHWC4, "NHWC4"},
|
||||
{schema::Format_HWKC, "HWKC"}, {schema::Format_HWCK, "HWCK"}, {schema::Format_KCHW, "KCHW"},
|
||||
{schema::Format_CKHW, "CKHW"}, {schema::Format_KHWC, "KHWC"}, {schema::Format_CHWK, "CHWK"},
|
||||
{schema::Format_HW, "HW"}, {schema::Format_HW4, "HW4"}, {schema::Format_NC, "NC"},
|
||||
{schema::Format_NC4, "NC4"}, {schema::Format_NC4HW4, "NC4HW4"}, {schema::Format_NCDHW, "NCDHW"}};
|
||||
|
||||
namespace dump {
|
||||
constexpr auto kConfigPath = "MINDSPORE_DUMP_CONFIG";
|
||||
constexpr auto kSettings = "common_dump_settings";
|
||||
constexpr auto kMode = "dump_mode";
|
||||
constexpr auto kPath = "path";
|
||||
constexpr auto kNetName = "net_name";
|
||||
constexpr auto kInputOutput = "input_output";
|
||||
constexpr auto kKernels = "kernels";
|
||||
} // namespace dump
|
||||
|
||||
int Benchmark::GenerateRandomData(size_t size, void *data, TypeId data_type) {
|
||||
MS_ASSERT(data != nullptr);
|
||||
switch (data_type) {
|
||||
case kNumberTypeFloat32:
|
||||
case kNumberTypeFloat:
|
||||
FillInputData<float>(size, data, std::uniform_real_distribution<float>(0.1f, 1.0f));
|
||||
break;
|
||||
case kNumberTypeFloat64:
|
||||
FillInputData<double>(size, data, std::uniform_real_distribution<double>(0.1, 1.0));
|
||||
break;
|
||||
case kNumberTypeInt64:
|
||||
FillInputData<int64_t>(size, data, std::uniform_int_distribution<int64_t>(0, 1));
|
||||
break;
|
||||
case kNumberTypeInt:
|
||||
case kNumberTypeInt32:
|
||||
FillInputData<int32_t>(size, data, std::uniform_int_distribution<int32_t>(0, 1));
|
||||
break;
|
||||
case kNumberTypeInt16:
|
||||
FillInputData<int16_t>(size, data, std::uniform_int_distribution<int16_t>(0, 1));
|
||||
break;
|
||||
case kNumberTypeInt8:
|
||||
FillInputData<int8_t>(size, data, std::uniform_int_distribution<int8_t>(-127, 127));
|
||||
break;
|
||||
case kNumberTypeUInt8:
|
||||
FillInputData<uint8_t>(size, data, std::uniform_int_distribution<uint8_t>(0, 254));
|
||||
break;
|
||||
default:
|
||||
char *casted_data = static_cast<char *>(data);
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
casted_data[i] = static_cast<char>(i);
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int Benchmark::GenerateInputData() {
|
||||
for (auto tensor : ms_inputs_) {
|
||||
|
@ -118,7 +55,7 @@ int Benchmark::GenerateInputData() {
|
|||
if (tensor->data_type() == kObjectTypeString) {
|
||||
status = StringsToMSTensor({"you're the best."}, tensor);
|
||||
} else {
|
||||
status = GenerateRandomData(tensor->Size(), input_data, tensor->data_type());
|
||||
status = GenerateRandomData(tensor->Size(), input_data, static_cast<float>(tensor->data_type()));
|
||||
}
|
||||
if (status != RET_OK) {
|
||||
std::cerr << "GenerateRandomData for inTensor failed: " << status << std::endl;
|
||||
|
@ -129,25 +66,6 @@ int Benchmark::GenerateInputData() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int Benchmark::LoadInput() {
|
||||
if (flags_->in_data_file_.empty()) {
|
||||
auto status = GenerateInputData();
|
||||
if (status != 0) {
|
||||
std::cerr << "Generate input data error " << status << std::endl;
|
||||
MS_LOG(ERROR) << "Generate input data error " << status;
|
||||
return status;
|
||||
}
|
||||
} else {
|
||||
auto status = ReadInputFile();
|
||||
if (status != 0) {
|
||||
std::cerr << "ReadInputFile error, " << status << std::endl;
|
||||
MS_LOG(ERROR) << "ReadInputFile error, " << status;
|
||||
return status;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int Benchmark::ReadInputFile() {
|
||||
if (ms_inputs_.empty()) {
|
||||
return RET_OK;
|
||||
|
@ -196,49 +114,6 @@ int Benchmark::ReadInputFile() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
// calibData is FP32
|
||||
int Benchmark::ReadCalibData() {
|
||||
const char *calib_data_path = flags_->benchmark_data_file_.c_str();
|
||||
// read calib data
|
||||
std::ifstream in_file(calib_data_path);
|
||||
if (!in_file.good()) {
|
||||
std::cerr << "file: " << calib_data_path << " is not exist" << std::endl;
|
||||
MS_LOG(ERROR) << "file: " << calib_data_path << " is not exist";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (!in_file.is_open()) {
|
||||
std::cerr << "file: " << calib_data_path << " open failed" << std::endl;
|
||||
MS_LOG(ERROR) << "file: " << calib_data_path << " open failed";
|
||||
in_file.close();
|
||||
return RET_ERROR;
|
||||
}
|
||||
MS_LOG(INFO) << "Start reading calibData file";
|
||||
std::string line;
|
||||
std::string tensor_name;
|
||||
|
||||
while (!in_file.eof()) {
|
||||
getline(in_file, line);
|
||||
std::stringstream string_line1(line);
|
||||
size_t dim = 0;
|
||||
string_line1 >> tensor_name >> dim;
|
||||
std::vector<size_t> dims;
|
||||
for (size_t i = 0; i < dim; i++) {
|
||||
size_t tmp_dim;
|
||||
string_line1 >> tmp_dim;
|
||||
dims.push_back(tmp_dim);
|
||||
}
|
||||
auto ret = ReadTensorData(in_file, tensor_name, dims);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Read tensor data failed, tensor name: " << tensor_name;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
in_file.close();
|
||||
MS_LOG(INFO) << "Finish reading calibData file";
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int Benchmark::ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name,
|
||||
const std::vector<size_t> &dims) {
|
||||
std::string line;
|
||||
|
@ -379,28 +254,6 @@ tensor::MSTensor *Benchmark::GetTensorByNameOrShape(const std::string &node_or_t
|
|||
return tensor;
|
||||
}
|
||||
|
||||
int Benchmark::CompareStringData(const std::string &name, tensor::MSTensor *tensor) {
|
||||
auto iter = this->benchmark_data_.find(name);
|
||||
if (iter != this->benchmark_data_.end()) {
|
||||
std::vector<std::string> calib_strings = iter->second->strings_data;
|
||||
std::vector<std::string> output_strings = MSTensorToStrings(tensor);
|
||||
size_t compare_num = std::min(calib_strings.size(), output_strings.size());
|
||||
size_t print_num = std::min(compare_num, static_cast<size_t>(kNumPrintMin));
|
||||
|
||||
std::cout << "Data of node " << name << " : " << std::endl;
|
||||
for (size_t i = 0; i < compare_num; i++) {
|
||||
if (i < print_num) {
|
||||
std::cout << " " << output_strings[i] << std::endl;
|
||||
}
|
||||
if (calib_strings[i] != output_strings[i]) {
|
||||
MS_LOG(ERROR) << "Compare failed, index: " << i;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int Benchmark::CompareDataGetTotalBiasAndSize(const std::string &name, tensor::MSTensor *tensor, float *total_bias,
|
||||
int *total_size) {
|
||||
float bias = 0;
|
||||
|
@ -698,36 +551,6 @@ int Benchmark::RunBenchmark() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
void BenchmarkFlags::InitInputDataList() {
|
||||
char *input_list = new char[this->in_data_file_.length() + 1];
|
||||
snprintf(input_list, this->in_data_file_.length() + 1, "%s", this->in_data_file_.c_str());
|
||||
char *cur_input;
|
||||
const char *split_c = ",";
|
||||
cur_input = strtok(input_list, split_c);
|
||||
while (cur_input != nullptr) {
|
||||
input_data_list_.emplace_back(cur_input);
|
||||
cur_input = strtok(nullptr, split_c);
|
||||
}
|
||||
delete[] input_list;
|
||||
}
|
||||
|
||||
void BenchmarkFlags::InitResizeDimsList() {
|
||||
std::string content = this->resize_dims_in_;
|
||||
std::vector<int> shape;
|
||||
auto shape_strs = StringSplit(content, std::string(DELIM_COLON));
|
||||
for (const auto &shape_str : shape_strs) {
|
||||
shape.clear();
|
||||
auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA));
|
||||
std::cout << "Resize Dims: ";
|
||||
for (const auto &dim_str : dim_strs) {
|
||||
std::cout << dim_str << " ";
|
||||
shape.emplace_back(static_cast<int>(std::stoi(dim_str)));
|
||||
}
|
||||
std::cout << std::endl;
|
||||
this->resize_dims_.emplace_back(shape);
|
||||
}
|
||||
}
|
||||
|
||||
int Benchmark::InitTimeProfilingCallbackParameter() {
|
||||
// before callback
|
||||
before_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &before_inputs,
|
||||
|
@ -1022,444 +845,7 @@ int Benchmark::InitDumpTensorDataCallbackParameter() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int Benchmark::CheckThreadNumValid() {
|
||||
if (this->flags_->num_threads_ < 1) {
|
||||
MS_LOG(ERROR) << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0";
|
||||
std::cerr << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0" << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
Benchmark::~Benchmark() { delete (session_); }
|
||||
|
||||
if (flags_->enable_parallel_) {
|
||||
if (flags_->num_threads_ < 2) {
|
||||
MS_LOG(ERROR) << "enable parallel need more than 1 thread.";
|
||||
std::cerr << "enable parallel need more than 1 thread." << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int Benchmark::InitDumpConfigFromJson(char *path) {
|
||||
auto real_path = RealPath(path);
|
||||
std::ifstream ifs(real_path);
|
||||
if (!ifs.good()) {
|
||||
MS_LOG(ERROR) << "file: " << real_path << " is not exist";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (!ifs.is_open()) {
|
||||
MS_LOG(ERROR) << "file: " << real_path << " open failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
try {
|
||||
dump_cfg_json_ = nlohmann::json::parse(ifs);
|
||||
} catch (const nlohmann::json::parse_error &error) {
|
||||
MS_LOG(ERROR) << "parse json file failed, please check your file.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (dump_cfg_json_[dump::kSettings] == nullptr) {
|
||||
MS_LOG(ERROR) << "\"common_dump_settings\" is required.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (dump_cfg_json_[dump::kSettings][dump::kMode] == nullptr) {
|
||||
MS_LOG(ERROR) << "\"dump_mode\" is required.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (dump_cfg_json_[dump::kSettings][dump::kPath] == nullptr) {
|
||||
MS_LOG(ERROR) << "\"path\" is required.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (dump_cfg_json_[dump::kSettings][dump::kNetName] == nullptr) {
|
||||
dump_cfg_json_[dump::kSettings][dump::kNetName] = "Default";
|
||||
}
|
||||
if (dump_cfg_json_[dump::kSettings][dump::kInputOutput] == nullptr) {
|
||||
dump_cfg_json_[dump::kSettings][dump::kInputOutput] = 0;
|
||||
}
|
||||
if (dump_cfg_json_[dump::kSettings][dump::kKernels] != nullptr &&
|
||||
!dump_cfg_json_[dump::kSettings][dump::kKernels].empty()) {
|
||||
if (dump_cfg_json_[dump::kSettings][dump::kMode] == 0) {
|
||||
MS_LOG(ERROR) << R"("dump_mode" should be 1 when "kernels" isn't empty.)";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
auto abs_path = dump_cfg_json_[dump::kSettings][dump::kPath].get<std::string>();
|
||||
auto net_name = dump_cfg_json_[dump::kSettings][dump::kNetName].get<std::string>();
|
||||
if (abs_path.back() == '\\' || abs_path.back() == '/') {
|
||||
dump_file_output_dir_ = abs_path + net_name;
|
||||
} else {
|
||||
#ifdef _WIN32
|
||||
dump_file_output_dir_ = abs_path + "\\" + net_name;
|
||||
#else
|
||||
dump_file_output_dir_ = abs_path + "/" + net_name;
|
||||
#endif
|
||||
}
|
||||
|
||||
auto status = CreateOutputDir(&dump_file_output_dir_);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "create data output directory failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int Benchmark::InitCallbackParameter() {
|
||||
int ret = RET_OK;
|
||||
if (flags_->time_profiling_) {
|
||||
ret = InitTimeProfilingCallbackParameter();
|
||||
} else if (flags_->perf_profiling_) {
|
||||
ret = InitPerfProfilingCallbackParameter();
|
||||
} else if (flags_->print_tensor_data_) {
|
||||
ret = InitPrintTensorDataCallbackParameter();
|
||||
} else if (flags_->dump_tensor_data_) {
|
||||
ret = InitDumpTensorDataCallbackParameter();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int Benchmark::Init() {
|
||||
if (this->flags_ == nullptr) {
|
||||
return 1;
|
||||
}
|
||||
MS_LOG(INFO) << "ModelPath = " << this->flags_->model_file_;
|
||||
MS_LOG(INFO) << "InDataPath = " << this->flags_->in_data_file_;
|
||||
MS_LOG(INFO) << "InDataType = " << this->flags_->in_data_type_in_;
|
||||
MS_LOG(INFO) << "LoopCount = " << this->flags_->loop_count_;
|
||||
MS_LOG(INFO) << "DeviceType = " << this->flags_->device_;
|
||||
MS_LOG(INFO) << "AccuracyThreshold = " << this->flags_->accuracy_threshold_;
|
||||
MS_LOG(INFO) << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_;
|
||||
MS_LOG(INFO) << "NumThreads = " << this->flags_->num_threads_;
|
||||
MS_LOG(INFO) << "Fp16Priority = " << this->flags_->enable_fp16_;
|
||||
MS_LOG(INFO) << "EnableParallel = " << this->flags_->enable_parallel_;
|
||||
MS_LOG(INFO) << "calibDataPath = " << this->flags_->benchmark_data_file_;
|
||||
std::cout << "ModelPath = " << this->flags_->model_file_ << std::endl;
|
||||
std::cout << "InDataPath = " << this->flags_->in_data_file_ << std::endl;
|
||||
std::cout << "InDataType = " << this->flags_->in_data_type_in_ << std::endl;
|
||||
std::cout << "LoopCount = " << this->flags_->loop_count_ << std::endl;
|
||||
std::cout << "DeviceType = " << this->flags_->device_ << std::endl;
|
||||
std::cout << "AccuracyThreshold = " << this->flags_->accuracy_threshold_ << std::endl;
|
||||
std::cout << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_ << std::endl;
|
||||
std::cout << "NumThreads = " << this->flags_->num_threads_ << std::endl;
|
||||
std::cout << "Fp16Priority = " << this->flags_->enable_fp16_ << std::endl;
|
||||
std::cout << "EnableParallel = " << this->flags_->enable_parallel_ << std::endl;
|
||||
std::cout << "calibDataPath = " << this->flags_->benchmark_data_file_ << std::endl;
|
||||
if (this->flags_->loop_count_ < 1) {
|
||||
MS_LOG(ERROR) << "LoopCount:" << this->flags_->loop_count_ << " must be greater than 0";
|
||||
std::cerr << "LoopCount:" << this->flags_->loop_count_ << " must be greater than 0" << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto thread_ret = CheckThreadNumValid();
|
||||
if (thread_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Invalid numThreads.";
|
||||
std::cerr << "Invalid numThreads." << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
static std::vector<std::string> CPU_BIND_MODE_MAP = {"NO_BIND", "HIGHER_CPU", "MID_CPU"};
|
||||
if (this->flags_->cpu_bind_mode_ >= 1) {
|
||||
MS_LOG(INFO) << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_];
|
||||
std::cout << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_] << std::endl;
|
||||
} else {
|
||||
MS_LOG(INFO) << "cpuBindMode = NO_BIND";
|
||||
std::cout << "cpuBindMode = NO_BIND" << std::endl;
|
||||
}
|
||||
|
||||
this->flags_->in_data_type_ = this->flags_->in_data_type_in_ == "img" ? kImage : kBinary;
|
||||
|
||||
if (!flags_->benchmark_data_type_.empty()) {
|
||||
if (data_type_map_.find(flags_->benchmark_data_type_) == data_type_map_.end()) {
|
||||
MS_LOG(ERROR) << "CalibDataType not supported: " << flags_->benchmark_data_type_.c_str();
|
||||
return RET_ERROR;
|
||||
}
|
||||
msCalibDataType = data_type_map_.at(flags_->benchmark_data_type_);
|
||||
MS_LOG(INFO) << "CalibDataType = " << flags_->benchmark_data_type_.c_str();
|
||||
std::cout << "CalibDataType = " << flags_->benchmark_data_type_.c_str() << std::endl;
|
||||
}
|
||||
|
||||
if (flags_->model_file_.empty()) {
|
||||
MS_LOG(ERROR) << "modelPath is required";
|
||||
std::cerr << "modelPath is required" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
flags_->InitInputDataList();
|
||||
flags_->InitResizeDimsList();
|
||||
if (!flags_->resize_dims_.empty() && !flags_->input_data_list_.empty() &&
|
||||
flags_->resize_dims_.size() != flags_->input_data_list_.size()) {
|
||||
MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath";
|
||||
std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (flags_->device_ != "CPU" && flags_->device_ != "GPU" && flags_->device_ != "NPU") {
|
||||
MS_LOG(ERROR) << "Device type:" << flags_->device_ << " is not supported.";
|
||||
std::cerr << "Device type:" << flags_->device_ << " is not supported." << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (flags_->time_profiling_ && flags_->perf_profiling_) {
|
||||
MS_LOG(INFO) << "time_profiling is enabled, will not run perf_profiling.";
|
||||
}
|
||||
|
||||
// get dump data output path
|
||||
auto dump_cfg_path = std::getenv(dump::kConfigPath);
|
||||
if (dump_cfg_path != nullptr) {
|
||||
flags_->dump_tensor_data_ = true;
|
||||
if (InitDumpConfigFromJson(dump_cfg_path) != RET_OK) {
|
||||
MS_LOG(ERROR) << "parse dump config file failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(INFO) << "No MINDSPORE_DUMP_CONFIG in env, don't need to dump data";
|
||||
}
|
||||
|
||||
auto status = InitCallbackParameter();
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init callback Parameter failed.";
|
||||
std::cerr << "Init callback Parameter failed." << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int Benchmark::PrintResult(const std::vector<std::string> &title,
|
||||
const std::map<std::string, std::pair<int, float>> &result) {
|
||||
std::vector<size_t> columnLenMax(5);
|
||||
std::vector<std::vector<std::string>> rows;
|
||||
|
||||
for (auto &iter : result) {
|
||||
char stringBuf[5][100] = {};
|
||||
std::vector<std::string> columns;
|
||||
size_t len = 0;
|
||||
|
||||
len = iter.first.size();
|
||||
if (len > columnLenMax.at(0)) {
|
||||
columnLenMax.at(0) = len + 4;
|
||||
}
|
||||
columns.push_back(iter.first);
|
||||
|
||||
len =
|
||||
snprintf(stringBuf[1], sizeof(stringBuf[1]), "%f", iter.second.second / static_cast<float>(flags_->loop_count_));
|
||||
if (len > columnLenMax.at(1)) {
|
||||
columnLenMax.at(1) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[1]);
|
||||
|
||||
len = snprintf(stringBuf[2], sizeof(stringBuf[2]), "%f", iter.second.second / op_cost_total_);
|
||||
if (len > columnLenMax.at(2)) {
|
||||
columnLenMax.at(2) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[2]);
|
||||
|
||||
len = snprintf(stringBuf[3], sizeof(stringBuf[3]), "%d", iter.second.first);
|
||||
if (len > columnLenMax.at(3)) {
|
||||
columnLenMax.at(3) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[3]);
|
||||
|
||||
len = snprintf(stringBuf[4], sizeof(stringBuf[4]), "%f", iter.second.second);
|
||||
if (len > columnLenMax.at(4)) {
|
||||
columnLenMax.at(4) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[4]);
|
||||
|
||||
rows.push_back(columns);
|
||||
}
|
||||
|
||||
printf("-------------------------------------------------------------------------\n");
|
||||
for (int i = 0; i < 5; i++) {
|
||||
auto printBuf = title[i];
|
||||
if (printBuf.size() > columnLenMax.at(i)) {
|
||||
columnLenMax.at(i) = printBuf.size();
|
||||
}
|
||||
printBuf.resize(columnLenMax.at(i), ' ');
|
||||
printf("%s\t", printBuf.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
for (auto &row : rows) {
|
||||
for (int j = 0; j < 5; j++) {
|
||||
auto printBuf = row[j];
|
||||
printBuf.resize(columnLenMax.at(j), ' ');
|
||||
printf("%s\t", printBuf.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_ARM64
|
||||
int Benchmark::PrintPerfResult(const std::vector<std::string> &title,
|
||||
const std::map<std::string, std::pair<int, struct PerfCount>> &result) {
|
||||
std::vector<size_t> columnLenMax(5);
|
||||
std::vector<std::vector<std::string>> rows;
|
||||
|
||||
for (auto &iter : result) {
|
||||
char stringBuf[5][100] = {};
|
||||
std::vector<std::string> columns;
|
||||
size_t len = 0;
|
||||
|
||||
len = iter.first.size();
|
||||
if (len > columnLenMax.at(0)) {
|
||||
columnLenMax.at(0) = len + 4;
|
||||
}
|
||||
columns.push_back(iter.first);
|
||||
|
||||
float tmp = float_t(flags_->num_threads_) * iter.second.second.value[0] / float_t(flags_->loop_count_) / 1000.0f;
|
||||
len = snprintf(stringBuf[1], sizeof(stringBuf[1]), "%.2f", tmp);
|
||||
if (len > columnLenMax.at(1)) {
|
||||
columnLenMax.at(1) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[1]);
|
||||
|
||||
len = snprintf(stringBuf[2], sizeof(stringBuf[2]), "%f", iter.second.second.value[0] / op_cost_total_);
|
||||
if (len > columnLenMax.at(2)) {
|
||||
columnLenMax.at(2) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[2]);
|
||||
|
||||
tmp = float_t(flags_->num_threads_) * iter.second.second.value[1] / float_t(flags_->loop_count_) / 1000.0f;
|
||||
len = snprintf(stringBuf[3], sizeof(stringBuf[3]), "%.2f", tmp);
|
||||
if (len > columnLenMax.at(3)) {
|
||||
columnLenMax.at(3) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[3]);
|
||||
|
||||
len = snprintf(stringBuf[4], sizeof(stringBuf[4]), "%f", iter.second.second.value[1] / op_cost2_total_);
|
||||
if (len > columnLenMax.at(4)) {
|
||||
columnLenMax.at(4) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[4]);
|
||||
|
||||
rows.push_back(columns);
|
||||
}
|
||||
|
||||
printf("-------------------------------------------------------------------------\n");
|
||||
for (int i = 0; i < 5; i++) {
|
||||
auto printBuf = title[i];
|
||||
if (printBuf.size() > columnLenMax.at(i)) {
|
||||
columnLenMax.at(i) = printBuf.size();
|
||||
}
|
||||
printBuf.resize(columnLenMax.at(i), ' ');
|
||||
printf("%s\t", printBuf.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
for (auto &row : rows) {
|
||||
for (int j = 0; j < 5; j++) {
|
||||
auto printBuf = row[j];
|
||||
printBuf.resize(columnLenMax.at(j), ' ');
|
||||
printf("%s\t", printBuf.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_NNIE
|
||||
int SvpSysInit() {
|
||||
HI_S32 ret = HI_SUCCESS;
|
||||
VB_CONFIG_S struVbConf;
|
||||
|
||||
HI_MPI_SYS_Exit();
|
||||
HI_MPI_VB_Exit();
|
||||
|
||||
memset(&struVbConf, 0, sizeof(VB_CONFIG_S));
|
||||
struVbConf.u32MaxPoolCnt = 2;
|
||||
struVbConf.astCommPool[1].u64BlkSize = 768 * 576 * 2;
|
||||
struVbConf.astCommPool[1].u32BlkCnt = 1;
|
||||
|
||||
ret = HI_MPI_VB_SetConfig((const VB_CONFIG_S *)&struVbConf);
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "Error:HI_MPI_VB_SetConf failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = HI_MPI_VB_Init();
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "Error:HI_MPI_VB_Init failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = HI_MPI_SYS_Init();
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "Error:HI_MPI_SYS_Init failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int SvpSysExit() {
|
||||
HI_S32 ret = HI_SUCCESS;
|
||||
|
||||
ret = HI_MPI_SYS_Exit();
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "Error:HI_MPI_SYS_Exit failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = HI_MPI_VB_Exit();
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "Error:HI_MPI_VB_Exit failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
Benchmark::~Benchmark() {
|
||||
for (const auto &iter : this->benchmark_data_) {
|
||||
delete (iter.second);
|
||||
}
|
||||
this->benchmark_data_.clear();
|
||||
delete (session_);
|
||||
#ifdef SUPPORT_NNIE
|
||||
SvpSysExit();
|
||||
#endif
|
||||
}
|
||||
|
||||
int RunBenchmark(int argc, const char **argv) {
|
||||
BenchmarkFlags flags;
|
||||
Option<std::string> err = flags.ParseFlags(argc, argv);
|
||||
#ifdef SUPPORT_NNIE
|
||||
SvpSysInit();
|
||||
#endif
|
||||
if (err.IsSome()) {
|
||||
std::cerr << err.Get() << std::endl;
|
||||
std::cerr << flags.Usage() << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (flags.help) {
|
||||
std::cerr << flags.Usage() << std::endl;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
Benchmark benchmark(&flags);
|
||||
auto status = benchmark.Init();
|
||||
if (status != 0) {
|
||||
MS_LOG(ERROR) << "Benchmark init Error : " << status;
|
||||
std::cerr << "Benchmark init Error : " << status << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
status = benchmark.RunBenchmark();
|
||||
if (status != 0) {
|
||||
MS_LOG(ERROR) << "Run Benchmark "
|
||||
<< flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
|
||||
<< " Failed : " << status;
|
||||
std::cerr << "Run Benchmark " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
|
||||
<< " Failed : " << status << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Run Benchmark " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
|
||||
<< " Success.";
|
||||
std::cout << "Run Benchmark " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
|
||||
<< " Success." << std::endl;
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include <cfloat>
|
||||
#include <utility>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "tools/benchmark/benchmark_base.h"
|
||||
#include "include/model.h"
|
||||
#include "tools/common/flag_parser.h"
|
||||
#include "src/common/file_utils.h"
|
||||
|
@ -38,283 +39,57 @@
|
|||
#include "include/lite_session.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
enum MS_API InDataType { kImage = 0, kBinary = 1 };
|
||||
|
||||
constexpr float relativeTolerance = 1e-5;
|
||||
constexpr float absoluteTolerance = 1e-8;
|
||||
|
||||
#ifdef ENABLE_ARM64
|
||||
struct PerfResult {
|
||||
int64_t nr;
|
||||
struct {
|
||||
int64_t value;
|
||||
int64_t id;
|
||||
} values[2];
|
||||
};
|
||||
struct PerfCount {
|
||||
int64_t value[2];
|
||||
};
|
||||
#endif
|
||||
|
||||
struct MS_API CheckTensor {
|
||||
CheckTensor(const std::vector<size_t> &shape, const std::vector<float> &data,
|
||||
const std::vector<std::string> &strings_data = {""}) {
|
||||
this->shape = shape;
|
||||
this->data = data;
|
||||
this->strings_data = strings_data;
|
||||
}
|
||||
std::vector<size_t> shape;
|
||||
std::vector<float> data;
|
||||
std::vector<std::string> strings_data;
|
||||
};
|
||||
|
||||
class MS_API BenchmarkFlags : public virtual FlagParser {
|
||||
class MS_API Benchmark : public BenchmarkBase {
|
||||
public:
|
||||
BenchmarkFlags() {
|
||||
// common
|
||||
AddFlag(&BenchmarkFlags::model_file_, "modelFile", "Input model file", "");
|
||||
AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", "");
|
||||
AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU", "CPU");
|
||||
AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode",
|
||||
"Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, default value: 1", 1);
|
||||
// MarkPerformance
|
||||
AddFlag(&BenchmarkFlags::loop_count_, "loopCount", "Run loop count", 10);
|
||||
AddFlag(&BenchmarkFlags::num_threads_, "numThreads", "Run threads number", 2);
|
||||
AddFlag(&BenchmarkFlags::enable_fp16_, "enableFp16", "Enable float16", false);
|
||||
AddFlag(&BenchmarkFlags::enable_parallel_, "enableParallel", "Enable subgraph parallel : true | false", false);
|
||||
AddFlag(&BenchmarkFlags::warm_up_loop_count_, "warmUpLoopCount", "Run warm up loop", 3);
|
||||
AddFlag(&BenchmarkFlags::time_profiling_, "timeProfiling", "Run time profiling", false);
|
||||
AddFlag(&BenchmarkFlags::perf_profiling_, "perfProfiling",
|
||||
"Perf event profiling(only instructions statics enabled currently)", false);
|
||||
AddFlag(&BenchmarkFlags::perf_event_, "perfEvent", "CYCLE|CACHE|STALL", "CYCLE");
|
||||
// MarkAccuracy
|
||||
AddFlag(&BenchmarkFlags::benchmark_data_file_, "benchmarkDataFile", "Benchmark data file path", "");
|
||||
AddFlag(&BenchmarkFlags::benchmark_data_type_, "benchmarkDataType",
|
||||
"Benchmark data type. FLOAT | INT32 | INT8 | UINT8", "FLOAT");
|
||||
AddFlag(&BenchmarkFlags::accuracy_threshold_, "accuracyThreshold", "Threshold of accuracy", 0.5);
|
||||
AddFlag(&BenchmarkFlags::resize_dims_in_, "inputShapes",
|
||||
"Shape of input data, the format should be NHWC. e.g. 1,32,32,32:1,1,32,32,1", "");
|
||||
}
|
||||
|
||||
~BenchmarkFlags() override = default;
|
||||
|
||||
void InitInputDataList();
|
||||
|
||||
void InitResizeDimsList();
|
||||
|
||||
public:
|
||||
// common
|
||||
std::string model_file_;
|
||||
std::string in_data_file_;
|
||||
std::vector<std::string> input_data_list_;
|
||||
InDataType in_data_type_ = kBinary;
|
||||
std::string in_data_type_in_ = "bin";
|
||||
int cpu_bind_mode_ = 1;
|
||||
// MarkPerformance
|
||||
int loop_count_ = 10;
|
||||
int num_threads_ = 2;
|
||||
bool enable_fp16_ = false;
|
||||
bool enable_parallel_ = false;
|
||||
int warm_up_loop_count_ = 3;
|
||||
// MarkAccuracy
|
||||
std::string benchmark_data_file_;
|
||||
std::string benchmark_data_type_ = "FLOAT";
|
||||
float accuracy_threshold_ = 0.5;
|
||||
// Resize
|
||||
std::string resize_dims_in_;
|
||||
std::vector<std::vector<int>> resize_dims_;
|
||||
|
||||
std::string device_ = "CPU";
|
||||
bool time_profiling_ = false;
|
||||
bool perf_profiling_ = false;
|
||||
std::string perf_event_ = "CYCLE";
|
||||
bool dump_tensor_data_ = false;
|
||||
bool print_tensor_data_ = false;
|
||||
};
|
||||
|
||||
class MS_API Benchmark {
|
||||
public:
|
||||
explicit Benchmark(BenchmarkFlags *flags) : flags_(flags) {}
|
||||
explicit Benchmark(BenchmarkFlags *flags) : BenchmarkBase(flags) {}
|
||||
|
||||
virtual ~Benchmark();
|
||||
|
||||
int Init();
|
||||
int RunBenchmark();
|
||||
|
||||
private:
|
||||
// call GenerateInputData or ReadInputFile to init inputTensors
|
||||
int LoadInput();
|
||||
int RunBenchmark() override;
|
||||
|
||||
protected:
|
||||
// call GenerateRandomData to fill inputTensors
|
||||
int GenerateInputData();
|
||||
int GenerateInputData() override;
|
||||
|
||||
int GenerateRandomData(size_t size, void *data, TypeId data_type);
|
||||
int ReadInputFile() override;
|
||||
|
||||
int ReadInputFile();
|
||||
|
||||
int ReadCalibData();
|
||||
|
||||
int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name, const std::vector<size_t> &dims);
|
||||
int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name,
|
||||
const std::vector<size_t> &dims) override;
|
||||
|
||||
void InitContext(const std::shared_ptr<Context> &context);
|
||||
|
||||
int CompareOutput();
|
||||
int CompareOutput() override;
|
||||
|
||||
tensor::MSTensor *GetTensorByNameOrShape(const std::string &node_or_tensor_name, const std::vector<size_t> &dims);
|
||||
|
||||
tensor::MSTensor *GetTensorByNodeShape(const std::vector<size_t> &node_shape);
|
||||
|
||||
int CompareStringData(const std::string &name, tensor::MSTensor *tensor);
|
||||
|
||||
int CompareDataGetTotalBiasAndSize(const std::string &name, tensor::MSTensor *tensor, float *total_bias,
|
||||
int *total_size);
|
||||
|
||||
int InitDumpConfigFromJson(char *path);
|
||||
int InitTimeProfilingCallbackParameter() override;
|
||||
|
||||
int InitCallbackParameter();
|
||||
int InitPerfProfilingCallbackParameter() override;
|
||||
|
||||
int InitTimeProfilingCallbackParameter();
|
||||
int InitDumpTensorDataCallbackParameter() override;
|
||||
|
||||
int InitPerfProfilingCallbackParameter();
|
||||
|
||||
int InitDumpTensorDataCallbackParameter();
|
||||
|
||||
int InitPrintTensorDataCallbackParameter();
|
||||
|
||||
int PrintResult(const std::vector<std::string> &title, const std::map<std::string, std::pair<int, float>> &result);
|
||||
|
||||
#ifdef ENABLE_ARM64
|
||||
int PrintPerfResult(const std::vector<std::string> &title,
|
||||
const std::map<std::string, std::pair<int, struct PerfCount>> &result);
|
||||
#endif
|
||||
int InitPrintTensorDataCallbackParameter() override;
|
||||
|
||||
int PrintInputData();
|
||||
|
||||
// tensorData need to be converter first
|
||||
template <typename T>
|
||||
float CompareData(const std::string &nodeName, const std::vector<int> &msShape, const void *tensor_data) {
|
||||
const T *msTensorData = static_cast<const T *>(tensor_data);
|
||||
auto iter = this->benchmark_data_.find(nodeName);
|
||||
if (iter != this->benchmark_data_.end()) {
|
||||
std::vector<size_t> castedMSShape;
|
||||
size_t shapeSize = 1;
|
||||
for (int64_t dim : msShape) {
|
||||
castedMSShape.push_back(size_t(dim));
|
||||
shapeSize *= dim;
|
||||
}
|
||||
|
||||
CheckTensor *calibTensor = iter->second;
|
||||
if (calibTensor->shape != castedMSShape) {
|
||||
std::ostringstream oss;
|
||||
oss << "Shape of mslite output(";
|
||||
for (auto dim : castedMSShape) {
|
||||
oss << dim << ",";
|
||||
}
|
||||
oss << ") and shape source model output(";
|
||||
for (auto dim : calibTensor->shape) {
|
||||
oss << dim << ",";
|
||||
}
|
||||
oss << ") are different";
|
||||
std::cerr << oss.str() << std::endl;
|
||||
MS_LOG(ERROR) << oss.str().c_str();
|
||||
return RET_ERROR;
|
||||
}
|
||||
size_t errorCount = 0;
|
||||
float meanError = 0;
|
||||
std::cout << "Data of node " << nodeName << " : ";
|
||||
for (size_t j = 0; j < shapeSize; j++) {
|
||||
if (j < 50) {
|
||||
std::cout << static_cast<float>(msTensorData[j]) << " ";
|
||||
}
|
||||
|
||||
if (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j])) {
|
||||
std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl;
|
||||
MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto tolerance = absoluteTolerance + relativeTolerance * fabs(calibTensor->data.at(j));
|
||||
auto absoluteError = std::fabs(msTensorData[j] - calibTensor->data.at(j));
|
||||
if (absoluteError > tolerance) {
|
||||
if (fabs(calibTensor->data.at(j) - 0.0f) < FLT_EPSILON) {
|
||||
if (absoluteError > 1e-5) {
|
||||
meanError += absoluteError;
|
||||
errorCount++;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// just assume that atol = rtol
|
||||
meanError += absoluteError / (fabs(calibTensor->data.at(j)) + FLT_MIN);
|
||||
errorCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
if (meanError > 0.0f) {
|
||||
meanError /= errorCount;
|
||||
}
|
||||
|
||||
if (meanError <= 0.0000001) {
|
||||
std::cout << "Mean bias of node/tensor " << nodeName << " : 0%" << std::endl;
|
||||
} else {
|
||||
std::cout << "Mean bias of node/tensor " << nodeName << " : " << meanError * 100 << "%" << std::endl;
|
||||
}
|
||||
return meanError;
|
||||
} else {
|
||||
MS_LOG(INFO) << "%s is not in Source Model output", nodeName.c_str();
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename Distribution>
|
||||
void FillInputData(int size, void *data, Distribution distribution) {
|
||||
MS_ASSERT(data != nullptr);
|
||||
int elements_num = size / sizeof(T);
|
||||
(void)std::generate_n(static_cast<T *>(data), elements_num,
|
||||
[&]() { return static_cast<T>(distribution(random_engine_)); });
|
||||
}
|
||||
|
||||
int MarkPerformance();
|
||||
|
||||
int MarkAccuracy();
|
||||
|
||||
int CheckThreadNumValid();
|
||||
|
||||
private:
|
||||
BenchmarkFlags *flags_;
|
||||
session::LiteSession *session_{nullptr};
|
||||
std::vector<mindspore::tensor::MSTensor *> ms_inputs_;
|
||||
std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> ms_outputs_;
|
||||
std::unordered_map<std::string, CheckTensor *> benchmark_data_;
|
||||
std::unordered_map<std::string, TypeId> data_type_map_{{"FLOAT", TypeId::kNumberTypeFloat},
|
||||
{"INT8", TypeId::kNumberTypeInt8},
|
||||
{"INT32", TypeId::kNumberTypeInt32},
|
||||
{"UINT8", TypeId::kNumberTypeUInt8}};
|
||||
TypeId msCalibDataType = TypeId::kNumberTypeFloat;
|
||||
|
||||
// callback parameters
|
||||
uint64_t op_begin_ = 0;
|
||||
int op_call_times_total_ = 0;
|
||||
float op_cost_total_ = 0.0f;
|
||||
std::map<std::string, std::pair<int, float>> op_times_by_type_;
|
||||
std::map<std::string, std::pair<int, float>> op_times_by_name_;
|
||||
|
||||
// dump data
|
||||
nlohmann::json dump_cfg_json_;
|
||||
std::string dump_file_output_dir_;
|
||||
#ifdef ENABLE_ARM64
|
||||
int perf_fd = 0;
|
||||
int perf_fd2 = 0;
|
||||
float op_cost2_total_ = 0.0f;
|
||||
std::map<std::string, std::pair<int, struct PerfCount>> op_perf_by_type_;
|
||||
std::map<std::string, std::pair<int, struct PerfCount>> op_perf_by_name_;
|
||||
#endif
|
||||
KernelCallBack before_call_back_ = nullptr;
|
||||
KernelCallBack after_call_back_ = nullptr;
|
||||
std::mt19937 random_engine_;
|
||||
};
|
||||
|
||||
int MS_API RunBenchmark(int argc, const char **argv);
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINNIE_BENCHMARK_BENCHMARK_H_
|
||||
|
|
|
@ -0,0 +1,606 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "tools/benchmark/benchmark_base.h"
|
||||
#define __STDC_FORMAT_MACROS
|
||||
#include <cinttypes>
|
||||
#undef __STDC_FORMAT_MACROS
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <functional>
|
||||
#include "include/context.h"
|
||||
#include "include/ms_tensor.h"
|
||||
#include "include/version.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/common/common.h"
|
||||
#include "src/tensor.h"
|
||||
#ifdef ENABLE_ARM64
|
||||
#include <linux/perf_event.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef SUPPORT_NNIE
|
||||
#include "include/hi_common.h"
|
||||
#include "include/hi_comm_vb.h"
|
||||
#include "include/mpi_sys.h"
|
||||
#include "include/mpi_vb.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
|
||||
const std::unordered_map<int, std::string> TYPE_ID_MAP{
|
||||
{kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat, "Float32"}, {kNumberTypeFloat32, "Float32"},
|
||||
{kNumberTypeInt8, "Int8"}, {kNumberTypeInt16, "Int16"}, {kNumberTypeInt, "Int32"},
|
||||
{kNumberTypeInt32, "Int32"}, {kNumberTypeUInt8, "UInt8"}, {kNumberTypeUInt16, "UInt16"},
|
||||
{kNumberTypeUInt, "UInt32"}, {kNumberTypeUInt32, "UInt32"}, {kObjectTypeString, "String"},
|
||||
{kNumberTypeBool, "Bool"}, {kObjectTypeTensorType, "Tensor"}};
|
||||
const std::unordered_map<schema::Format, std::string> TENSOR_FORMAT_MAP{
|
||||
{schema::Format_NCHW, "NCHW"}, {schema::Format_NHWC, "NHWC"}, {schema::Format_NHWC4, "NHWC4"},
|
||||
{schema::Format_HWKC, "HWKC"}, {schema::Format_HWCK, "HWCK"}, {schema::Format_KCHW, "KCHW"},
|
||||
{schema::Format_CKHW, "CKHW"}, {schema::Format_KHWC, "KHWC"}, {schema::Format_CHWK, "CHWK"},
|
||||
{schema::Format_HW, "HW"}, {schema::Format_HW4, "HW4"}, {schema::Format_NC, "NC"},
|
||||
{schema::Format_NC4, "NC4"}, {schema::Format_NC4HW4, "NC4HW4"}, {schema::Format_NCDHW, "NCDHW"}};
|
||||
|
||||
int BenchmarkBase::GenerateRandomData(size_t size, void *data, int data_type) {
|
||||
MS_ASSERT(data != nullptr);
|
||||
switch (data_type) {
|
||||
case kNumberTypeFloat32:
|
||||
case kNumberTypeFloat:
|
||||
FillInputData<float>(size, data, std::uniform_real_distribution<float>(0.1f, 1.0f));
|
||||
break;
|
||||
case kNumberTypeFloat64:
|
||||
FillInputData<double>(size, data, std::uniform_real_distribution<double>(0.1, 1.0));
|
||||
break;
|
||||
case kNumberTypeInt64:
|
||||
FillInputData<int64_t>(size, data, std::uniform_int_distribution<int64_t>(0, 1));
|
||||
break;
|
||||
case kNumberTypeInt:
|
||||
case kNumberTypeInt32:
|
||||
FillInputData<int32_t>(size, data, std::uniform_int_distribution<int32_t>(0, 1));
|
||||
break;
|
||||
case kNumberTypeInt16:
|
||||
FillInputData<int16_t>(size, data, std::uniform_int_distribution<int16_t>(0, 1));
|
||||
break;
|
||||
case kNumberTypeInt8:
|
||||
FillInputData<int8_t>(size, data, std::uniform_int_distribution<int8_t>(-127, 127));
|
||||
break;
|
||||
case kNumberTypeUInt8:
|
||||
FillInputData<uint8_t>(size, data, std::uniform_int_distribution<uint8_t>(0, 254));
|
||||
break;
|
||||
default:
|
||||
char *casted_data = static_cast<char *>(data);
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
casted_data[i] = static_cast<char>(i);
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BenchmarkBase::LoadInput() {
|
||||
if (flags_->in_data_file_.empty()) {
|
||||
auto status = GenerateInputData();
|
||||
if (status != 0) {
|
||||
std::cerr << "Generate input data error " << status << std::endl;
|
||||
MS_LOG(ERROR) << "Generate input data error " << status;
|
||||
return status;
|
||||
}
|
||||
} else {
|
||||
auto status = ReadInputFile();
|
||||
if (status != 0) {
|
||||
std::cerr << "ReadInputFile error, " << status << std::endl;
|
||||
MS_LOG(ERROR) << "ReadInputFile error, " << status;
|
||||
return status;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
// calibData is FP32
|
||||
int BenchmarkBase::ReadCalibData() {
|
||||
const char *calib_data_path = flags_->benchmark_data_file_.c_str();
|
||||
// read calib data
|
||||
std::ifstream in_file(calib_data_path);
|
||||
if (!in_file.good()) {
|
||||
std::cerr << "file: " << calib_data_path << " is not exist" << std::endl;
|
||||
MS_LOG(ERROR) << "file: " << calib_data_path << " is not exist";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (!in_file.is_open()) {
|
||||
std::cerr << "file: " << calib_data_path << " open failed" << std::endl;
|
||||
MS_LOG(ERROR) << "file: " << calib_data_path << " open failed";
|
||||
in_file.close();
|
||||
return RET_ERROR;
|
||||
}
|
||||
MS_LOG(INFO) << "Start reading calibData file";
|
||||
std::string line;
|
||||
std::string tensor_name;
|
||||
|
||||
while (!in_file.eof()) {
|
||||
getline(in_file, line);
|
||||
std::stringstream string_line1(line);
|
||||
size_t dim = 0;
|
||||
string_line1 >> tensor_name >> dim;
|
||||
std::vector<size_t> dims;
|
||||
for (size_t i = 0; i < dim; i++) {
|
||||
size_t tmp_dim;
|
||||
string_line1 >> tmp_dim;
|
||||
dims.push_back(tmp_dim);
|
||||
}
|
||||
auto ret = ReadTensorData(in_file, tensor_name, dims);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Read tensor data failed, tensor name: " << tensor_name;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
in_file.close();
|
||||
MS_LOG(INFO) << "Finish reading calibData file";
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BenchmarkBase::CompareStringData(const std::string &name, tensor::MSTensor *tensor) {
|
||||
auto iter = this->benchmark_data_.find(name);
|
||||
if (iter != this->benchmark_data_.end()) {
|
||||
std::vector<std::string> calib_strings = iter->second->strings_data;
|
||||
std::vector<std::string> output_strings = MSTensorToStrings(tensor);
|
||||
size_t compare_num = std::min(calib_strings.size(), output_strings.size());
|
||||
size_t print_num = std::min(compare_num, static_cast<size_t>(kNumPrintMin));
|
||||
|
||||
std::cout << "Data of node " << name << " : " << std::endl;
|
||||
for (size_t i = 0; i < compare_num; i++) {
|
||||
if (i < print_num) {
|
||||
std::cout << " " << output_strings[i] << std::endl;
|
||||
}
|
||||
if (calib_strings[i] != output_strings[i]) {
|
||||
MS_LOG(ERROR) << "Compare failed, index: " << i;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void BenchmarkFlags::InitInputDataList() {
|
||||
char *input_list = new char[this->in_data_file_.length() + 1];
|
||||
snprintf(input_list, this->in_data_file_.length() + 1, "%s", this->in_data_file_.c_str());
|
||||
char *cur_input;
|
||||
const char *split_c = ",";
|
||||
cur_input = strtok(input_list, split_c);
|
||||
while (cur_input != nullptr) {
|
||||
input_data_list_.emplace_back(cur_input);
|
||||
cur_input = strtok(nullptr, split_c);
|
||||
}
|
||||
delete[] input_list;
|
||||
}
|
||||
|
||||
void BenchmarkFlags::InitResizeDimsList() {
|
||||
std::string content = this->resize_dims_in_;
|
||||
std::vector<int> shape;
|
||||
auto shape_strs = StringSplit(content, std::string(DELIM_COLON));
|
||||
for (const auto &shape_str : shape_strs) {
|
||||
shape.clear();
|
||||
auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA));
|
||||
std::cout << "Resize Dims: ";
|
||||
for (const auto &dim_str : dim_strs) {
|
||||
std::cout << dim_str << " ";
|
||||
shape.emplace_back(static_cast<int>(std::stoi(dim_str)));
|
||||
}
|
||||
std::cout << std::endl;
|
||||
this->resize_dims_.emplace_back(shape);
|
||||
}
|
||||
}
|
||||
|
||||
int BenchmarkBase::CheckThreadNumValid() {
|
||||
if (this->flags_->num_threads_ < 1) {
|
||||
MS_LOG(ERROR) << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0";
|
||||
std::cerr << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0" << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (flags_->enable_parallel_) {
|
||||
if (flags_->num_threads_ < 2) {
|
||||
MS_LOG(ERROR) << "enable parallel need more than 1 thread.";
|
||||
std::cerr << "enable parallel need more than 1 thread." << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BenchmarkBase::InitDumpConfigFromJson(char *path) {
|
||||
auto real_path = RealPath(path);
|
||||
std::ifstream ifs(real_path);
|
||||
if (!ifs.good()) {
|
||||
MS_LOG(ERROR) << "file: " << real_path << " is not exist";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (!ifs.is_open()) {
|
||||
MS_LOG(ERROR) << "file: " << real_path << " open failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
try {
|
||||
dump_cfg_json_ = nlohmann::json::parse(ifs);
|
||||
} catch (const nlohmann::json::parse_error &error) {
|
||||
MS_LOG(ERROR) << "parse json file failed, please check your file.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (dump_cfg_json_[dump::kSettings] == nullptr) {
|
||||
MS_LOG(ERROR) << "\"common_dump_settings\" is required.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (dump_cfg_json_[dump::kSettings][dump::kMode] == nullptr) {
|
||||
MS_LOG(ERROR) << "\"dump_mode\" is required.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (dump_cfg_json_[dump::kSettings][dump::kPath] == nullptr) {
|
||||
MS_LOG(ERROR) << "\"path\" is required.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (dump_cfg_json_[dump::kSettings][dump::kNetName] == nullptr) {
|
||||
dump_cfg_json_[dump::kSettings][dump::kNetName] = "Default";
|
||||
}
|
||||
if (dump_cfg_json_[dump::kSettings][dump::kInputOutput] == nullptr) {
|
||||
dump_cfg_json_[dump::kSettings][dump::kInputOutput] = 0;
|
||||
}
|
||||
if (dump_cfg_json_[dump::kSettings][dump::kKernels] != nullptr &&
|
||||
!dump_cfg_json_[dump::kSettings][dump::kKernels].empty()) {
|
||||
if (dump_cfg_json_[dump::kSettings][dump::kMode] == 0) {
|
||||
MS_LOG(ERROR) << R"("dump_mode" should be 1 when "kernels" isn't empty.)";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
auto abs_path = dump_cfg_json_[dump::kSettings][dump::kPath].get<std::string>();
|
||||
auto net_name = dump_cfg_json_[dump::kSettings][dump::kNetName].get<std::string>();
|
||||
if (abs_path.back() == '\\' || abs_path.back() == '/') {
|
||||
dump_file_output_dir_ = abs_path + net_name;
|
||||
} else {
|
||||
#ifdef _WIN32
|
||||
dump_file_output_dir_ = abs_path + "\\" + net_name;
|
||||
#else
|
||||
dump_file_output_dir_ = abs_path + "/" + net_name;
|
||||
#endif
|
||||
}
|
||||
|
||||
auto status = CreateOutputDir(&dump_file_output_dir_);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "create data output directory failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BenchmarkBase::InitCallbackParameter() {
|
||||
int ret = RET_OK;
|
||||
if (flags_->time_profiling_) {
|
||||
ret = InitTimeProfilingCallbackParameter();
|
||||
} else if (flags_->perf_profiling_) {
|
||||
ret = InitPerfProfilingCallbackParameter();
|
||||
} else if (flags_->print_tensor_data_) {
|
||||
ret = InitPrintTensorDataCallbackParameter();
|
||||
} else if (flags_->dump_tensor_data_) {
|
||||
ret = InitDumpTensorDataCallbackParameter();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int BenchmarkBase::Init() {
|
||||
if (this->flags_ == nullptr) {
|
||||
return 1;
|
||||
}
|
||||
MS_LOG(INFO) << "ModelPath = " << this->flags_->model_file_;
|
||||
MS_LOG(INFO) << "InDataPath = " << this->flags_->in_data_file_;
|
||||
MS_LOG(INFO) << "InDataType = " << this->flags_->in_data_type_in_;
|
||||
MS_LOG(INFO) << "LoopCount = " << this->flags_->loop_count_;
|
||||
MS_LOG(INFO) << "DeviceType = " << this->flags_->device_;
|
||||
MS_LOG(INFO) << "AccuracyThreshold = " << this->flags_->accuracy_threshold_;
|
||||
MS_LOG(INFO) << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_;
|
||||
MS_LOG(INFO) << "NumThreads = " << this->flags_->num_threads_;
|
||||
MS_LOG(INFO) << "Fp16Priority = " << this->flags_->enable_fp16_;
|
||||
MS_LOG(INFO) << "EnableParallel = " << this->flags_->enable_parallel_;
|
||||
MS_LOG(INFO) << "calibDataPath = " << this->flags_->benchmark_data_file_;
|
||||
std::cout << "ModelPath = " << this->flags_->model_file_ << std::endl;
|
||||
std::cout << "InDataPath = " << this->flags_->in_data_file_ << std::endl;
|
||||
std::cout << "InDataType = " << this->flags_->in_data_type_in_ << std::endl;
|
||||
std::cout << "LoopCount = " << this->flags_->loop_count_ << std::endl;
|
||||
std::cout << "DeviceType = " << this->flags_->device_ << std::endl;
|
||||
std::cout << "AccuracyThreshold = " << this->flags_->accuracy_threshold_ << std::endl;
|
||||
std::cout << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_ << std::endl;
|
||||
std::cout << "NumThreads = " << this->flags_->num_threads_ << std::endl;
|
||||
std::cout << "Fp16Priority = " << this->flags_->enable_fp16_ << std::endl;
|
||||
std::cout << "EnableParallel = " << this->flags_->enable_parallel_ << std::endl;
|
||||
std::cout << "calibDataPath = " << this->flags_->benchmark_data_file_ << std::endl;
|
||||
if (this->flags_->loop_count_ < 1) {
|
||||
MS_LOG(ERROR) << "LoopCount:" << this->flags_->loop_count_ << " must be greater than 0";
|
||||
std::cerr << "LoopCount:" << this->flags_->loop_count_ << " must be greater than 0" << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto thread_ret = CheckThreadNumValid();
|
||||
if (thread_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Invalid numThreads.";
|
||||
std::cerr << "Invalid numThreads." << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
static std::vector<std::string> CPU_BIND_MODE_MAP = {"NO_BIND", "HIGHER_CPU", "MID_CPU"};
|
||||
if (this->flags_->cpu_bind_mode_ >= 1) {
|
||||
MS_LOG(INFO) << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_];
|
||||
std::cout << "cpuBindMode = " << CPU_BIND_MODE_MAP[this->flags_->cpu_bind_mode_] << std::endl;
|
||||
} else {
|
||||
MS_LOG(INFO) << "cpuBindMode = NO_BIND";
|
||||
std::cout << "cpuBindMode = NO_BIND" << std::endl;
|
||||
}
|
||||
|
||||
this->flags_->in_data_type_ = this->flags_->in_data_type_in_ == "img" ? kImage : kBinary;
|
||||
|
||||
if (!flags_->benchmark_data_type_.empty()) {
|
||||
if (data_type_map_.find(flags_->benchmark_data_type_) == data_type_map_.end()) {
|
||||
MS_LOG(ERROR) << "CalibDataType not supported: " << flags_->benchmark_data_type_.c_str();
|
||||
return RET_ERROR;
|
||||
}
|
||||
msCalibDataType = data_type_map_.at(flags_->benchmark_data_type_);
|
||||
MS_LOG(INFO) << "CalibDataType = " << flags_->benchmark_data_type_.c_str();
|
||||
std::cout << "CalibDataType = " << flags_->benchmark_data_type_.c_str() << std::endl;
|
||||
}
|
||||
|
||||
if (flags_->model_file_.empty()) {
|
||||
MS_LOG(ERROR) << "modelPath is required";
|
||||
std::cerr << "modelPath is required" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
flags_->InitInputDataList();
|
||||
flags_->InitResizeDimsList();
|
||||
if (!flags_->resize_dims_.empty() && !flags_->input_data_list_.empty() &&
|
||||
flags_->resize_dims_.size() != flags_->input_data_list_.size()) {
|
||||
MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath";
|
||||
std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (flags_->device_ != "CPU" && flags_->device_ != "GPU" && flags_->device_ != "NPU") {
|
||||
MS_LOG(ERROR) << "Device type:" << flags_->device_ << " is not supported.";
|
||||
std::cerr << "Device type:" << flags_->device_ << " is not supported." << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (flags_->time_profiling_ && flags_->perf_profiling_) {
|
||||
MS_LOG(INFO) << "time_profiling is enabled, will not run perf_profiling.";
|
||||
}
|
||||
|
||||
// get dump data output path
|
||||
auto dump_cfg_path = std::getenv(dump::kConfigPath);
|
||||
if (dump_cfg_path != nullptr) {
|
||||
flags_->dump_tensor_data_ = true;
|
||||
if (InitDumpConfigFromJson(dump_cfg_path) != RET_OK) {
|
||||
MS_LOG(ERROR) << "parse dump config file failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(INFO) << "No MINDSPORE_DUMP_CONFIG in env, don't need to dump data";
|
||||
}
|
||||
|
||||
auto status = InitCallbackParameter();
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init callback Parameter failed.";
|
||||
std::cerr << "Init callback Parameter failed." << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BenchmarkBase::PrintResult(const std::vector<std::string> &title,
|
||||
const std::map<std::string, std::pair<int, float>> &result) {
|
||||
std::vector<size_t> columnLenMax(5);
|
||||
std::vector<std::vector<std::string>> rows;
|
||||
|
||||
for (auto &iter : result) {
|
||||
char stringBuf[5][100] = {};
|
||||
std::vector<std::string> columns;
|
||||
size_t len = 0;
|
||||
|
||||
len = iter.first.size();
|
||||
if (len > columnLenMax.at(0)) {
|
||||
columnLenMax.at(0) = len + 4;
|
||||
}
|
||||
columns.push_back(iter.first);
|
||||
|
||||
len =
|
||||
snprintf(stringBuf[1], sizeof(stringBuf[1]), "%f", iter.second.second / static_cast<float>(flags_->loop_count_));
|
||||
if (len > columnLenMax.at(1)) {
|
||||
columnLenMax.at(1) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[1]);
|
||||
|
||||
len = snprintf(stringBuf[2], sizeof(stringBuf[2]), "%f", iter.second.second / op_cost_total_);
|
||||
if (len > columnLenMax.at(2)) {
|
||||
columnLenMax.at(2) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[2]);
|
||||
|
||||
len = snprintf(stringBuf[3], sizeof(stringBuf[3]), "%d", iter.second.first);
|
||||
if (len > columnLenMax.at(3)) {
|
||||
columnLenMax.at(3) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[3]);
|
||||
|
||||
len = snprintf(stringBuf[4], sizeof(stringBuf[4]), "%f", iter.second.second);
|
||||
if (len > columnLenMax.at(4)) {
|
||||
columnLenMax.at(4) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[4]);
|
||||
|
||||
rows.push_back(columns);
|
||||
}
|
||||
|
||||
printf("-------------------------------------------------------------------------\n");
|
||||
for (int i = 0; i < 5; i++) {
|
||||
auto printBuf = title[i];
|
||||
if (printBuf.size() > columnLenMax.at(i)) {
|
||||
columnLenMax.at(i) = printBuf.size();
|
||||
}
|
||||
printBuf.resize(columnLenMax.at(i), ' ');
|
||||
printf("%s\t", printBuf.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
for (auto &row : rows) {
|
||||
for (int j = 0; j < 5; j++) {
|
||||
auto printBuf = row[j];
|
||||
printBuf.resize(columnLenMax.at(j), ' ');
|
||||
printf("%s\t", printBuf.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_ARM64
|
||||
int BenchmarkBase::PrintPerfResult(const std::vector<std::string> &title,
|
||||
const std::map<std::string, std::pair<int, struct PerfCount>> &result) {
|
||||
std::vector<size_t> columnLenMax(5);
|
||||
std::vector<std::vector<std::string>> rows;
|
||||
|
||||
for (auto &iter : result) {
|
||||
char stringBuf[5][100] = {};
|
||||
std::vector<std::string> columns;
|
||||
size_t len = 0;
|
||||
|
||||
len = iter.first.size();
|
||||
if (len > columnLenMax.at(0)) {
|
||||
columnLenMax.at(0) = len + 4;
|
||||
}
|
||||
columns.push_back(iter.first);
|
||||
|
||||
float tmp = float_t(flags_->num_threads_) * iter.second.second.value[0] / float_t(flags_->loop_count_) / 1000.0f;
|
||||
len = snprintf(stringBuf[1], sizeof(stringBuf[1]), "%.2f", tmp);
|
||||
if (len > columnLenMax.at(1)) {
|
||||
columnLenMax.at(1) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[1]);
|
||||
|
||||
len = snprintf(stringBuf[2], sizeof(stringBuf[2]), "%f", iter.second.second.value[0] / op_cost_total_);
|
||||
if (len > columnLenMax.at(2)) {
|
||||
columnLenMax.at(2) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[2]);
|
||||
|
||||
tmp = float_t(flags_->num_threads_) * iter.second.second.value[1] / float_t(flags_->loop_count_) / 1000.0f;
|
||||
len = snprintf(stringBuf[3], sizeof(stringBuf[3]), "%.2f", tmp);
|
||||
if (len > columnLenMax.at(3)) {
|
||||
columnLenMax.at(3) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[3]);
|
||||
|
||||
len = snprintf(stringBuf[4], sizeof(stringBuf[4]), "%f", iter.second.second.value[1] / op_cost2_total_);
|
||||
if (len > columnLenMax.at(4)) {
|
||||
columnLenMax.at(4) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[4]);
|
||||
|
||||
rows.push_back(columns);
|
||||
}
|
||||
|
||||
printf("-------------------------------------------------------------------------\n");
|
||||
for (int i = 0; i < 5; i++) {
|
||||
auto printBuf = title[i];
|
||||
if (printBuf.size() > columnLenMax.at(i)) {
|
||||
columnLenMax.at(i) = printBuf.size();
|
||||
}
|
||||
printBuf.resize(columnLenMax.at(i), ' ');
|
||||
printf("%s\t", printBuf.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
for (auto &row : rows) {
|
||||
for (int j = 0; j < 5; j++) {
|
||||
auto printBuf = row[j];
|
||||
printBuf.resize(columnLenMax.at(j), ' ');
|
||||
printf("%s\t", printBuf.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_NNIE
|
||||
int SvpSysInit() {
|
||||
HI_S32 ret = HI_SUCCESS;
|
||||
VB_CONFIG_S struVbConf;
|
||||
|
||||
HI_MPI_SYS_Exit();
|
||||
HI_MPI_VB_Exit();
|
||||
|
||||
memset(&struVbConf, 0, sizeof(VB_CONFIG_S));
|
||||
struVbConf.u32MaxPoolCnt = 2;
|
||||
struVbConf.astCommPool[1].u64BlkSize = 768 * 576 * 2;
|
||||
struVbConf.astCommPool[1].u32BlkCnt = 1;
|
||||
|
||||
ret = HI_MPI_VB_SetConfig((const VB_CONFIG_S *)&struVbConf);
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "Error:HI_MPI_VB_SetConf failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = HI_MPI_VB_Init();
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "Error:HI_MPI_VB_Init failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = HI_MPI_SYS_Init();
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "Error:HI_MPI_SYS_Init failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int SvpSysExit() {
|
||||
HI_S32 ret = HI_SUCCESS;
|
||||
|
||||
ret = HI_MPI_SYS_Exit();
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "Error:HI_MPI_SYS_Exit failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = HI_MPI_VB_Exit();
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "Error:HI_MPI_VB_Exit failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
BenchmarkBase::~BenchmarkBase() {
|
||||
for (const auto &iter : this->benchmark_data_) {
|
||||
delete (iter.second);
|
||||
}
|
||||
this->benchmark_data_.clear();
|
||||
#ifdef SUPPORT_NNIE
|
||||
SvpSysExit();
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,316 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINNIE_BENCHMARK_BENCHMARK_BASE_H_
|
||||
#define MINNIE_BENCHMARK_BENCHMARK_BASE_H_
|
||||
|
||||
#include <getopt.h>
|
||||
#include <signal.h>
|
||||
#include <random>
|
||||
#include <unordered_map>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <cfloat>
|
||||
#include <utility>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "include/model.h"
|
||||
#include "tools/common/flag_parser.h"
|
||||
#include "src/common/file_utils.h"
|
||||
#include "src/common/utils.h"
|
||||
#include "ir/dtype/type_id.h"
|
||||
#include "schema/model_generated.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
enum MS_API InDataType { kImage = 0, kBinary = 1 };
|
||||
|
||||
constexpr float relativeTolerance = 1e-5;
|
||||
constexpr float absoluteTolerance = 1e-8;
|
||||
|
||||
constexpr int kNumPrintMin = 5;
|
||||
constexpr const char *DELIM_COLON = ":";
|
||||
constexpr const char *DELIM_COMMA = ",";
|
||||
constexpr const char *DELIM_SLASH = "/";
|
||||
|
||||
extern const std::unordered_map<int, std::string> TYPE_ID_MAP;
|
||||
extern const std::unordered_map<schema::Format, std::string> TENSOR_FORMAT_MAP;
|
||||
|
||||
//
|
||||
namespace dump {
|
||||
constexpr auto kConfigPath = "MINDSPORE_DUMP_CONFIG";
|
||||
constexpr auto kSettings = "common_dump_settings";
|
||||
constexpr auto kMode = "dump_mode";
|
||||
constexpr auto kPath = "path";
|
||||
constexpr auto kNetName = "net_name";
|
||||
constexpr auto kInputOutput = "input_output";
|
||||
constexpr auto kKernels = "kernels";
|
||||
} // namespace dump
|
||||
|
||||
#ifdef ENABLE_ARM64
|
||||
struct PerfResult {
|
||||
int64_t nr;
|
||||
struct {
|
||||
int64_t value;
|
||||
int64_t id;
|
||||
} values[2];
|
||||
};
|
||||
struct PerfCount {
|
||||
int64_t value[2];
|
||||
};
|
||||
#endif
|
||||
|
||||
struct MS_API CheckTensor {
|
||||
CheckTensor(const std::vector<size_t> &shape, const std::vector<float> &data,
|
||||
const std::vector<std::string> &strings_data = {""}) {
|
||||
this->shape = shape;
|
||||
this->data = data;
|
||||
this->strings_data = strings_data;
|
||||
}
|
||||
std::vector<size_t> shape;
|
||||
std::vector<float> data;
|
||||
std::vector<std::string> strings_data;
|
||||
};
|
||||
|
||||
class MS_API BenchmarkFlags : public virtual FlagParser {
|
||||
public:
|
||||
BenchmarkFlags() {
|
||||
// common
|
||||
AddFlag(&BenchmarkFlags::model_file_, "modelFile", "Input model file", "");
|
||||
AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", "");
|
||||
AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU", "CPU");
|
||||
AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode",
|
||||
"Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, default value: 1", 1);
|
||||
// MarkPerformance
|
||||
AddFlag(&BenchmarkFlags::loop_count_, "loopCount", "Run loop count", 10);
|
||||
AddFlag(&BenchmarkFlags::num_threads_, "numThreads", "Run threads number", 2);
|
||||
AddFlag(&BenchmarkFlags::enable_fp16_, "enableFp16", "Enable float16", false);
|
||||
AddFlag(&BenchmarkFlags::enable_parallel_, "enableParallel", "Enable subgraph parallel : true | false", false);
|
||||
AddFlag(&BenchmarkFlags::warm_up_loop_count_, "warmUpLoopCount", "Run warm up loop", 3);
|
||||
AddFlag(&BenchmarkFlags::time_profiling_, "timeProfiling", "Run time profiling", false);
|
||||
AddFlag(&BenchmarkFlags::perf_profiling_, "perfProfiling",
|
||||
"Perf event profiling(only instructions statics enabled currently)", false);
|
||||
AddFlag(&BenchmarkFlags::perf_event_, "perfEvent", "CYCLE|CACHE|STALL", "CYCLE");
|
||||
// MarkAccuracy
|
||||
AddFlag(&BenchmarkFlags::benchmark_data_file_, "benchmarkDataFile", "Benchmark data file path", "");
|
||||
AddFlag(&BenchmarkFlags::benchmark_data_type_, "benchmarkDataType",
|
||||
"Benchmark data type. FLOAT | INT32 | INT8 | UINT8", "FLOAT");
|
||||
AddFlag(&BenchmarkFlags::accuracy_threshold_, "accuracyThreshold", "Threshold of accuracy", 0.5);
|
||||
AddFlag(&BenchmarkFlags::resize_dims_in_, "inputShapes",
|
||||
"Shape of input data, the format should be NHWC. e.g. 1,32,32,32:1,1,32,32,1", "");
|
||||
}
|
||||
|
||||
~BenchmarkFlags() override = default;
|
||||
|
||||
void InitInputDataList();
|
||||
|
||||
void InitResizeDimsList();
|
||||
|
||||
public:
|
||||
// common
|
||||
std::string model_file_;
|
||||
std::string in_data_file_;
|
||||
std::vector<std::string> input_data_list_;
|
||||
InDataType in_data_type_ = kBinary;
|
||||
std::string in_data_type_in_ = "bin";
|
||||
int cpu_bind_mode_ = 1;
|
||||
// MarkPerformance
|
||||
int loop_count_ = 10;
|
||||
int num_threads_ = 2;
|
||||
bool enable_fp16_ = false;
|
||||
bool enable_parallel_ = false;
|
||||
int warm_up_loop_count_ = 3;
|
||||
// MarkAccuracy
|
||||
std::string benchmark_data_file_;
|
||||
std::string benchmark_data_type_ = "FLOAT";
|
||||
float accuracy_threshold_ = 0.5;
|
||||
// Resize
|
||||
std::string resize_dims_in_;
|
||||
std::vector<std::vector<int>> resize_dims_;
|
||||
|
||||
std::string device_ = "CPU";
|
||||
bool time_profiling_ = false;
|
||||
bool perf_profiling_ = false;
|
||||
std::string perf_event_ = "CYCLE";
|
||||
bool dump_tensor_data_ = false;
|
||||
bool print_tensor_data_ = false;
|
||||
};
|
||||
|
||||
class MS_API BenchmarkBase {
|
||||
public:
|
||||
explicit BenchmarkBase(BenchmarkFlags *flags) : flags_(flags) {}
|
||||
|
||||
virtual ~BenchmarkBase();
|
||||
|
||||
int Init();
|
||||
virtual int RunBenchmark() = 0;
|
||||
|
||||
protected:
|
||||
int LoadInput();
|
||||
|
||||
virtual int GenerateInputData() = 0;
|
||||
|
||||
int GenerateRandomData(size_t size, void *data, int data_type);
|
||||
|
||||
virtual int ReadInputFile() = 0;
|
||||
|
||||
int ReadCalibData();
|
||||
|
||||
virtual int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name,
|
||||
const std::vector<size_t> &dims) = 0;
|
||||
|
||||
virtual int CompareOutput() = 0;
|
||||
|
||||
int CompareStringData(const std::string &name, tensor::MSTensor *tensor);
|
||||
|
||||
int InitDumpConfigFromJson(char *path);
|
||||
|
||||
int InitCallbackParameter();
|
||||
|
||||
virtual int InitTimeProfilingCallbackParameter() = 0;
|
||||
|
||||
virtual int InitPerfProfilingCallbackParameter() = 0;
|
||||
|
||||
virtual int InitDumpTensorDataCallbackParameter() = 0;
|
||||
|
||||
virtual int InitPrintTensorDataCallbackParameter() = 0;
|
||||
|
||||
int PrintResult(const std::vector<std::string> &title, const std::map<std::string, std::pair<int, float>> &result);
|
||||
|
||||
#ifdef ENABLE_ARM64
|
||||
int PrintPerfResult(const std::vector<std::string> &title,
|
||||
const std::map<std::string, std::pair<int, struct PerfCount>> &result);
|
||||
#endif
|
||||
|
||||
// tensorData need to be converter first
|
||||
template <typename T, typename ST>
|
||||
float CompareData(const std::string &nodeName, const std::vector<ST> &msShape, const void *tensor_data) {
|
||||
const T *msTensorData = static_cast<const T *>(tensor_data);
|
||||
auto iter = this->benchmark_data_.find(nodeName);
|
||||
if (iter != this->benchmark_data_.end()) {
|
||||
std::vector<size_t> castedMSShape;
|
||||
size_t shapeSize = 1;
|
||||
for (int64_t dim : msShape) {
|
||||
castedMSShape.push_back(size_t(dim));
|
||||
shapeSize *= dim;
|
||||
}
|
||||
|
||||
CheckTensor *calibTensor = iter->second;
|
||||
if (calibTensor->shape != castedMSShape) {
|
||||
std::ostringstream oss;
|
||||
oss << "Shape of mslite output(";
|
||||
for (auto dim : castedMSShape) {
|
||||
oss << dim << ",";
|
||||
}
|
||||
oss << ") and shape source model output(";
|
||||
for (auto dim : calibTensor->shape) {
|
||||
oss << dim << ",";
|
||||
}
|
||||
oss << ") are different";
|
||||
std::cerr << oss.str() << std::endl;
|
||||
MS_LOG(ERROR) << oss.str().c_str();
|
||||
return RET_ERROR;
|
||||
}
|
||||
size_t errorCount = 0;
|
||||
float meanError = 0;
|
||||
std::cout << "Data of node " << nodeName << " : ";
|
||||
for (size_t j = 0; j < shapeSize; j++) {
|
||||
if (j < 50) {
|
||||
std::cout << static_cast<float>(msTensorData[j]) << " ";
|
||||
}
|
||||
|
||||
if (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j])) {
|
||||
std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl;
|
||||
MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto tolerance = absoluteTolerance + relativeTolerance * fabs(calibTensor->data.at(j));
|
||||
auto absoluteError = std::fabs(msTensorData[j] - calibTensor->data.at(j));
|
||||
if (absoluteError > tolerance) {
|
||||
if (fabs(calibTensor->data.at(j) - 0.0f) < FLT_EPSILON) {
|
||||
if (absoluteError > 1e-5) {
|
||||
meanError += absoluteError;
|
||||
errorCount++;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// just assume that atol = rtol
|
||||
meanError += absoluteError / (fabs(calibTensor->data.at(j)) + FLT_MIN);
|
||||
errorCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
if (meanError > 0.0f) {
|
||||
meanError /= errorCount;
|
||||
}
|
||||
|
||||
if (meanError <= 0.0000001) {
|
||||
std::cout << "Mean bias of node/tensor " << nodeName << " : 0%" << std::endl;
|
||||
} else {
|
||||
std::cout << "Mean bias of node/tensor " << nodeName << " : " << meanError * 100 << "%" << std::endl;
|
||||
}
|
||||
return meanError;
|
||||
} else {
|
||||
MS_LOG(INFO) << "%s is not in Source Model output", nodeName.c_str();
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename Distribution>
|
||||
void FillInputData(int size, void *data, Distribution distribution) {
|
||||
MS_ASSERT(data != nullptr);
|
||||
int elements_num = size / sizeof(T);
|
||||
(void)std::generate_n(static_cast<T *>(data), elements_num,
|
||||
[&]() { return static_cast<T>(distribution(random_engine_)); });
|
||||
}
|
||||
|
||||
int CheckThreadNumValid();
|
||||
|
||||
protected:
|
||||
BenchmarkFlags *flags_;
|
||||
std::unordered_map<std::string, CheckTensor *> benchmark_data_;
|
||||
std::unordered_map<std::string, int> data_type_map_{
|
||||
{"FLOAT", kNumberTypeFloat}, {"INT8", kNumberTypeInt8}, {"INT32", kNumberTypeInt32}, {"UINT8", kNumberTypeUInt8}};
|
||||
int msCalibDataType = kNumberTypeFloat;
|
||||
|
||||
// callback parameters
|
||||
uint64_t op_begin_ = 0;
|
||||
int op_call_times_total_ = 0;
|
||||
float op_cost_total_ = 0.0f;
|
||||
std::map<std::string, std::pair<int, float>> op_times_by_type_;
|
||||
std::map<std::string, std::pair<int, float>> op_times_by_name_;
|
||||
|
||||
// dump data
|
||||
nlohmann::json dump_cfg_json_;
|
||||
std::string dump_file_output_dir_;
|
||||
#ifdef ENABLE_ARM64
|
||||
int perf_fd = 0;
|
||||
int perf_fd2 = 0;
|
||||
float op_cost2_total_ = 0.0f;
|
||||
std::map<std::string, std::pair<int, struct PerfCount>> op_perf_by_type_;
|
||||
std::map<std::string, std::pair<int, struct PerfCount>> op_perf_by_name_;
|
||||
#endif
|
||||
std::mt19937 random_engine_;
|
||||
};
|
||||
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINNIE_BENCHMARK_BENCHMARK_BASE_H_
|
|
@ -0,0 +1,828 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "tools/benchmark/benchmark_unified_api.h"
|
||||
#define __STDC_FORMAT_MACROS
|
||||
#include <cinttypes>
|
||||
#undef __STDC_FORMAT_MACROS
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <functional>
|
||||
#include "include/context.h"
|
||||
#include "include/ms_tensor.h"
|
||||
#include "include/version.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/common/common.h"
|
||||
#include "src/tensor.h"
|
||||
#ifdef ENABLE_ARM64
|
||||
#include <linux/perf_event.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef SUPPORT_NNIE
|
||||
#include "include/hi_common.h"
|
||||
#include "include/hi_comm_vb.h"
|
||||
#include "include/mpi_sys.h"
|
||||
#include "include/mpi_vb.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
|
||||
int BenchmarkUnifiedApi::GenerateInputData() {
|
||||
for (auto tensor : ms_inputs_for_api_) {
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
auto input_data = tensor.MutableData();
|
||||
if (input_data == nullptr) {
|
||||
MS_LOG(ERROR) << "MallocData for inTensor failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
int status;
|
||||
if (static_cast<int>(tensor.DataType()) == kObjectTypeString) {
|
||||
std::cerr << "Unsupported kObjectTypeString:" << std::endl;
|
||||
MS_LOG(ERROR) << "Unsupported kObjectTypeString:";
|
||||
return RET_ERROR;
|
||||
// status = StringsToMSTensor({"you're the best."}, tensor);
|
||||
} else {
|
||||
status = GenerateRandomData(tensor.DataSize(), input_data, static_cast<int>(tensor.DataType()));
|
||||
}
|
||||
if (status != RET_OK) {
|
||||
std::cerr << "GenerateRandomData for inTensor failed: " << status << std::endl;
|
||||
MS_LOG(ERROR) << "GenerateRandomData for inTensor failed:" << status;
|
||||
return status;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BenchmarkUnifiedApi::ReadInputFile() {
|
||||
if (ms_inputs_for_api_.empty()) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
if (this->flags_->in_data_type_ == kImage) {
|
||||
MS_LOG(ERROR) << "Not supported image input";
|
||||
return RET_ERROR;
|
||||
} else {
|
||||
for (size_t i = 0; i < flags_->input_data_list_.size(); i++) {
|
||||
auto cur_tensor = ms_inputs_for_api_.at(i);
|
||||
MS_ASSERT(cur_tensor != nullptr);
|
||||
size_t size;
|
||||
char *bin_buf = ReadFile(flags_->input_data_list_[i].c_str(), &size);
|
||||
if (bin_buf == nullptr) {
|
||||
MS_LOG(ERROR) << "ReadFile return nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (static_cast<int>(cur_tensor.DataType()) == kObjectTypeString) {
|
||||
std::cerr << "Unsupported kObjectTypeString:" << std::endl;
|
||||
MS_LOG(ERROR) << "Unsupported kObjectTypeString:";
|
||||
return RET_ERROR;
|
||||
|
||||
} else {
|
||||
auto tensor_data_size = cur_tensor.DataSize();
|
||||
if (size != tensor_data_size) {
|
||||
std::cerr << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size
|
||||
<< std::endl;
|
||||
MS_LOG(ERROR) << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size;
|
||||
delete[] bin_buf;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto input_data = cur_tensor.MutableData();
|
||||
if (input_data == nullptr) {
|
||||
MS_LOG(ERROR) << "input_data is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
memcpy(input_data, bin_buf, tensor_data_size);
|
||||
}
|
||||
delete[] bin_buf;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BenchmarkUnifiedApi::ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name,
|
||||
const std::vector<size_t> &dims) {
|
||||
std::string line;
|
||||
getline(in_file_stream, line);
|
||||
std::stringstream line_stream(line);
|
||||
if (this->benchmark_data_.find(tensor_name) != this->benchmark_data_.end()) {
|
||||
return RET_OK;
|
||||
}
|
||||
mindspore::MSTensor tensor = GetMSTensorByNameOrShape(tensor_name, dims);
|
||||
if (tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
|
||||
return RET_ERROR;
|
||||
}
|
||||
std::vector<float> data;
|
||||
std::vector<std::string> strings_data;
|
||||
size_t shape_size = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<size_t>());
|
||||
if (static_cast<int>(tensor.DataType()) == kObjectTypeString) {
|
||||
strings_data.push_back(line);
|
||||
for (size_t i = 1; i < shape_size; i++) {
|
||||
getline(in_file_stream, line);
|
||||
strings_data.push_back(line);
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < shape_size; i++) {
|
||||
float tmp_data;
|
||||
line_stream >> tmp_data;
|
||||
data.push_back(tmp_data);
|
||||
}
|
||||
}
|
||||
auto *check_tensor = new (std::nothrow) CheckTensor(dims, data, strings_data);
|
||||
if (check_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "New CheckTensor failed, tensor name: " << tensor_name;
|
||||
return RET_ERROR;
|
||||
}
|
||||
this->benchmark_data_.insert(std::make_pair(tensor_name, check_tensor));
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void BenchmarkUnifiedApi::InitMSContext(const std::shared_ptr<mindspore::Context> &context) {
|
||||
context->SetThreadNum(flags_->num_threads_);
|
||||
context->SetEnableParallel(flags_->enable_parallel_);
|
||||
context->SetThreadAffinity(flags_->cpu_bind_mode_);
|
||||
auto &device_list = context->MutableDeviceInfo();
|
||||
|
||||
std::shared_ptr<CPUDeviceInfo> device_info = std::make_shared<CPUDeviceInfo>();
|
||||
device_info->SetEnableFP16(flags_->enable_fp16_);
|
||||
device_list.push_back(device_info);
|
||||
|
||||
if (flags_->device_ == "GPU") {
|
||||
std::shared_ptr<MaliGPUDeviceInfo> gpu_device_info = std::make_shared<MaliGPUDeviceInfo>();
|
||||
gpu_device_info->SetEnableFP16(flags_->enable_fp16_);
|
||||
device_list.push_back(gpu_device_info);
|
||||
}
|
||||
|
||||
if (flags_->device_ == "NPU") {
|
||||
std::shared_ptr<KirinNPUDeviceInfo> npu_device_info = std::make_shared<KirinNPUDeviceInfo>();
|
||||
npu_device_info->SetFrequency(3);
|
||||
device_list.push_back(npu_device_info);
|
||||
}
|
||||
}
|
||||
|
||||
int BenchmarkUnifiedApi::CompareOutput() {
|
||||
std::cout << "================ Comparing Output data ================" << std::endl;
|
||||
float total_bias = 0;
|
||||
int total_size = 0;
|
||||
for (const auto &calib_tensor : benchmark_data_) {
|
||||
std::string node_or_tensor_name = calib_tensor.first;
|
||||
mindspore::MSTensor tensor = GetMSTensorByNameOrShape(node_or_tensor_name, calib_tensor.second->shape);
|
||||
if (tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "Get tensor failed, tensor name: " << node_or_tensor_name;
|
||||
return RET_ERROR;
|
||||
}
|
||||
int ret;
|
||||
if (static_cast<int>(tensor.DataType()) == kObjectTypeString) {
|
||||
std::cerr << "Unsupported kObjectTypeString:" << std::endl;
|
||||
MS_LOG(ERROR) << "Unsupported kObjectTypeString:";
|
||||
return RET_ERROR;
|
||||
// ret = CompareStringData(node_or_tensor_name, tensor);
|
||||
} else {
|
||||
ret = CompareDataGetTotalBiasAndSize(node_or_tensor_name, &tensor, &total_bias, &total_size);
|
||||
}
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Error in CompareData";
|
||||
std::cerr << "Error in CompareData" << std::endl;
|
||||
std::cout << "=======================================================" << std::endl << std::endl;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
float mean_bias;
|
||||
if (total_size != 0) {
|
||||
mean_bias = total_bias / float_t(total_size) * 100;
|
||||
} else {
|
||||
mean_bias = 0;
|
||||
}
|
||||
|
||||
std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%" << std::endl;
|
||||
std::cout << "=======================================================" << std::endl << std::endl;
|
||||
|
||||
if (mean_bias > this->flags_->accuracy_threshold_) {
|
||||
MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%";
|
||||
std::cerr << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%" << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
mindspore::MSTensor BenchmarkUnifiedApi::GetMSTensorByNodeShape(const std::vector<size_t> &node_shape) {
|
||||
std::vector<mindspore::MSTensor> match_tensors;
|
||||
std::vector<int64_t> shape_vector = ConverterToInt64Vector<size_t>(node_shape);
|
||||
auto tensors = ms_model_.GetOutputs();
|
||||
for (auto &out_tensor_pair : tensors) {
|
||||
if (out_tensor_pair.Shape() == shape_vector) {
|
||||
match_tensors.emplace_back(out_tensor_pair);
|
||||
}
|
||||
}
|
||||
|
||||
return match_tensors.front();
|
||||
}
|
||||
|
||||
mindspore::MSTensor BenchmarkUnifiedApi::GetMSTensorByNameOrShape(const std::string &node_or_tensor_name,
|
||||
const std::vector<size_t> &dims) {
|
||||
mindspore::MSTensor tensor;
|
||||
auto tensors = ms_model_.GetOutputsByNodeName(node_or_tensor_name);
|
||||
if (tensors.empty() || tensors.size() != 1) {
|
||||
MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name
|
||||
<< " or node has more than one output tensor, switch to GetOutputByTensorName";
|
||||
tensor = ms_model_.GetOutputByTensorName(node_or_tensor_name);
|
||||
if (tensor == nullptr) {
|
||||
return GetMSTensorByNodeShape(dims);
|
||||
}
|
||||
} else {
|
||||
tensor = tensors.front();
|
||||
}
|
||||
return tensor;
|
||||
}
|
||||
|
||||
int BenchmarkUnifiedApi::CompareDataGetTotalBiasAndSize(const std::string &name, mindspore::MSTensor *tensor,
|
||||
float *total_bias, int *total_size) {
|
||||
float bias = 0;
|
||||
auto mutableData = tensor->MutableData();
|
||||
if (mutableData == nullptr) {
|
||||
MS_LOG(ERROR) << "mutableData is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
switch (static_cast<int>(tensor->DataType())) {
|
||||
case TypeId::kNumberTypeFloat:
|
||||
case TypeId::kNumberTypeFloat32: {
|
||||
bias = CompareData<float>(name, tensor->Shape(), mutableData);
|
||||
break;
|
||||
}
|
||||
case TypeId::kNumberTypeInt8: {
|
||||
bias = CompareData<int8_t>(name, tensor->Shape(), mutableData);
|
||||
break;
|
||||
}
|
||||
case TypeId::kNumberTypeUInt8: {
|
||||
bias = CompareData<uint8_t>(name, tensor->Shape(), mutableData);
|
||||
break;
|
||||
}
|
||||
case TypeId::kNumberTypeInt32: {
|
||||
bias = CompareData<int32_t>(name, tensor->Shape(), mutableData);
|
||||
break;
|
||||
}
|
||||
case TypeId::kNumberTypeInt16: {
|
||||
bias = CompareData<int16_t>(name, tensor->Shape(), mutableData);
|
||||
break;
|
||||
}
|
||||
case TypeId::kNumberTypeBool: {
|
||||
bias = CompareData<bool>(name, tensor->Shape(), mutableData);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MS_LOG(ERROR) << "Datatype " << static_cast<int>(tensor->DataType()) << " is not supported.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (bias < 0) {
|
||||
MS_LOG(ERROR) << "CompareData failed, name: " << name;
|
||||
return RET_ERROR;
|
||||
}
|
||||
*total_bias += bias;
|
||||
*total_size += 1;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BenchmarkUnifiedApi::MarkPerformance() {
|
||||
MS_LOG(INFO) << "Running warm up loops...";
|
||||
std::cout << "Running warm up loops..." << std::endl;
|
||||
std::vector<MSTensor> outputs;
|
||||
|
||||
for (int i = 0; i < flags_->warm_up_loop_count_; i++) {
|
||||
auto status = ms_model_.Predict(ms_inputs_for_api_, &outputs);
|
||||
if (status != kSuccess) {
|
||||
MS_LOG(ERROR) << "Inference error ";
|
||||
std::cerr << "Inference error " << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Running benchmark loops...";
|
||||
std::cout << "Running benchmark loops..." << std::endl;
|
||||
uint64_t time_min = 1000000;
|
||||
uint64_t time_max = 0;
|
||||
uint64_t time_avg = 0;
|
||||
|
||||
for (int i = 0; i < flags_->loop_count_; i++) {
|
||||
auto inputs = ms_model_.GetInputs();
|
||||
for (auto tensor : inputs) {
|
||||
tensor.MutableData(); // prepare data
|
||||
}
|
||||
auto start = GetTimeUs();
|
||||
auto status = ms_model_.Predict(ms_inputs_for_api_, &outputs, ms_before_call_back_, ms_after_call_back_);
|
||||
if (status != kSuccess) {
|
||||
MS_LOG(ERROR) << "Inference error ";
|
||||
std::cerr << "Inference error ";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto end = GetTimeUs();
|
||||
auto time = end - start;
|
||||
time_min = std::min(time_min, time);
|
||||
time_max = std::max(time_max, time);
|
||||
time_avg += time;
|
||||
}
|
||||
|
||||
if (flags_->time_profiling_) {
|
||||
const std::vector<std::string> per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
|
||||
const std::vector<std::string> per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
|
||||
PrintResult(per_op_name, op_times_by_name_);
|
||||
PrintResult(per_op_type, op_times_by_type_);
|
||||
#ifdef ENABLE_ARM64
|
||||
} else if (flags_->perf_profiling_) {
|
||||
if (flags_->perf_event_ == "CACHE") {
|
||||
const std::vector<std::string> per_op_name = {"opName", "cache ref(k)", "cache ref(%)", "miss(k)", "miss(%)"};
|
||||
const std::vector<std::string> per_op_type = {"opType", "cache ref(k)", "cache ref(%)", "miss(k)", "miss(%)"};
|
||||
PrintPerfResult(per_op_name, op_perf_by_name_);
|
||||
PrintPerfResult(per_op_type, op_perf_by_type_);
|
||||
} else if (flags_->perf_event_ == "STALL") {
|
||||
const std::vector<std::string> per_op_name = {"opName", "frontend(k)", "frontend(%)", "backendend(k)",
|
||||
"backendend(%)"};
|
||||
const std::vector<std::string> per_op_type = {"opType", "frontend(k)", "frontend(%)", "backendend(k)",
|
||||
"backendend(%)"};
|
||||
PrintPerfResult(per_op_name, op_perf_by_name_);
|
||||
PrintPerfResult(per_op_type, op_perf_by_type_);
|
||||
} else {
|
||||
const std::vector<std::string> per_op_name = {"opName", "cycles(k)", "cycles(%)", "ins(k)", "ins(%)"};
|
||||
const std::vector<std::string> per_op_type = {"opType", "cycles(k)", "cycles(%)", "ins(k)", "ins(%)"};
|
||||
PrintPerfResult(per_op_name, op_perf_by_name_);
|
||||
PrintPerfResult(per_op_type, op_perf_by_type_);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (flags_->loop_count_ > 0) {
|
||||
time_avg /= flags_->loop_count_;
|
||||
MS_LOG(INFO) << "Model = " << flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
|
||||
<< ", NumThreads = " << flags_->num_threads_ << ", MinRunTime = " << time_min / 1000.0f
|
||||
<< ", MaxRuntime = " << time_max / 1000.0f << ", AvgRunTime = " << time_avg / 1000.0f;
|
||||
printf("Model = %s, NumThreads = %d, MinRunTime = %f ms, MaxRuntime = %f ms, AvgRunTime = %f ms\n",
|
||||
flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str(), flags_->num_threads_,
|
||||
time_min / 1000.0f, time_max / 1000.0f, time_avg / 1000.0f);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BenchmarkUnifiedApi::MarkAccuracy() {
|
||||
MS_LOG(INFO) << "MarkAccuracy";
|
||||
std::cout << "MarkAccuracy" << std::endl;
|
||||
|
||||
auto status = PrintInputData();
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "PrintInputData error " << status;
|
||||
std::cerr << "PrintInputData error " << status << std::endl;
|
||||
return status;
|
||||
}
|
||||
std::vector<MSTensor> outputs;
|
||||
auto ret = ms_model_.Predict(ms_inputs_for_api_, &outputs, ms_before_call_back_, ms_after_call_back_);
|
||||
if (ret != kSuccess) {
|
||||
MS_LOG(ERROR) << "Inference error ";
|
||||
std::cerr << "Inference error " << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
status = ReadCalibData();
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "Read calib data error " << status;
|
||||
std::cerr << "Read calib data error " << status << std::endl;
|
||||
return status;
|
||||
}
|
||||
status = CompareOutput();
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "Compare output error " << status;
|
||||
std::cerr << "Compare output error " << status << std::endl;
|
||||
return status;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BenchmarkUnifiedApi::PrintInputData() {
|
||||
for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
|
||||
auto input = ms_inputs_for_api_[i];
|
||||
MS_ASSERT(input != nullptr);
|
||||
auto tensor_data_type = static_cast<int>(input.DataType());
|
||||
|
||||
std::cout << "InData" << i << ": ";
|
||||
if (tensor_data_type == TypeId::kObjectTypeString) {
|
||||
std::cerr << "Unsupported kObjectTypeString:" << std::endl;
|
||||
MS_LOG(ERROR) << "Unsupported kObjectTypeString:";
|
||||
return RET_ERROR;
|
||||
}
|
||||
size_t print_num = std::min(static_cast<int>(input.ElementNum()), 20);
|
||||
const void *in_data = input.MutableData();
|
||||
if (in_data == nullptr) {
|
||||
MS_LOG(ERROR) << "in_data is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < print_num; j++) {
|
||||
if (tensor_data_type == TypeId::kNumberTypeFloat32 || tensor_data_type == TypeId::kNumberTypeFloat) {
|
||||
std::cout << static_cast<const float *>(in_data)[j] << " ";
|
||||
} else if (tensor_data_type == TypeId::kNumberTypeInt8) {
|
||||
std::cout << static_cast<const int8_t *>(in_data)[j] << " ";
|
||||
} else if (tensor_data_type == TypeId::kNumberTypeUInt8) {
|
||||
std::cout << static_cast<const uint8_t *>(in_data)[j] << " ";
|
||||
} else if (tensor_data_type == TypeId::kNumberTypeInt32) {
|
||||
std::cout << static_cast<const int32_t *>(in_data)[j] << " ";
|
||||
} else if (tensor_data_type == TypeId::kNumberTypeInt64) {
|
||||
std::cout << static_cast<const int64_t *>(in_data)[j] << " ";
|
||||
} else if (tensor_data_type == TypeId::kNumberTypeBool) {
|
||||
std::cout << static_cast<const bool *>(in_data)[j] << " ";
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Datatype: " << tensor_data_type << " is not supported.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BenchmarkUnifiedApi::RunBenchmark() {
|
||||
auto start_prepare_time = GetTimeUs();
|
||||
// Load graph
|
||||
std::string model_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1);
|
||||
|
||||
MS_LOG(INFO) << "start reading model file";
|
||||
std::cout << "start reading model file" << std::endl;
|
||||
size_t size = 0;
|
||||
char *graph_buf = ReadFile(flags_->model_file_.c_str(), &size);
|
||||
if (graph_buf == nullptr) {
|
||||
MS_LOG(ERROR) << "Read model file failed while running " << model_name.c_str();
|
||||
std::cerr << "Read model file failed while running " << model_name.c_str() << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto context = std::make_shared<mindspore::Context>();
|
||||
if (context == nullptr) {
|
||||
MS_LOG(ERROR) << "New context failed while running " << model_name.c_str();
|
||||
std::cerr << "New context failed while running " << model_name.c_str() << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
(void)InitMSContext(context);
|
||||
auto ret = ms_model_.Build(graph_buf, size, kMindIR, context);
|
||||
if (ret != kSuccess) {
|
||||
MS_LOG(ERROR) << "ms_model_.Build failed while running ", model_name.c_str();
|
||||
std::cout << "ms_model_.Build failed while running ", model_name.c_str();
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (!flags_->resize_dims_.empty()) {
|
||||
std::vector<std::vector<int64_t>> resize_dims;
|
||||
(void)std::transform(flags_->resize_dims_.begin(), flags_->resize_dims_.end(), std::back_inserter(resize_dims),
|
||||
[&](auto &shapes) { return this->ConverterToInt64Vector<int>(shapes); });
|
||||
|
||||
ret = ms_model_.Resize(ms_model_.GetInputs(), resize_dims);
|
||||
if (ret != kSuccess) {
|
||||
MS_LOG(ERROR) << "Input tensor resize failed.";
|
||||
std::cout << "Input tensor resize failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
ms_inputs_for_api_ = ms_model_.GetInputs();
|
||||
auto end_prepare_time = GetTimeUs();
|
||||
MS_LOG(INFO) << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms";
|
||||
std::cout << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl;
|
||||
|
||||
// Load input
|
||||
MS_LOG(INFO) << "start generate input data";
|
||||
auto status = LoadInput();
|
||||
if (status != 0) {
|
||||
MS_LOG(ERROR) << "Generate input data error";
|
||||
return status;
|
||||
}
|
||||
if (!flags_->benchmark_data_file_.empty()) {
|
||||
status = MarkAccuracy();
|
||||
for (auto &data : benchmark_data_) {
|
||||
data.second->shape.clear();
|
||||
data.second->data.clear();
|
||||
delete data.second;
|
||||
data.second = nullptr;
|
||||
}
|
||||
benchmark_data_.clear();
|
||||
if (status != 0) {
|
||||
MS_LOG(ERROR) << "Run MarkAccuracy error: " << status;
|
||||
std::cout << "Run MarkAccuracy error: " << status << std::endl;
|
||||
return status;
|
||||
}
|
||||
} else {
|
||||
status = MarkPerformance();
|
||||
if (status != 0) {
|
||||
MS_LOG(ERROR) << "Run MarkPerformance error: " << status;
|
||||
std::cout << "Run MarkPerformance error: " << status << std::endl;
|
||||
return status;
|
||||
}
|
||||
}
|
||||
if (flags_->dump_tensor_data_) {
|
||||
std::cout << "Dumped file is saved to : " + dump_file_output_dir_ << std::endl;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BenchmarkUnifiedApi::InitTimeProfilingCallbackParameter() {
|
||||
// before callback
|
||||
ms_before_call_back_ = [&](const std::vector<mindspore::MSTensor> &before_inputs,
|
||||
const std::vector<mindspore::MSTensor> &before_outputs,
|
||||
const MSCallBackParam &call_param) {
|
||||
if (before_inputs.empty()) {
|
||||
MS_LOG(INFO) << "The num of beforeInputs is empty";
|
||||
}
|
||||
if (before_outputs.empty()) {
|
||||
MS_LOG(INFO) << "The num of beforeOutputs is empty";
|
||||
}
|
||||
if (op_times_by_type_.find(call_param.node_type_) == op_times_by_type_.end()) {
|
||||
op_times_by_type_.insert(std::make_pair(call_param.node_type_, std::make_pair(0, 0.0f)));
|
||||
}
|
||||
if (op_times_by_name_.find(call_param.node_name_) == op_times_by_name_.end()) {
|
||||
op_times_by_name_.insert(std::make_pair(call_param.node_name_, std::make_pair(0, 0.0f)));
|
||||
}
|
||||
|
||||
op_call_times_total_++;
|
||||
op_begin_ = GetTimeUs();
|
||||
return true;
|
||||
};
|
||||
|
||||
// after callback
|
||||
ms_after_call_back_ = [&](const std::vector<mindspore::MSTensor> &after_inputs,
|
||||
const std::vector<mindspore::MSTensor> &after_outputs, const MSCallBackParam &call_param) {
|
||||
uint64_t opEnd = GetTimeUs();
|
||||
|
||||
if (after_inputs.empty()) {
|
||||
MS_LOG(INFO) << "The num of after inputs is empty";
|
||||
}
|
||||
if (after_outputs.empty()) {
|
||||
MS_LOG(INFO) << "The num of after outputs is empty";
|
||||
}
|
||||
|
||||
float cost = static_cast<float>(opEnd - op_begin_) / 1000.0f;
|
||||
if (flags_->device_ == "GPU") {
|
||||
auto gpu_param = reinterpret_cast<const GPUCallBackParam &>(call_param);
|
||||
cost = static_cast<float>(gpu_param.execute_time);
|
||||
}
|
||||
op_cost_total_ += cost;
|
||||
op_times_by_type_[call_param.node_type_].first++;
|
||||
op_times_by_type_[call_param.node_type_].second += cost;
|
||||
op_times_by_name_[call_param.node_name_].first++;
|
||||
op_times_by_name_[call_param.node_name_].second += cost;
|
||||
return true;
|
||||
};
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BenchmarkUnifiedApi::InitPerfProfilingCallbackParameter() {
|
||||
#ifndef ENABLE_ARM64
|
||||
MS_LOG(ERROR) << "Only support perf_profiling on arm64.";
|
||||
return RET_ERROR;
|
||||
#else
|
||||
struct perf_event_attr pe, pe2;
|
||||
memset(&pe, 0, sizeof(struct perf_event_attr));
|
||||
memset(&pe2, 0, sizeof(struct perf_event_attr));
|
||||
pe.type = PERF_TYPE_HARDWARE;
|
||||
pe2.type = PERF_TYPE_HARDWARE;
|
||||
pe.size = sizeof(struct perf_event_attr);
|
||||
pe2.size = sizeof(struct perf_event_attr);
|
||||
pe.disabled = 1;
|
||||
pe2.disabled = 1;
|
||||
pe.exclude_kernel = 1; // don't count kernel
|
||||
pe2.exclude_kernel = 1; // don't count kernel
|
||||
pe.exclude_hv = 1; // don't count hypervisor
|
||||
pe2.exclude_hv = 1; // don't count hypervisor
|
||||
pe.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
|
||||
pe2.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
|
||||
if (flags_->perf_event_ == "CACHE") {
|
||||
pe.config = PERF_COUNT_HW_CACHE_REFERENCES;
|
||||
pe2.config = PERF_COUNT_HW_CACHE_MISSES;
|
||||
} else if (flags_->perf_event_ == "STALL") {
|
||||
pe.config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND;
|
||||
pe2.config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND;
|
||||
} else {
|
||||
pe.config = PERF_COUNT_HW_CPU_CYCLES;
|
||||
pe2.config = PERF_COUNT_HW_INSTRUCTIONS;
|
||||
}
|
||||
perf_fd = syscall(__NR_perf_event_open, pe, 0, -1, -1, 0);
|
||||
if (perf_fd == -1) {
|
||||
MS_LOG(ERROR) << "Failed to open perf event " << pe.config;
|
||||
return RET_ERROR;
|
||||
}
|
||||
perf_fd2 = syscall(__NR_perf_event_open, pe2, 0, -1, perf_fd, 0);
|
||||
if (perf_fd2 == -1) {
|
||||
MS_LOG(ERROR) << "Failed to open perf event " << pe2.config;
|
||||
return RET_ERROR;
|
||||
}
|
||||
struct PerfCount zero;
|
||||
zero.value[0] = 0;
|
||||
zero.value[1] = 0;
|
||||
// before callback
|
||||
ms_before_call_back_ = [&](const std::vector<mindspore::MSTensor> &before_inputs,
|
||||
const std::vector<mindspore::MSTensor> &before_outputs,
|
||||
const MSCallBackParam &call_param) {
|
||||
if (before_inputs.empty()) {
|
||||
MS_LOG(INFO) << "The num of beforeInputs is empty";
|
||||
}
|
||||
if (before_outputs.empty()) {
|
||||
MS_LOG(INFO) << "The num of beforeOutputs is empty";
|
||||
}
|
||||
if (op_perf_by_type_.find(call_param.node_type_) == op_perf_by_type_.end()) {
|
||||
op_perf_by_type_.insert(std::make_pair(call_param.node_type_, std::make_pair(0, zero)));
|
||||
}
|
||||
if (op_perf_by_name_.find(call_param.node_name_) == op_perf_by_name_.end()) {
|
||||
op_perf_by_name_.insert(std::make_pair(call_param.node_name_, std::make_pair(0, zero)));
|
||||
}
|
||||
|
||||
op_call_times_total_++;
|
||||
ioctl(perf_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
|
||||
ioctl(perf_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
|
||||
return true;
|
||||
};
|
||||
|
||||
// after callback
|
||||
ms_after_call_back_ = [&](const std::vector<mindspore::MSTensor> &after_inputs,
|
||||
const std::vector<mindspore::MSTensor> &after_outputs, const MSCallBackParam &call_param) {
|
||||
struct PerfResult res;
|
||||
ioctl(perf_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
|
||||
read(perf_fd, &res, sizeof(struct PerfResult));
|
||||
|
||||
if (after_inputs.empty()) {
|
||||
MS_LOG(INFO) << "The num of after inputs is empty";
|
||||
}
|
||||
if (after_outputs.empty()) {
|
||||
MS_LOG(INFO) << "The num of after outputs is empty";
|
||||
}
|
||||
float cost1 = static_cast<float>(res.values[0].value);
|
||||
float cost2 = static_cast<float>(res.values[1].value);
|
||||
op_cost_total_ += cost1;
|
||||
op_cost2_total_ += cost2;
|
||||
op_perf_by_type_[call_param.node_type_].first++;
|
||||
op_perf_by_type_[call_param.node_type_].second.value[0] += cost1;
|
||||
op_perf_by_type_[call_param.node_type_].second.value[1] += cost2;
|
||||
op_perf_by_name_[call_param.node_name_].first++;
|
||||
op_perf_by_name_[call_param.node_name_].second.value[0] += cost1;
|
||||
op_perf_by_name_[call_param.node_name_].second.value[1] += cost2;
|
||||
return true;
|
||||
};
|
||||
#endif
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
namespace {
|
||||
template <typename T>
|
||||
std::string DataToString(void *data, size_t data_number) {
|
||||
if (data == nullptr) {
|
||||
return "Data of tensor is nullptr";
|
||||
}
|
||||
std::ostringstream oss;
|
||||
auto casted_data = static_cast<T *>(data);
|
||||
for (size_t i = 0; i < 40 && i < data_number; i++) {
|
||||
oss << " " << casted_data[i];
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
std::string DumpMSTensor(mindspore::MSTensor *tensor) {
|
||||
if (tensor == nullptr) {
|
||||
return "Tensor is nullptr";
|
||||
}
|
||||
std::ostringstream oss;
|
||||
oss << " DataType: " << static_cast<int>(tensor->DataType());
|
||||
oss << " Shape:";
|
||||
for (auto &dim : tensor->Shape()) {
|
||||
oss << " " << dim;
|
||||
}
|
||||
oss << std::endl << " Data:";
|
||||
switch (static_cast<int>(tensor->DataType())) {
|
||||
case kNumberTypeFloat32: {
|
||||
oss << DataToString<float>(tensor->MutableData(), tensor->ElementNum());
|
||||
} break;
|
||||
case kNumberTypeFloat16: {
|
||||
oss << DataToString<int16_t>(tensor->MutableData(), tensor->ElementNum());
|
||||
} break;
|
||||
case kNumberTypeInt32: {
|
||||
oss << DataToString<int32_t>(tensor->MutableData(), tensor->ElementNum());
|
||||
} break;
|
||||
case kNumberTypeInt16: {
|
||||
oss << DataToString<int16_t>(tensor->MutableData(), tensor->ElementNum());
|
||||
} break;
|
||||
case kNumberTypeInt8: {
|
||||
oss << DataToString<int8_t>(tensor->MutableData(), tensor->ElementNum());
|
||||
} break;
|
||||
default:
|
||||
oss << "Unsupported data type to print";
|
||||
break;
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
std::string GenerateOutputFileName(mindspore::MSTensor *tensor, const std::string &op_name,
|
||||
const std::string &file_type, const size_t &idx) {
|
||||
std::string file_name = op_name;
|
||||
auto pos = file_name.find_first_of('/');
|
||||
while (pos != std::string::npos) {
|
||||
file_name.replace(pos, 1, ".");
|
||||
pos = file_name.find_first_of('/');
|
||||
}
|
||||
file_name += "_" + file_type + "_" + std::to_string(idx) + "_shape_";
|
||||
for (const auto &dim : tensor->Shape()) {
|
||||
file_name += std::to_string(dim) + "_";
|
||||
}
|
||||
if (TYPE_ID_MAP.find(static_cast<int>(tensor->DataType())) != TYPE_ID_MAP.end()) {
|
||||
file_name += TYPE_ID_MAP.at(static_cast<int>(tensor->DataType()));
|
||||
}
|
||||
|
||||
file_name += +".bin";
|
||||
return file_name;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
int BenchmarkUnifiedApi::InitPrintTensorDataCallbackParameter() {
|
||||
// before callback
|
||||
ms_before_call_back_ = [&](const std::vector<mindspore::MSTensor> &before_inputs,
|
||||
const std::vector<mindspore::MSTensor> &before_outputs,
|
||||
const MSCallBackParam &call_param) { return true; };
|
||||
|
||||
// after callback
|
||||
ms_after_call_back_ = [&](const std::vector<mindspore::MSTensor> &after_inputs,
|
||||
const std::vector<mindspore::MSTensor> &after_outputs, const MSCallBackParam &call_param) {
|
||||
std::cout << "================================================================" << std::endl;
|
||||
std::cout << call_param.node_name_ << " inputs : " << std::endl;
|
||||
for (auto ms_tensor : after_inputs) {
|
||||
std::cout << DumpMSTensor(&ms_tensor) << std::endl;
|
||||
}
|
||||
std::cout << "----------------------------------------------------------------" << std::endl;
|
||||
std::cout << call_param.node_name_ << " outputs : " << std::endl;
|
||||
for (auto ms_tensor : after_outputs) {
|
||||
std::cout << DumpMSTensor(&ms_tensor) << std::endl;
|
||||
}
|
||||
std::cout << "================================================================" << std::endl;
|
||||
return true;
|
||||
};
|
||||
return RET_OK;
|
||||
}
|
||||
int BenchmarkUnifiedApi::InitDumpTensorDataCallbackParameter() {
|
||||
// before callback
|
||||
ms_before_call_back_ = [&](const std::vector<mindspore::MSTensor> &before_inputs,
|
||||
const std::vector<mindspore::MSTensor> &before_outputs,
|
||||
const MSCallBackParam &call_param) {
|
||||
auto dump_mode = dump_cfg_json_[dump::kSettings][dump::kMode].get<int>();
|
||||
auto input_output_mode = dump_cfg_json_[dump::kSettings][dump::kInputOutput].get<int>();
|
||||
auto kernels = dump_cfg_json_[dump::kSettings][dump::kKernels].get<std::vector<std::string>>();
|
||||
|
||||
if (dump_mode == 0 || std::find(kernels.begin(), kernels.end(), call_param.node_name_) != kernels.end()) {
|
||||
if (input_output_mode == 0 || input_output_mode == 1) {
|
||||
for (size_t i = 0; i < before_inputs.size(); i++) {
|
||||
auto ms_tensor = before_inputs.at(i);
|
||||
auto file_name = GenerateOutputFileName(&ms_tensor, call_param.node_name_, "input", i);
|
||||
auto abs_file_path = dump_file_output_dir_ + "/" + file_name;
|
||||
if (WriteToBin(abs_file_path, ms_tensor.MutableData(), ms_tensor.DataSize()) != RET_OK) { // save to file
|
||||
MS_LOG(ERROR) << "write tensor data to file failed.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
// after callback
|
||||
ms_after_call_back_ = [&](const std::vector<mindspore::MSTensor> &after_inputs,
|
||||
const std::vector<mindspore::MSTensor> &after_outputs, const MSCallBackParam &call_param) {
|
||||
auto dump_mode = dump_cfg_json_[dump::kSettings][dump::kMode].get<int>();
|
||||
auto input_output_mode = dump_cfg_json_[dump::kSettings][dump::kInputOutput].get<int>();
|
||||
auto kernels = dump_cfg_json_[dump::kSettings][dump::kKernels].get<std::vector<std::string>>();
|
||||
|
||||
if (dump_mode == 0 || std::find(kernels.begin(), kernels.end(), call_param.node_name_) != kernels.end()) {
|
||||
if (input_output_mode == 0 || input_output_mode == 2) {
|
||||
for (size_t i = 0; i < after_outputs.size(); i++) {
|
||||
auto ms_tensor = after_outputs.at(i);
|
||||
auto file_name = GenerateOutputFileName(&ms_tensor, call_param.node_name_, "output", i);
|
||||
auto abs_file_path = dump_file_output_dir_ + "/" + file_name;
|
||||
if (WriteToBin(abs_file_path, ms_tensor.MutableData(), ms_tensor.DataSize()) != RET_OK) { // save to file
|
||||
MS_LOG(ERROR) << "write tensor data to file failed.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
BenchmarkUnifiedApi::~BenchmarkUnifiedApi() {}
|
||||
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,103 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_BENCHMARK_BENCHMARK_UNIFIED_API_H_
|
||||
#define MINDSPORE_BENCHMARK_BENCHMARK_UNIFIED_API_H_
|
||||
|
||||
#include <getopt.h>
|
||||
#include <signal.h>
|
||||
#include <random>
|
||||
#include <unordered_map>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <cfloat>
|
||||
#include <utility>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "tools/benchmark/benchmark_base.h"
|
||||
#include "include/model.h"
|
||||
#include "tools/common/flag_parser.h"
|
||||
#include "src/common/file_utils.h"
|
||||
#include "src/common/utils.h"
|
||||
#include "include/api/types.h"
|
||||
#include "include/api/model.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
|
||||
class MS_API BenchmarkUnifiedApi : public BenchmarkBase {
|
||||
public:
|
||||
explicit BenchmarkUnifiedApi(BenchmarkFlags *flags) : BenchmarkBase(flags) {}
|
||||
|
||||
virtual ~BenchmarkUnifiedApi();
|
||||
|
||||
int RunBenchmark() override;
|
||||
|
||||
protected:
|
||||
int CompareDataGetTotalBiasAndSize(const std::string &name, mindspore::MSTensor *tensor, float *total_bias,
|
||||
int *total_size);
|
||||
void InitContext(const std::shared_ptr<mindspore::Context> &context);
|
||||
mindspore::MSTensor GetMSTensorByNodeShape(const std::vector<size_t> &node_shape);
|
||||
mindspore::MSTensor GetMSTensorByNameOrShape(const std::string &node_or_tensor_name, const std::vector<size_t> &dims);
|
||||
|
||||
// call GenerateRandomData to fill inputTensors
|
||||
int GenerateInputData() override;
|
||||
|
||||
int ReadInputFile() override;
|
||||
|
||||
int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name,
|
||||
const std::vector<size_t> &dims) override;
|
||||
|
||||
void InitMSContext(const std::shared_ptr<Context> &context);
|
||||
|
||||
int CompareOutput() override;
|
||||
|
||||
int InitTimeProfilingCallbackParameter() override;
|
||||
|
||||
int InitPerfProfilingCallbackParameter() override;
|
||||
|
||||
int InitDumpTensorDataCallbackParameter() override;
|
||||
|
||||
int InitPrintTensorDataCallbackParameter() override;
|
||||
|
||||
int PrintInputData();
|
||||
|
||||
template <typename T>
|
||||
std::vector<int64_t> ConverterToInt64Vector(const std::vector<T> &srcDims) {
|
||||
std::vector<int64_t> dims;
|
||||
for (auto shape : srcDims) {
|
||||
dims.push_back(static_cast<int64_t>(shape));
|
||||
}
|
||||
return dims;
|
||||
}
|
||||
|
||||
int MarkPerformance();
|
||||
|
||||
int MarkAccuracy();
|
||||
|
||||
private:
|
||||
mindspore::Model ms_model_;
|
||||
std::vector<mindspore::MSTensor> ms_inputs_for_api_;
|
||||
|
||||
MSKernelCallBack ms_before_call_back_ = nullptr;
|
||||
MSKernelCallBack ms_after_call_back_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINNIE_BENCHMARK_BENCHMARK_H_
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "tools/benchmark/benchmark.h"
|
||||
#include "tools/benchmark/run_benchmark.h"
|
||||
#include "include/version.h"
|
||||
|
||||
int main(int argc, const char **argv) {
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "tools/benchmark/run_benchmark.h"
|
||||
#include <string>
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
|
||||
int RunBenchmark(int argc, const char **argv) {
|
||||
BenchmarkFlags flags;
|
||||
Option<std::string> err = flags.ParseFlags(argc, argv);
|
||||
#ifdef SUPPORT_NNIE
|
||||
SvpSysInit();
|
||||
#endif
|
||||
if (err.IsSome()) {
|
||||
std::cerr << err.Get() << std::endl;
|
||||
std::cerr << flags.Usage() << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (flags.help) {
|
||||
std::cerr << flags.Usage() << std::endl;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
BenchmarkBase *benchmark = nullptr;
|
||||
// get dump data output path
|
||||
auto new_api = std::getenv("ENABLE_NEW_API");
|
||||
if (new_api == nullptr || std::string(new_api) != "true") {
|
||||
benchmark = new Benchmark(&flags);
|
||||
} else {
|
||||
benchmark = new BenchmarkUnifiedApi(&flags);
|
||||
}
|
||||
if (benchmark == nullptr) {
|
||||
MS_LOG(ERROR) << "new benchmark failed ";
|
||||
std::cerr << "new benchmark failed" << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto status = benchmark->Init();
|
||||
if (status != 0) {
|
||||
MS_LOG(ERROR) << "Benchmark init Error : " << status;
|
||||
std::cerr << "Benchmark init Error : " << status << std::endl;
|
||||
delete benchmark;
|
||||
benchmark = nullptr;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
status = benchmark->RunBenchmark();
|
||||
if (status != 0) {
|
||||
MS_LOG(ERROR) << "Run Benchmark "
|
||||
<< flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
|
||||
<< " Failed : " << status;
|
||||
std::cerr << "Run Benchmark " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
|
||||
<< " Failed : " << status << std::endl;
|
||||
delete benchmark;
|
||||
benchmark = nullptr;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Run Benchmark " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
|
||||
<< " Success.";
|
||||
std::cout << "Run Benchmark " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
|
||||
<< " Success." << std::endl;
|
||||
delete benchmark;
|
||||
benchmark = nullptr;
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,27 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINNIE_BENCHMARK_RUN_BENCHMARK_H_
|
||||
#define MINNIE_BENCHMARK_RUN_BENCHMARK_H_
|
||||
#include "tools/benchmark/benchmark.h"
|
||||
#include "tools/benchmark/benchmark_unified_api.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
|
||||
int MS_API RunBenchmark(int argc, const char **argv);
|
||||
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINNIE_BENCHMARK_RUN_BENCHMARK_H_
|
Loading…
Reference in New Issue