forked from OSSInnovation/mindspore
fuse benchmark and timeprofiler
This commit is contained in:
parent
5ebdaaca26
commit
0ac7c7884d
|
@ -78,7 +78,7 @@ Model *Model::Import(const char *model_buf, size_t size) {
|
|||
MS_LOG(ERROR) << "The buffer is invalid and fail to create graph.";
|
||||
return nullptr;
|
||||
}
|
||||
Model *model = new (std::nothrow) Model();
|
||||
auto *model = new (std::nothrow) Model();
|
||||
if (model == nullptr) {
|
||||
MS_LOG(ERROR) << "new model fail!";
|
||||
return nullptr;
|
||||
|
@ -86,14 +86,14 @@ Model *Model::Import(const char *model_buf, size_t size) {
|
|||
model->buf = reinterpret_cast<char *>(malloc(size));
|
||||
if (model->buf == nullptr) {
|
||||
MS_LOG(ERROR) << "new inner model buf fail!";
|
||||
delete(model);
|
||||
delete (model);
|
||||
return nullptr;
|
||||
}
|
||||
memcpy(model->buf, model_buf, size);
|
||||
auto meta_graph = schema::GetMetaGraph(model->buf);
|
||||
if (meta_graph == nullptr) {
|
||||
MS_LOG(ERROR) << "meta_graph is nullptr!";
|
||||
delete(model);
|
||||
delete (model);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -265,7 +265,8 @@ int Benchmark::MarkPerformance() {
|
|||
for (int i = 0; i < _flags->loopCount; i++) {
|
||||
session->BindThread(true);
|
||||
auto start = GetTimeUs();
|
||||
auto status = session->RunGraph();
|
||||
auto status =
|
||||
_flags->runTimeProfiler ? session->RunGraph(before_call_back_, after_call_back_) : session->RunGraph();
|
||||
if (status != 0) {
|
||||
MS_LOG(ERROR) << "Inference error " << status;
|
||||
std::cerr << "Inference error " << status;
|
||||
|
@ -280,6 +281,14 @@ int Benchmark::MarkPerformance() {
|
|||
|
||||
session->BindThread(false);
|
||||
}
|
||||
|
||||
if (_flags->runTimeProfiler) {
|
||||
const std::vector<std::string> per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
|
||||
const std::vector<std::string> per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
|
||||
PrintResult(per_op_name, op_times_by_name_);
|
||||
PrintResult(per_op_type, op_times_by_type_);
|
||||
}
|
||||
|
||||
if (_flags->loopCount > 0) {
|
||||
timeAvg /= _flags->loopCount;
|
||||
MS_LOG(INFO) << "Model = " << _flags->modelPath.substr(_flags->modelPath.find_last_of(DELIM_SLASH) + 1).c_str()
|
||||
|
@ -295,25 +304,25 @@ int Benchmark::MarkPerformance() {
|
|||
int Benchmark::MarkAccuracy() {
|
||||
MS_LOG(INFO) << "MarkAccuracy";
|
||||
std::cout << "MarkAccuracy" << std::endl;
|
||||
for (size_t i = 0; i < msInputs.size(); i++) {
|
||||
switch (msInputs.at(i)->data_type()) {
|
||||
for (auto &msInput : msInputs) {
|
||||
switch (msInput->data_type()) {
|
||||
case TypeId::kNumberTypeFloat:
|
||||
PrintInputData<float>(msInputs.at(i));
|
||||
PrintInputData<float>(msInput);
|
||||
break;
|
||||
case TypeId::kNumberTypeFloat32:
|
||||
PrintInputData<float>(msInputs.at(i));
|
||||
PrintInputData<float>(msInput);
|
||||
break;
|
||||
case TypeId::kNumberTypeInt8:
|
||||
PrintInputData<int8_t>(msInputs.at(i));
|
||||
PrintInputData<int8_t>(msInput);
|
||||
break;
|
||||
case TypeId::kNumberTypeUInt8:
|
||||
PrintInputData<uint8_t>(msInputs.at(i));
|
||||
PrintInputData<uint8_t>(msInput);
|
||||
break;
|
||||
case TypeId::kNumberTypeInt32:
|
||||
PrintInputData<int>(msInputs.at(i));
|
||||
PrintInputData<int>(msInput);
|
||||
break;
|
||||
default:
|
||||
MS_LOG(ERROR) << "Datatype " << msInputs.at(i)->data_type() << " is not supported.";
|
||||
MS_LOG(ERROR) << "Datatype " << msInput->data_type() << " is not supported.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
@ -340,7 +349,7 @@ int Benchmark::MarkAccuracy() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int Benchmark::RunBenchmark(const std::string &deviceType) {
|
||||
int Benchmark::RunBenchmark() {
|
||||
auto startPrepareTime = GetTimeUs();
|
||||
// Load graph
|
||||
std::string modelName = _flags->modelPath.substr(_flags->modelPath.find_last_of(DELIM_SLASH) + 1);
|
||||
|
@ -355,13 +364,12 @@ int Benchmark::RunBenchmark(const std::string &deviceType) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
auto model = lite::Model::Import(graphBuf, size);
|
||||
delete[](graphBuf);
|
||||
if (model == nullptr) {
|
||||
MS_LOG(ERROR) << "Import model file failed while running " << modelName.c_str();
|
||||
std::cerr << "Import model file failed while running " << modelName.c_str() << std::endl;
|
||||
delete[](graphBuf);
|
||||
return RET_ERROR;
|
||||
}
|
||||
delete[](graphBuf);
|
||||
auto context = new (std::nothrow) lite::Context;
|
||||
if (context == nullptr) {
|
||||
MS_LOG(ERROR) << "New context failed while running " << modelName.c_str();
|
||||
|
@ -372,8 +380,6 @@ int Benchmark::RunBenchmark(const std::string &deviceType) {
|
|||
context->device_type_ = lite::DT_CPU;
|
||||
} else if (_flags->device == "GPU") {
|
||||
context->device_type_ = lite::DT_GPU;
|
||||
} else {
|
||||
context->device_type_ = lite::DT_NPU;
|
||||
}
|
||||
|
||||
if (_flags->cpuBindMode == -1) {
|
||||
|
@ -403,13 +409,8 @@ int Benchmark::RunBenchmark(const std::string &deviceType) {
|
|||
model->Free();
|
||||
msInputs = session->GetInputs();
|
||||
auto endPrepareTime = GetTimeUs();
|
||||
#if defined(__arm__)
|
||||
MS_LOG(INFO) << "PrepareTime = " << (endPrepareTime - startPrepareTime) / 1000 << " ms";
|
||||
printf("PrepareTime = %lld ms, ", (endPrepareTime - startPrepareTime) / 1000);
|
||||
#else
|
||||
MS_LOG(INFO) << "PrepareTime = " << (endPrepareTime - startPrepareTime) / 1000 << " ms ";
|
||||
printf("PrepareTime = %ld ms, ", (endPrepareTime - startPrepareTime) / 1000);
|
||||
#endif
|
||||
std::cout << "PrepareTime = " << (endPrepareTime - startPrepareTime) / 1000 << " ms" << std::endl;
|
||||
|
||||
// Load input
|
||||
MS_LOG(INFO) << "start generate input data";
|
||||
|
@ -481,6 +482,54 @@ void BenchmarkFlags::InitResizeDimsList() {
|
|||
}
|
||||
}
|
||||
|
||||
int Benchmark::InitCallbackParameter() {
|
||||
// before callback
|
||||
before_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &before_inputs,
|
||||
const std::vector<mindspore::tensor::MSTensor *> &before_outputs,
|
||||
const session::CallBackParam &callParam) {
|
||||
if (before_inputs.empty()) {
|
||||
MS_LOG(INFO) << "The num of beforeInputs is empty";
|
||||
}
|
||||
if (before_outputs.empty()) {
|
||||
MS_LOG(INFO) << "The num of beforeOutputs is empty";
|
||||
}
|
||||
if (op_times_by_type_.find(callParam.type_callback_param) == op_times_by_type_.end()) {
|
||||
op_times_by_type_.insert(std::make_pair(callParam.type_callback_param, std::make_pair(0, 0.0f)));
|
||||
}
|
||||
if (op_times_by_name_.find(callParam.name_callback_param) == op_times_by_name_.end()) {
|
||||
op_times_by_name_.insert(std::make_pair(callParam.name_callback_param, std::make_pair(0, 0.0f)));
|
||||
}
|
||||
|
||||
op_call_times_total_++;
|
||||
op_begin_ = GetTimeUs();
|
||||
return true;
|
||||
};
|
||||
|
||||
// after callback
|
||||
after_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &after_inputs,
|
||||
const std::vector<mindspore::tensor::MSTensor *> &after_outputs,
|
||||
const session::CallBackParam &call_param) {
|
||||
uint64_t opEnd = GetTimeUs();
|
||||
|
||||
if (after_inputs.empty()) {
|
||||
MS_LOG(INFO) << "The num of after inputs is empty";
|
||||
}
|
||||
if (after_outputs.empty()) {
|
||||
MS_LOG(INFO) << "The num of after outputs is empty";
|
||||
}
|
||||
|
||||
float cost = static_cast<float>(opEnd - op_begin_) / 1000.0f;
|
||||
op_cost_total_ += cost;
|
||||
op_times_by_type_[call_param.type_callback_param].first++;
|
||||
op_times_by_type_[call_param.type_callback_param].second += cost;
|
||||
op_times_by_name_[call_param.name_callback_param].first++;
|
||||
op_times_by_name_[call_param.name_callback_param].second += cost;
|
||||
return true;
|
||||
};
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int Benchmark::Init() {
|
||||
if (this->_flags == nullptr) {
|
||||
return 1;
|
||||
|
@ -550,6 +599,79 @@ int Benchmark::Init() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (_flags->runTimeProfiler) {
|
||||
auto status = InitCallbackParameter();
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init callback Parameter failed.";
|
||||
std::cerr << "Init callback Parameter failed." << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int Benchmark::PrintResult(const std::vector<std::string> &title,
|
||||
const std::map<std::string, std::pair<int, float>> &result) {
|
||||
std::vector<size_t> columnLenMax(5);
|
||||
std::vector<std::vector<std::string>> rows;
|
||||
|
||||
for (auto &iter : result) {
|
||||
char stringBuf[5][100] = {};
|
||||
std::vector<std::string> columns;
|
||||
size_t len;
|
||||
|
||||
len = iter.first.size();
|
||||
if (len > columnLenMax.at(0)) {
|
||||
columnLenMax.at(0) = len + 4;
|
||||
}
|
||||
columns.push_back(iter.first);
|
||||
|
||||
len = snprintf(stringBuf[1], sizeof(stringBuf[1]), "%f", iter.second.second / _flags->loopCount);
|
||||
if (len > columnLenMax.at(1)) {
|
||||
columnLenMax.at(1) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[1]);
|
||||
|
||||
len = snprintf(stringBuf[2], sizeof(stringBuf[2]), "%f", iter.second.second / op_cost_total_);
|
||||
if (len > columnLenMax.at(2)) {
|
||||
columnLenMax.at(2) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[2]);
|
||||
|
||||
len = snprintf(stringBuf[3], sizeof(stringBuf[3]), "%d", iter.second.first);
|
||||
if (len > columnLenMax.at(3)) {
|
||||
columnLenMax.at(3) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[3]);
|
||||
|
||||
len = snprintf(stringBuf[4], sizeof(stringBuf[4]), "%f", iter.second.second);
|
||||
if (len > columnLenMax.at(4)) {
|
||||
columnLenMax.at(4) = len + 4;
|
||||
}
|
||||
columns.emplace_back(stringBuf[4]);
|
||||
|
||||
rows.push_back(columns);
|
||||
}
|
||||
|
||||
printf("-------------------------------------------------------------------------\n");
|
||||
for (int i = 0; i < 5; i++) {
|
||||
auto printBuf = title[i];
|
||||
if (printBuf.size() > columnLenMax.at(i)) {
|
||||
columnLenMax.at(i) = printBuf.size();
|
||||
}
|
||||
printBuf.resize(columnLenMax.at(i), ' ');
|
||||
printf("%s\t", printBuf.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
for (size_t i = 0; i < rows.size(); i++) {
|
||||
for (int j = 0; j < 5; j++) {
|
||||
auto printBuf = rows[i][j];
|
||||
printBuf.resize(columnLenMax.at(j), ' ');
|
||||
printf("%s\t", printBuf.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -583,16 +705,7 @@ int RunBenchmark(int argc, const char **argv) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (flags.device == "GPU") {
|
||||
status = mBenchmark.RunBenchmark("GPU");
|
||||
} else if (flags.device == "CPU") {
|
||||
status = mBenchmark.RunBenchmark("CPU");
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Device type" << flags.device << " not support.";
|
||||
std::cerr << "Device type" << flags.device << " not support." << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
status = mBenchmark.RunBenchmark();
|
||||
if (status != 0) {
|
||||
MS_LOG(ERROR) << "Run Benchmark " << flags.modelPath.substr(flags.modelPath.find_last_of(DELIM_SLASH) + 1).c_str()
|
||||
<< " Failed : " << status;
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include <vector>
|
||||
#include <memory>
|
||||
#include <cfloat>
|
||||
#include <utility>
|
||||
#include "include/model.h"
|
||||
#include "tools/common/flag_parser.h"
|
||||
#include "src/common/file_utils.h"
|
||||
|
@ -64,6 +65,7 @@ class MS_API BenchmarkFlags : public virtual FlagParser {
|
|||
AddFlag(&BenchmarkFlags::numThreads, "numThreads", "Run threads number", 2);
|
||||
AddFlag(&BenchmarkFlags::fp16Priority, "fp16Priority", "Priority float16", false);
|
||||
AddFlag(&BenchmarkFlags::warmUpLoopCount, "warmUpLoopCount", "Run warm up loop", 3);
|
||||
AddFlag(&BenchmarkFlags::runTimeProfiler, "runTimeProfiler", "Run time profiler", false);
|
||||
// MarkAccuracy
|
||||
AddFlag(&BenchmarkFlags::calibDataPath, "calibDataPath", "Calibration data file path", "");
|
||||
AddFlag(&BenchmarkFlags::calibDataType, "calibDataType", "Calibration data type. FLOAT | INT32 | INT8 | UINT8",
|
||||
|
@ -90,6 +92,7 @@ class MS_API BenchmarkFlags : public virtual FlagParser {
|
|||
int numThreads;
|
||||
bool fp16Priority;
|
||||
int warmUpLoopCount;
|
||||
bool runTimeProfiler;
|
||||
// MarkAccuracy
|
||||
std::string calibDataPath;
|
||||
std::string calibDataType;
|
||||
|
@ -108,7 +111,7 @@ class MS_API Benchmark {
|
|||
virtual ~Benchmark();
|
||||
|
||||
int Init();
|
||||
int RunBenchmark(const std::string &deviceType = "NPU");
|
||||
int RunBenchmark();
|
||||
|
||||
private:
|
||||
// call GenerateInputData or ReadInputFile to init inputTensors
|
||||
|
@ -125,6 +128,10 @@ class MS_API Benchmark {
|
|||
|
||||
int CompareOutput();
|
||||
|
||||
int InitCallbackParameter();
|
||||
|
||||
int PrintResult(const std::vector<std::string> &title, const std::map<std::string, std::pair<int, float>> &result);
|
||||
|
||||
template <typename T>
|
||||
void PrintInputData(tensor::MSTensor *input) {
|
||||
MS_ASSERT(input != nullptr);
|
||||
|
@ -228,6 +235,16 @@ class MS_API Benchmark {
|
|||
{"INT32", TypeId::kNumberTypeInt32},
|
||||
{"UINT8", TypeId::kNumberTypeUInt8}};
|
||||
TypeId msCalibDataType = TypeId::kNumberTypeFloat;
|
||||
|
||||
// callback parameters
|
||||
uint64_t op_begin_ = 0;
|
||||
int op_call_times_total_ = 0;
|
||||
float op_cost_total_ = 0.0f;
|
||||
std::map<std::string, std::pair<int, float>> op_times_by_type_;
|
||||
std::map<std::string, std::pair<int, float>> op_times_by_name_;
|
||||
|
||||
session::KernelCallBack before_call_back_;
|
||||
session::KernelCallBack after_call_back_;
|
||||
};
|
||||
|
||||
int MS_API RunBenchmark(int argc, const char **argv);
|
||||
|
|
|
@ -38,13 +38,13 @@ Option<std::string> FlagParser::ParseFlags(int argc, const char *const *argv, bo
|
|||
}
|
||||
|
||||
if (flagItem.find("--") == std::string::npos) {
|
||||
continue;
|
||||
return Option<std::string>("Failed: flag " + flagItem + " is not valid.");
|
||||
}
|
||||
|
||||
std::string key;
|
||||
Option<std::string> value = Option<std::string>(None());
|
||||
|
||||
size_t pos = flagItem.find_first_of("=");
|
||||
size_t pos = flagItem.find_first_of('=');
|
||||
if (pos == std::string::npos && flagItem.find("--no-") != std::string::npos) {
|
||||
key = flagItem.substr(FLAG_PREFIX_LEN);
|
||||
} else if (pos == std::string::npos) {
|
||||
|
|
|
@ -79,13 +79,8 @@ AwareQuantizer::AwareQuantizer(schema::MetaGraphT *graph, const TypeId &inferTyp
|
|||
const float stdValue = std::stof(stdValues, &sz);
|
||||
sz = 0;
|
||||
const float mean = std::stof(meanValues, &sz);
|
||||
std::unique_ptr<InputArray> inArr = nullptr;
|
||||
if (inferType == kNumberTypeFloat) {
|
||||
inArr.reset(new (std::nothrow) InputArray(mean, stdValue));
|
||||
} else {
|
||||
inArr.reset(new (std::nothrow) InputArray(mean, stdValue, TypeId::kNumberTypeInt8));
|
||||
}
|
||||
mInputArray = inArr.get();
|
||||
mInputArray = new (std::nothrow) InputArray(mean, stdValue);
|
||||
mInputArray->dataType = inferType;
|
||||
mInputArray->InitQuantParam();
|
||||
}
|
||||
|
||||
|
@ -132,7 +127,7 @@ STATUS AwareQuantizer::GenerateQuantParam() {
|
|||
} else {
|
||||
auto status = quantParamCalcer->Calc(graph, *node);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "quantParamCalcer failed: " << status << " node: " << node->name.c_str();
|
||||
MS_LOG(WARNING) << "quantParamCalcer failed: " << status << " node: " << node->name.c_str();
|
||||
node->quantType = schema::QuantType_QUANT_NONE;
|
||||
} else {
|
||||
node->quantType = schema::QuantType_AwareTraining;
|
||||
|
|
Loading…
Reference in New Issue