From 5198ade3f815493497272783f241cfe86c22d93d Mon Sep 17 00:00:00 2001 From: greatpanc Date: Mon, 13 Dec 2021 19:17:21 +0800 Subject: [PATCH] add new api benchmark for opengl --- mindspore/lite/tools/benchmark/benchmark.cc | 70 +++--- mindspore/lite/tools/benchmark/benchmark.h | 2 +- .../tools/benchmark/benchmark_unified_api.cc | 211 +++++++++++++++++- .../tools/benchmark/benchmark_unified_api.h | 18 ++ mindspore/lite/tools/common/opengl_util.cc | 2 +- mindspore/lite/tools/common/opengl_util.h | 2 +- 6 files changed, 259 insertions(+), 46 deletions(-) diff --git a/mindspore/lite/tools/benchmark/benchmark.cc b/mindspore/lite/tools/benchmark/benchmark.cc index 5fa02568b99..957cebb7ba9 100644 --- a/mindspore/lite/tools/benchmark/benchmark.cc +++ b/mindspore/lite/tools/benchmark/benchmark.cc @@ -55,14 +55,14 @@ int Benchmark::LoadInput() { #else if (flags_->in_data_file_.empty()) { auto status = GenerateInputData(); - if (status != 0) { + if (status != RET_OK) { std::cerr << "Generate input data error " << status << std::endl; MS_LOG(ERROR) << "Generate input data error " << status; return status; } } else { auto status = ReadInputFile(); - if (status != 0) { + if (status != RET_OK) { std::cerr << "ReadInputFile error, " << status << std::endl; MS_LOG(ERROR) << "ReadInputFile error, " << status; return status; @@ -120,9 +120,7 @@ int Benchmark::GenerateGLTexture(std::map *input_gl_texture } for (const auto &[name, tensor] : ms_outputs_) { MS_ASSERT(tensor != nullptr); - float *output_data = nullptr; - auto status = FillGLTextureToTensor(output_gl_texture, tensor, name, output_data); - free(output_data); + auto status = FillGLTextureToTensor(output_gl_texture, tensor, name); if (status != RET_OK) { MS_LOG(ERROR) << "Fill GLTexture to output tensor" << status; return status; @@ -132,25 +130,33 @@ int Benchmark::GenerateGLTexture(std::map *input_gl_texture } int Benchmark::FillGLTextureToTensor(std::map *gl_texture, mindspore::tensor::MSTensor *tensor, - std::string name, float *data) { - if (data == nullptr) { - data = reinterpret_cast(malloc(tensor->Size())); - if (data == nullptr) { - MS_LOG(ERROR) << "new output_data failed"; - return RET_ERROR; - } - } + std::string name, void *data) { auto image_id = 0; - if (tensor->shape().size() < DIMENSION_2D) { + + int width = 1, height = 1, channel = 1; + if (tensor->shape().size() == DIMENSION_2D) { + height = tensor->shape()[kNHWC_N]; + channel = tensor->shape()[kNHWC_H]; + } else if (tensor->shape().size() == DIMENSION_3D) { + width = tensor->shape()[kNHWC_H]; + height = tensor->shape()[kNHWC_N]; + channel = tensor->shape()[kNHWC_C]; + } else if (tensor->shape().size() == DIMENSION_4D) { + width = tensor->shape()[kNHWC_W]; + height = tensor->shape()[kNHWC_H]; + channel = tensor->shape()[kNHWC_C]; + } else { MS_LOG(ERROR) << "the tensor shape is not support"; return RET_ERROR; - } else if (tensor->shape().size() == DIMENSION_2D) { - image_id = gl_runtime_.CopyHostToDeviceTexture(data, tensor->shape()[0], tensor->shape()[1], 1); - } else { - image_id = gl_runtime_.CopyHostToDeviceTexture(data, tensor->shape()[kNHWC_H], tensor->shape()[kNHWC_W], - tensor->shape()[kNHWC_C]); } - if (image_id != 0) { + + if (data == nullptr) { + image_id = gl_runtime_.GLCreateTexture(width, height, channel); + } else { + image_id = gl_runtime_.CopyHostToDeviceTexture(data, width, height, channel); + } + + if (image_id != GL_NONE) { gl_texture->insert(std::pair(name, image_id)); } else { MS_LOG(ERROR) << "glMemPool CopyHostToDeviceTexture failed"; @@ -164,14 +170,14 @@ int Benchmark::LoadGLTexture() { if (flags_->in_data_file_.empty()) { auto status = GenerateGLTexture(&input_gl_texture, &output_gl_texture); - if (status != 0) { + if (status != RET_OK) { std::cerr << "Generate input GLTexture error " << status << std::endl; MS_LOG(ERROR) << "Generate input GLTexture error " << status; return status; } } else { auto status = ReadGLTextureFile(&input_gl_texture, &output_gl_texture); - if (status != 0) { + if (status != RET_OK) { std::cerr << "ReadGLTextureFile error, " << status << std::endl; MS_LOG(ERROR) << "ReadGLTextureFile error, " << status; return status; @@ -211,13 +217,7 @@ int Benchmark::ReadGLTextureFile(std::map *input_gl_texture delete[] bin_buf; return RET_ERROR; } - float *input_data = reinterpret_cast(malloc(tensor->Size())); - if (input_data == nullptr) { - MS_LOG(ERROR) << "new input_data failed"; - return RET_ERROR; - } - memcpy(input_data, bin_buf, tensor_data_size); - auto status = FillGLTextureToTensor(input_gl_texture, tensor, tensor->tensor_name(), input_data); + auto status = FillGLTextureToTensor(input_gl_texture, tensor, tensor->tensor_name(), bin_buf); delete[] bin_buf; if (status != RET_OK) { MS_LOG(ERROR) << "Fill GLTexture to input tensor" << status; @@ -227,9 +227,7 @@ int Benchmark::ReadGLTextureFile(std::map *input_gl_texture } for (const auto &[name, tensor] : ms_outputs_) { MS_ASSERT(tensor != nullptr); - float *output_data = nullptr; - auto status = FillGLTextureToTensor(output_gl_texture, tensor, name, output_data); - free(output_data); + auto status = FillGLTextureToTensor(output_gl_texture, tensor, name); if (status != RET_OK) { MS_LOG(ERROR) << "Fill GLTexture to output tensor" << status; return status; @@ -447,7 +445,7 @@ int Benchmark::MarkPerformance() { std::cout << "Running warm up loops..." << std::endl; for (int i = 0; i < flags_->warm_up_loop_count_; i++) { auto status = session_->RunGraph(); - if (status != 0) { + if (status != RET_OK) { MS_LOG(ERROR) << "Inference error " << status; std::cerr << "Inference error " << status << std::endl; return status; @@ -468,7 +466,7 @@ int Benchmark::MarkPerformance() { session_->BindThread(true); auto start = GetTimeUs(); auto status = session_->RunGraph(before_call_back_, after_call_back_); - if (status != 0) { + if (status != RET_OK) { MS_LOG(ERROR) << "Inference error " << status; std::cerr << "Inference error " << status; return status; @@ -701,14 +699,14 @@ int Benchmark::RunBenchmark() { } if (!flags_->benchmark_data_file_.empty()) { auto status = MarkAccuracy(); - if (status != 0) { + if (status != RET_OK) { MS_LOG(ERROR) << "Run MarkAccuracy error: " << status; std::cout << "Run MarkAccuracy error: " << status << std::endl; return status; } } else { auto status = MarkPerformance(); - if (status != 0) { + if (status != RET_OK) { MS_LOG(ERROR) << "Run MarkPerformance error: " << status; std::cout << "Run MarkPerformance error: " << status << std::endl; return status; diff --git a/mindspore/lite/tools/benchmark/benchmark.h b/mindspore/lite/tools/benchmark/benchmark.h index a3036ef6208..8843e3b27c5 100644 --- a/mindspore/lite/tools/benchmark/benchmark.h +++ b/mindspore/lite/tools/benchmark/benchmark.h @@ -61,7 +61,7 @@ class MS_API Benchmark : public BenchmarkBase { int ReadGLTextureFile(std::map *inputGlTexture, std::map *outputGLTexture); int FillGLTextureToTensor(std::map *gl_texture, mindspore::tensor::MSTensor *tensor, - std::string name, float *data); + std::string name, void *data = nullptr); #endif int LoadInput() override; diff --git a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc index 556b1066818..a82a77c83c1 100644 --- a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc +++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc @@ -46,17 +46,160 @@ constexpr int kDumpInputsAndOutputs = 0; constexpr int kDumpOutputs = 2; namespace lite { +#ifdef ENABLE_OPENGL_TEXTURE +int BenchmarkUnifiedApi::GenerateGLTexture(std::map *input_gl_texture) { + for (auto tensor : ms_inputs_for_api_) { + float *input_data = reinterpret_cast(malloc(tensor.DataSize())); + if (input_data == nullptr) { + MS_LOG(ERROR) << "new input_data failed"; + return RET_ERROR; + } + int status = GenerateRandomData(tensor.DataSize(), input_data, static_cast(tensor.DataType())); + if (status != RET_OK) { + std::cerr << "GenerateRandomData for inTensor failed: " << status << std::endl; + MS_LOG(ERROR) << "GenerateRandomData for inTensor failed:" << status; + return status; + } + status = FillGLTextureToTensor(input_gl_texture, &tensor, tensor.Name(), input_data); + free(input_data); + if (status != RET_OK) { + MS_LOG(ERROR) << "Fill GLTexture to input tensor" << status; + return status; + } + } + return RET_OK; +} + +int BenchmarkUnifiedApi::FillGLTextureToTensor(std::map *gl_texture, mindspore::MSTensor *tensor, + std::string name, void *data) { + auto image_id = 0; + + int width = 1, height = 1, channel = 1; + if (tensor->Shape().size() == DIMENSION_2D) { + height = tensor->Shape()[kNHWC_N]; + channel = tensor->Shape()[kNHWC_H]; + } else if (tensor->Shape().size() == DIMENSION_3D) { + width = tensor->Shape()[kNHWC_H]; + height = tensor->Shape()[kNHWC_N]; + channel = tensor->Shape()[kNHWC_C]; + } else if (tensor->Shape().size() == DIMENSION_4D) { + width = tensor->Shape()[kNHWC_W]; + height = tensor->Shape()[kNHWC_H]; + channel = tensor->Shape()[kNHWC_C]; + } else { + MS_LOG(ERROR) << "the tensor shape is not support"; + return RET_ERROR; + } + + if (data == nullptr) { + image_id = gl_runtime_.GLCreateTexture(width, height, channel); + } else { + image_id = gl_runtime_.CopyHostToDeviceTexture(data, width, height, channel); + } + + if (image_id != GL_NONE) { + gl_texture->insert(std::pair(name, image_id)); + } else { + MS_LOG(ERROR) << "glMemPool CopyHostToDeviceTexture failed"; + } + return RET_OK; +} + +int BenchmarkUnifiedApi::LoadGLTexture() { + std::map input_gl_texture; + std::map output_gl_texture; + + if (flags_->in_data_file_.empty()) { + auto status = GenerateGLTexture(&input_gl_texture); + if (status != RET_OK) { + std::cerr << "Generate input GLTexture error " << status << std::endl; + MS_LOG(ERROR) << "Generate input GLTexture error " << status; + return status; + } + } else { + auto status = ReadGLTextureFile(&input_gl_texture, &output_gl_texture); + if (status != RET_OK) { + std::cerr << "ReadGLTextureFile error, " << status << std::endl; + MS_LOG(ERROR) << "ReadGLTextureFile error, " << status; + return status; + } + } + auto status = ms_model_.BindGLTexture2DMemory(input_gl_texture, &output_gl_texture); + if (status != kSuccess) { + MS_LOG(ERROR) << "BindGLTexture2DMemory failed"; + return RET_ERROR; + } + return RET_OK; +} + +int BenchmarkUnifiedApi::ReadGLTextureFile(std::map *input_gl_texture, + std::map *output_gl_texture) { + if (ms_inputs_for_api_.empty()) { + return RET_OK; + } + if (this->flags_->in_data_type_ == kImage) { + MS_LOG(ERROR) << "Not supported image input"; + return RET_ERROR; + } else { + for (size_t i = 0; i < flags_->input_data_list_.size(); i++) { + auto tensor = ms_inputs_for_api_.at(i); + MS_ASSERT(tensor != nullptr); + size_t size; + char *bin_buf = ReadFile(flags_->input_data_list_[i].c_str(), &size); + if (bin_buf == nullptr) { + MS_LOG(ERROR) << "ReadFile return nullptr"; + return RET_ERROR; + } + auto tensor_data_size = tensor.DataSize(); + if (size != tensor_data_size) { + std::cerr << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size + << std::endl; + MS_LOG(ERROR) << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size; + delete[] bin_buf; + return RET_ERROR; + } + + auto status = FillGLTextureToTensor(input_gl_texture, &tensor, tensor.Name(), bin_buf); + delete[] bin_buf; + if (status != RET_OK) { + MS_LOG(ERROR) << "Fill GLTexture to input tensor" << status; + return status; + } + } + } + for (auto &tensor : ms_outputs_for_api_) { + MS_ASSERT(tensor != nullptr); + auto status = FillGLTextureToTensor(output_gl_texture, &tensor, tensor.Name()); + if (status != RET_OK) { + MS_LOG(ERROR) << "Fill GLTexture to output tensor" << status; + return status; + } + } + return RET_OK; +} +#endif + int BenchmarkUnifiedApi::LoadInput() { +#ifdef ENABLE_OPENGL_TEXTURE + if (flags_->enable_gl_texture_ == true) { + if (lite::BenchmarkUnifiedApi::LoadGLTexture() != RET_OK) { + MS_LOG(ERROR) << "Generate input GLTexture error"; + return RET_ERROR; + } + return RET_OK; + } +#endif + if (flags_->in_data_file_.empty()) { auto status = GenerateInputData(); - if (status != 0) { + if (status != RET_OK) { std::cerr << "Generate input data error " << status << std::endl; MS_LOG(ERROR) << "Generate input data error " << status; return status; } } else { auto status = ReadInputFile(); - if (status != 0) { + if (status != RET_OK) { std::cerr << "ReadInputFile error, " << status << std::endl; MS_LOG(ERROR) << "ReadInputFile error, " << status; return status; @@ -175,6 +318,11 @@ void BenchmarkUnifiedApi::InitMSContext(const std::shared_ptrdevice_ == "GPU") { std::shared_ptr gpu_device_info = std::make_shared(); gpu_device_info->SetEnableFP16(flags_->enable_fp16_); + +#ifdef ENABLE_OPENGL_TEXTURE + gpu_device_info->SetEnableGLTexture(flags_->enable_gl_texture_); +#endif + device_list.push_back(gpu_device_info); } @@ -217,7 +365,32 @@ int BenchmarkUnifiedApi::CompareOutput() { std::vector output_strings = MSTensor::TensorToStrings(tensor); ret = CompareStringData(tensor_name, calib_tensor.second->strings_data, output_strings); } else { +#ifdef ENABLE_OPENGL_TEXTURE + if (flags_->enable_gl_texture_) { + auto *gltexture_id = reinterpret_cast(tensor.MutableData()); + float *hostptr = reinterpret_cast(gl_runtime_.CopyDeviceTextureToHost(*gltexture_id)); + if (hostptr == nullptr) { + MS_LOG(ERROR) << "CopyDeviceTextureToHost failed"; + return RET_ERROR; + } + + auto tensor_shape = tensor.Shape(); + auto data_len = + std::accumulate(tensor_shape.begin(), tensor_shape.end(), sizeof(float), std::multiplies()); + auto *new_tensor = new (std::nothrow) + MSTensor(tensor_name, mindspore::DataType::kNumberTypeFloat32, tensor_shape, hostptr, data_len); + MS_CHECK_TRUE_MSG(new_tensor != nullptr, RET_ERROR, "new tensor failed"); + if (new_tensor->MutableData() == nullptr) { + MS_LOG(ERROR) << "CopyDeviceTextureToHost failed"; + return RET_ERROR; + } + ret = CompareDataGetTotalBiasAndSize(tensor_name, new_tensor, &total_bias, &total_size); + } else { + ret = CompareDataGetTotalBiasAndSize(tensor_name, &tensor, &total_bias, &total_size); + } +#else ret = CompareDataGetTotalBiasAndSize(tensor_name, &tensor, &total_bias, &total_size); +#endif } if (ret != RET_OK) { MS_LOG(ERROR) << "Error in CompareData"; @@ -484,12 +657,28 @@ int BenchmarkUnifiedApi::MarkAccuracy() { MS_LOG(INFO) << "MarkAccuracy"; std::cout << "MarkAccuracy" << std::endl; - auto status = PrintInputData(); + int status = 0; +#ifdef ENABLE_OPENGL_TEXTURE + if (flags_->enable_gl_texture_) { + for (auto in_tensor : ms_inputs_for_api_) { + auto *input = reinterpret_cast(in_tensor.MutableData()); + float *hostptr = reinterpret_cast(gl_runtime_.CopyDeviceTextureToHost(*input)); + size_t print_num = 20; + gl_runtime_.PrintImage2DData(hostptr, 1, 1, print_num); + } + } else { +#else + status = PrintInputData(); if (status != RET_OK) { MS_LOG(ERROR) << "PrintInputData error " << status; std::cerr << "PrintInputData error " << status << std::endl; return status; } +#endif + +#ifdef ENABLE_OPENGL_TEXTURE + } +#endif std::vector outputs; auto ret = ms_model_.Predict(ms_inputs_for_api_, &outputs, ms_before_call_back_, ms_after_call_back_); if (ret != kSuccess) { @@ -526,7 +715,7 @@ int BenchmarkUnifiedApi::PrintInputData() { MS_ASSERT(input != nullptr); auto tensor_data_type = static_cast(input.DataType()); - std::cout << "InData" << i << ": "; + std::cout << "InData " << i << ": "; if (tensor_data_type == TypeId::kObjectTypeString) { std::vector output_strings = MSTensor::TensorToStrings(input); size_t print_num = std::min(output_strings.size(), static_cast(20)); @@ -567,6 +756,13 @@ int BenchmarkUnifiedApi::PrintInputData() { int BenchmarkUnifiedApi::RunBenchmark() { auto start_prepare_time = GetTimeUs(); + +#ifdef ENABLE_OPENGL_TEXTURE + if (flags_->enable_gl_texture_) { + gl_runtime_.Init(); + } +#endif + // Load graph std::string model_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1); mindspore::ModelType model_type = ModelTypeMap.at(flags_->model_type_); @@ -616,6 +812,7 @@ int BenchmarkUnifiedApi::RunBenchmark() { } ms_inputs_for_api_ = ms_model_.GetInputs(); + ms_outputs_for_api_ = ms_model_.GetOutputs(); auto end_prepare_time = GetTimeUs(); MS_LOG(INFO) << "PrepareTime = " << ((end_prepare_time - start_prepare_time) / kFloatMSEC) << " ms"; std::cout << "PrepareTime = " << ((end_prepare_time - start_prepare_time) / kFloatMSEC) << " ms" << std::endl; @@ -623,20 +820,20 @@ int BenchmarkUnifiedApi::RunBenchmark() { // Load input MS_LOG(INFO) << "start generate input data"; auto status = LoadInput(); - if (status != 0) { + if (status != RET_OK) { MS_LOG(ERROR) << "Generate input data error"; return status; } if (!flags_->benchmark_data_file_.empty()) { status = MarkAccuracy(); - if (status != 0) { + if (status != RET_OK) { MS_LOG(ERROR) << "Run MarkAccuracy error: " << status; std::cout << "Run MarkAccuracy error: " << status << std::endl; return status; } } else { status = MarkPerformance(); - if (status != 0) { + if (status != RET_OK) { MS_LOG(ERROR) << "Run MarkPerformance error: " << status; std::cout << "Run MarkPerformance error: " << status << std::endl; return status; diff --git a/mindspore/lite/tools/benchmark/benchmark_unified_api.h b/mindspore/lite/tools/benchmark/benchmark_unified_api.h index bbd2334e0b8..78ae5784959 100644 --- a/mindspore/lite/tools/benchmark/benchmark_unified_api.h +++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.h @@ -37,6 +37,9 @@ #include "src/common/utils.h" #include "include/api/types.h" #include "include/api/model.h" +#ifdef ENABLE_OPENGL_TEXTURE +#include "tools/common/opengl_util.h" +#endif namespace mindspore::lite { class MS_API BenchmarkUnifiedApi : public BenchmarkBase { @@ -54,6 +57,17 @@ class MS_API BenchmarkUnifiedApi : public BenchmarkBase { float *total_cosine_distance, int *total_size); void InitContext(const std::shared_ptr &context); +#ifdef ENABLE_OPENGL_TEXTURE + int GenerateGLTexture(std::map *inputGlTexture); + + int LoadGLTexture(); + + int ReadGLTextureFile(std::map *inputGlTexture, std::map *outputGLTexture); + + int FillGLTextureToTensor(std::map *gl_texture, mindspore::MSTensor *tensor, std::string name, + void *data = nullptr); +#endif + // call GenerateRandomData to fill inputTensors int LoadInput() override; int GenerateInputData() override; @@ -94,8 +108,12 @@ class MS_API BenchmarkUnifiedApi : public BenchmarkBase { void UpdateDistributionName(const std::shared_ptr &context, std::string *name); private: +#ifdef ENABLE_OPENGL_TEXTURE + mindspore::OpenGL::OpenGLRuntime gl_runtime_; +#endif mindspore::Model ms_model_; std::vector ms_inputs_for_api_; + std::vector ms_outputs_for_api_; MSKernelCallBack ms_before_call_back_ = nullptr; MSKernelCallBack ms_after_call_back_ = nullptr; diff --git a/mindspore/lite/tools/common/opengl_util.cc b/mindspore/lite/tools/common/opengl_util.cc index 6eadec0c4b3..9bf3016a8bc 100644 --- a/mindspore/lite/tools/common/opengl_util.cc +++ b/mindspore/lite/tools/common/opengl_util.cc @@ -311,7 +311,7 @@ bool OpenGLRuntime::CopyHostToDeviceSSBO(void *hostData, GLuint ssboBufferID, GL } GLuint OpenGLRuntime::GLCreateTexture(int w, int h, int c, GLenum TextrueFormat, GLenum target) { - GLuint textureID = -0; + GLuint textureID = 0; if (target == GL_TEXTURE_3D) { MS_ASSERT(w > 0 && h > 0 && c > 0); glGenTextures(1, &textureID); diff --git a/mindspore/lite/tools/common/opengl_util.h b/mindspore/lite/tools/common/opengl_util.h index 43bcc7bc725..4edd4dcafa5 100644 --- a/mindspore/lite/tools/common/opengl_util.h +++ b/mindspore/lite/tools/common/opengl_util.h @@ -66,7 +66,7 @@ class OpenGLRuntime { bool CopyDeviceSSBOToHost(GLuint ssboBufferID, void *hostData, GLsizeiptr size); bool CopyHostToDeviceSSBO(void *hostData, GLuint ssboBufferID, GLsizeiptr size); - GLuint GLCreateTexture(int w, int h, int c, GLenum textrueFormat, GLenum target = GL_TEXTURE_3D); + GLuint GLCreateTexture(int w, int h, int c, GLenum textrueFormat = GL_RGBA32F, GLenum target = GL_TEXTURE_2D); bool CopyDeviceTextureToSSBO(GLuint textureID, GLuint ssboBufferID); bool CopyDeviceSSBOToTexture(GLuint ssboBufferID, GLuint textureID);