add new api benchmark for opengl

This commit is contained in:
greatpanc 2021-12-13 19:17:21 +08:00
parent 31b17f273d
commit 5198ade3f8
6 changed files with 259 additions and 46 deletions

View File

@ -55,14 +55,14 @@ int Benchmark::LoadInput() {
#else
if (flags_->in_data_file_.empty()) {
auto status = GenerateInputData();
if (status != 0) {
if (status != RET_OK) {
std::cerr << "Generate input data error " << status << std::endl;
MS_LOG(ERROR) << "Generate input data error " << status;
return status;
}
} else {
auto status = ReadInputFile();
if (status != 0) {
if (status != RET_OK) {
std::cerr << "ReadInputFile error, " << status << std::endl;
MS_LOG(ERROR) << "ReadInputFile error, " << status;
return status;
@ -120,9 +120,7 @@ int Benchmark::GenerateGLTexture(std::map<std::string, GLuint> *input_gl_texture
}
for (const auto &[name, tensor] : ms_outputs_) {
MS_ASSERT(tensor != nullptr);
float *output_data = nullptr;
auto status = FillGLTextureToTensor(output_gl_texture, tensor, name, output_data);
free(output_data);
auto status = FillGLTextureToTensor(output_gl_texture, tensor, name);
if (status != RET_OK) {
MS_LOG(ERROR) << "Fill GLTexture to output tensor" << status;
return status;
@ -132,25 +130,33 @@ int Benchmark::GenerateGLTexture(std::map<std::string, GLuint> *input_gl_texture
}
int Benchmark::FillGLTextureToTensor(std::map<std::string, GLuint> *gl_texture, mindspore::tensor::MSTensor *tensor,
std::string name, float *data) {
if (data == nullptr) {
data = reinterpret_cast<float *>(malloc(tensor->Size()));
if (data == nullptr) {
MS_LOG(ERROR) << "new output_data failed";
return RET_ERROR;
}
}
std::string name, void *data) {
auto image_id = 0;
if (tensor->shape().size() < DIMENSION_2D) {
int width = 1, height = 1, channel = 1;
if (tensor->shape().size() == DIMENSION_2D) {
height = tensor->shape()[kNHWC_N];
channel = tensor->shape()[kNHWC_H];
} else if (tensor->shape().size() == DIMENSION_3D) {
width = tensor->shape()[kNHWC_H];
height = tensor->shape()[kNHWC_N];
channel = tensor->shape()[kNHWC_C];
} else if (tensor->shape().size() == DIMENSION_4D) {
width = tensor->shape()[kNHWC_W];
height = tensor->shape()[kNHWC_H];
channel = tensor->shape()[kNHWC_C];
} else {
MS_LOG(ERROR) << "the tensor shape is not support";
return RET_ERROR;
} else if (tensor->shape().size() == DIMENSION_2D) {
image_id = gl_runtime_.CopyHostToDeviceTexture(data, tensor->shape()[0], tensor->shape()[1], 1);
} else {
image_id = gl_runtime_.CopyHostToDeviceTexture(data, tensor->shape()[kNHWC_H], tensor->shape()[kNHWC_W],
tensor->shape()[kNHWC_C]);
}
if (image_id != 0) {
if (data == nullptr) {
image_id = gl_runtime_.GLCreateTexture(width, height, channel);
} else {
image_id = gl_runtime_.CopyHostToDeviceTexture(data, width, height, channel);
}
if (image_id != GL_NONE) {
gl_texture->insert(std::pair<std::string, GLuint>(name, image_id));
} else {
MS_LOG(ERROR) << "glMemPool CopyHostToDeviceTexture failed";
@ -164,14 +170,14 @@ int Benchmark::LoadGLTexture() {
if (flags_->in_data_file_.empty()) {
auto status = GenerateGLTexture(&input_gl_texture, &output_gl_texture);
if (status != 0) {
if (status != RET_OK) {
std::cerr << "Generate input GLTexture error " << status << std::endl;
MS_LOG(ERROR) << "Generate input GLTexture error " << status;
return status;
}
} else {
auto status = ReadGLTextureFile(&input_gl_texture, &output_gl_texture);
if (status != 0) {
if (status != RET_OK) {
std::cerr << "ReadGLTextureFile error, " << status << std::endl;
MS_LOG(ERROR) << "ReadGLTextureFile error, " << status;
return status;
@ -211,13 +217,7 @@ int Benchmark::ReadGLTextureFile(std::map<std::string, GLuint> *input_gl_texture
delete[] bin_buf;
return RET_ERROR;
}
float *input_data = reinterpret_cast<float *>(malloc(tensor->Size()));
if (input_data == nullptr) {
MS_LOG(ERROR) << "new input_data failed";
return RET_ERROR;
}
memcpy(input_data, bin_buf, tensor_data_size);
auto status = FillGLTextureToTensor(input_gl_texture, tensor, tensor->tensor_name(), input_data);
auto status = FillGLTextureToTensor(input_gl_texture, tensor, tensor->tensor_name(), bin_buf);
delete[] bin_buf;
if (status != RET_OK) {
MS_LOG(ERROR) << "Fill GLTexture to input tensor" << status;
@ -227,9 +227,7 @@ int Benchmark::ReadGLTextureFile(std::map<std::string, GLuint> *input_gl_texture
}
for (const auto &[name, tensor] : ms_outputs_) {
MS_ASSERT(tensor != nullptr);
float *output_data = nullptr;
auto status = FillGLTextureToTensor(output_gl_texture, tensor, name, output_data);
free(output_data);
auto status = FillGLTextureToTensor(output_gl_texture, tensor, name);
if (status != RET_OK) {
MS_LOG(ERROR) << "Fill GLTexture to output tensor" << status;
return status;
@ -447,7 +445,7 @@ int Benchmark::MarkPerformance() {
std::cout << "Running warm up loops..." << std::endl;
for (int i = 0; i < flags_->warm_up_loop_count_; i++) {
auto status = session_->RunGraph();
if (status != 0) {
if (status != RET_OK) {
MS_LOG(ERROR) << "Inference error " << status;
std::cerr << "Inference error " << status << std::endl;
return status;
@ -468,7 +466,7 @@ int Benchmark::MarkPerformance() {
session_->BindThread(true);
auto start = GetTimeUs();
auto status = session_->RunGraph(before_call_back_, after_call_back_);
if (status != 0) {
if (status != RET_OK) {
MS_LOG(ERROR) << "Inference error " << status;
std::cerr << "Inference error " << status;
return status;
@ -701,14 +699,14 @@ int Benchmark::RunBenchmark() {
}
if (!flags_->benchmark_data_file_.empty()) {
auto status = MarkAccuracy();
if (status != 0) {
if (status != RET_OK) {
MS_LOG(ERROR) << "Run MarkAccuracy error: " << status;
std::cout << "Run MarkAccuracy error: " << status << std::endl;
return status;
}
} else {
auto status = MarkPerformance();
if (status != 0) {
if (status != RET_OK) {
MS_LOG(ERROR) << "Run MarkPerformance error: " << status;
std::cout << "Run MarkPerformance error: " << status << std::endl;
return status;

View File

@ -61,7 +61,7 @@ class MS_API Benchmark : public BenchmarkBase {
int ReadGLTextureFile(std::map<std::string, GLuint> *inputGlTexture, std::map<std::string, GLuint> *outputGLTexture);
int FillGLTextureToTensor(std::map<std::string, GLuint> *gl_texture, mindspore::tensor::MSTensor *tensor,
std::string name, float *data);
std::string name, void *data = nullptr);
#endif
int LoadInput() override;

View File

@ -46,17 +46,160 @@ constexpr int kDumpInputsAndOutputs = 0;
constexpr int kDumpOutputs = 2;
namespace lite {
#ifdef ENABLE_OPENGL_TEXTURE
int BenchmarkUnifiedApi::GenerateGLTexture(std::map<std::string, GLuint> *input_gl_texture) {
for (auto tensor : ms_inputs_for_api_) {
float *input_data = reinterpret_cast<float *>(malloc(tensor.DataSize()));
if (input_data == nullptr) {
MS_LOG(ERROR) << "new input_data failed";
return RET_ERROR;
}
int status = GenerateRandomData(tensor.DataSize(), input_data, static_cast<int>(tensor.DataType()));
if (status != RET_OK) {
std::cerr << "GenerateRandomData for inTensor failed: " << status << std::endl;
MS_LOG(ERROR) << "GenerateRandomData for inTensor failed:" << status;
return status;
}
status = FillGLTextureToTensor(input_gl_texture, &tensor, tensor.Name(), input_data);
free(input_data);
if (status != RET_OK) {
MS_LOG(ERROR) << "Fill GLTexture to input tensor" << status;
return status;
}
}
return RET_OK;
}
int BenchmarkUnifiedApi::FillGLTextureToTensor(std::map<std::string, GLuint> *gl_texture, mindspore::MSTensor *tensor,
std::string name, void *data) {
auto image_id = 0;
int width = 1, height = 1, channel = 1;
if (tensor->Shape().size() == DIMENSION_2D) {
height = tensor->Shape()[kNHWC_N];
channel = tensor->Shape()[kNHWC_H];
} else if (tensor->Shape().size() == DIMENSION_3D) {
width = tensor->Shape()[kNHWC_H];
height = tensor->Shape()[kNHWC_N];
channel = tensor->Shape()[kNHWC_C];
} else if (tensor->Shape().size() == DIMENSION_4D) {
width = tensor->Shape()[kNHWC_W];
height = tensor->Shape()[kNHWC_H];
channel = tensor->Shape()[kNHWC_C];
} else {
MS_LOG(ERROR) << "the tensor shape is not support";
return RET_ERROR;
}
if (data == nullptr) {
image_id = gl_runtime_.GLCreateTexture(width, height, channel);
} else {
image_id = gl_runtime_.CopyHostToDeviceTexture(data, width, height, channel);
}
if (image_id != GL_NONE) {
gl_texture->insert(std::pair<std::string, GLuint>(name, image_id));
} else {
MS_LOG(ERROR) << "glMemPool CopyHostToDeviceTexture failed";
}
return RET_OK;
}
int BenchmarkUnifiedApi::LoadGLTexture() {
std::map<std::string, GLuint> input_gl_texture;
std::map<std::string, GLuint> output_gl_texture;
if (flags_->in_data_file_.empty()) {
auto status = GenerateGLTexture(&input_gl_texture);
if (status != RET_OK) {
std::cerr << "Generate input GLTexture error " << status << std::endl;
MS_LOG(ERROR) << "Generate input GLTexture error " << status;
return status;
}
} else {
auto status = ReadGLTextureFile(&input_gl_texture, &output_gl_texture);
if (status != RET_OK) {
std::cerr << "ReadGLTextureFile error, " << status << std::endl;
MS_LOG(ERROR) << "ReadGLTextureFile error, " << status;
return status;
}
}
auto status = ms_model_.BindGLTexture2DMemory(input_gl_texture, &output_gl_texture);
if (status != kSuccess) {
MS_LOG(ERROR) << "BindGLTexture2DMemory failed";
return RET_ERROR;
}
return RET_OK;
}
int BenchmarkUnifiedApi::ReadGLTextureFile(std::map<std::string, GLuint> *input_gl_texture,
std::map<std::string, GLuint> *output_gl_texture) {
if (ms_inputs_for_api_.empty()) {
return RET_OK;
}
if (this->flags_->in_data_type_ == kImage) {
MS_LOG(ERROR) << "Not supported image input";
return RET_ERROR;
} else {
for (size_t i = 0; i < flags_->input_data_list_.size(); i++) {
auto tensor = ms_inputs_for_api_.at(i);
MS_ASSERT(tensor != nullptr);
size_t size;
char *bin_buf = ReadFile(flags_->input_data_list_[i].c_str(), &size);
if (bin_buf == nullptr) {
MS_LOG(ERROR) << "ReadFile return nullptr";
return RET_ERROR;
}
auto tensor_data_size = tensor.DataSize();
if (size != tensor_data_size) {
std::cerr << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size
<< std::endl;
MS_LOG(ERROR) << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size;
delete[] bin_buf;
return RET_ERROR;
}
auto status = FillGLTextureToTensor(input_gl_texture, &tensor, tensor.Name(), bin_buf);
delete[] bin_buf;
if (status != RET_OK) {
MS_LOG(ERROR) << "Fill GLTexture to input tensor" << status;
return status;
}
}
}
for (auto &tensor : ms_outputs_for_api_) {
MS_ASSERT(tensor != nullptr);
auto status = FillGLTextureToTensor(output_gl_texture, &tensor, tensor.Name());
if (status != RET_OK) {
MS_LOG(ERROR) << "Fill GLTexture to output tensor" << status;
return status;
}
}
return RET_OK;
}
#endif
int BenchmarkUnifiedApi::LoadInput() {
#ifdef ENABLE_OPENGL_TEXTURE
if (flags_->enable_gl_texture_ == true) {
if (lite::BenchmarkUnifiedApi::LoadGLTexture() != RET_OK) {
MS_LOG(ERROR) << "Generate input GLTexture error";
return RET_ERROR;
}
return RET_OK;
}
#endif
if (flags_->in_data_file_.empty()) {
auto status = GenerateInputData();
if (status != 0) {
if (status != RET_OK) {
std::cerr << "Generate input data error " << status << std::endl;
MS_LOG(ERROR) << "Generate input data error " << status;
return status;
}
} else {
auto status = ReadInputFile();
if (status != 0) {
if (status != RET_OK) {
std::cerr << "ReadInputFile error, " << status << std::endl;
MS_LOG(ERROR) << "ReadInputFile error, " << status;
return status;
@ -175,6 +318,11 @@ void BenchmarkUnifiedApi::InitMSContext(const std::shared_ptr<mindspore::Context
if (flags_->device_ == "GPU") {
std::shared_ptr<GPUDeviceInfo> gpu_device_info = std::make_shared<GPUDeviceInfo>();
gpu_device_info->SetEnableFP16(flags_->enable_fp16_);
#ifdef ENABLE_OPENGL_TEXTURE
gpu_device_info->SetEnableGLTexture(flags_->enable_gl_texture_);
#endif
device_list.push_back(gpu_device_info);
}
@ -217,8 +365,33 @@ int BenchmarkUnifiedApi::CompareOutput() {
std::vector<std::string> output_strings = MSTensor::TensorToStrings(tensor);
ret = CompareStringData(tensor_name, calib_tensor.second->strings_data, output_strings);
} else {
#ifdef ENABLE_OPENGL_TEXTURE
if (flags_->enable_gl_texture_) {
auto *gltexture_id = reinterpret_cast<GLuint *>(tensor.MutableData());
float *hostptr = reinterpret_cast<float *>(gl_runtime_.CopyDeviceTextureToHost(*gltexture_id));
if (hostptr == nullptr) {
MS_LOG(ERROR) << "CopyDeviceTextureToHost failed";
return RET_ERROR;
}
auto tensor_shape = tensor.Shape();
auto data_len =
std::accumulate(tensor_shape.begin(), tensor_shape.end(), sizeof(float), std::multiplies<size_t>());
auto *new_tensor = new (std::nothrow)
MSTensor(tensor_name, mindspore::DataType::kNumberTypeFloat32, tensor_shape, hostptr, data_len);
MS_CHECK_TRUE_MSG(new_tensor != nullptr, RET_ERROR, "new tensor failed");
if (new_tensor->MutableData() == nullptr) {
MS_LOG(ERROR) << "CopyDeviceTextureToHost failed";
return RET_ERROR;
}
ret = CompareDataGetTotalBiasAndSize(tensor_name, new_tensor, &total_bias, &total_size);
} else {
ret = CompareDataGetTotalBiasAndSize(tensor_name, &tensor, &total_bias, &total_size);
}
#else
ret = CompareDataGetTotalBiasAndSize(tensor_name, &tensor, &total_bias, &total_size);
#endif
}
if (ret != RET_OK) {
MS_LOG(ERROR) << "Error in CompareData";
std::cerr << "Error in CompareData" << std::endl;
@ -484,12 +657,28 @@ int BenchmarkUnifiedApi::MarkAccuracy() {
MS_LOG(INFO) << "MarkAccuracy";
std::cout << "MarkAccuracy" << std::endl;
auto status = PrintInputData();
int status = 0;
#ifdef ENABLE_OPENGL_TEXTURE
if (flags_->enable_gl_texture_) {
for (auto in_tensor : ms_inputs_for_api_) {
auto *input = reinterpret_cast<GLuint *>(in_tensor.MutableData());
float *hostptr = reinterpret_cast<float *>(gl_runtime_.CopyDeviceTextureToHost(*input));
size_t print_num = 20;
gl_runtime_.PrintImage2DData(hostptr, 1, 1, print_num);
}
} else {
#else
status = PrintInputData();
if (status != RET_OK) {
MS_LOG(ERROR) << "PrintInputData error " << status;
std::cerr << "PrintInputData error " << status << std::endl;
return status;
}
#endif
#ifdef ENABLE_OPENGL_TEXTURE
}
#endif
std::vector<MSTensor> outputs;
auto ret = ms_model_.Predict(ms_inputs_for_api_, &outputs, ms_before_call_back_, ms_after_call_back_);
if (ret != kSuccess) {
@ -567,6 +756,13 @@ int BenchmarkUnifiedApi::PrintInputData() {
int BenchmarkUnifiedApi::RunBenchmark() {
auto start_prepare_time = GetTimeUs();
#ifdef ENABLE_OPENGL_TEXTURE
if (flags_->enable_gl_texture_) {
gl_runtime_.Init();
}
#endif
// Load graph
std::string model_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1);
mindspore::ModelType model_type = ModelTypeMap.at(flags_->model_type_);
@ -616,6 +812,7 @@ int BenchmarkUnifiedApi::RunBenchmark() {
}
ms_inputs_for_api_ = ms_model_.GetInputs();
ms_outputs_for_api_ = ms_model_.GetOutputs();
auto end_prepare_time = GetTimeUs();
MS_LOG(INFO) << "PrepareTime = " << ((end_prepare_time - start_prepare_time) / kFloatMSEC) << " ms";
std::cout << "PrepareTime = " << ((end_prepare_time - start_prepare_time) / kFloatMSEC) << " ms" << std::endl;
@ -623,20 +820,20 @@ int BenchmarkUnifiedApi::RunBenchmark() {
// Load input
MS_LOG(INFO) << "start generate input data";
auto status = LoadInput();
if (status != 0) {
if (status != RET_OK) {
MS_LOG(ERROR) << "Generate input data error";
return status;
}
if (!flags_->benchmark_data_file_.empty()) {
status = MarkAccuracy();
if (status != 0) {
if (status != RET_OK) {
MS_LOG(ERROR) << "Run MarkAccuracy error: " << status;
std::cout << "Run MarkAccuracy error: " << status << std::endl;
return status;
}
} else {
status = MarkPerformance();
if (status != 0) {
if (status != RET_OK) {
MS_LOG(ERROR) << "Run MarkPerformance error: " << status;
std::cout << "Run MarkPerformance error: " << status << std::endl;
return status;

View File

@ -37,6 +37,9 @@
#include "src/common/utils.h"
#include "include/api/types.h"
#include "include/api/model.h"
#ifdef ENABLE_OPENGL_TEXTURE
#include "tools/common/opengl_util.h"
#endif
namespace mindspore::lite {
class MS_API BenchmarkUnifiedApi : public BenchmarkBase {
@ -54,6 +57,17 @@ class MS_API BenchmarkUnifiedApi : public BenchmarkBase {
float *total_cosine_distance, int *total_size);
void InitContext(const std::shared_ptr<mindspore::Context> &context);
#ifdef ENABLE_OPENGL_TEXTURE
int GenerateGLTexture(std::map<std::string, GLuint> *inputGlTexture);
int LoadGLTexture();
int ReadGLTextureFile(std::map<std::string, GLuint> *inputGlTexture, std::map<std::string, GLuint> *outputGLTexture);
int FillGLTextureToTensor(std::map<std::string, GLuint> *gl_texture, mindspore::MSTensor *tensor, std::string name,
void *data = nullptr);
#endif
// call GenerateRandomData to fill inputTensors
int LoadInput() override;
int GenerateInputData() override;
@ -94,8 +108,12 @@ class MS_API BenchmarkUnifiedApi : public BenchmarkBase {
void UpdateDistributionName(const std::shared_ptr<mindspore::Context> &context, std::string *name);
private:
#ifdef ENABLE_OPENGL_TEXTURE
mindspore::OpenGL::OpenGLRuntime gl_runtime_;
#endif
mindspore::Model ms_model_;
std::vector<mindspore::MSTensor> ms_inputs_for_api_;
std::vector<mindspore::MSTensor> ms_outputs_for_api_;
MSKernelCallBack ms_before_call_back_ = nullptr;
MSKernelCallBack ms_after_call_back_ = nullptr;

View File

@ -311,7 +311,7 @@ bool OpenGLRuntime::CopyHostToDeviceSSBO(void *hostData, GLuint ssboBufferID, GL
}
GLuint OpenGLRuntime::GLCreateTexture(int w, int h, int c, GLenum TextrueFormat, GLenum target) {
GLuint textureID = -0;
GLuint textureID = 0;
if (target == GL_TEXTURE_3D) {
MS_ASSERT(w > 0 && h > 0 && c > 0);
glGenTextures(1, &textureID);

View File

@ -66,7 +66,7 @@ class OpenGLRuntime {
bool CopyDeviceSSBOToHost(GLuint ssboBufferID, void *hostData, GLsizeiptr size);
bool CopyHostToDeviceSSBO(void *hostData, GLuint ssboBufferID, GLsizeiptr size);
GLuint GLCreateTexture(int w, int h, int c, GLenum textrueFormat, GLenum target = GL_TEXTURE_3D);
GLuint GLCreateTexture(int w, int h, int c, GLenum textrueFormat = GL_RGBA32F, GLenum target = GL_TEXTURE_2D);
bool CopyDeviceTextureToSSBO(GLuint textureID, GLuint ssboBufferID);
bool CopyDeviceSSBOToTexture(GLuint ssboBufferID, GLuint textureID);