!5123 fixed segmentation fault for opencl mode

Merge pull request !5123 from liuchao/master
This commit is contained in:
mindspore-ci-bot 2020-08-25 17:07:23 +08:00 committed by Gitee
commit 4efa20a137
20 changed files with 56 additions and 109 deletions

View File

@ -35,39 +35,17 @@ using mindspore::kernel::CLErrorCode;
namespace mindspore::lite::opencl {
std::map<std::string, std::string> g_opencl_program_map;
static std::map<std::string, std::string> g_opencl_program_map;
static std::mutex g_mtx;
static std::mutex g_init_mtx;
// magic number
static std::map<int, int> AdrenoSubGroup{
{640, 128}, {630, 128}, {616, 128}, {612, 64}, {610, 64}, {540, 32}, {530, 32},
{512, 32}, {510, 32}, {509, 32}, {506, 32}, {505, 32}, {405, 32}, {330, 16},
};
#ifdef USE_OPENCL_WRAPPER
std::shared_ptr<OpenCLWrapper> OpenCLWrapper::opencl_wrapper_singleton_ = nullptr;
#endif
std::shared_ptr<OpenCLRuntime> OpenCLRuntime::opencl_runtime_singleton_ = nullptr;
bool OpenCLRuntime::init_done_ = false;
OpenCLRuntime *OpenCLRuntime::GetInstance() {
std::unique_lock<std::mutex> lck(g_mtx);
if (opencl_runtime_singleton_.get() == nullptr) {
opencl_runtime_singleton_.reset(new OpenCLRuntime());
opencl_runtime_singleton_->Init();
}
return opencl_runtime_singleton_.get();
}
void OpenCLRuntime::DeleteInstance() {
std::unique_lock<std::mutex> lck(g_mtx);
init_done_ = false;
if (opencl_runtime_singleton_ != nullptr) {
opencl_runtime_singleton_.reset();
opencl_runtime_singleton_ = nullptr;
}
static OpenCLRuntime ocl_runtime;
ocl_runtime.Init();
return &ocl_runtime;
}
OpenCLRuntime::OpenCLRuntime() { default_build_opts_ = " -cl-mad-enable -cl-fast-relaxed-math -Werror"; }
@ -88,7 +66,7 @@ int OpenCLRuntime::Init() {
MS_LOG(INFO) << "CL_HPP_MINIMUM_OPENCL_VERSION " << CL_HPP_MINIMUM_OPENCL_VERSION;
#ifdef USE_OPENCL_WRAPPER
if (false == OpenCLWrapper::GetInstance()->LoadOpenCLLibrary()) {
if (OpenCLWrapper::GetInstance()->LoadOpenCLLibrary() == false) {
MS_LOG(ERROR) << "Load OpenCL symbols failed!";
return RET_ERROR;
}
@ -123,7 +101,11 @@ int OpenCLRuntime::Init() {
return RET_ERROR;
}
device_ = std::make_shared<cl::Device>();
device_ = new (std::nothrow) cl::Device();
if (device_ == nullptr) {
MS_LOG(ERROR) << "Create OpenCL device failed!";
return RET_ERROR;
}
*device_ = devices[0];
max_work_item_sizes_ = device_->getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
const std::string device_name = device_->getInfo<CL_DEVICE_NAME>();
@ -144,20 +126,21 @@ int OpenCLRuntime::Init() {
MS_LOG(INFO) << "Create special opencl context to share with OpenGL";
cl_context_properties context_prop[] = {CL_GL_CONTEXT_KHR, (cl_context_properties)eglGetCurrentContext(),
CL_EGL_DISPLAY_KHR, (cl_context_properties)eglGetCurrentDisplay(), 0};
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &ret);
context_ = new (std::nothrow) cl::Context(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &ret);
if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Create special OpenCL context falied, Create common OpenCL context then.";
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
if (ret != CL_SUCCESS || context_ == nullptr) {
MS_LOG(ERROR) << "Create special OpenCL context failed, Create common OpenCL context then.";
context_ = new (std::nothrow) cl::Context(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
if (context_ == nullptr) {
MS_LOG(ERROR) << "Create OpenCL context failed!";
return RET_ERROR;
}
}
#else
MS_LOG(INFO) << "Create common opencl context";
// cl_context_properties context_prop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[0](),
// CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printf_callback, 0};
// context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &err);
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
context_ = new (std::nothrow) cl::Context(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
#endif
if (ret != CL_SUCCESS) {
if (ret != CL_SUCCESS || context_ == nullptr) {
MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret);
return RET_ERROR;
}
@ -203,13 +186,17 @@ int OpenCLRuntime::Init() {
properties |= CL_QUEUE_PROFILING_ENABLE;
#endif
default_command_queue_ = std::make_shared<cl::CommandQueue>(*context_, *device_, properties, &ret);
if (ret != CL_SUCCESS) {
default_command_queue_ = new (std::nothrow) cl::CommandQueue(*context_, *device_, properties, &ret);
if (ret != CL_SUCCESS || default_command_queue_ == nullptr) {
MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret);
return RET_ERROR;
}
allocator_ = std::make_shared<OpenCLAllocator>();
allocator_ = new (std::nothrow) OpenCLAllocator();
if (allocator_ == nullptr) {
MS_LOG(ERROR) << "Command OpenCL allocator failed!";
return RET_ERROR;
}
#ifdef PROGRAM_WITH_IL
std::string flag = "";
binary_program_ = CreateProgramFromIL(g_program_binary, flag);
@ -221,17 +208,18 @@ int OpenCLRuntime::Init() {
}
OpenCLRuntime::~OpenCLRuntime() {
init_done_ = false;
program_map_.clear();
// allocator_->Clear();
allocator_.reset();
default_command_queue_.reset();
context_.reset();
device_.reset();
delete allocator_;
delete default_command_queue_;
delete context_;
delete device_;
OpenCLWrapper::GetInstance()->UnLoadOpenCLLibrary();
}
cl::Context *OpenCLRuntime::Context() { return context_.get(); }
cl::Context *OpenCLRuntime::Context() { return context_; }
cl::Device *OpenCLRuntime::Device() { return device_.get(); }
cl::Device *OpenCLRuntime::Device() { return device_; }
uint64_t OpenCLRuntime::DeviceGlobalMemoryCacheSize() const { return global_memery_cachesize_; }
@ -262,9 +250,7 @@ uint32_t OpenCLRuntime::GetSubGroupSize(const cl::Kernel &kernel, const cl::NDRa
sub_group_size = 0;
}
#else
if (AdrenoSubGroup.find(gpu_info_.model_num) != AdrenoSubGroup.end()) {
sub_group_size = AdrenoSubGroup[gpu_info_.model_num];
}
sub_group_size = 0;
#endif
}
@ -337,7 +323,7 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t> &global,
const std::vector<size_t> &local, cl::CommandQueue *command_queue) {
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
command_queue = default_command_queue_;
}
MS_ASSERT(local.size() == 0 || local.size() == global.size());
std::vector<size_t> internal_global_ws = global;
@ -462,7 +448,7 @@ bool OpenCLRuntime::BuildProgram(const std::string &build_options, const cl::Pro
bool OpenCLRuntime::CopyDeviceMemToHost(void *dst, const void *src, size_t size, cl::CommandQueue *command_queue,
bool sync) const {
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
command_queue = default_command_queue_;
}
cl_int cl_ret = CL_SUCCESS;
const cl::Buffer *buffer = static_cast<const cl::Buffer *>(src);
@ -475,7 +461,7 @@ bool OpenCLRuntime::CopyDeviceMemToHost(void *dst, const void *src, size_t size,
bool OpenCLRuntime::CopyHostMemToDevice(const void *dst, const void *src, size_t size, cl::CommandQueue *command_queue,
bool sync) const {
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
command_queue = default_command_queue_;
}
cl_int cl_ret = CL_SUCCESS;
const cl::Buffer *buffer = static_cast<const cl::Buffer *>(dst);
@ -488,7 +474,7 @@ bool OpenCLRuntime::CopyHostMemToDevice(const void *dst, const void *src, size_t
void *OpenCLRuntime::MapBuffer(const cl::Buffer &buffer, int flags, size_t size, cl::CommandQueue *command_queue,
bool sync) const {
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
command_queue = default_command_queue_;
}
return command_queue->enqueueMapBuffer(buffer, sync, flags, 0, size);
}
@ -498,7 +484,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command
return RET_OK;
}
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
command_queue = default_command_queue_;
}
return command_queue->enqueueMapSVM(host_ptr, sync, flags, size);
}
@ -506,7 +492,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command
void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> &region,
cl::CommandQueue *command_queue) const {
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
command_queue = default_command_queue_;
}
cl::size_type row_pitch;
cl::size_type slice_pitch;
@ -517,7 +503,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags,
int OpenCLRuntime::UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue) const {
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
command_queue = default_command_queue_;
}
return command_queue->enqueueUnmapMemObject(buffer, host_ptr);
}
@ -527,14 +513,14 @@ int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue)
return RET_OK;
}
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
command_queue = default_command_queue_;
}
return command_queue->enqueueUnmapSVM(host_ptr);
}
bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) {
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
command_queue = default_command_queue_;
}
cl_int ret = command_queue->finish();
if (ret != CL_SUCCESS) {

View File

@ -20,7 +20,6 @@ j* you may not use this file except in compliance with the License.
#include <vector>
#include <map>
#include <memory>
#include <mutex>
#include <set>
#include <string>
#include <type_traits>
@ -38,9 +37,6 @@ struct GpuInfo {
float opencl_version = 0;
};
// Base GPU cache size used for computing local work group size.
const int32_t g_base_gpu_mem_cachesize = 16384;
class OpenCLRuntime {
public:
static OpenCLRuntime *GetInstance();
@ -54,8 +50,8 @@ class OpenCLRuntime {
cl::Context *Context();
cl::Device *Device();
OpenCLAllocator *GetAllocator() { return allocator_.get(); }
cl::CommandQueue *GetDefaultCommandQueue() { return default_command_queue_.get(); }
OpenCLAllocator *GetAllocator() { return allocator_; }
cl::CommandQueue *GetDefaultCommandQueue() { return default_command_queue_; }
uint64_t DeviceGlobalMemoryCacheSize() const;
int DeviceMaxWorkGroupSize() const;
uint32_t DeviceComputeUnits() const;
@ -146,13 +142,12 @@ class OpenCLRuntime {
bool BuildProgram(const std::string &build_options, const cl::Program &program);
private:
static std::shared_ptr<OpenCLRuntime> opencl_runtime_singleton_;
static bool init_done_;
std::shared_ptr<cl::CommandQueue> default_command_queue_{nullptr};
std::shared_ptr<cl::Context> context_{nullptr};
std::shared_ptr<cl::Device> device_{nullptr};
std::shared_ptr<OpenCLAllocator> allocator_{nullptr};
std::map<std::string, cl::Program> program_map_{};
cl::CommandQueue *default_command_queue_{nullptr};
cl::Context *context_{nullptr};
cl::Device *device_{nullptr};
OpenCLAllocator *allocator_{nullptr};
std::map<std::string, cl::Program> program_map_;
cl::Program binary_program_{0};
uint64_t global_memery_cachesize_{0};
int max_work_group_size;
@ -169,5 +164,4 @@ class OpenCLRuntime {
};
} // namespace mindspore::lite::opencl
#endif // MINDSPORE_LITE_SRC_OPENCL_RUNTIME_H_

View File

@ -66,19 +66,13 @@ static const std::vector<std::string> g_opencl_library_paths = {
};
OpenCLWrapper *OpenCLWrapper::GetInstance() {
static std::once_flag opencl_wrapper_once;
std::call_once(opencl_wrapper_once,
[]() { opencl_wrapper_singleton_ = std::shared_ptr<OpenCLWrapper>(new OpenCLWrapper()); });
return opencl_wrapper_singleton_.get();
static OpenCLWrapper ocl_wrapper;
return &ocl_wrapper;
}
OpenCLWrapper::OpenCLWrapper() {}
OpenCLWrapper::~OpenCLWrapper() {
if (nullptr == opencl_wrapper_singleton_.get()) return;
opencl_wrapper_singleton_->UnLoadOpenCLLibrary();
}
OpenCLWrapper::~OpenCLWrapper() {}
// load default library path
bool OpenCLWrapper::LoadOpenCLLibrary() {

View File

@ -230,8 +230,7 @@ class OpenCLWrapper {
bool LoadLibraryFromPath(const std::string &path);
private:
static std::shared_ptr<OpenCLWrapper> opencl_wrapper_singleton_;
void *handle_ = nullptr;
void *handle_{nullptr};
};
} // namespace mindspore::lite::opencl

View File

@ -173,7 +173,6 @@ TEST_F(TestActivationOpenCL, ReluFp32_dim4) {
delete input_tensor;
delete output_tensor;
delete sub_graph;
lite::opencl::OpenCLRuntime::DeleteInstance();
}
TEST_F(TestActivationOpenCL, Relu6Fp32_dim4) {
@ -276,7 +275,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp32_dim4) {
delete input_tensor;
delete output_tensor;
delete sub_graph;
lite::opencl::OpenCLRuntime::DeleteInstance();
}
TEST_F(TestActivationOpenCL, SigmoidFp32_dim4) {
@ -379,7 +377,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp32_dim4) {
delete input_tensor;
delete output_tensor;
delete sub_graph;
lite::opencl::OpenCLRuntime::DeleteInstance();
}
TEST_F(TestActivationOpenCL, LeakyReluFp32_dim4) {
@ -483,6 +480,5 @@ TEST_F(TestActivationOpenCL, LeakyReluFp32_dim4) {
delete input_tensor;
delete output_tensor;
delete sub_graph;
lite::opencl::OpenCLRuntime::DeleteInstance();
}
} // namespace mindspore

View File

@ -202,7 +202,6 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
for (auto tensor : outputs) {
delete tensor;
}
lite::opencl::OpenCLRuntime::DeleteInstance();
}
class TestArithmeticOpenCL : public mindspore::CommonTest {

View File

@ -143,7 +143,6 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) {
delete pooling_kernel;
delete pGraph;
delete param;
lite::opencl::OpenCLRuntime::DeleteInstance();
}
} // namespace mindspore

View File

@ -155,6 +155,5 @@ TEST_F(TestBatchnormOpenCL, Batchnorminput_dim4) {
delete param;
delete batchnorm_kernel;
delete sub_graph;
lite::opencl::OpenCLRuntime::DeleteInstance();
}
} // namespace mindspore

View File

@ -213,6 +213,5 @@ TEST_F(TestConcatOpenCL, ConcatFp32_2input_dim4_axis3) {
delete param;
delete concat_kernel;
delete sub_graph;
lite::opencl::OpenCLRuntime::DeleteInstance();
}
} // namespace mindspore

View File

@ -175,7 +175,6 @@ void RunTestCase(const std::vector<int> shape, const std::vector<std::string> fi
inputs[0]->SetData(nullptr);
outputs[0]->SetData(nullptr);
MS_LOG(INFO) << "Test Conv2dTransposeFp32 passed";
lite::opencl::OpenCLRuntime::DeleteInstance();
}
TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) {
int pad = 0;

View File

@ -136,7 +136,6 @@ void TEST_MAIN(schema::Format input_format, schema::Format output_format, const
bias_tensor.SetData(nullptr);
delete param;
delete sub_graph;
lite::opencl::OpenCLRuntime::DeleteInstance();
}
TEST_F(TestConvolutionOpenCL, in1x1x64x512_out1x1x64x7358_k11_s11_p0000) {

View File

@ -195,7 +195,6 @@ TEST_F(TestConvolutionDwOpenCL, NoPadNC4HW4Fp32) {
2.2294958, 1.6570128, 2.465089, 1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988};
DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NC4HW4);
lite::opencl::OpenCLRuntime::DeleteInstance();
}
TEST_F(TestConvolutionDwOpenCL, PadNC4HW4Fp32) {
@ -268,7 +267,6 @@ TEST_F(TestConvolutionDwOpenCL, PadNC4HW4Fp32) {
1.0517888, 0.59817517, 0.75649744, 1.2075498, 0.38804203};
DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NC4HW4);
lite::opencl::OpenCLRuntime::DeleteInstance();
}
TEST_F(TestConvolutionDwOpenCL, NoPadNHWC4Fp32) {
@ -314,7 +312,6 @@ TEST_F(TestConvolutionDwOpenCL, NoPadNHWC4Fp32) {
2.2294958, 1.6570128, 2.465089, 1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988};
DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NHWC4);
lite::opencl::OpenCLRuntime::DeleteInstance();
}
TEST_F(TestConvolutionDwOpenCL, PadNHWC4Fp32) {
@ -387,7 +384,6 @@ TEST_F(TestConvolutionDwOpenCL, PadNHWC4Fp32) {
1.0517888, 0.59817517, 0.75649744, 1.2075498, 0.38804203};
DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NHWC4);
lite::opencl::OpenCLRuntime::DeleteInstance();
}
TEST_F(TestConvolutionDwOpenCL, ConvDwNoPadFp32) {
@ -512,7 +508,6 @@ TEST_F(TestConvolutionDwOpenCL, ConvDwNoPadFp32) {
inputs[1]->SetData(nullptr);
inputs[2]->SetData(nullptr);
MS_LOG(INFO) << "TestConvolutionDwNoPadFp32 passed";
lite::opencl::OpenCLRuntime::DeleteInstance();
}
TEST_F(TestConvolutionDwOpenCL, ConvDwPadFp32) {
@ -673,7 +668,6 @@ TEST_F(TestConvolutionDwOpenCL, ConvDwPadFp32) {
inputs[1]->SetData(nullptr);
inputs[2]->SetData(nullptr);
MS_LOG(INFO) << "TestConvolutionDwPadFp32 passed";
lite::opencl::OpenCLRuntime::DeleteInstance();
}
TEST_F(TestConvolutionDwOpenCL, ProfilingMobilenetv2) {
@ -739,7 +733,6 @@ TEST_F(TestConvolutionDwOpenCL, ProfilingMobilenetv2) {
DepthWiseTestMain(conv_param.get(), input_data.get(), weight_data.get(), nullptr, schema::Format_NHWC4, false);
}
}
lite::opencl::OpenCLRuntime::DeleteInstance();
}
TEST_F(TestConvolutionDwOpenCL, Buffer2Image) {
@ -788,6 +781,5 @@ TEST_F(TestConvolutionDwOpenCL, Buffer2Image) {
}
// DepthWiseTestMain(conv_param, input_data, weight_data, gnd_data, schema::Format_NC4HW4, true);
DepthWiseTestMain(conv_param.get(), input_data.get(), weight_data.get(), gnd_data.get(), schema::Format_NHWC4, true);
lite::opencl::OpenCLRuntime::DeleteInstance();
}
} // namespace mindspore

View File

@ -115,7 +115,6 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) {
CompareOutputData(output_data, correct_data, co, 0.0001);
tensor_x->SetData(nullptr);
tensor_out->SetData(nullptr);
lite::opencl::OpenCLRuntime::DeleteInstance();
MS_LOG(INFO) << "TestMatMulFp32 passed";
}
} // namespace mindspore

View File

@ -118,7 +118,6 @@ TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) {
}
delete pooling_kernel;
delete pGraph;
lite::opencl::OpenCLRuntime::DeleteInstance();
}
} // namespace mindspore

View File

@ -183,6 +183,5 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) {
delete param;
delete prelu_kernel;
delete sub_graph;
lite::opencl::OpenCLRuntime::DeleteInstance();
}
} // namespace mindspore

View File

@ -104,7 +104,6 @@ TEST_F(TestReshapeOpenCL, ReshapeFp32) {
inputs[0]->SetData(nullptr);
outputs[0]->SetData(nullptr);
lite::opencl::OpenCLRuntime::DeleteInstance();
MS_LOG(INFO) << "Test ReshapeFp32 passed";
}

View File

@ -144,6 +144,5 @@ TEST_F(TestSliceOpenCL, Sliceinput_dim4) {
}
delete slice_kernel;
delete sub_graph;
lite::opencl::OpenCLRuntime::DeleteInstance();
}
} // namespace mindspore

View File

@ -92,7 +92,6 @@ void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, st
}
delete kernel;
delete pGraph;
lite::opencl::OpenCLRuntime::DeleteInstance();
}
TEST_F(TestSoftmaxOpenCL, Softmax_1) {

View File

@ -103,6 +103,5 @@ TEST_F(TestToFormatOpenCL, ToFormatNHWC2NCHW) {
// compare
CompareOutputData(output_data, correct_data, h * w * c, 0.00001);
MS_LOG(INFO) << "Test TransposeFp32 passed";
lite::opencl::OpenCLRuntime::DeleteInstance();
}
} // namespace mindspore

View File

@ -106,7 +106,6 @@ TEST_F(TestTransposeOpenCL, TransposeFp32) {
inputs[0]->SetData(nullptr);
outputs[0]->SetData(nullptr);
lite::opencl::OpenCLRuntime::DeleteInstance();
MS_LOG(INFO) << "Test TransposeFp32 passed";
}