!5123 fixed segmentation fault for opencl mode
Merge pull request !5123 from liuchao/master
This commit is contained in:
commit
4efa20a137
|
@ -35,39 +35,17 @@ using mindspore::kernel::CLErrorCode;
|
|||
|
||||
namespace mindspore::lite::opencl {
|
||||
|
||||
std::map<std::string, std::string> g_opencl_program_map;
|
||||
|
||||
static std::map<std::string, std::string> g_opencl_program_map;
|
||||
static std::mutex g_mtx;
|
||||
static std::mutex g_init_mtx;
|
||||
|
||||
// magic number
|
||||
static std::map<int, int> AdrenoSubGroup{
|
||||
{640, 128}, {630, 128}, {616, 128}, {612, 64}, {610, 64}, {540, 32}, {530, 32},
|
||||
{512, 32}, {510, 32}, {509, 32}, {506, 32}, {505, 32}, {405, 32}, {330, 16},
|
||||
};
|
||||
|
||||
#ifdef USE_OPENCL_WRAPPER
|
||||
std::shared_ptr<OpenCLWrapper> OpenCLWrapper::opencl_wrapper_singleton_ = nullptr;
|
||||
#endif
|
||||
std::shared_ptr<OpenCLRuntime> OpenCLRuntime::opencl_runtime_singleton_ = nullptr;
|
||||
bool OpenCLRuntime::init_done_ = false;
|
||||
|
||||
OpenCLRuntime *OpenCLRuntime::GetInstance() {
|
||||
std::unique_lock<std::mutex> lck(g_mtx);
|
||||
if (opencl_runtime_singleton_.get() == nullptr) {
|
||||
opencl_runtime_singleton_.reset(new OpenCLRuntime());
|
||||
opencl_runtime_singleton_->Init();
|
||||
}
|
||||
return opencl_runtime_singleton_.get();
|
||||
}
|
||||
|
||||
void OpenCLRuntime::DeleteInstance() {
|
||||
std::unique_lock<std::mutex> lck(g_mtx);
|
||||
init_done_ = false;
|
||||
if (opencl_runtime_singleton_ != nullptr) {
|
||||
opencl_runtime_singleton_.reset();
|
||||
opencl_runtime_singleton_ = nullptr;
|
||||
}
|
||||
static OpenCLRuntime ocl_runtime;
|
||||
ocl_runtime.Init();
|
||||
return &ocl_runtime;
|
||||
}
|
||||
|
||||
OpenCLRuntime::OpenCLRuntime() { default_build_opts_ = " -cl-mad-enable -cl-fast-relaxed-math -Werror"; }
|
||||
|
@ -88,7 +66,7 @@ int OpenCLRuntime::Init() {
|
|||
MS_LOG(INFO) << "CL_HPP_MINIMUM_OPENCL_VERSION " << CL_HPP_MINIMUM_OPENCL_VERSION;
|
||||
|
||||
#ifdef USE_OPENCL_WRAPPER
|
||||
if (false == OpenCLWrapper::GetInstance()->LoadOpenCLLibrary()) {
|
||||
if (OpenCLWrapper::GetInstance()->LoadOpenCLLibrary() == false) {
|
||||
MS_LOG(ERROR) << "Load OpenCL symbols failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -123,7 +101,11 @@ int OpenCLRuntime::Init() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
device_ = std::make_shared<cl::Device>();
|
||||
device_ = new (std::nothrow) cl::Device();
|
||||
if (device_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Create OpenCL device failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
*device_ = devices[0];
|
||||
max_work_item_sizes_ = device_->getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
|
||||
const std::string device_name = device_->getInfo<CL_DEVICE_NAME>();
|
||||
|
@ -144,20 +126,21 @@ int OpenCLRuntime::Init() {
|
|||
MS_LOG(INFO) << "Create special opencl context to share with OpenGL";
|
||||
cl_context_properties context_prop[] = {CL_GL_CONTEXT_KHR, (cl_context_properties)eglGetCurrentContext(),
|
||||
CL_EGL_DISPLAY_KHR, (cl_context_properties)eglGetCurrentDisplay(), 0};
|
||||
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &ret);
|
||||
context_ = new (std::nothrow) cl::Context(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &ret);
|
||||
|
||||
if (ret != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Create special OpenCL context falied, Create common OpenCL context then.";
|
||||
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
|
||||
if (ret != CL_SUCCESS || context_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Create special OpenCL context failed, Create common OpenCL context then.";
|
||||
context_ = new (std::nothrow) cl::Context(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
|
||||
if (context_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Create OpenCL context failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
#else
|
||||
MS_LOG(INFO) << "Create common opencl context";
|
||||
// cl_context_properties context_prop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[0](),
|
||||
// CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printf_callback, 0};
|
||||
// context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &err);
|
||||
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
|
||||
context_ = new (std::nothrow) cl::Context(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
|
||||
#endif
|
||||
if (ret != CL_SUCCESS) {
|
||||
if (ret != CL_SUCCESS || context_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret);
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -203,13 +186,17 @@ int OpenCLRuntime::Init() {
|
|||
properties |= CL_QUEUE_PROFILING_ENABLE;
|
||||
#endif
|
||||
|
||||
default_command_queue_ = std::make_shared<cl::CommandQueue>(*context_, *device_, properties, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
default_command_queue_ = new (std::nothrow) cl::CommandQueue(*context_, *device_, properties, &ret);
|
||||
if (ret != CL_SUCCESS || default_command_queue_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret);
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
allocator_ = std::make_shared<OpenCLAllocator>();
|
||||
allocator_ = new (std::nothrow) OpenCLAllocator();
|
||||
if (allocator_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Command OpenCL allocator failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
#ifdef PROGRAM_WITH_IL
|
||||
std::string flag = "";
|
||||
binary_program_ = CreateProgramFromIL(g_program_binary, flag);
|
||||
|
@ -221,17 +208,18 @@ int OpenCLRuntime::Init() {
|
|||
}
|
||||
|
||||
OpenCLRuntime::~OpenCLRuntime() {
|
||||
init_done_ = false;
|
||||
program_map_.clear();
|
||||
// allocator_->Clear();
|
||||
allocator_.reset();
|
||||
default_command_queue_.reset();
|
||||
context_.reset();
|
||||
device_.reset();
|
||||
delete allocator_;
|
||||
delete default_command_queue_;
|
||||
delete context_;
|
||||
delete device_;
|
||||
OpenCLWrapper::GetInstance()->UnLoadOpenCLLibrary();
|
||||
}
|
||||
|
||||
cl::Context *OpenCLRuntime::Context() { return context_.get(); }
|
||||
cl::Context *OpenCLRuntime::Context() { return context_; }
|
||||
|
||||
cl::Device *OpenCLRuntime::Device() { return device_.get(); }
|
||||
cl::Device *OpenCLRuntime::Device() { return device_; }
|
||||
|
||||
uint64_t OpenCLRuntime::DeviceGlobalMemoryCacheSize() const { return global_memery_cachesize_; }
|
||||
|
||||
|
@ -262,9 +250,7 @@ uint32_t OpenCLRuntime::GetSubGroupSize(const cl::Kernel &kernel, const cl::NDRa
|
|||
sub_group_size = 0;
|
||||
}
|
||||
#else
|
||||
if (AdrenoSubGroup.find(gpu_info_.model_num) != AdrenoSubGroup.end()) {
|
||||
sub_group_size = AdrenoSubGroup[gpu_info_.model_num];
|
||||
}
|
||||
sub_group_size = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -337,7 +323,7 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
|
|||
int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t> &global,
|
||||
const std::vector<size_t> &local, cl::CommandQueue *command_queue) {
|
||||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_.get();
|
||||
command_queue = default_command_queue_;
|
||||
}
|
||||
MS_ASSERT(local.size() == 0 || local.size() == global.size());
|
||||
std::vector<size_t> internal_global_ws = global;
|
||||
|
@ -462,7 +448,7 @@ bool OpenCLRuntime::BuildProgram(const std::string &build_options, const cl::Pro
|
|||
bool OpenCLRuntime::CopyDeviceMemToHost(void *dst, const void *src, size_t size, cl::CommandQueue *command_queue,
|
||||
bool sync) const {
|
||||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_.get();
|
||||
command_queue = default_command_queue_;
|
||||
}
|
||||
cl_int cl_ret = CL_SUCCESS;
|
||||
const cl::Buffer *buffer = static_cast<const cl::Buffer *>(src);
|
||||
|
@ -475,7 +461,7 @@ bool OpenCLRuntime::CopyDeviceMemToHost(void *dst, const void *src, size_t size,
|
|||
bool OpenCLRuntime::CopyHostMemToDevice(const void *dst, const void *src, size_t size, cl::CommandQueue *command_queue,
|
||||
bool sync) const {
|
||||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_.get();
|
||||
command_queue = default_command_queue_;
|
||||
}
|
||||
cl_int cl_ret = CL_SUCCESS;
|
||||
const cl::Buffer *buffer = static_cast<const cl::Buffer *>(dst);
|
||||
|
@ -488,7 +474,7 @@ bool OpenCLRuntime::CopyHostMemToDevice(const void *dst, const void *src, size_t
|
|||
void *OpenCLRuntime::MapBuffer(const cl::Buffer &buffer, int flags, size_t size, cl::CommandQueue *command_queue,
|
||||
bool sync) const {
|
||||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_.get();
|
||||
command_queue = default_command_queue_;
|
||||
}
|
||||
return command_queue->enqueueMapBuffer(buffer, sync, flags, 0, size);
|
||||
}
|
||||
|
@ -498,7 +484,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command
|
|||
return RET_OK;
|
||||
}
|
||||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_.get();
|
||||
command_queue = default_command_queue_;
|
||||
}
|
||||
return command_queue->enqueueMapSVM(host_ptr, sync, flags, size);
|
||||
}
|
||||
|
@ -506,7 +492,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command
|
|||
void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> ®ion,
|
||||
cl::CommandQueue *command_queue) const {
|
||||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_.get();
|
||||
command_queue = default_command_queue_;
|
||||
}
|
||||
cl::size_type row_pitch;
|
||||
cl::size_type slice_pitch;
|
||||
|
@ -517,7 +503,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags,
|
|||
|
||||
int OpenCLRuntime::UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue) const {
|
||||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_.get();
|
||||
command_queue = default_command_queue_;
|
||||
}
|
||||
return command_queue->enqueueUnmapMemObject(buffer, host_ptr);
|
||||
}
|
||||
|
@ -527,14 +513,14 @@ int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue)
|
|||
return RET_OK;
|
||||
}
|
||||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_.get();
|
||||
command_queue = default_command_queue_;
|
||||
}
|
||||
return command_queue->enqueueUnmapSVM(host_ptr);
|
||||
}
|
||||
|
||||
bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) {
|
||||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_.get();
|
||||
command_queue = default_command_queue_;
|
||||
}
|
||||
cl_int ret = command_queue->finish();
|
||||
if (ret != CL_SUCCESS) {
|
||||
|
|
|
@ -20,7 +20,6 @@ j* you may not use this file except in compliance with the License.
|
|||
#include <vector>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
@ -38,9 +37,6 @@ struct GpuInfo {
|
|||
float opencl_version = 0;
|
||||
};
|
||||
|
||||
// Base GPU cache size used for computing local work group size.
|
||||
const int32_t g_base_gpu_mem_cachesize = 16384;
|
||||
|
||||
class OpenCLRuntime {
|
||||
public:
|
||||
static OpenCLRuntime *GetInstance();
|
||||
|
@ -54,8 +50,8 @@ class OpenCLRuntime {
|
|||
|
||||
cl::Context *Context();
|
||||
cl::Device *Device();
|
||||
OpenCLAllocator *GetAllocator() { return allocator_.get(); }
|
||||
cl::CommandQueue *GetDefaultCommandQueue() { return default_command_queue_.get(); }
|
||||
OpenCLAllocator *GetAllocator() { return allocator_; }
|
||||
cl::CommandQueue *GetDefaultCommandQueue() { return default_command_queue_; }
|
||||
uint64_t DeviceGlobalMemoryCacheSize() const;
|
||||
int DeviceMaxWorkGroupSize() const;
|
||||
uint32_t DeviceComputeUnits() const;
|
||||
|
@ -146,13 +142,12 @@ class OpenCLRuntime {
|
|||
bool BuildProgram(const std::string &build_options, const cl::Program &program);
|
||||
|
||||
private:
|
||||
static std::shared_ptr<OpenCLRuntime> opencl_runtime_singleton_;
|
||||
static bool init_done_;
|
||||
std::shared_ptr<cl::CommandQueue> default_command_queue_{nullptr};
|
||||
std::shared_ptr<cl::Context> context_{nullptr};
|
||||
std::shared_ptr<cl::Device> device_{nullptr};
|
||||
std::shared_ptr<OpenCLAllocator> allocator_{nullptr};
|
||||
std::map<std::string, cl::Program> program_map_{};
|
||||
cl::CommandQueue *default_command_queue_{nullptr};
|
||||
cl::Context *context_{nullptr};
|
||||
cl::Device *device_{nullptr};
|
||||
OpenCLAllocator *allocator_{nullptr};
|
||||
std::map<std::string, cl::Program> program_map_;
|
||||
cl::Program binary_program_{0};
|
||||
uint64_t global_memery_cachesize_{0};
|
||||
int max_work_group_size;
|
||||
|
@ -169,5 +164,4 @@ class OpenCLRuntime {
|
|||
};
|
||||
|
||||
} // namespace mindspore::lite::opencl
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_OPENCL_RUNTIME_H_
|
||||
|
|
|
@ -66,19 +66,13 @@ static const std::vector<std::string> g_opencl_library_paths = {
|
|||
};
|
||||
|
||||
OpenCLWrapper *OpenCLWrapper::GetInstance() {
|
||||
static std::once_flag opencl_wrapper_once;
|
||||
std::call_once(opencl_wrapper_once,
|
||||
[]() { opencl_wrapper_singleton_ = std::shared_ptr<OpenCLWrapper>(new OpenCLWrapper()); });
|
||||
|
||||
return opencl_wrapper_singleton_.get();
|
||||
static OpenCLWrapper ocl_wrapper;
|
||||
return &ocl_wrapper;
|
||||
}
|
||||
|
||||
OpenCLWrapper::OpenCLWrapper() {}
|
||||
|
||||
OpenCLWrapper::~OpenCLWrapper() {
|
||||
if (nullptr == opencl_wrapper_singleton_.get()) return;
|
||||
opencl_wrapper_singleton_->UnLoadOpenCLLibrary();
|
||||
}
|
||||
OpenCLWrapper::~OpenCLWrapper() {}
|
||||
|
||||
// load default library path
|
||||
bool OpenCLWrapper::LoadOpenCLLibrary() {
|
||||
|
|
|
@ -230,8 +230,7 @@ class OpenCLWrapper {
|
|||
bool LoadLibraryFromPath(const std::string &path);
|
||||
|
||||
private:
|
||||
static std::shared_ptr<OpenCLWrapper> opencl_wrapper_singleton_;
|
||||
void *handle_ = nullptr;
|
||||
void *handle_{nullptr};
|
||||
};
|
||||
|
||||
} // namespace mindspore::lite::opencl
|
||||
|
|
|
@ -173,7 +173,6 @@ TEST_F(TestActivationOpenCL, ReluFp32_dim4) {
|
|||
delete input_tensor;
|
||||
delete output_tensor;
|
||||
delete sub_graph;
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
|
||||
TEST_F(TestActivationOpenCL, Relu6Fp32_dim4) {
|
||||
|
@ -276,7 +275,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp32_dim4) {
|
|||
delete input_tensor;
|
||||
delete output_tensor;
|
||||
delete sub_graph;
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
|
||||
TEST_F(TestActivationOpenCL, SigmoidFp32_dim4) {
|
||||
|
@ -379,7 +377,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp32_dim4) {
|
|||
delete input_tensor;
|
||||
delete output_tensor;
|
||||
delete sub_graph;
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
|
||||
TEST_F(TestActivationOpenCL, LeakyReluFp32_dim4) {
|
||||
|
@ -483,6 +480,5 @@ TEST_F(TestActivationOpenCL, LeakyReluFp32_dim4) {
|
|||
delete input_tensor;
|
||||
delete output_tensor;
|
||||
delete sub_graph;
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -202,7 +202,6 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
|
|||
for (auto tensor : outputs) {
|
||||
delete tensor;
|
||||
}
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
|
||||
class TestArithmeticOpenCL : public mindspore::CommonTest {
|
||||
|
|
|
@ -143,7 +143,6 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) {
|
|||
delete pooling_kernel;
|
||||
delete pGraph;
|
||||
delete param;
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -155,6 +155,5 @@ TEST_F(TestBatchnormOpenCL, Batchnorminput_dim4) {
|
|||
delete param;
|
||||
delete batchnorm_kernel;
|
||||
delete sub_graph;
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -213,6 +213,5 @@ TEST_F(TestConcatOpenCL, ConcatFp32_2input_dim4_axis3) {
|
|||
delete param;
|
||||
delete concat_kernel;
|
||||
delete sub_graph;
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -175,7 +175,6 @@ void RunTestCase(const std::vector<int> shape, const std::vector<std::string> fi
|
|||
inputs[0]->SetData(nullptr);
|
||||
outputs[0]->SetData(nullptr);
|
||||
MS_LOG(INFO) << "Test Conv2dTransposeFp32 passed";
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) {
|
||||
int pad = 0;
|
||||
|
|
|
@ -136,7 +136,6 @@ void TEST_MAIN(schema::Format input_format, schema::Format output_format, const
|
|||
bias_tensor.SetData(nullptr);
|
||||
delete param;
|
||||
delete sub_graph;
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
|
||||
TEST_F(TestConvolutionOpenCL, in1x1x64x512_out1x1x64x7358_k11_s11_p0000) {
|
||||
|
|
|
@ -195,7 +195,6 @@ TEST_F(TestConvolutionDwOpenCL, NoPadNC4HW4Fp32) {
|
|||
2.2294958, 1.6570128, 2.465089, 1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988};
|
||||
|
||||
DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NC4HW4);
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
|
||||
TEST_F(TestConvolutionDwOpenCL, PadNC4HW4Fp32) {
|
||||
|
@ -268,7 +267,6 @@ TEST_F(TestConvolutionDwOpenCL, PadNC4HW4Fp32) {
|
|||
1.0517888, 0.59817517, 0.75649744, 1.2075498, 0.38804203};
|
||||
|
||||
DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NC4HW4);
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
|
||||
TEST_F(TestConvolutionDwOpenCL, NoPadNHWC4Fp32) {
|
||||
|
@ -314,7 +312,6 @@ TEST_F(TestConvolutionDwOpenCL, NoPadNHWC4Fp32) {
|
|||
2.2294958, 1.6570128, 2.465089, 1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988};
|
||||
|
||||
DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NHWC4);
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
|
||||
TEST_F(TestConvolutionDwOpenCL, PadNHWC4Fp32) {
|
||||
|
@ -387,7 +384,6 @@ TEST_F(TestConvolutionDwOpenCL, PadNHWC4Fp32) {
|
|||
1.0517888, 0.59817517, 0.75649744, 1.2075498, 0.38804203};
|
||||
|
||||
DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NHWC4);
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
|
||||
TEST_F(TestConvolutionDwOpenCL, ConvDwNoPadFp32) {
|
||||
|
@ -512,7 +508,6 @@ TEST_F(TestConvolutionDwOpenCL, ConvDwNoPadFp32) {
|
|||
inputs[1]->SetData(nullptr);
|
||||
inputs[2]->SetData(nullptr);
|
||||
MS_LOG(INFO) << "TestConvolutionDwNoPadFp32 passed";
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
|
||||
TEST_F(TestConvolutionDwOpenCL, ConvDwPadFp32) {
|
||||
|
@ -673,7 +668,6 @@ TEST_F(TestConvolutionDwOpenCL, ConvDwPadFp32) {
|
|||
inputs[1]->SetData(nullptr);
|
||||
inputs[2]->SetData(nullptr);
|
||||
MS_LOG(INFO) << "TestConvolutionDwPadFp32 passed";
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
|
||||
TEST_F(TestConvolutionDwOpenCL, ProfilingMobilenetv2) {
|
||||
|
@ -739,7 +733,6 @@ TEST_F(TestConvolutionDwOpenCL, ProfilingMobilenetv2) {
|
|||
DepthWiseTestMain(conv_param.get(), input_data.get(), weight_data.get(), nullptr, schema::Format_NHWC4, false);
|
||||
}
|
||||
}
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
|
||||
TEST_F(TestConvolutionDwOpenCL, Buffer2Image) {
|
||||
|
@ -788,6 +781,5 @@ TEST_F(TestConvolutionDwOpenCL, Buffer2Image) {
|
|||
}
|
||||
// DepthWiseTestMain(conv_param, input_data, weight_data, gnd_data, schema::Format_NC4HW4, true);
|
||||
DepthWiseTestMain(conv_param.get(), input_data.get(), weight_data.get(), gnd_data.get(), schema::Format_NHWC4, true);
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -115,7 +115,6 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) {
|
|||
CompareOutputData(output_data, correct_data, co, 0.0001);
|
||||
tensor_x->SetData(nullptr);
|
||||
tensor_out->SetData(nullptr);
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
MS_LOG(INFO) << "TestMatMulFp32 passed";
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -118,7 +118,6 @@ TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) {
|
|||
}
|
||||
delete pooling_kernel;
|
||||
delete pGraph;
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -183,6 +183,5 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) {
|
|||
delete param;
|
||||
delete prelu_kernel;
|
||||
delete sub_graph;
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -104,7 +104,6 @@ TEST_F(TestReshapeOpenCL, ReshapeFp32) {
|
|||
|
||||
inputs[0]->SetData(nullptr);
|
||||
outputs[0]->SetData(nullptr);
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
|
||||
MS_LOG(INFO) << "Test ReshapeFp32 passed";
|
||||
}
|
||||
|
|
|
@ -144,6 +144,5 @@ TEST_F(TestSliceOpenCL, Sliceinput_dim4) {
|
|||
}
|
||||
delete slice_kernel;
|
||||
delete sub_graph;
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -92,7 +92,6 @@ void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, st
|
|||
}
|
||||
delete kernel;
|
||||
delete pGraph;
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
|
||||
TEST_F(TestSoftmaxOpenCL, Softmax_1) {
|
||||
|
|
|
@ -103,6 +103,5 @@ TEST_F(TestToFormatOpenCL, ToFormatNHWC2NCHW) {
|
|||
// compare
|
||||
CompareOutputData(output_data, correct_data, h * w * c, 0.00001);
|
||||
MS_LOG(INFO) << "Test TransposeFp32 passed";
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -106,7 +106,6 @@ TEST_F(TestTransposeOpenCL, TransposeFp32) {
|
|||
|
||||
inputs[0]->SetData(nullptr);
|
||||
outputs[0]->SetData(nullptr);
|
||||
lite::opencl::OpenCLRuntime::DeleteInstance();
|
||||
|
||||
MS_LOG(INFO) << "Test TransposeFp32 passed";
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue