clean code for opencl
This commit is contained in:
parent
7e47cdc460
commit
8c99355260
|
@ -23,7 +23,7 @@ __kernel void ElementDiv(__global float *input_a, __global float *input_b, __glo
|
|||
const unsigned int n) {
|
||||
int idx = get_global_id(0);
|
||||
if (idx >= n) return;
|
||||
output[idx] = input_a[idx] * input_b[idx];
|
||||
output[idx] = input_a[idx] / input_b[idx];
|
||||
}
|
||||
|
||||
__kernel void BoardcastArith(__global float *input_a, float weight, float bias, __global float *output,
|
||||
|
|
|
@ -102,19 +102,26 @@ int ArithmeticOpenCLKernel::Init() {
|
|||
}
|
||||
}
|
||||
|
||||
lite::STATUS error_code = RET_OK;
|
||||
#ifdef PROGRAM_WITH_IL
|
||||
runtime_->CreateKernelFromIL(kernel_(), kernel_name);
|
||||
bool ret = runtime_->CreateKernelFromIL(kernel_(), kernel_name);
|
||||
if (!ret) {
|
||||
error_code = RET_ERROR;
|
||||
}
|
||||
#else
|
||||
std::string program_name = "Arithmetic";
|
||||
std::set<std::string> build_options;
|
||||
std::string source = arithmetic_image2d_source_fp32;
|
||||
runtime_->LoadSource(program_name, source);
|
||||
runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
|
||||
error_code = runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
|
||||
#endif
|
||||
if (error_code != RET_OK) {
|
||||
return error_code;
|
||||
}
|
||||
ori_format_ = out_tensors_[0]->GetFormat();
|
||||
out_tensors_[0]->SetFormat(schema::Format_NHWC4);
|
||||
Image2dGetWorkGroupSize();
|
||||
return 0;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ArithmeticOpenCLKernel::Run() {
|
||||
|
@ -155,7 +162,7 @@ int ArithmeticOpenCLKernel::Run() {
|
|||
cl_int2 output_shape{W, H};
|
||||
ocl_runtime->SetKernelArg(kernel_, arg_idx++, output_shape);
|
||||
ocl_runtime->RunKernel(kernel_, global_size_, local_size_, nullptr);
|
||||
return 0;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *OpenCLArithmeticKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
|
@ -170,7 +177,7 @@ kernel::LiteKernel *OpenCLArithmeticKernelCreator(const std::vector<lite::tensor
|
|||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (0 != ret) {
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: Arithmetic";
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
|
|
|
@ -68,18 +68,37 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
|
|||
auto tensorType = schema::NodeType_ValueNode;
|
||||
|
||||
lite::tensor::Tensor *tensor_a =
|
||||
new lite::tensor::Tensor(kNumberTypeFloat32, shape_a, schema::Format_NHWC4, tensorType);
|
||||
new (std::nothrow) lite::tensor::Tensor(kNumberTypeFloat32, shape_a, schema::Format_NHWC4, tensorType);
|
||||
lite::tensor::Tensor *tensor_b =
|
||||
new lite::tensor::Tensor(kNumberTypeFloat32, shape_b, schema::Format_NHWC4, tensorType);
|
||||
new (std::nothrow) lite::tensor::Tensor(kNumberTypeFloat32, shape_b, schema::Format_NHWC4, tensorType);
|
||||
lite::tensor::Tensor *tensor_c =
|
||||
new lite::tensor::Tensor(kNumberTypeFloat32, shape_a, schema::Format_NHWC4, tensorType);
|
||||
new (std::nothrow) lite::tensor::Tensor(kNumberTypeFloat32, shape_a, schema::Format_NHWC4, tensorType);
|
||||
if (tensor_a == nullptr || tensor_b == nullptr || tensor_c == nullptr) {
|
||||
MS_LOG(ERROR) << "Create tensor failed!";
|
||||
delete tensor_a;
|
||||
delete tensor_b;
|
||||
delete tensor_c;
|
||||
return;
|
||||
}
|
||||
|
||||
int64_t element_num = tensor_a->ElementsC4Num();
|
||||
int64_t element_num_b = is_bias_add ? 1 : tensor_b->ElementsC4Num();
|
||||
|
||||
float *data_a = new float[element_num];
|
||||
float *data_b = new float[element_num_b];
|
||||
float *data_c_cpu = new float[element_num];
|
||||
float *data_c_ocl = new float[element_num];
|
||||
float *data_a = new (std::nothrow) float[element_num];
|
||||
float *data_b = new (std::nothrow) float[element_num_b];
|
||||
float *data_c_cpu = new (std::nothrow) float[element_num];
|
||||
float *data_c_ocl = new (std::nothrow) float[element_num];
|
||||
if (data_a == nullptr || data_b == nullptr || data_c_cpu == nullptr || data_c_ocl == nullptr) {
|
||||
MS_LOG(ERROR) << "Create buffer failed!";
|
||||
delete tensor_a;
|
||||
delete tensor_b;
|
||||
delete tensor_c;
|
||||
delete[] data_a;
|
||||
delete[] data_b;
|
||||
delete[] data_c_cpu;
|
||||
delete[] data_c_ocl;
|
||||
return;
|
||||
}
|
||||
|
||||
InitData(data_a, element_num);
|
||||
InitData(data_b, element_num_b);
|
||||
|
@ -100,7 +119,18 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
|
|||
}
|
||||
std::vector<lite::tensor::Tensor *> outputs = {tensor_c};
|
||||
|
||||
ArithmeticParameter *param = new ArithmeticParameter();
|
||||
ArithmeticParameter *param = new (std::nothrow) ArithmeticParameter();
|
||||
if (param == nullptr) {
|
||||
MS_LOG(ERROR) << "Create parameter failed!";
|
||||
delete tensor_a;
|
||||
delete tensor_b;
|
||||
delete tensor_c;
|
||||
delete[] data_a;
|
||||
delete[] data_b;
|
||||
delete[] data_c_cpu;
|
||||
delete[] data_c_ocl;
|
||||
return;
|
||||
}
|
||||
param->ndim_ = 4;
|
||||
param->op_parameter_.type_ = PrimitiveType_Add;
|
||||
|
||||
|
@ -108,12 +138,36 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
|
|||
lite::Context ctx;
|
||||
auto *arith_kernel =
|
||||
new kernel::ArithmeticOpenCLKernel(reinterpret_cast<OpParameter *>(param), arithmetic_inputs, outputs, &ctx);
|
||||
if (arith_kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "Create ArithmeticOpenCLKernel failed!";
|
||||
delete tensor_a;
|
||||
delete tensor_b;
|
||||
delete tensor_c;
|
||||
delete[] data_a;
|
||||
delete[] data_b;
|
||||
delete[] data_c_cpu;
|
||||
delete[] data_c_ocl;
|
||||
delete param;
|
||||
return;
|
||||
}
|
||||
arith_kernel->Init();
|
||||
|
||||
tensor_a->MallocData(allocator);
|
||||
tensor_b->MallocData(allocator);
|
||||
std::vector<kernel::LiteKernel *> kernels{arith_kernel};
|
||||
auto *kernel = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
|
||||
auto *kernel = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
|
||||
if (arith_kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "Create SubGraphOpenCLKernel failed!";
|
||||
delete tensor_a;
|
||||
delete tensor_b;
|
||||
delete tensor_c;
|
||||
delete[] data_a;
|
||||
delete[] data_b;
|
||||
delete[] data_c_cpu;
|
||||
delete[] data_c_ocl;
|
||||
delete arith_kernel;
|
||||
return;
|
||||
}
|
||||
kernel->Init();
|
||||
|
||||
memcpy(inputs[0]->Data(), data_a, sizeof(float) * element_num);
|
||||
|
|
Loading…
Reference in New Issue