!7524 remove OpenCLKernel's member op_format_

Merge pull request !7524 from 王东旭/remove_op_format
This commit is contained in:
mindspore-ci-bot 2020-10-21 17:02:27 +08:00 committed by Gitee
commit f887618662
70 changed files with 410 additions and 1446 deletions

View File

@ -123,7 +123,7 @@ __kernel void transpose_0231_NHWC4(__read_only image2d_t src_data, __write_only
if (X >= shape.y || 4 * Y >= shape.z || 4 * Z >= shape.w) {
return;
}
int W4 = UP_DIV(shape.y, 4);
int W4 = UP_DIV(shape.z, 4);
int C4 = UP_DIV(shape.w, 4);
FLT4 src0 = READ_IMAGE(src_data, smp_zero, (int2)(X * W4 + Y, 4 * Z));
FLT4 src1 = (FLT4)0.f;

View File

@ -61,12 +61,12 @@ int ActivationOpenClKernel::Init() {
MS_LOG(ERROR) << "Activate fun only support dim=4 or 2, but your dim=" << in_size_;
return mindspore::lite::RET_ERROR;
}
std::map<int, std::string> Program_Kernel{{ActivationType_LEAKY_RELU, "LeakyRelu"},
{ActivationType_RELU, "Relu"},
{ActivationType_SIGMOID, "Sigmoid"},
{ActivationType_RELU6, "Relu6"},
{ActivationType_TANH, "Tanh"}};
if (Program_Kernel.count(type_) == 0) {
std::map<int, std::string> kernel_names{{ActivationType_LEAKY_RELU, "LeakyRelu"},
{ActivationType_RELU, "Relu"},
{ActivationType_SIGMOID, "Sigmoid"},
{ActivationType_RELU6, "Relu6"},
{ActivationType_TANH, "Tanh"}};
if (kernel_names.count(type_) == 0) {
MS_LOG(ERROR) << "schema::ActivationType:" << type_ << "not found";
return mindspore::lite::RET_ERROR;
}
@ -75,12 +75,8 @@ int ActivationOpenClKernel::Init() {
std::set<std::string> build_options;
std::string program_name = "Activation";
ocl_runtime_->LoadSource(program_name, source);
std::string kernel_name = Program_Kernel[type_];
std::string kernel_name = kernel_names[type_];
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_tensors_[0]->SetFormat(op_format_);
MS_LOG(DEBUG) << op_parameter_->name_ << " init Done!";
return mindspore::lite::RET_OK;
}
@ -107,32 +103,12 @@ int ActivationOpenClKernel::Run() {
cl_int4 ActivationOpenClKernel::GetImg2dShape() {
cl_int4 img2d_shape = {1, 1, 1, 1};
if (op_format_ == schema::Format_NHWC4) {
img2d_shape.s[1] = nhwc_shape_[1];
img2d_shape.s[2] = nhwc_shape_[2] * UP_DIV(nhwc_shape_[3], C4NUM);
img2d_shape.s[3] = C4NUM;
}
if (op_format_ == schema::Format_NC4HW4) {
img2d_shape.s[1] = UP_DIV(nhwc_shape_[3], C4NUM) * nhwc_shape_[1];
img2d_shape.s[2] = nhwc_shape_[2];
img2d_shape.s[3] = C4NUM;
}
img2d_shape.s[1] = nhwc_shape_[1];
img2d_shape.s[2] = nhwc_shape_[2] * UP_DIV(nhwc_shape_[3], C4NUM);
img2d_shape.s[3] = C4NUM;
return img2d_shape;
}
int ActivationOpenClKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
cl_int4 img_shape = GetImg2dShape();
size_t img_dtype = CL_FLOAT;
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
img_size->push_back(img_shape.s[2]);
img_size->push_back(img_shape.s[1]);
img_size->push_back(img_dtype);
return mindspore::lite::RET_OK;
}
kernel::LiteKernel *OpenClActivationKernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const lite::InnerContext *ctx, const kernel::KernelKey &desc,

View File

@ -26,27 +26,25 @@ namespace mindspore::kernel {
class ActivationOpenClKernel : public OpenCLKernel {
public:
explicit ActivationOpenClKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {
type_ = (reinterpret_cast<ActivationParameter *>(parameter))->type_;
alpha_ = (reinterpret_cast<ActivationParameter *>(parameter))->alpha_;
}
~ActivationOpenClKernel() override{};
ActivationOpenClKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs),
type_(reinterpret_cast<ActivationParameter *>(parameter)->type_),
alpha_(reinterpret_cast<ActivationParameter *>(parameter)->alpha_) {}
~ActivationOpenClKernel() override = default;
int Init() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
cl_int4 GetImg2dShape();
void InitBuffer() {}
private:
cl_int4 GetImg2dShape();
cl::Kernel kernel_;
int type_;
float alpha_;
int in_size_;
int out_size_;
size_t fp_size;
int in_size_{};
int out_size_{};
size_t fp_size{};
bool enable_fp16_{false};
std::vector<size_t> nhwc_shape_;
};

View File

@ -34,81 +34,37 @@ using mindspore::schema::PrimitiveType_Eltwise;
namespace mindspore::kernel {
ArithmeticOpenCLKernel::~ArithmeticOpenCLKernel() {}
std::vector<size_t> ArithmeticOpenCLKernel::InitGlobalSize() const {
const size_t global_x = out_tensors_[0]->Width();
const size_t global_y = out_tensors_[0]->Height();
const size_t global_z = UP_ROUND_DIV(out_tensors_[0]->Channel(), 4);
std::vector<size_t> global = {global_x, global_y, global_z};
return global;
auto out_shape = out_tensors_[0]->shape();
if (out_shape.size() == 2) {
const size_t global_x = 1;
const size_t global_y = 1;
const size_t global_z = UP_ROUND_DIV(out_shape[1], C4NUM);
std::vector<size_t> global = {global_x, global_y, global_z};
return global;
} else {
const size_t global_x = out_shape[2];
const size_t global_y = out_shape[1];
const size_t global_z = UP_ROUND_DIV(out_shape[3], C4NUM);
std::vector<size_t> global = {global_x, global_y, global_z};
return global;
}
}
void ArithmeticOpenCLKernel::Image2dGetWorkGroupSize() {
local_size_ = {16, 16};
if (out_tensors_[0]->shape().size() == 2) {
size_t H = out_tensors_[0]->shape()[0];
size_t W = UP_DIV(out_tensors_[0]->shape()[1], C4NUM);
global_size_ = {W, H};
return;
}
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t W = out_tensors_[0]->Width();
global_size_ = {W, H};
} else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height();
size_t W = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
global_size_ = {W, H};
} else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) {
size_t H = out_tensors_[0]->Batch();
size_t W = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
auto out_shape = out_tensors_[0]->shape();
if (out_shape.size() == 2) {
size_t H = out_shape[0];
size_t W = UP_DIV(out_shape[1], C4NUM);
global_size_ = {W, H};
} else {
MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat();
size_t H = out_shape[0] * out_shape[1];
size_t W = out_shape[2] * UP_DIV(out_shape[3], C4NUM);
global_size_ = {W, H};
}
}
void ArithmeticOpenCLKernel::BufferGetWorkGroupSize() {
uint32_t element_num = out_tensors_[0]->ElementsC4Num();
global_size_ = {element_num};
}
int ArithmeticOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;
if (out_tensors_[0]->shape().size() == 2) {
im_dst_x = UP_DIV(out_tensors_[0]->shape()[1], C4NUM);
im_dst_y = out_tensors_[0]->shape()[0];
} else {
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
im_dst_x = out_tensors_[0]->Width();
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
} else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
im_dst_x = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height();
} else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) {
im_dst_y = out_tensors_[0]->Batch();
im_dst_x = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
} else {
MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat();
return RET_ERROR;
}
}
size_t img_dtype = CL_FLOAT;
if (in_tensors_[0]->data_type() == kNumberTypeFloat16) {
img_dtype = CL_HALF_FLOAT;
} else if (in_tensors_[0]->data_type() == kNumberTypeFloat32) {
img_dtype = CL_FLOAT;
} else {
MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[0]->data_type();
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return RET_OK;
}
int ArithmeticOpenCLKernel::InitBuffer() {
auto fp16_enable = ocl_runtime_->GetFp16Enable();
auto data_size = fp16_enable ? sizeof(float16_t) : sizeof(float);
@ -119,7 +75,7 @@ int ArithmeticOpenCLKernel::InitBuffer() {
inputs_weight_ptrs_.push_back(nullptr);
} else {
auto allocator = ocl_runtime_->GetAllocator();
std::vector<size_t> img_size = GetImage2dShapeFromNHWC(nhwc_shape, op_format_);
std::vector<size_t> img_size = GetImage2dShapeFromNHWC(nhwc_shape, schema::Format_NHWC4);
int pack_weight_size = img_size[0] * img_size[1] * C4NUM;
int plane = nhwc_shape[1] * nhwc_shape[2];
int channel = nhwc_shape[3];
@ -132,22 +88,12 @@ int ArithmeticOpenCLKernel::InitBuffer() {
return RET_ERROR;
}
memset(weight, 0x00, pack_weight_size * data_size);
if (op_format_ == schema::Format_NHWC4) {
if (in_tensor_->data_type() == kNumberTypeFloat32) {
std::function<float(float)> to_dtype = [](float x) -> float { return x; };
PackNHWCToNHWC4<float, float>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype);
} else if (in_tensor_->data_type() == kNumberTypeFloat16) {
std::function<float(float16_t)> to_dtype = [](float16_t x) -> float { return static_cast<float>(x); };
PackNHWCToNHWC4<float16_t, float>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype);
}
} else if (op_format_ == schema::Format_NC4HW4) {
if (in_tensor_->data_type() == kNumberTypeFloat32) {
std::function<float(float)> to_dtype = [](float x) -> float { return x; };
PackNHWCToNC4HW4<float, float>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype);
} else if (in_tensor_->data_type() == kNumberTypeFloat16) {
std::function<float(float16_t)> to_dtype = [](float16_t x) -> float { return static_cast<float>(x); };
PackNHWCToNC4HW4<float16_t, float>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype);
}
if (in_tensor_->data_type() == kNumberTypeFloat32) {
std::function<float(float)> to_dtype = [](float x) -> float { return x; };
PackNHWCToNHWC4<float, float>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype);
} else if (in_tensor_->data_type() == kNumberTypeFloat16) {
std::function<float(float16_t)> to_dtype = [](float16_t x) -> float { return static_cast<float>(x); };
PackNHWCToNHWC4<float16_t, float>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype);
}
if (batch * plane * channel == 1) {
// scalar
@ -163,22 +109,12 @@ int ArithmeticOpenCLKernel::InitBuffer() {
return RET_ERROR;
}
memset(weight, 0x00, pack_weight_size * data_size);
if (op_format_ == schema::Format_NHWC4) {
if (in_tensor_->data_type() == kNumberTypeFloat32) {
std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); };
PackNHWCToNHWC4<float, float16_t>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype);
} else if (in_tensor_->data_type() == kNumberTypeFloat16) {
std::function<float16_t(float16_t)> to_dtype = [](float16_t x) -> float16_t { return x; };
PackNHWCToNHWC4<float16_t, float16_t>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype);
}
} else if (op_format_ == schema::Format_NC4HW4) {
if (in_tensor_->data_type() == kNumberTypeFloat32) {
std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); };
PackNHWCToNC4HW4<float, float16_t>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype);
} else if (in_tensor_->data_type() == kNumberTypeFloat16) {
std::function<float16_t(float16_t)> to_dtype = [](float16_t x) -> float16_t { return x; };
PackNHWCToNC4HW4<float16_t, float16_t>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype);
}
if (in_tensor_->data_type() == kNumberTypeFloat32) {
std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); };
PackNHWCToNHWC4<float, float16_t>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype);
} else if (in_tensor_->data_type() == kNumberTypeFloat16) {
std::function<float16_t(float16_t)> to_dtype = [](float16_t x) -> float16_t { return x; };
PackNHWCToNHWC4<float16_t, float16_t>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype);
}
if (batch * plane * channel == 1) {
// scalar
@ -195,18 +131,11 @@ int ArithmeticOpenCLKernel::InitBuffer() {
int ArithmeticOpenCLKernel::Init() {
std::string kernel_name;
const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_);
auto *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_);
if (arithmetic_parameter->broadcasting_) {
element_flag_ = false;
if (op_format_ == schema::Format_NHWC4) {
kernel_name = "BroadcastNHWC4";
} else {
kernel_name = "BroadcastNC4HW4";
MS_LOG(ERROR) << "Don't support BroadcastNC4HW4 yet";
return RET_ERROR;
}
kernel_name = "BroadcastNHWC4";
} else {
kernel_name = "Element";
}
@ -302,17 +231,6 @@ int ArithmeticOpenCLKernel::Init() {
return error_code;
}
auto format = schema::Format::Format_NHWC4;
if (arithmetic_parameter->ndim_ == 2) {
format = schema::Format::Format_NC4;
}
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(format);
if (element_flag_ && in_tensors_[1]->category() != lite::Tensor::Category::CONST) {
in_tensors_[1]->SetFormat(format);
}
out_tensors_[0]->SetFormat(format);
Image2dGetWorkGroupSize();
InitBuffer();
MS_LOG(DEBUG) << kernel_name << " Init Done!";

View File

@ -25,20 +25,18 @@ namespace mindspore::kernel {
class ArithmeticOpenCLKernel : public OpenCLKernel {
public:
explicit ArithmeticOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
ArithmeticOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: OpenCLKernel(parameter, inputs, outputs) {}
~ArithmeticOpenCLKernel() override;
~ArithmeticOpenCLKernel() override = default;
int Init() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
int InitBuffer() override;
private:
std::vector<size_t> InitGlobalSize() const;
void Image2dGetWorkGroupSize();
void BufferGetWorkGroupSize();
int InitBuffer();
cl::Kernel kernel_;
bool element_flag_{true};

View File

@ -41,38 +41,6 @@ using mindspore::schema::PrimitiveType_Square;
namespace mindspore::kernel {
int ArithmeticSelfOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
auto out_shape = out_tensors_[0]->shape();
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t im_dst_x, im_dst_y;
if (in_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
if (in_tensors_[0]->shape().size() == 4) {
im_dst_x = out_tensors_[0]->Width() * CO4;
im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch();
} else {
im_dst_x = UP_DIV(out_shape[1], C4NUM);
im_dst_y = out_tensors_[0]->Batch();
}
} else {
if (in_tensors_[0]->shape().size() == 4) {
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4;
im_dst_x = out_tensors_[0]->Width();
} else {
im_dst_y = out_tensors_[0]->Batch() * UP_DIV(out_shape[1], C4NUM);
im_dst_x = 1;
}
}
size_t img_dtype = CL_FLOAT;
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return mindspore::lite::RET_OK;
}
void ArithmeticSelfOpenCLKernel::GetKernelName(std::string *kernel_name, ArithmeticSelfParameter *param) {
switch (param->op_parameter_.type_) {
case PrimitiveType_Abs:
@ -126,24 +94,9 @@ int ArithmeticSelfOpenCLKernel::Init() {
}
auto param = reinterpret_cast<ArithmeticSelfParameter *>(this->op_parameter_);
auto in_format = op_format_;
if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4 && in_format != schema::Format_NC4) {
MS_LOG(ERROR) << "input format(" << in_format << ") "
<< "format not support!";
return mindspore::lite::RET_ERROR;
}
in_ori_format_ = in_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_ori_format_ = out_tensors_[0]->GetFormat();
out_tensors_[0]->SetFormat(op_format_);
std::string kernel_name = "ArithmeticSelf";
GetKernelName(&kernel_name, param);
if (in_format == schema::Format_NC4HW4) {
kernel_name += "_NC4HW4";
} else if (in_format == schema::Format_NHWC4) {
kernel_name += "_NHWC4";
}
kernel_name += "_NHWC4";
MS_LOG(DEBUG) << "execute kernel name : " << kernel_name;
std::set<std::string> build_options;
std::string source = arithmeticself_source;
@ -154,8 +107,6 @@ int ArithmeticSelfOpenCLKernel::Init() {
return mindspore::lite::RET_OK;
}
int ArithmeticSelfOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; }
void ArithmeticSelfGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *local, int max_size) {
const int max_divider = 8;
const int max_x = 4, max_y = 8;

View File

@ -26,23 +26,19 @@ namespace mindspore::kernel {
class ArithmeticSelfOpenCLKernel : public OpenCLKernel {
public:
explicit ArithmeticSelfOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
ArithmeticSelfOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~ArithmeticSelfOpenCLKernel() override{};
~ArithmeticSelfOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
private:
void GetKernelName(std::string *kernel_name, ArithmeticSelfParameter *param);
private:
cl::Kernel kernel_;
};

View File

@ -32,18 +32,12 @@ using mindspore::schema::PrimitiveType_BatchToSpaceND;
namespace mindspore::kernel {
int BatchToSpaceNDOpenCLKernel::Init() {
std::string kernel_name = "batch_to_space_nd";
auto in_format = op_format_;
std::string kernel_name = "batch_to_space_nd_NHWC4";
if (in_tensors_[0]->shape().size() != 4 && out_tensors_[0]->shape().size() != 4) {
MS_LOG(ERROR) << "input/output shape size must be 4, actual: " << in_tensors_[0]->shape().size() << ", "
<< out_tensors_[0]->shape().size();
return RET_ERROR;
}
if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) {
MS_LOG(ERROR) << "input format(" << in_format << ") "
<< "format not support!";
return RET_ERROR;
}
auto *param = reinterpret_cast<BatchToSpaceParameter *>(this->op_parameter_);
if (param->block_shape_[0] < 1 || param->block_shape_[1] < 1) {
MS_LOG(ERROR) << "block_sizes_ must > 1, actual " << param->block_shape_[0] << ", " << param->block_shape_[1];
@ -55,18 +49,10 @@ int BatchToSpaceNDOpenCLKernel::Init() {
return RET_ERROR;
}
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_tensors_[0]->SetFormat(op_format_);
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
if (in_format == schema::Format_NC4HW4) {
kernel_name += "_NC4HW4";
} else {
kernel_name += "_NHWC4";
}
std::set<std::string> build_options;
std::string source = batch_to_space_nd_source;
std::string program_name = "batch_to_space_nd";
@ -76,28 +62,7 @@ int BatchToSpaceNDOpenCLKernel::Init() {
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK;
}
int BatchToSpaceNDOpenCLKernel::InitBuffer() { return RET_OK; }
int BatchToSpaceNDOpenCLKernel::ReSize() { return RET_OK; }
int BatchToSpaceNDOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t im_dst_x, im_dst_y;
if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) {
im_dst_x = out_tensors_[0]->Width() * CO4;
im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch();
} else {
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4;
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = std::move(vec);
return RET_OK;
}
int BatchToSpaceNDOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! ";
auto param = reinterpret_cast<BatchToSpaceParameter *>(this->op_parameter_);

View File

@ -25,22 +25,16 @@ namespace mindspore::kernel {
class BatchToSpaceNDOpenCLKernel : public OpenCLKernel {
public:
explicit BatchToSpaceNDOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
BatchToSpaceNDOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~BatchToSpaceNDOpenCLKernel() override{};
~BatchToSpaceNDOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
int InitBuffer();
private:
cl::Kernel kernel_;
};

View File

@ -30,45 +30,8 @@ using mindspore::schema::PrimitiveType_BatchNorm;
namespace mindspore::kernel {
int BatchNormOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t im_dst_x, im_dst_y;
if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) {
im_dst_x = out_tensors_[0]->Width() * CO4;
im_dst_y = out_tensors_[0]->Height();
} else {
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4;
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return RET_OK;
}
int BatchNormOpenCLKernel::Init() {
auto in_format = op_format_;
if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) {
MS_LOG(ERROR) << "input format(" << in_format << ") "
<< "format not support!";
return RET_ERROR;
}
in_ori_format_ = in_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_ori_format_ = out_tensors_[0]->GetFormat();
out_tensors_[0]->SetFormat(op_format_);
std::string kernel_name = "Batch_normalization";
if (in_format == schema::Format_NC4HW4) {
kernel_name += "_NC4HW4";
} else if (in_format == schema::Format_NHWC4) {
kernel_name += "_NHWC4";
}
std::string kernel_name = "Batch_normalization_NHWC4";
std::set<std::string> build_options;
std::string source = batchnorm_source;
std::string program_name = "Batch_normalization";
@ -78,8 +41,6 @@ int BatchNormOpenCLKernel::Init() {
return RET_OK;
}
int BatchNormOpenCLKernel::ReSize() { return RET_OK; }
void BatchNormGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *local, int max_size) {
const int max_divider = 8;
const int max_x = 4, max_y = 8;

View File

@ -25,20 +25,16 @@ namespace mindspore::kernel {
class BatchNormOpenCLKernel : public OpenCLKernel {
public:
explicit BatchNormOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
BatchNormOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~BatchNormOpenCLKernel() override{};
~BatchNormOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
private:
cl::Kernel kernel_;
};

View File

@ -35,7 +35,7 @@ using mindspore::schema::PrimitiveType_BiasAdd;
namespace mindspore::kernel {
void BiasAddOpenCLKernel::InitBuffer() {
int BiasAddOpenCLKernel::InitBuffer() {
int C = in_tensors_[1]->shape()[0];
int div_ci = UP_DIV(C, C4NUM);
auto allocator = ocl_runtime_->GetAllocator();
@ -49,6 +49,7 @@ void BiasAddOpenCLKernel::InitBuffer() {
memset(BiasAdd_, 0x00, div_ci * C4NUM * fp_size);
memcpy(BiasAdd_, in_tensors_[1]->data_c(), C * fp_size);
allocator->UnmapBuffer(BiasAdd_);
return RET_OK;
}
int BiasAddOpenCLKernel::Init() {
@ -77,10 +78,6 @@ int BiasAddOpenCLKernel::Init() {
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_tensors_[0]->SetFormat(op_format_);
MS_LOG(DEBUG) << program_name << " Init Done!";
return mindspore::lite::RET_OK;
}
@ -95,7 +92,7 @@ int BiasAddOpenCLKernel::Run() {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, BiasAdd_);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, data_type[schema::Format::Format_NHWC4]);
std::vector<size_t> local = {1, 1};
std::vector<size_t> global = {static_cast<size_t>(global_size.s[1]), static_cast<size_t>(global_size.s[2])};
auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
@ -108,32 +105,10 @@ int BiasAddOpenCLKernel::Run() {
cl_int4 BiasAddOpenCLKernel::GetGlobalshape() {
cl_int4 global_shape = input_shape_;
if (op_format_ == schema::Format::Format_NC4) {
global_shape.s[1] = global_shape.s[2];
global_shape.s[2] = UP_DIV(global_shape.s[3], C4NUM);
}
if (op_format_ == schema::Format::Format_NC4HW4) {
global_shape.s[1] = UP_DIV(global_shape.s[3], C4NUM) * global_shape.s[1]; // c / 4 * H
}
if (op_format_ == schema::Format::Format_NHWC4) {
global_shape.s[2] = UP_DIV(global_shape.s[3], C4NUM) * global_shape.s[2];
}
global_shape.s[2] = UP_DIV(global_shape.s[3], C4NUM) * global_shape.s[2];
return global_shape;
}
int BiasAddOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
cl_int4 img_shape = GetGlobalshape();
size_t img_dtype = CL_FLOAT;
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
img_size->push_back(img_shape.s[2]);
img_size->push_back(img_shape.s[1]);
img_size->push_back(img_dtype);
return mindspore::lite::RET_OK;
}
kernel::LiteKernel *OpenCLBiasAddKernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const lite::InnerContext *ctx, const kernel::KernelKey &desc,

View File

@ -28,25 +28,25 @@ namespace mindspore::kernel {
class BiasAddOpenCLKernel : public OpenCLKernel {
public:
explicit BiasAddOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
BiasAddOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~BiasAddOpenCLKernel() override{};
~BiasAddOpenCLKernel() override = default;
int Init() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
void InitBuffer();
cl_int4 GetGlobalshape();
int InitBuffer() override;
private:
cl_int4 GetGlobalshape();
cl::Kernel kernel_;
void *BiasAdd_;
int in_size_;
int out_size_;
size_t fp_size;
cl_int4 input_shape_;
bool enable_fp16_{false};
void *BiasAdd_{nullptr};
int in_size_{};
int out_size_{};
size_t fp_size{};
cl_int4 input_shape_{};
bool enable_fp16_{};
};
} // namespace mindspore::kernel

View File

@ -30,27 +30,6 @@ using mindspore::schema::PrimitiveType_Cast;
namespace mindspore::kernel {
int CastOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t im_dst_x, im_dst_y;
if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) {
im_dst_x = out_tensors_[0]->Width() * CO4;
im_dst_y = out_tensors_[0]->Height();
} else {
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4;
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return RET_OK;
}
int CastOpenCLKernel::GetKernelName(std::string *kernel_name, CastParameter *param) {
if (param->src_type_ == kNumberTypeFloat32 && param->dst_type_ == kNumberTypeFloat16) {
kernel_name[0] += "_Fp32ToFp16";
@ -65,23 +44,9 @@ int CastOpenCLKernel::GetKernelName(std::string *kernel_name, CastParameter *par
int CastOpenCLKernel::Init() {
auto param = reinterpret_cast<CastParameter *>(this->op_parameter_);
auto in_format = op_format_;
if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) {
MS_LOG(ERROR) << "input format(" << in_format << ") "
<< "format not support!";
return RET_ERROR;
}
in_ori_format_ = in_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_ori_format_ = out_tensors_[0]->GetFormat();
out_tensors_[0]->SetFormat(op_format_);
std::string kernel_name = "Cast";
GetKernelName(&kernel_name, param);
if (in_format == schema::Format_NC4HW4) {
kernel_name += "_NC4HW4";
} else if (in_format == schema::Format_NHWC4) {
kernel_name += "_NHWC4";
}
kernel_name += "_NHWC4";
std::set<std::string> build_options;
std::string source = cast_source;
std::string program_name = "cast";
@ -91,8 +56,6 @@ int CastOpenCLKernel::Init() {
return RET_OK;
}
int CastOpenCLKernel::ReSize() { return RET_OK; }
void CastGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *local, int max_size) {
const int max_divider = 8;
const int max_x = 4, max_y = 8;

View File

@ -26,23 +26,19 @@ namespace mindspore::kernel {
class CastOpenCLKernel : public OpenCLKernel {
public:
explicit CastOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
CastOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~CastOpenCLKernel() override{};
~CastOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
private:
int GetKernelName(std::string *kernel_name, CastParameter *param);
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
private:
cl::Kernel kernel_;
};

View File

@ -31,27 +31,6 @@ using mindspore::schema::PrimitiveType_Concat;
namespace mindspore::kernel {
int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t im_dst_x, im_dst_y;
if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) {
im_dst_x = out_tensors_[0]->Width() * CO4;
im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch();
} else {
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4;
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return RET_OK;
}
int ConcatOpenCLKernel::RunAxis0() {
auto allocator_ = ocl_runtime_->GetAllocator();
std::vector<size_t> img_size;
@ -85,39 +64,15 @@ int ConcatOpenCLKernel::Init() {
MS_LOG(ERROR) << " only support axis >= 0 and axis <= 3 ";
return RET_ERROR;
}
auto in_format = op_format_;
if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) {
MS_LOG(ERROR) << "input format(" << in_format << ") "
<< "format not support!";
return RET_ERROR;
}
in_ori_format_ = in_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_ori_format_ = out_tensors_[0]->GetFormat();
out_tensors_[0]->SetFormat(op_format_);
std::string kernel_name = "Concat";
if (in_tensors_.size() == 2) {
kernel_name += "2inputaxis";
kernel_name += std::to_string(param->axis_);
} else if (in_tensors_.size() == 3) {
kernel_name += "3inputaxis";
kernel_name += std::to_string(param->axis_);
} else if (in_tensors_.size() == 4) {
kernel_name += "4inputaxis";
kernel_name += std::to_string(param->axis_);
} else if (in_tensors_.size() == 6) {
kernel_name += "6inputaxis";
kernel_name += std::to_string(param->axis_);
if (in_tensors_.size() == 2 || in_tensors_.size() == 3 || in_tensors_.size() == 4 || in_tensors_.size() == 4) {
kernel_name += std::to_string(in_tensors_.size()) + "inputaxis" + std::to_string(param->axis_);
} else {
MS_LOG(ERROR) << " input must be 2 , 3 , 4 or 6";
return RET_ERROR;
}
if (in_format == schema::Format_NC4HW4) {
kernel_name += "_NC4HW4";
} else if (in_format == schema::Format_NHWC4) {
kernel_name += "_NHWC4";
}
kernel_name += "_NHWC4";
MS_LOG(DEBUG) << "kernel_name=: " << kernel_name;
std::set<std::string> build_options;
std::string source = concat_source;
@ -128,16 +83,13 @@ int ConcatOpenCLKernel::Init() {
return RET_OK;
}
int ConcatOpenCLKernel::ReSize() { return RET_OK; }
int ConcatOpenCLKernel::IntegraShapeToXYZ() {
auto in_format = op_format_;
if (out_tensors_[0]->shape().size() > 4 || out_tensors_[0]->shape().size() <= 0) {
if (out_tensors_[0]->shape().size() > 4 || out_tensors_[0]->shape().empty()) {
MS_LOG(ERROR) << "in_tensors_.shape() must between 0~4";
return RET_ERROR;
}
if (in_format == schema::Format_NHWC4 || in_format == schema::Format_NC4HW4) {
if (out_tensors_[0]->shape().size() == 4) {
for (int i = 0; i < in_tensors_.size(); ++i) {
cl_int4 temp_cl;
auto temp = in_tensors_[i]->shape();

View File

@ -25,28 +25,24 @@ namespace mindspore::kernel {
class ConcatOpenCLKernel : public OpenCLKernel {
public:
explicit ConcatOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
ConcatOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~ConcatOpenCLKernel() override{};
~ConcatOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
private:
int RunAxis0();
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
int IntegraShapeToXYZ();
private:
cl::Kernel kernel_;
std::vector<cl_int3> XYZShape;
cl_int4 shape_nhwc;
cl_int4 shape_nhwc{};
};
} // namespace mindspore::kernel

View File

@ -38,8 +38,7 @@ int Conv2dTransposeOpenCLKernel::Init() {
MS_LOG(ERROR) << "only support kernel - stride == 2 * pad";
return RET_ERROR;
}
std::string kernel_name = "conv2d_transpose";
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
std::string kernel_name = "conv2d_transpose_NHWC4";
enable_fp16_ = ocl_runtime_->GetFp16Enable();
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
@ -51,20 +50,14 @@ int Conv2dTransposeOpenCLKernel::Init() {
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
PadWeight();
in_ori_format_ = in_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_ori_format_ = out_tensors_[0]->GetFormat();
out_tensors_[0]->SetFormat(op_format_);
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return mindspore::lite::RET_OK;
}
int Conv2dTransposeOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; }
void Conv2dTransposeOpenCLKernel::PadWeight() {
ConvParameter *param = reinterpret_cast<ConvParameter *>(op_parameter_);
int ci = in_tensors_[0]->Channel();
int co = out_tensors_[0]->Channel();
int ci = in_tensors_[0]->shape()[3];
int co = out_tensors_[0]->shape()[3];
int kh = param->kernel_h_;
int kw = param->kernel_w_;
int div_ci = UP_DIV(ci, C4NUM);
@ -147,32 +140,6 @@ void Conv2dTransposeOpenCLKernel::PadWeight() {
allocator->UnmapBuffer(bias_);
}
int Conv2dTransposeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;
int n = out_tensors_[0]->shape()[0];
int h = out_tensors_[0]->shape()[1];
int w = out_tensors_[0]->shape()[2];
int c = out_tensors_[0]->shape()[3];
if (op_format_ == schema::Format::Format_NHWC4) {
im_dst_x = w * UP_DIV(c, C4NUM);
im_dst_y = n * h;
} else if (op_format_ == schema::Format::Format_NC4HW4) {
im_dst_x = w;
im_dst_y = n * UP_DIV(c, C4NUM) * h;
} else {
MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_);
return mindspore::lite::RET_ERROR;
}
size_t img_dtype = CL_FLOAT;
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return mindspore::lite::RET_OK;
}
int Conv2dTransposeOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
ConvParameter *param = reinterpret_cast<ConvParameter *>(op_parameter_);

View File

@ -27,21 +27,20 @@ namespace mindspore::kernel {
class Conv2dTransposeOpenCLKernel : public OpenCLKernel {
public:
explicit Conv2dTransposeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
Conv2dTransposeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~Conv2dTransposeOpenCLKernel() override{};
~Conv2dTransposeOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
void PadWeight();
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
private:
void PadWeight();
cl::Kernel kernel_;
void *padWeight_;
void *bias_;
void *padWeight_{nullptr};
void *bias_{nullptr};
bool enable_fp16_{false};
};
} // namespace mindspore::kernel

View File

@ -29,8 +29,6 @@ using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Conv2D;
using mindspore::schema::Format::Format_NC4HW4;
using mindspore::schema::Format::Format_NCHW;
using mindspore::schema::Format::Format_NHWC;
using mindspore::schema::Format::Format_NHWC4;
namespace mindspore::kernel {
@ -46,14 +44,6 @@ int ConvolutionOpenCLKernel::Init() {
auto input_tensor = in_tensors_[0];
auto output_tensor = out_tensors_[0];
in_ori_format_ = input_tensor->GetFormat();
out_ori_format_ = output_tensor->GetFormat();
if (op_format_ != Format_NHWC4 && op_format_ != Format_NC4HW4) {
MS_LOG(ERROR) << "op_format_ " << op_format_ << " not support!";
return RET_ERROR;
}
input_tensor->SetFormat(op_format_);
output_tensor->SetFormat(op_format_);
batch_size_ = input_tensor->Batch();
CI_ = input_tensor->Channel();
@ -112,7 +102,7 @@ int ConvolutionOpenCLKernel::Init() {
winograd_mem1_ = allocator->Malloc(size, {width, height, img_dtype});
}
this->InitBuffer();
InitBuffer();
MS_LOG(DEBUG) << "Convolution Init Done!";
return RET_OK;
@ -251,30 +241,6 @@ int ConvolutionOpenCLKernel::InitBuffer() {
return RET_OK;
}
int ConvolutionOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;
if (in_tensors_[0]->GetFormat() == Format_NHWC4) {
if (OW_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE) {
{
im_dst_y = batch_size_ * OH_;
im_dst_x = OW_ * CO_SLICES_;
}
} else {
im_dst_y = OW_;
im_dst_x = batch_size_ * OH_ * CO_SLICES_;
}
} else {
im_dst_y = batch_size_ * CO_SLICES_ * OH_;
im_dst_x = OW_;
}
size_t img_dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT;
img_size->clear();
img_size->push_back(im_dst_x);
img_size->push_back(im_dst_y);
img_size->push_back(img_dtype);
return RET_OK;
}
int ConvolutionOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";

View File

@ -23,22 +23,22 @@
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "schema/model_generated.h"
#include "nnacl/conv_parameter.h"
#include "schema/ops_generated.h"
namespace mindspore::kernel {
class ConvolutionOpenCLKernel : public OpenCLKernel {
public:
explicit ConvolutionOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
ConvolutionOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~ConvolutionOpenCLKernel() override{};
~ConvolutionOpenCLKernel() override = default;
int Init() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
int InitBuffer() override;
private:
int InitBuffer();
int InitWeight();
int InitBias();
int GenerateWinogradWeight();
@ -92,7 +92,8 @@ class ConvolutionOpenCLKernel : public OpenCLKernel {
return code_id;
}
bool use_fp16_ = false;
bool use_fp16_{false};
const schema::Format op_format_{schema::Format_NHWC4};
int batch_size_{};
int CI_{};
@ -105,16 +106,16 @@ class ConvolutionOpenCLKernel : public OpenCLKernel {
int CO_SLICES_{};
int KH_{};
int KW_{};
void *packed_weight_ = nullptr;
void *packed_bias_ = nullptr;
bool has_bias_ = false;
void *packed_weight_{nullptr};
void *packed_bias_{nullptr};
bool has_bias_{false};
bool use_winograd_ = false;
bool use_winograd_{false};
int TILES_X_{};
int TILES_Y_{};
int TILES_XY_{};
void *winograd_mem0_ = nullptr;
void *winograd_mem1_ = nullptr;
void *winograd_mem0_{nullptr};
void *winograd_mem1_{nullptr};
cl::Kernel kernel_4x4to36_;
cl::Kernel kernel_conv_;

View File

@ -42,26 +42,12 @@ namespace mindspore::kernel {
int DepthwiseConv2dOpenCLKernel::Init() {
std::string kernel_name = "DepthwiseConv2d";
auto in_format = op_format_;
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
if (in_format != schema::Format::Format_NHWC4 && in_format != schema::Format::Format_NC4HW4) {
MS_LOG(ERROR) << "input format(" << in_format << ") "
<< "format not support!";
return mindspore::lite::RET_ERROR;
}
in_tensors_[0]->SetFormat(in_format);
out_tensors_[0]->SetFormat(in_format);
if (out_mem_type_ == OpenCLMemType::BUF) {
kernel_name += "_BUF";
} else {
kernel_name += "_IMG";
}
if (in_format == schema::Format::Format_NC4HW4) {
kernel_name += "_NC4HW4";
} else if (in_format == schema::Format::Format_NHWC4) {
kernel_name += "_NHWC4";
}
kernel_name += "_NHWC4";
auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_);
if (parameter->kernel_h_ == 1) {
kernel_name += "_1x1";
@ -75,7 +61,7 @@ int DepthwiseConv2dOpenCLKernel::Init() {
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
this->InitBuffer();
InitBuffer();
MS_LOG(DEBUG) << kernel_name << " Init Done! mem type=" << static_cast<int>(out_mem_type_);
return mindspore::lite::RET_OK;
}
@ -146,28 +132,6 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() {
return mindspore::lite::RET_OK;
}
int DepthwiseConv2dOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; }
int DepthwiseConv2dOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t im_dst_x, im_dst_y;
if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) {
im_dst_x = out_tensors_[0]->Width() * CO4;
im_dst_y = out_tensors_[0]->Height();
} else {
im_dst_y = out_tensors_[0]->Height() * CO4;
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
if (ocl_runtime_->GetFp16Enable()) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return mindspore::lite::RET_OK;
}
int DepthwiseConv2dOpenCLKernel::GetGlobalSize(size_t idx, std::vector<size_t> *global_size) {
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
std::vector<size_t> global = {(size_t)out_tensors_[0]->Width(), (size_t)out_tensors_[0]->Height(), CO4};

View File

@ -25,27 +25,25 @@ namespace mindspore::kernel {
class DepthwiseConv2dOpenCLKernel : public OpenCLKernel {
public:
explicit DepthwiseConv2dOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs), packed_weight_(nullptr), bias_data_(nullptr), kernel_(nullptr) {}
DepthwiseConv2dOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~DepthwiseConv2dOpenCLKernel() override{};
~DepthwiseConv2dOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
int InitBuffer();
int InitBuffer() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) override;
int GetLocalSize(size_t idx, const std::vector<size_t> &global_size, std::vector<size_t> *local_size) override;
private:
void *packed_weight_;
void *bias_data_;
void *packed_weight_{nullptr};
void *bias_data_{nullptr};
cl::Kernel kernel_;
};
} // namespace mindspore::kernel

View File

@ -33,8 +33,7 @@ using mindspore::schema::PrimitiveType_FullConnection;
namespace mindspore::kernel {
int FullConnectionOpenCLKernel::Init() {
std::string kernel_name = "FullConnection";
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
std::string kernel_name = "FullConnection_NHWC4";
auto param = reinterpret_cast<MatMulParameter *>(op_parameter_);
transposeA = param->a_transpose_;
if (transposeA) {
@ -77,16 +76,10 @@ int FullConnectionOpenCLKernel::Init() {
#endif
PadWeight();
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_tensors_[0]->SetFormat(op_format_);
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK;
}
int FullConnectionOpenCLKernel::ReSize() { return RET_OK; }
void FullConnectionOpenCLKernel::PadWeight() {
// ABMCI @ ABCICO = ABMCO
auto allocator = ocl_runtime_->GetAllocator();
@ -177,39 +170,6 @@ void FullConnectionOpenCLKernel::PadWeight() {
allocator->UnmapBuffer(bias_);
}
int FullConnectionOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;
auto out_shape = out_tensors_[0]->shape();
int n = 1, h = 1, w = 1, c = 1;
if (out_tensors_[0]->shape().size() == 2) {
n = out_shape[0];
c = out_shape[1];
} else {
n = out_shape[0];
h = out_shape[1];
w = out_shape[2];
c = out_shape[3];
}
if (op_format_ == schema::Format_NHWC4) {
im_dst_x = w * UP_DIV(c, C4NUM);
im_dst_y = n * h;
} else if (op_format_ == schema::Format_NC4HW4) {
im_dst_x = w;
im_dst_y = n * UP_DIV(c, C4NUM) * h;
} else {
MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_);
return RET_ERROR;
}
size_t img_dtype = CL_FLOAT;
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return RET_OK;
}
int FullConnectionOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
// local size should less than MAX_GROUP_SIZE

View File

@ -26,21 +26,20 @@ namespace mindspore::kernel {
class FullConnectionOpenCLKernel : public OpenCLKernel {
public:
explicit FullConnectionOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
FullConnectionOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~FullConnectionOpenCLKernel() override{};
~FullConnectionOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
void PadWeight();
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
private:
void PadWeight();
cl::Kernel kernel_;
void *padWeight_;
void *bias_;
void *padWeight_{nullptr};
void *bias_{nullptr};
bool enable_fp16_{false};
bool transposeA{false};
bool transposeB{true};

View File

@ -31,25 +31,10 @@ using mindspore::schema::PrimitiveType_Gather;
namespace mindspore::kernel {
int GatherOpenCLKernel::Init() {
std::string kernel_name = "gather";
auto in_format = op_format_;
if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) {
MS_LOG(ERROR) << "input format(" << in_format << ") "
<< "format not support!";
return RET_ERROR;
}
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_tensors_[0]->SetFormat(op_format_);
std::string kernel_name = "gather_NHWC4";
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
if (in_format == schema::Format_NC4HW4) {
kernel_name += "_NC4HW4";
} else {
kernel_name += "_NHWC4";
}
std::set<std::string> build_options;
std::string source = gather_source;
std::string program_name = "gather";
@ -99,29 +84,6 @@ int GatherOpenCLKernel::InitBuffer() {
return RET_OK;
}
int GatherOpenCLKernel::ReSize() { return RET_OK; }
int GatherOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t im_dst_x, im_dst_y;
if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) {
im_dst_x = out_tensors_[0]->Width() * CO4;
im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch();
} else {
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4;
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = std::move(vec);
return RET_OK;
}
int GatherOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! ";
auto param = reinterpret_cast<GatherParameter *>(this->op_parameter_);

View File

@ -25,25 +25,19 @@ namespace mindspore::kernel {
class GatherOpenCLKernel : public OpenCLKernel {
public:
explicit GatherOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs), indices_data_(nullptr) {}
GatherOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~GatherOpenCLKernel() override{};
~GatherOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
int InitBuffer();
int InitBuffer() override;
private:
cl::Kernel kernel_;
int32_t *indices_data_;
int32_t *indices_data_{nullptr};
};
} // namespace mindspore::kernel
#endif

View File

@ -31,8 +31,7 @@ using mindspore::schema::PrimitiveType_MatMul;
namespace mindspore::kernel {
int MatMulOpenCLKernel::Init() {
std::string kernel_name = "MatMul";
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
std::string kernel_name = "MatMul_NHWC4";
auto param = reinterpret_cast<MatMulParameter *>(op_parameter_);
transposeA = param->a_transpose_;
if (transposeA) {
@ -64,16 +63,10 @@ int MatMulOpenCLKernel::Init() {
#endif
PadWeight();
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_tensors_[0]->SetFormat(op_format_);
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return mindspore::lite::RET_OK;
}
int MatMulOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; }
void MatMulOpenCLKernel::PadWeight() {
// ABMCI @ ABCICO = ABMCO
auto allocator = ocl_runtime_->GetAllocator();
@ -137,39 +130,6 @@ void MatMulOpenCLKernel::PadWeight() {
allocator->UnmapBuffer(padWeight_);
}
int MatMulOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;
auto out_shape = out_tensors_[0]->shape();
int n = 1, h = 1, w = 1, c = 1;
if (dims == 2) {
n = out_shape[0];
c = out_shape[1];
} else if (dims == 4) {
n = out_shape[0];
h = out_shape[1];
w = out_shape[2];
c = out_shape[3];
}
if (op_format_ == schema::Format_NHWC4) {
im_dst_x = w * UP_DIV(c, C4NUM);
im_dst_y = n * h;
} else if (op_format_ == schema::Format_NC4HW4) {
im_dst_x = w;
im_dst_y = n * UP_DIV(c, C4NUM) * h;
} else {
MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_);
return mindspore::lite::RET_ERROR;
}
size_t img_dtype = CL_FLOAT;
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return mindspore::lite::RET_OK;
}
int MatMulOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
// local size should less than MAX_GROUP_SIZE

View File

@ -26,27 +26,26 @@ namespace mindspore::kernel {
class MatMulOpenCLKernel : public OpenCLKernel {
public:
explicit MatMulOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs), inShape(MAX_DIMS, 1), outShape(MAX_DIMS, 1) {}
~MatMulOpenCLKernel() override{};
MatMulOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~MatMulOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
void PadWeight();
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
private:
void PadWeight();
cl::Kernel kernel_;
void *padWeight_;
void *padWeight_{nullptr};
bool enable_fp16_{false};
bool transposeA{false};
bool transposeB{true};
int dims;
static constexpr int MAX_DIMS = 4; // max supported matmul dims
std::vector<int> inShape;
std::vector<int> outShape;
int dims{};
static constexpr int MAX_DIMS{4}; // max supported matmul dims
std::vector<int> inShape{std::vector<int>(MAX_DIMS, 1)};
std::vector<int> outShape{std::vector<int>(MAX_DIMS, 1)};
};
} // namespace mindspore::kernel

View File

@ -30,10 +30,6 @@ using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PaddingMode_CONSTANT;
using mindspore::schema::PrimitiveType_Pad;
using mindspore::schema::Format::Format_NC4HW4;
using mindspore::schema::Format::Format_NCHW;
using mindspore::schema::Format::Format_NHWC;
using mindspore::schema::Format::Format_NHWC4;
namespace mindspore::kernel {
@ -41,9 +37,6 @@ int PadOpenCLKernel::Init() {
auto param = reinterpret_cast<PadParameter *>(op_parameter_);
std::set<std::string> build_options;
if (op_format_ != Format_NHWC4 && op_format_ != Format_NC4HW4) {
MS_LOG(ERROR) << "op_format_ " << op_format_ << " not support!";
}
if (in_tensors_.empty()) {
MS_LOG(ERROR) << "PadOpenCLKernel in_tensors is empty";
return RET_ERROR;
@ -63,10 +56,6 @@ int PadOpenCLKernel::Init() {
auto input_tensor = in_tensors_[0];
auto output_tensor = out_tensors_[0];
in_ori_format_ = input_tensor->GetFormat();
out_ori_format_ = output_tensor->GetFormat();
input_tensor->SetFormat(op_format_);
output_tensor->SetFormat(op_format_);
CI_ = input_tensor->Channel();
IH_ = input_tensor->Height();
@ -78,8 +67,8 @@ int PadOpenCLKernel::Init() {
CO_SLICES_ = UP_DIV(CO_, C4NUM);
const std::string source = pad_source;
const std::string kernel_name = op_format_ == Format_NHWC4 ? "Pad_NHWC4" : "Pad_NC4HW4";
const std::string program_name = "Pad";
const std::string kernel_name = "Pad_NHWC4";
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
@ -87,30 +76,6 @@ int PadOpenCLKernel::Init() {
return RET_OK;
}
int PadOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;
if (in_tensors_[0]->GetFormat() == Format_NHWC4) {
if (OW_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE) {
{
im_dst_x = OW_ * CO_SLICES_;
im_dst_y = OH_;
}
} else {
im_dst_x = OH_ * CO_SLICES_;
im_dst_y = OW_;
}
} else {
im_dst_y = OH_ * CO_SLICES_;
im_dst_x = OW_;
}
size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT;
img_size->clear();
img_size->push_back(im_dst_x);
img_size->push_back(im_dst_y);
img_size->push_back(img_dtype);
return RET_OK;
}
int PadOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";

View File

@ -28,16 +28,16 @@ namespace mindspore::kernel {
class PadOpenCLKernel : public OpenCLKernel {
public:
explicit PadOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
PadOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~PadOpenCLKernel() override{};
~PadOpenCLKernel() override = default;
int Init() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
private:
cl::Kernel kernel_;
int CI_{};
int IH_{};
int IW_{};
@ -46,7 +46,6 @@ class PadOpenCLKernel : public OpenCLKernel {
int OW_{};
int CI_SLICES_{};
int CO_SLICES_{};
cl::Kernel kernel_;
};
} // namespace mindspore::kernel

View File

@ -72,7 +72,7 @@ int PoolingOpenCLKernel::Init() {
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
kernel_name += "_NHWC4";
if (out_mem_type_ == OpenCLMemType::BUF) {
MS_LOG(ERROR) << "buffer output not support yet.";
return mindspore::lite::RET_ERROR;
@ -83,10 +83,6 @@ int PoolingOpenCLKernel::Init() {
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_tensors_[0]->SetFormat(op_format_);
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return mindspore::lite::RET_OK;
@ -100,36 +96,6 @@ std::vector<size_t> PoolingOpenCLKernel::InitGlobalSize() const {
return global;
}
int PoolingOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;
int n = out_tensors_[0]->shape()[0];
int h = out_tensors_[0]->shape()[1];
int w = out_tensors_[0]->shape()[2];
int c = out_tensors_[0]->shape()[3];
if (op_format_ == schema::Format::Format_NHWC4) {
im_dst_x = w * UP_DIV(c, C4NUM);
im_dst_y = n * h;
} else if (op_format_ == schema::Format::Format_NC4HW4) {
im_dst_x = w;
im_dst_y = n * UP_DIV(c, C4NUM) * h;
} else {
MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_);
return mindspore::lite::RET_ERROR;
}
size_t img_dtype = CL_FLOAT;
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return mindspore::lite::RET_OK;
}
int PoolingOpenCLKernel::InitBuffer() { return mindspore::lite::RET_OK; }
int PoolingOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; }
int PoolingOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";

View File

@ -26,18 +26,13 @@ namespace mindspore::kernel {
class PoolingOpenCLKernel : public OpenCLKernel {
public:
explicit PoolingOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {
parameter_ = reinterpret_cast<PoolingParameter *>(parameter);
}
~PoolingOpenCLKernel() override{};
PoolingOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs), parameter_(reinterpret_cast<PoolingParameter *>(parameter)) {}
~PoolingOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
int InitBuffer();
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
private:
std::vector<size_t> InitGlobalSize() const;

View File

@ -33,7 +33,7 @@ using mindspore::schema::PrimitiveType_PReLU;
namespace mindspore::kernel {
void PReluOpenCLKernel::InitBuffer() {
int PReluOpenCLKernel::InitBuffer() {
auto allocator = ocl_runtime_->GetAllocator();
auto weight_tensor = in_tensors_[1];
if (weight_is_scalar) {
@ -71,6 +71,7 @@ void PReluOpenCLKernel::InitBuffer() {
}
allocator->UnmapBuffer(weight_vector_);
}
return RET_OK;
}
int PReluOpenCLKernel::Init() {
@ -84,10 +85,6 @@ int PReluOpenCLKernel::Init() {
C_ = input_tensor->Channel();
H_ = input_tensor->Height();
W_ = input_tensor->Width();
if (input_tensor->GetFormat() != schema::Format_NC4HW4 && input_tensor->GetFormat() != schema::Format_NHWC4) {
MS_LOG(ERROR) << "PRelu only support Format_NC4HW4 and Format_NHWC4";
return mindspore::lite::RET_ERROR;
}
if (batch_size_ != 1) {
MS_LOG(ERROR) << "Init PRelu kernel failed: Unsupported multi-batch.";
return RET_ERROR;
@ -104,12 +101,7 @@ int PReluOpenCLKernel::Init() {
MS_LOG(ERROR) << "PRelu weight must be float32 or float16";
return RET_ERROR;
}
enable_fp16_ = ocl_runtime_->GetFp16Enable();
in_ori_format_ = input_tensor->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
input_tensor->SetFormat(op_format_);
out_tensors_[0]->SetFormat(op_format_);
std::set<std::string> build_options;
std::string source = prelu_source;
@ -137,11 +129,7 @@ int PReluOpenCLKernel::Run() {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_);
}
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape);
if (op_format_ == schema::Format_NHWC4) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2);
} else { // Format_NC4HW4 = 100
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 100);
}
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2);
std::vector<size_t> local = {4, 4, 1};
std::vector<size_t> global = {static_cast<size_t>(H_), static_cast<size_t>(W_), static_cast<size_t>(CO_SLICES_)};
@ -153,31 +141,6 @@ int PReluOpenCLKernel::Run() {
return mindspore::lite::RET_OK;
}
int PReluOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;
auto CO_SLICES_ = UP_DIV(C_, C4NUM);
if (in_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
if (W_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE) {
{
im_dst_y = batch_size_ * H_;
im_dst_x = W_ * CO_SLICES_;
}
} else {
im_dst_y = W_;
im_dst_x = batch_size_ * H_ * CO_SLICES_;
}
} else {
im_dst_y = batch_size_ * CO_SLICES_ * H_;
im_dst_x = W_;
}
size_t img_dtype = enable_fp16_ ? CL_HALF_FLOAT : CL_FLOAT;
img_size->clear();
img_size->push_back(im_dst_x);
img_size->push_back(im_dst_y);
img_size->push_back(img_dtype);
return mindspore::lite::RET_OK;
}
kernel::LiteKernel *OpenCLPReluKernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const lite::InnerContext *ctx, const kernel::KernelKey &desc,

View File

@ -27,15 +27,14 @@ namespace mindspore::kernel {
class PReluOpenCLKernel : public OpenCLKernel {
public:
explicit PReluOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
PReluOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~PReluOpenCLKernel() override{};
~PReluOpenCLKernel() override = default;
int Init() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
void InitBuffer();
int InitBuffer() override;
private:
cl::Kernel kernel_;

View File

@ -57,7 +57,7 @@ int ReduceOpenCLKernel::Init() {
return RET_PARAM_INVALID;
}
std::string kernel_name = reduce_type2str.at(reduce_param->mode_);
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
kernel_name += "_NHWC4";
enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (in_tensors_[0]->shape().back() != out_tensors_[0]->shape().back()) {
@ -74,10 +74,6 @@ int ReduceOpenCLKernel::Init() {
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_tensors_[0]->SetFormat(op_format_);
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return mindspore::lite::RET_OK;
}
@ -97,31 +93,6 @@ void ReduceOpenCLKernel::InitNHWCShape() {
nhwc_shape_ = {n, h, w, c};
}
int ReduceOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; }
int ReduceOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;
if (op_format_ == schema::Format_NHWC4) {
im_dst_x = nhwc_shape_[2] * UP_DIV(nhwc_shape_[3], C4NUM);
im_dst_y = nhwc_shape_[0] * nhwc_shape_[1];
} else if (op_format_ == schema::Format_NC4HW4) {
im_dst_x = nhwc_shape_[2];
im_dst_y = nhwc_shape_[0] * UP_DIV(nhwc_shape_[3], C4NUM) * nhwc_shape_[1];
} else {
MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_);
return mindspore::lite::RET_ERROR;
}
size_t img_dtype = CL_FLOAT;
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return mindspore::lite::RET_OK;
}
int ReduceOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
std::vector<int> shapex = in_tensors_[0]->shape();

View File

@ -26,15 +26,13 @@
namespace mindspore::kernel {
class ReduceOpenCLKernel : public OpenCLKernel {
public:
explicit ReduceOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
ReduceOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~ReduceOpenCLKernel() override{};
~ReduceOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
void InitNHWCShape();
private:

View File

@ -31,25 +31,11 @@ using mindspore::schema::PrimitiveType_Squeeze;
namespace mindspore::kernel {
int ReshapeOpenCLKernel::Init() {
std::string kernel_name = "reshape";
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
enable_fp16_ = ocl_runtime_->GetFp16Enable();
std::string kernel_name = "reshape_NHWC4";
if (out_tensors_[0]->shape().size() != 2 && out_tensors_[0]->shape().size() != 4) {
MS_LOG(ERROR) << "Reshape output size should in 2,4";
return RET_ERROR;
}
if (in_tensors_[0]->shape().size() == 2) {
inShape = {in_tensors_[0]->shape()[0], 1, 1, in_tensors_[0]->shape()[1]};
} else {
inShape = {in_tensors_[0]->shape()[0], in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2],
in_tensors_[0]->shape()[3]};
}
if (out_tensors_[0]->shape().size() == 2) {
outShape = {out_tensors_[0]->shape()[0], 1, 1, out_tensors_[0]->shape()[1]};
} else {
outShape = {out_tensors_[0]->shape()[0], out_tensors_[0]->shape()[1], out_tensors_[0]->shape()[2],
out_tensors_[0]->shape()[3]};
}
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
@ -59,55 +45,20 @@ int ReshapeOpenCLKernel::Init() {
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_tensors_[0]->SetFormat(op_format_);
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK;
}
int ReshapeOpenCLKernel::ReSize() { return RET_OK; }
int ReshapeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;
int n = outShape[0];
int h = outShape[1];
int w = outShape[2];
int c = outShape[3];
if (img_size_.size() == OpenCLImageSizeIndex::IDX_NUM) {
*img_size = img_size_;
return RET_OK;
}
if (op_format_ == schema::Format::Format_NHWC4) {
im_dst_x = w * UP_DIV(c, C4NUM);
im_dst_y = n * h;
} else if (op_format_ == schema::Format::Format_NC4HW4) {
im_dst_x = w;
im_dst_y = n * UP_DIV(c, C4NUM) * h;
} else {
MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_);
return RET_ERROR;
}
size_t img_dtype = CL_FLOAT;
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
img_size_ = vec;
return RET_OK;
}
int ReshapeOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
auto in = Image2DInfo(in_tensors_.front());
auto out = Image2DInfo(out_tensors_.front());
std::vector<size_t> local = {};
std::vector<size_t> global{img_size_[0], img_size_[1]};
cl_int4 src_size = {inShape[3], inShape[2], inShape[1], inShape[0]};
cl_int4 dst_size = {static_cast<cl_int>(img_size_[0]), static_cast<cl_int>(img_size_[1]), outShape[3],
outShape[3] * outShape[2]};
std::vector<size_t> global{out.width, out.height};
cl_int4 src_size = {cl_int(in.C), cl_int(in.W), cl_int(in.H), cl_int(in.N)};
cl_int4 dst_size = {cl_int(out.width), cl_int(out.height), cl_int(out.C), cl_int(out.C * out.W)};
int arg_idx = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());

View File

@ -25,21 +25,16 @@
namespace mindspore::kernel {
class ReshapeOpenCLKernel : public OpenCLKernel {
public:
explicit ReshapeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
ReshapeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~ReshapeOpenCLKernel() override{};
~ReshapeOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
private:
cl::Kernel kernel_;
bool enable_fp16_{false};
std::vector<int> inShape;
std::vector<int> outShape;
};
} // namespace mindspore::kernel

View File

@ -54,7 +54,7 @@ int ResizeOpenCLKernel::Init() {
MS_LOG(ERROR) << "unsupported resize method:" << resize_param->method_;
return RET_PARAM_INVALID;
}
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
kernel_name += "_NHWC4";
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
@ -64,39 +64,10 @@ int ResizeOpenCLKernel::Init() {
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_tensors_[0]->SetFormat(op_format_);
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK;
}
int ResizeOpenCLKernel::ReSize() { return RET_OK; }
int ResizeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;
auto nhwc_shape_ = out_tensors_[0]->shape();
if (op_format_ == schema::Format_NHWC4) {
im_dst_x = nhwc_shape_[2] * UP_DIV(nhwc_shape_[3], C4NUM);
im_dst_y = nhwc_shape_[0] * nhwc_shape_[1];
} else if (op_format_ == schema::Format_NC4HW4) {
im_dst_x = nhwc_shape_[2];
im_dst_y = nhwc_shape_[0] * UP_DIV(nhwc_shape_[3], C4NUM) * nhwc_shape_[1];
} else {
MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_);
return RET_ERROR;
}
size_t img_dtype = CL_FLOAT;
if (ocl_runtime_->GetFp16Enable()) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return RET_OK;
}
float ResizeOpenCLKernel::getResizeScaleFactor(int input_size, int output_size) {
return input_size > 1 && output_size > 1 && alignCorner
? static_cast<float>(input_size - 1) / static_cast<float>(output_size - 1)

View File

@ -26,21 +26,20 @@
namespace mindspore::kernel {
class ResizeOpenCLKernel : public OpenCLKernel {
public:
explicit ResizeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
ResizeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~ResizeOpenCLKernel() override{};
~ResizeOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
float getResizeScaleFactor(int input_size, int output_size);
private:
float getResizeScaleFactor(int input_size, int output_size);
cl::Kernel kernel_;
bool alignCorner;
bool preserveAspectRatio;
bool alignCorner{false};
bool preserveAspectRatio{false};
};
} // namespace mindspore::kernel

View File

@ -46,79 +46,19 @@ ScaleOpenCLKernel::~ScaleOpenCLKernel() {
}
}
std::vector<size_t> ScaleOpenCLKernel::InitGlobalSize() const {
const size_t global_x = out_tensors_[0]->Width();
const size_t global_y = out_tensors_[0]->Height();
const size_t global_z = UP_ROUND_DIV(out_tensors_[0]->Channel(), C4NUM);
std::vector<size_t> global = {global_x, global_y, global_z};
return global;
}
void ScaleOpenCLKernel::Image2dGetWorkGroupSize() {
local_size_ = {16, 16};
if (out_tensors_[0]->shape().size() == 2) {
size_t H = out_tensors_[0]->shape()[0];
size_t W = UP_DIV(out_tensors_[0]->shape()[1], C4NUM);
global_size_ = {W, H};
return;
}
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t W = out_tensors_[0]->Width();
global_size_ = {W, H};
} else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
} else {
size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height();
size_t W = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
global_size_ = {W, H};
} else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) {
size_t H = out_tensors_[0]->Batch();
size_t W = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
global_size_ = {W, H};
} else {
MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat();
}
}
void ScaleOpenCLKernel::BufferGetWorkGroupSize() {
uint32_t element_num = out_tensors_[0]->ElementsC4Num();
global_size_ = {element_num};
}
int ScaleOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;
if (out_tensors_[0]->shape().size() == 2) {
im_dst_x = UP_DIV(out_tensors_[0]->shape()[1], C4NUM);
im_dst_y = out_tensors_[0]->shape()[0];
} else {
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
im_dst_x = out_tensors_[0]->Width();
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
} else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
im_dst_x = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height();
} else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) {
im_dst_y = out_tensors_[0]->Batch();
im_dst_x = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
} else {
MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat();
return RET_ERROR;
}
}
size_t img_dtype = CL_FLOAT;
if (in_tensors_[0]->data_type() == kNumberTypeFloat16) {
img_dtype = CL_HALF_FLOAT;
} else if (in_tensors_[0]->data_type() == kNumberTypeFloat32) {
img_dtype = CL_FLOAT;
} else {
MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[0]->data_type();
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return RET_OK;
}
int ScaleOpenCLKernel::InitBuffer() {
if (!element_flag_) {
return RET_OK;
@ -127,6 +67,7 @@ int ScaleOpenCLKernel::InitBuffer() {
auto allocator = ocl_runtime_->GetAllocator();
std::vector<size_t> img_size;
GetImageSize(0, &img_size);
img_size[2] = in_tensors_[1]->data_type() == kNumberTypeFloat16 ? CL_HALF_FLOAT : CL_FLOAT;
if (scale_C_flag_) {
img_size[1] = 1;
img_size[0] = UP_DIV(in_tensors_[1]->shape()[0], C4NUM);
@ -147,57 +88,7 @@ int ScaleOpenCLKernel::InitBuffer() {
<< in_tensors_[0]->data_type();
return RET_ERROR;
}
} else if (in_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
if (in_tensors_[1]->GetFormat() == schema::Format_NHWC) {
if (in_tensors_[0]->data_type() == kNumberTypeFloat32) {
float *scale = new (std::nothrow) float[pack_weight_size];
if (scale == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed!";
return RET_ERROR;
}
float *offset = new (std::nothrow) float[pack_weight_size];
if (offset == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed!";
delete[] scale;
return RET_ERROR;
}
std::function<float(float)> to_dtype = [](float x) -> float { return x; };
PackNHWCToNC4HW4<float, float>(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype);
PackNHWCToNC4HW4<float, float>(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype);
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size);
offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size);
delete[] scale;
delete[] offset;
} else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) {
float16_t *scale = new (std::nothrow) float16_t[pack_weight_size];
if (scale == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed!";
return RET_ERROR;
}
float16_t *offset = new (std::nothrow) float16_t[pack_weight_size];
if (offset == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed!";
delete[] scale;
return RET_ERROR;
}
std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); };
PackNHWCToNC4HW4<float, float16_t>(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype);
PackNHWCToNC4HW4<float, float16_t>(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype);
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size);
offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size);
delete[] scale;
delete[] offset;
} else {
MS_LOG(ERROR) << "Unsupport data type transpose from " << in_tensors_[1]->data_type() << "to "
<< in_tensors_[0]->data_type();
return RET_ERROR;
}
} else {
MS_LOG(ERROR) << "Unsupport format transpose from " << in_tensors_[1]->GetFormat() << "to "
<< in_tensors_[0]->GetFormat();
return RET_ERROR;
}
} else if (in_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
} else if (in_tensors_[0]->GetFormat() == schema::Format_NHWC) {
if (in_tensors_[1]->GetFormat() == schema::Format_NHWC) {
if (in_tensors_[0]->data_type() == kNumberTypeFloat32) {
float *scale = new (std::nothrow) float[pack_weight_size];
@ -298,18 +189,6 @@ int ScaleOpenCLKernel::Init() {
return error_code;
}
auto format = op_format_;
if (out_tensors_[0]->shape().size() == 2) {
format = schema::Format_NC4;
}
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(format);
if (element_flag_ && in_tensors_[1]->category() != lite::Tensor::Category::CONST) {
in_tensors_[1]->SetFormat(format);
in_tensors_[2]->SetFormat(format);
}
out_tensors_[0]->SetFormat(format);
Image2dGetWorkGroupSize();
InitBuffer();
MS_LOG(DEBUG) << kernel_name << " Init Done!";

View File

@ -25,20 +25,17 @@ namespace mindspore::kernel {
class ScaleOpenCLKernel : public OpenCLKernel {
public:
explicit ScaleOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
ScaleOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: OpenCLKernel(parameter, inputs, outputs) {}
~ScaleOpenCLKernel() override;
int Init() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
int InitBuffer() override;
private:
std::vector<size_t> InitGlobalSize() const;
void Image2dGetWorkGroupSize();
void BufferGetWorkGroupSize();
int InitBuffer();
cl::Kernel kernel_;
bool element_flag_{true};

View File

@ -30,55 +30,17 @@ using mindspore::schema::PrimitiveType_Slice;
namespace mindspore::kernel {
int SliceOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t im_dst_x, im_dst_y;
if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) {
im_dst_x = out_tensors_[0]->Width() * CO4;
im_dst_y = out_tensors_[0]->Height();
} else {
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4;
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return RET_OK;
}
int SliceOpenCLKernel::Init() {
std::string kernel_name = "slice";
auto in_format = op_format_;
if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) {
MS_LOG(ERROR) << "input format(" << in_format << ") "
<< "format not support!";
return RET_ERROR;
}
in_ori_format_ = in_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_ori_format_ = out_tensors_[0]->GetFormat();
out_tensors_[0]->SetFormat(op_format_);
if (in_format == schema::Format_NC4HW4) {
kernel_name += "_NC4HW4";
} else if (in_format == schema::Format_NHWC4) {
kernel_name += "_NHWC4";
}
std::set<std::string> build_options;
std::string source = slice_source;
std::string program_name = "slice";
std::string kernel_name = "slice_NHWC4";
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK;
}
int SliceOpenCLKernel::ReSize() { return RET_OK; }
void SlcieGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *local, int max_size) {
const int max_divider = 8;
const int max_x = 4, max_y = 8;

View File

@ -25,20 +25,16 @@ namespace mindspore::kernel {
class SliceOpenCLKernel : public OpenCLKernel {
public:
explicit SliceOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
SliceOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~SliceOpenCLKernel() override{};
~SliceOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
private:
cl::Kernel kernel_;
};

View File

@ -77,30 +77,6 @@ int SoftmaxOpenCLKernel::SetWorkGroupSize1x1() {
return lite::RET_OK;
}
int SoftmaxOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;
auto out_shape = out_tensors_[0]->shape();
int n = nhwc_shape_[0], h = nhwc_shape_[1], w = nhwc_shape_[2], c = nhwc_shape_[3];
if (op_format_ == schema::Format_NHWC4) {
im_dst_x = w * UP_DIV(c, C4NUM);
im_dst_y = n * h;
} else if (op_format_ == schema::Format_NC4HW4) {
im_dst_x = w;
im_dst_y = n * UP_DIV(c, C4NUM) * h;
} else {
MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_);
return mindspore::lite::RET_ERROR;
}
size_t img_dtype = CL_FLOAT;
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return mindspore::lite::RET_OK;
}
int SoftmaxOpenCLKernel::Init() {
std::string kernel_name = "SoftMax";
std::string program_name = "SoftMax";
@ -131,7 +107,7 @@ int SoftmaxOpenCLKernel::Init() {
onexone_flag_ = false;
kernel_name += "Axis" + std::to_string(axis_);
}
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
kernel_name += "_NHWC4";
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
#else
@ -139,10 +115,6 @@ int SoftmaxOpenCLKernel::Init() {
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_tensors_[0]->SetFormat(op_format_);
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return lite::RET_OK;
}

View File

@ -26,26 +26,22 @@ namespace mindspore::kernel {
class SoftmaxOpenCLKernel : public OpenCLKernel {
public:
explicit SoftmaxOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {
parameter_ = reinterpret_cast<SoftmaxParameter *>(parameter);
}
SoftmaxOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs), parameter_(reinterpret_cast<SoftmaxParameter *>(parameter)) {}
~SoftmaxOpenCLKernel() override{};
~SoftmaxOpenCLKernel() override = default;
int Init() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
private:
int InitGlobalSize();
int SetWorkGroupSize1x1();
int SetWorkGroupSize();
std::vector<float> GetMaskForLastChannel(int channels);
private:
cl::Kernel kernel_;
SoftmaxParameter *parameter_;
bool onexone_flag_{false};
std::vector<size_t> local_size_;
std::vector<size_t> global_size_;

View File

@ -32,18 +32,12 @@ using mindspore::schema::PrimitiveType_SpaceToBatchND;
namespace mindspore::kernel {
int SpaceToBatchNDOpenCLKernel::Init() {
std::string kernel_name = "space_to_batch_nd";
auto in_format = op_format_;
std::string kernel_name = "space_to_batch_nd_NHWC4";
if (in_tensors_[0]->shape().size() != 4 && out_tensors_[0]->shape().size() != 4) {
MS_LOG(ERROR) << "input/output shape size must be 4, actual: " << in_tensors_[0]->shape().size() << ", "
<< out_tensors_[0]->shape().size();
return RET_ERROR;
}
if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) {
MS_LOG(ERROR) << "input format(" << in_format << ") "
<< "format not support!";
return RET_ERROR;
}
auto *param = reinterpret_cast<SpaceToBatchParameter *>(this->op_parameter_);
param->need_paddings_ = (param->paddings_[0] | param->paddings_[1] | param->paddings_[2] | param->paddings_[3]);
param->padded_in_shape_[kNHWC_N] = in_tensors_[0]->shape().at(kNHWC_N);
@ -60,18 +54,10 @@ int SpaceToBatchNDOpenCLKernel::Init() {
return RET_ERROR;
}
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_tensors_[0]->SetFormat(op_format_);
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
if (in_format == schema::Format_NC4HW4) {
kernel_name += "_NC4HW4";
} else {
kernel_name += "_NHWC4";
}
std::set<std::string> build_options;
std::string source = space_to_batch_nd_source;
std::string program_name = "space_to_batch_nd";
@ -81,28 +67,7 @@ int SpaceToBatchNDOpenCLKernel::Init() {
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK;
}
int SpaceToBatchNDOpenCLKernel::InitBuffer() { return RET_OK; }
int SpaceToBatchNDOpenCLKernel::ReSize() { return RET_OK; }
int SpaceToBatchNDOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t im_dst_x, im_dst_y;
if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) {
im_dst_x = out_tensors_[0]->Width() * CO4;
im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch();
} else {
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4;
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = std::move(vec);
return RET_OK;
}
int SpaceToBatchNDOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! ";
auto param = reinterpret_cast<SpaceToBatchParameter *>(this->op_parameter_);

View File

@ -25,22 +25,16 @@ namespace mindspore::kernel {
class SpaceToBatchNDOpenCLKernel : public OpenCLKernel {
public:
explicit SpaceToBatchNDOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
SpaceToBatchNDOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~SpaceToBatchNDOpenCLKernel() override{};
~SpaceToBatchNDOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
int InitBuffer();
private:
cl::Kernel kernel_;
};

View File

@ -84,14 +84,6 @@ int ToFormatOpenCLKernel::InitNHWC() {
return RET_OK;
}
int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t img_height = N_ * H_;
size_t img_width = W_ * UP_DIV(C_, C4NUM);
size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT;
*img_size = {img_width, img_height, img_dtype};
return RET_OK;
}
int ToFormatOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
std::vector<size_t> global = {N_ * H_, W_, UP_DIV(C_, C4NUM)};

View File

@ -25,15 +25,13 @@
namespace mindspore::kernel {
class ToFormatOpenCLKernel : public OpenCLKernel {
public:
explicit ToFormatOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
ToFormatOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~ToFormatOpenCLKernel() override{};
~ToFormatOpenCLKernel() override = default;
int Init() override;
int ReSize() override { return mindspore::lite::RET_OK; };
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
private:
int InitNHWC();

View File

@ -57,7 +57,8 @@ int TransposeOpenCLKernel::Init() {
// just for input
kernel_name += "_oversize";
}
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
kernel_name += "_NHWC4";
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
@ -67,37 +68,11 @@ int TransposeOpenCLKernel::Init() {
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_tensors_[0]->SetFormat(op_format_);
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return mindspore::lite::RET_OK;
}
int TransposeOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; }
int TransposeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x = 1, im_dst_y = 1;
auto out_shape = out_tensors_[0]->shape();
if (op_format_ == schema::Format_NHWC4) {
im_dst_x = out_shape[2] * UP_DIV(out_shape[3], C4NUM); // W * C4
im_dst_y = out_shape[0] * out_shape[1]; // N * H
} else if (op_format_ == schema::Format_NC4HW4) {
im_dst_x = out_shape[2]; // W
im_dst_y = out_shape[0] * UP_DIV(out_shape[3], C4NUM) * out_shape[1]; // N * C4 * H
}
size_t img_dtype = CL_FLOAT;
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return mindspore::lite::RET_OK;
}
int TransposeOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
std::vector<int> shapex = out_tensors_[0]->shape();
@ -108,12 +83,11 @@ int TransposeOpenCLKernel::Run() {
size_t c4 = UP_DIV(c, 4);
std::vector<size_t> local = {};
std::vector<size_t> global;
if (type == TransposeType::AXIS0312) {
if (type == TransposeType::AXIS0312) { // NHWC -> NCHW
global = {UP_DIV(h, C4NUM), w, c4};
} else if (type == TransposeType::AXIS0231) {
} else if (type == TransposeType::AXIS0231) { // NCHW -> NHWC
global = {h, UP_DIV(w, C4NUM), c4};
}
cl_int4 shape = {static_cast<int>(n), static_cast<int>(h), static_cast<int>(w), static_cast<int>(c)};
int arg_idx = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());

View File

@ -29,15 +29,13 @@ enum class TransposeType { AXIS0312, AXIS0231 };
class TransposeOpenCLKernel : public OpenCLKernel {
public:
explicit TransposeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
TransposeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~TransposeOpenCLKernel() override{};
~TransposeOpenCLKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;
private:
cl::Kernel kernel_;

View File

@ -22,52 +22,120 @@
#include "include/errorcode.h"
#include "src/runtime/opencl/opencl_runtime.h"
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
namespace mindspore::kernel {
enum class OpenCLMemType { BUF, IMG };
enum OpenCLImageSizeIndex { IDX_X = 0, IDX_Y, IDX_DTYPE, IDX_NUM };
struct OpenCLToFormatParameter {
OpParameter op_parameter;
OpParameter op_parameter{};
schema::Format src_format{schema::Format::Format_NHWC};
schema::Format dst_format{schema::Format::Format_NHWC4};
OpenCLMemType out_mem_type{OpenCLMemType::IMG};
};
struct Image2DInfo {
explicit Image2DInfo(const lite::Tensor *tensor) {
if (tensor) {
auto shape = tensor->shape();
if (shape.size() == 1) {
N = shape[0];
} else if (shape.size() == 2) {
N = shape[0];
C = shape[1];
} else if (shape.size() == 3) {
N = shape[0];
W = shape[1];
C = shape[2];
} else if (shape.size() == 4) {
N = shape[0];
H = shape[1];
W = shape[2];
C = shape[3];
} else if (shape.size() >= 5) {
MS_LOG(ERROR) << "GPU dont't support Tensor with dim=" << shape.size();
}
FLT_size = tensor->data_type() == kNumberTypeFloat16 ? sizeof(cl_half) : sizeof(cl_float);
} else {
FLT_size = sizeof(cl_float);
}
FLT4_size = FLT_size * 4;
Slice = UP_DIV(C, C4NUM);
if (W * Slice <= MAX_IMAGE2D_SIZE) {
height = N * H;
width = W * Slice;
} else {
height = W;
width = N * H * Slice;
}
auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper();
int alignment = runtime_wrapper.GetInstance()->GetImagePitchAlignment();
row_pitch = (width + alignment - 1) / alignment * alignment * FLT4_size;
ElementsNum = N * H * W * C;
ElementsC4Num = N * H * W * Slice * C4NUM;
OriginSize = ElementsNum * FLT_size;
Image2DSize = height * width * FLT4_size;
}
size_t N{1};
size_t H{1};
size_t W{1};
size_t C{1};
size_t Slice{};
size_t width{};
size_t height{};
size_t FLT_size{};
size_t FLT4_size{};
size_t row_pitch{};
size_t ElementsNum{};
size_t ElementsC4Num{};
size_t OriginSize{};
size_t Image2DSize{};
};
class OpenCLKernel : public LiteKernel {
public:
explicit OpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
OpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {
ocl_runtime_ = ocl_runtime_wrap_.GetInstance();
}
~OpenCLKernel() override = default;
~OpenCLKernel() {}
int Init() override { return RET_ERROR; }
int PreProcess() override { return RET_ERROR; }
int ReSize() override { return RET_ERROR; }
int Run() override { return RET_ERROR; }
virtual int Init() { return mindspore::lite::RET_ERROR; }
virtual int PreProcess() { return mindspore::lite::RET_ERROR; }
virtual int InferShape() { return mindspore::lite::RET_ERROR; }
virtual int ReSize() { return mindspore::lite::RET_ERROR; }
virtual int Run() { return mindspore::lite::RET_ERROR; }
virtual int GetImageSize(size_t idx, std::vector<size_t> *img_size) { return mindspore::lite::RET_ERROR; }
virtual int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) { return mindspore::lite::RET_ERROR; }
virtual int InitBuffer() { return RET_OK; }
virtual int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) { return RET_ERROR; }
virtual int GetLocalSize(size_t idx, const std::vector<size_t> &global_size, std::vector<size_t> *local_size) {
return mindspore::lite::RET_ERROR;
return RET_ERROR;
}
int GetImageSize(size_t idx, std::vector<size_t> *img_size) {
if (idx >= out_tensors_.size()) {
return RET_ERROR;
}
auto img_info = Image2DInfo(out_tensors_[idx]);
size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT;
*img_size = {img_info.width, img_info.height, img_dtype};
return RET_OK;
}
OpenCLMemType GetMemType() { return out_mem_type_; }
void SetMemType(OpenCLMemType mem_type) { out_mem_type_ = mem_type; }
void SetFormatType(schema::Format format_type) { op_format_ = format_type; }
schema::Format GetInOriFormat() { return in_ori_format_; }
schema::Format GetOutOriFormat() { return out_ori_format_; }
protected:
OpenCLMemType out_mem_type_{OpenCLMemType::IMG};
schema::Format in_ori_format_{schema::Format::Format_NHWC};
schema::Format out_ori_format_{schema::Format::Format_NHWC4};
schema::Format op_format_{schema::Format::Format_NHWC4};
lite::opencl::OpenCLRuntimeWrapper ocl_runtime_wrap_;
lite::opencl::OpenCLRuntime *ocl_runtime_;
std::vector<size_t> img_size_;
OpenCLMemType out_mem_type_{OpenCLMemType::IMG};
private:
lite::opencl::OpenCLRuntimeWrapper ocl_runtime_wrap_;
};
} // namespace mindspore::kernel

View File

@ -27,7 +27,7 @@ using mindspore::lite::RET_OK;
SubGraphOpenCLKernel::~SubGraphOpenCLKernel() { UnInit(); }
int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_tensors,
const std::vector<std::vector<kernel::LiteKernel *>> in_kernels,
const std::vector<std::vector<kernel::LiteKernel *>> &in_kernels,
std::vector<lite::Tensor *> *out_tensors,
std::vector<OpenCLToFormatParameter *> *out_parameters,
std::vector<LiteKernel *> *out_convert_ops, OpenCLMemType mem_type) {
@ -66,7 +66,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te
}
auto dst_format = (mem_type == OpenCLMemType::IMG) ? schema::Format::Format_NHWC4 : schema::Format::Format_NHWC;
auto src_format = (mem_type == OpenCLMemType::IMG) ? schema::Format::Format_NHWC : schema::Format::Format_NHWC4;
lite::Tensor *new_tensor = new (std::nothrow) lite::Tensor();
auto *new_tensor = new (std::nothrow) lite::Tensor();
MS_ASSERT(new_tensor);
if (new_tensor == nullptr) {
MS_LOG(ERROR) << "SubGraphOpenCLKernel new tensor failed!";
@ -87,8 +87,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te
desc.data_type = kNumberTypeFloat16;
new_tensor->set_data_type(kNumberTypeFloat16);
}
OpenCLToFormatParameter *parameter =
static_cast<OpenCLToFormatParameter *>(malloc(sizeof(OpenCLToFormatParameter)));
auto *parameter = static_cast<OpenCLToFormatParameter *>(malloc(sizeof(OpenCLToFormatParameter)));
MS_ASSERT(parameter);
if (parameter == nullptr) {
MS_LOG(ERROR) << "SubGraphOpenCLKernel new parameter failed!";
@ -196,11 +195,12 @@ int SubGraphOpenCLKernel::UpdateTensorDataType() {
}
return RET_OK;
}
int SubGraphOpenCLKernel::MallocTensorWithReuse() {
kernel::LiteKernelUtil::InitTensorRefCount(nodes_);
for (auto *kernel : nodes_) {
MS_ASSERT(nullptr != kernel);
kernel::OpenCLKernel *op_kernel = reinterpret_cast<kernel::OpenCLKernel *>(kernel);
auto *op_kernel = reinterpret_cast<kernel::OpenCLKernel *>(kernel);
auto outputs = kernel->out_tensors();
for (auto i = 0; i < outputs.size(); ++i) {
auto *output = outputs.at(i);
@ -258,10 +258,10 @@ int SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector<lite::Tensor *
kset.insert(tens.begin(), tens.end());
ksets.emplace_back(kset);
}
for (size_t i = 0; i < in_tensors.size(); ++i) {
for (auto in_tensor : in_tensors) {
std::vector<kernel::LiteKernel *> kvec;
for (size_t j = 0; j < in_kernels.size(); ++j) {
if (ksets[j].count(in_tensors[i])) {
if (ksets[j].count(in_tensor)) {
kvec.emplace_back(in_kernels[j]);
}
}
@ -272,21 +272,15 @@ int SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector<lite::Tensor *
int SubGraphOpenCLKernel::UnInit() {
for (const auto &tensor : in_convert_tensors_) {
if (tensor != nullptr) {
delete tensor;
}
delete tensor;
}
in_convert_tensors_.clear();
for (const auto &tensor : out_convert_tensors_) {
if (tensor != nullptr) {
delete tensor;
}
delete tensor;
}
out_convert_tensors_.clear();
for (const auto &op : nodes_) {
if (op != nullptr) {
delete op;
}
delete op;
}
nodes_.clear();
in_convert_ops_.clear();

View File

@ -32,10 +32,10 @@ struct SubGraphOpenCLParameter {
class SubGraphOpenCLKernel : public SubGraphKernel {
public:
explicit SubGraphOpenCLKernel(const std::vector<lite::Tensor *> inputs, const std::vector<lite::Tensor *> outputs,
const std::vector<kernel::LiteKernel *> inKernels,
const std::vector<kernel::LiteKernel *> outKernels,
const std::vector<kernel::LiteKernel *> nodes, const lite::InnerContext *ctx = nullptr)
SubGraphOpenCLKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
const std::vector<kernel::LiteKernel *> &inKernels,
const std::vector<kernel::LiteKernel *> &outKernels,
const std::vector<kernel::LiteKernel *> &nodes, const lite::InnerContext *ctx = nullptr)
: SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx) {
ocl_runtime_ = ocl_runtime_wrap_.GetInstance();
subgraph_type_ = kGpuSubGraph;
@ -50,21 +50,19 @@ class SubGraphOpenCLKernel : public SubGraphKernel {
int ReSize() override;
int Run() override;
int Run(const KernelCallBack &before, const KernelCallBack &after) override { return this->Run(); };
int UnInit();
protected:
private:
int UnInit();
int UpdateTensorDataType();
int MallocTensorWithReuse();
int GenToFormatOp(const std::vector<lite::Tensor *> &in_tensors,
const std::vector<std::vector<kernel::LiteKernel *>> in_kernels,
const std::vector<std::vector<kernel::LiteKernel *>> &in_kernels,
std::vector<lite::Tensor *> *out_tensors, std::vector<OpenCLToFormatParameter *> *out_parameters,
std::vector<LiteKernel *> *out_convert_ops, OpenCLMemType mem_type);
int GetKernelFromToTensor(const std::vector<lite::Tensor *> &in_tensors,
const std::vector<kernel::LiteKernel *> &in_kernels,
std::vector<std::vector<kernel::LiteKernel *>> *out_kernels, bool is_from);
private:
lite::opencl::OpenCLAllocator *allocator_;
lite::opencl::OpenCLAllocator *allocator_{nullptr};
std::vector<lite::Tensor *> in_convert_tensors_;
std::vector<lite::Tensor *> out_convert_tensors_;
std::vector<OpenCLToFormatParameter *> in_parameters_;

View File

@ -20,6 +20,8 @@
#include <vector>
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "src/common/file_utils.h"
using mindspore::lite::KernelRegistrar;
@ -224,57 +226,77 @@ std::string CLErrorCode(cl_int error_code) {
}
}
void Write2File(void *mem, const std::string &file_name, int size) {
std::fstream os;
os.open(file_name, std::ios::out | std::ios::binary);
os.write(static_cast<char *>(mem), size);
os.close();
int WriteToBin(const std::string &file_path, void *data, size_t size) {
std::ofstream out_file;
out_file.open(file_path.c_str(), std::ios::binary);
if (!out_file.good()) {
MS_LOG(ERROR) << "file is bad";
return -1;
}
if (!out_file.is_open()) {
MS_LOG(ERROR) << "file open failed";
return -1;
}
out_file.write(reinterpret_cast<char *>(data), size);
return 0;
}
void PrintTensor(lite::Tensor *tensor, int num, const std::string &out_file) {
void PrintTensor(const lite::Tensor *tensor, OpenCLMemType mem_type, int n, const std::string &out_file) {
if (tensor->data_c() == nullptr) {
return;
}
Image2DInfo img_info(tensor);
auto size = mem_type == OpenCLMemType::BUF ? img_info.OriginSize : img_info.Image2DSize;
std::vector<char> data(size);
auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper();
auto runtime = runtime_wrapper.GetInstance();
auto allocator = runtime->GetAllocator();
auto origin_data = tensor->data_c();
runtime->SyncCommandQueue();
allocator->MapBuffer(origin_data, CL_MAP_READ, nullptr, true);
allocator->MapBuffer(tensor->data_c(), CL_MAP_READ, nullptr, true);
if (mem_type == OpenCLMemType::BUF) {
memcpy(data.data(), tensor->data_c(), img_info.OriginSize);
} else {
auto row_size = img_info.width * img_info.FLT4_size;
for (int i = 0; i < img_info.height; ++i) {
memcpy(reinterpret_cast<char *>(data.data()) + i * row_size,
static_cast<char *>(tensor->data_c()) + i * img_info.row_pitch, row_size);
}
}
allocator->UnmapBuffer(tensor->data_c());
printf("shape=(");
auto shape = tensor->shape();
auto N = shape.size() > 0 ? shape[0] : 1;
auto H = shape.size() > 1 ? shape[1] : 1;
auto W = shape.size() > 2 ? shape[2] : 1;
auto C = shape.size() > 3 ? shape[3] : 1;
auto SLICES = UP_DIV(C, C4NUM);
auto ElementsC4Num = N * H * W * UP_ROUND(C, C4NUM);
auto alignment = runtime->GetImagePitchAlignment();
auto FLT4_size = tensor->data_type() == kNumberTypeFloat16 ? sizeof(cl_half4) : sizeof(cl_float4);
auto row_pitch = (W * SLICES + alignment - 1) / alignment * alignment * FLT4_size;
auto row_size = W * SLICES * FLT4_size;
std::vector<char> data(N * H * row_size);
for (int i = 0; i < N * H; ++i) {
memcpy(static_cast<char *>(data.data()) + i * row_size, static_cast<char *>(origin_data) + i * row_pitch, row_size);
for (int i = 0; i < shape.size(); ++i) {
printf("%4d", shape[i]);
if (i + 1 < shape.size()) {
printf(",");
}
}
printf(") ");
std::cout << "shape=(";
for (auto x : shape) {
printf("%3d,", x);
}
printf("): ");
for (size_t i = 0; i < num && i < ElementsC4Num; ++i) {
if (tensor->data_type() == kNumberTypeFloat16)
printf("%zu %6.3f | ", i, (reinterpret_cast<float16_t *>(data.data()))[i]);
else
printf("%zu %6.3f | ", i, (reinterpret_cast<float *>(data.data()))[i]);
auto num = mem_type == OpenCLMemType::BUF ? img_info.ElementsNum : img_info.ElementsC4Num;
for (int i = 0; i < n && i < num; ++i) {
if (tensor->data_type() == kNumberTypeFloat16) {
printf("%d %7.3f | ", i, reinterpret_cast<float16_t *>(data.data())[i]);
} else {
printf("%d %7.3f | ", i, reinterpret_cast<float *>(data.data())[i]);
}
}
printf("\n");
if (!out_file.empty()) {
Write2File(data.data(), out_file, data.size());
WriteToBin(out_file, data.data(), data.size());
}
}
void PrintKernelOutput(OpenCLKernel *kernel, int n, const std::string &out_file) {
printf("%-30s", kernel->name().c_str());
if (!kernel->out_tensors().empty()) {
PrintTensor(kernel->out_tensors()[0], kernel->GetMemType(), n, out_file);
}
allocator->UnmapBuffer(origin_data);
}
std::vector<int> GetNHWCShape(const std::vector<int> &tensor_shape) {

View File

@ -24,6 +24,8 @@
#include "nnacl/op_base.h"
#include "src/lite_kernel.h"
#include "src/common/utils.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
namespace mindspore::lite {
kernel::LiteKernel *GetOpenCLKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
@ -44,9 +46,11 @@ std::vector<size_t> GetCommonLocalSize(const std::vector<size_t> &global, int ma
std::string CLErrorCode(cl_int error_code);
void Write2File(void *mem, const std::string &file_name, int size);
int WriteToBin(const std::string &file_path, void *data, size_t size);
void PrintTensor(lite::Tensor *tensor, int num = 10, const std::string &out_file = "");
void PrintTensor(const lite::Tensor *tensor, OpenCLMemType mem_type, int n = 10, const std::string &out_file = "");
void PrintKernelOutput(OpenCLKernel *kernel, int n = 10, const std::string &out_file = "");
std::vector<int> GetNHWCShape(const std::vector<int> &tensor_shape);

View File

@ -37,7 +37,7 @@ int OpenCLExecutor::Run(std::vector<Tensor *> &inputs, std::vector<Tensor *> &ou
MS_LOG(ERROR) << "run kernel before_callback failed, name: " << kernel->name();
}
}
kernel::OpenCLKernel *op_kernel = reinterpret_cast<kernel::OpenCLKernel *>(kernel);
auto *op_kernel = reinterpret_cast<kernel::OpenCLKernel *>(kernel);
auto cur_outputs = kernel->out_tensors();
for (auto i = 0; i < cur_outputs.size(); ++i) {
auto *output = cur_outputs.at(i);

View File

@ -23,7 +23,6 @@
#include <vector>
#include <iostream>
#include "src/common/log_adapter.h"
#include "src/runtime/opencl/opencl_runtime.h"
namespace mindspore::lite::opencl {

View File

@ -124,7 +124,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) {
param->type_ = ActivationType_RELU;
auto *kernel =
new (std::nothrow) kernel::ActivationOpenClKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
kernel->SetFormatType(op_format);
if (kernel == nullptr) {
MS_LOG(ERROR) << "Kernel:Relu create fail.";
delete param;
@ -241,7 +240,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) {
delete output_tensor;
return;
}
kernel->SetFormatType(op_format);
auto ret = kernel->Init();
if (ret != RET_OK) {
delete param;
@ -352,7 +350,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) {
delete output_tensor;
return;
}
kernel->SetFormatType(op_format);
auto ret = kernel->Init();
if (ret != RET_OK) {
delete param;
@ -464,7 +461,6 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) {
delete output_tensor;
return;
}
kernel->SetFormatType(op_format);
auto ret = kernel->Init();
if (ret != RET_OK) {
delete param;
@ -573,7 +569,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) {
delete output_tensor;
return;
}
kernel->SetFormatType(op_format);
auto ret = kernel->Init();
if (ret != RET_OK) {
delete param;

View File

@ -96,7 +96,6 @@ TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfOpenCLFp16) {
delete param;
return;
}
arithmeticself_kernel->SetFormatType(schema::Format_NC4HW4);
arithmeticself_kernel->Init();
// to do allocate memory for inputs and outputs
for (auto &input_tensor : inputs) {
@ -185,7 +184,6 @@ TEST_F(TestArithmeticSelfOpenCLCI, ArithmeticSelfRound) {
delete param;
return;
}
arithmeticself_kernel->SetFormatType(schema::Format_NC4HW4);
arithmeticself_kernel->Init();
// to do allocate memory for inputs and outputs
for (auto &input_tensor : inputs) {
@ -280,7 +278,6 @@ TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfdim2Fp16) {
delete param;
return;
}
arithmeticself_kernel->SetFormatType(schema::Format_NC4HW4);
arithmeticself_kernel->Init();
// to do allocate memory for inputs and outputs
for (auto &input_tensor : inputs) {

View File

@ -142,7 +142,6 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) {
delete param;
return;
}
biasadd_kernel->SetFormatType(op_format);
auto ret = biasadd_kernel->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "biasadd kernel init error.";

View File

@ -84,7 +84,6 @@ TEST_F(TestCastSelfOpenCL, Castfp32tofp16) {
delete param;
return;
}
cast_kernel->SetFormatType(schema::Format_NC4HW4);
cast_kernel->Init();
// to do allocate memory for inputs and outputs
for (auto &input_tensor : inputs) {
@ -171,7 +170,6 @@ TEST_F(TestCastSelfOpenCL, Castfp16tofp32) {
delete param;
return;
}
cast_kernel->SetFormatType(schema::Format_NC4HW4);
cast_kernel->Init();
// to do allocate memory for inputs and outputs
for (auto &input_tensor : inputs) {

View File

@ -103,7 +103,6 @@ TEST_F(TestConcatOpenCLCI, ConcatFp32_2inputforCI) {
delete param;
return;
}
concat_kernel->SetFormatType(schema::Format_NC4HW4);
concat_kernel->Init();
// to do allocate memory for inputs
for (auto &input_tensor : inputs) {
@ -220,7 +219,6 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_4input_dim4_axis1) {
delete param;
return;
}
concat_kernel->SetFormatType(schema::Format_NC4HW4);
concat_kernel->Init();
// to do allocate memory for inputs and outputs
for (auto &input_tensor : inputs) {
@ -344,7 +342,6 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_3input_dim4_axis1) {
delete param;
return;
}
concat_kernel->SetFormatType(schema::Format_NC4HW4);
concat_kernel->Init();
// to do allocate memory for inputs
for (auto &input_tensor : inputs) {
@ -473,7 +470,6 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_6input_dim4_axis1) {
delete param;
return;
}
concat_kernel->SetFormatType(schema::Format_NC4HW4);
concat_kernel->Init();
// to do allocate memory for inputs and outputs
for (auto &input_tensor : inputs) {

View File

@ -170,7 +170,6 @@ void TEST_MAIN(const std::string &attr, Format input_format, Format output_forma
std::vector<lite::Tensor *> inputs{&input, &weight, &bias};
std::vector<lite::Tensor *> outputs{&output};
auto kernel = std::make_unique<ConvolutionOpenCLKernel>(reinterpret_cast<OpParameter *>(param), inputs, outputs);
kernel->SetFormatType(op_format);
kernel->Init();
MS_LOG(DEBUG) << "create SubGraph";

View File

@ -101,7 +101,6 @@ void DepthWiseTestMain(ConvParameter *conv_param, T2 *input_data, T1 *weight_dat
delete[] packed_input;
return;
}
pKernel->SetFormatType(format);
pKernel->Init();
std::vector<kernel::LiteKernel *> kernels{pKernel.release()};

View File

@ -55,7 +55,6 @@ void test_main_gather(void *input_data, void *correct_data, const std::vector<in
MS_LOG(INFO) << "new GatherOpenCLKernel failed ";
return;
}
pkernel->SetFormatType(schema::Format_NC4HW4);
pkernel->Init();
// to do allocate memory for inputs and outputs

View File

@ -60,7 +60,6 @@ void TEST_MAIN(PadParameter *param, Format input_format, Format output_format, F
if (kernel == nullptr) {
return;
}
kernel->SetFormatType(op_format);
kernel->Init();
MS_LOG(DEBUG) << "create SubGraph";

View File

@ -141,7 +141,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) {
delete param;
return;
}
prelu_kernel->SetFormatType(op_format);
auto ret = prelu_kernel->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init prelu kernel error";