forked from OSSInnovation/mindspore
!6025 fixed lite comlile error
Merge pull request !6025 from liuchao/master
This commit is contained in:
commit
a7422977ef
|
@ -19,7 +19,7 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
int FlattenGrad::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::Tensor *> outputs_) {
|
||||
int FlattenGrad::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outputs_) {
|
||||
MS_ASSERT(this->primitive_ != nullptr);
|
||||
auto input = inputs_.front();
|
||||
auto output = outputs_.front();
|
||||
|
|
|
@ -37,7 +37,7 @@ class FlattenGrad : public PrimitiveC {
|
|||
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
||||
#endif
|
||||
int InferShape(std::vector<lite::tensor::Tensor *> inputs_, std::vector<lite::tensor::Tensor *> outputs_) override;
|
||||
int InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) override;
|
||||
};
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -110,15 +110,17 @@ int ScaleOpenCLKernel::InitBuffer() {
|
|||
if (!element_flag_) {
|
||||
return RET_OK;
|
||||
}
|
||||
if (in_tensors_[1]->TensorType() == schema::NodeType_ValueNode && in_tensors_[1]->Data() != nullptr) {
|
||||
if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->MutableData() != nullptr) {
|
||||
auto allocator = ocl_runtime_->GetAllocator();
|
||||
std::vector<size_t> img_size;
|
||||
GetImageSize(0, &img_size);
|
||||
if (in_tensors_[1]->shape().size() == 1 && axis_ == 3) {
|
||||
img_size[0] = 1;
|
||||
img_size[1] = UP_DIV(in_tensors_[1]->shape()[0], C4NUM);
|
||||
scale_ptr_ = allocator->CreateImageFromHost(in_tensors_[1]->Data(), in_tensors_[1]->ElementsNum(), img_size);
|
||||
offset_ptr_ = allocator->CreateImageFromHost(in_tensors_[2]->Data(), in_tensors_[2]->ElementsNum(), img_size);
|
||||
scale_ptr_ =
|
||||
allocator->CreateImageFromHost(in_tensors_[1]->MutableData(), in_tensors_[1]->ElementsNum(), img_size);
|
||||
offset_ptr_ =
|
||||
allocator->CreateImageFromHost(in_tensors_[2]->MutableData(), in_tensors_[2]->ElementsNum(), img_size);
|
||||
return RET_OK;
|
||||
}
|
||||
int pack_weight_size = in_tensors_[1]->ElementsC4Num();
|
||||
|
@ -127,8 +129,10 @@ int ScaleOpenCLKernel::InitBuffer() {
|
|||
int batch = in_tensors_[1]->Batch();
|
||||
if (in_tensors_[0]->GetFormat() == in_tensors_[1]->GetFormat()) {
|
||||
if (in_tensors_[0]->data_type() == in_tensors_[1]->data_type()) {
|
||||
scale_ptr_ = allocator->CreateImageFromHost(in_tensors_[1]->Data(), in_tensors_[1]->ElementsNum(), img_size);
|
||||
offset_ptr_ = allocator->CreateImageFromHost(in_tensors_[2]->Data(), in_tensors_[2]->ElementsNum(), img_size);
|
||||
scale_ptr_ =
|
||||
allocator->CreateImageFromHost(in_tensors_[1]->MutableData(), in_tensors_[1]->ElementsNum(), img_size);
|
||||
offset_ptr_ =
|
||||
allocator->CreateImageFromHost(in_tensors_[2]->MutableData(), in_tensors_[2]->ElementsNum(), img_size);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupport data type transpose from " << in_tensors_[1]->data_type() << "to "
|
||||
<< in_tensors_[0]->data_type();
|
||||
|
@ -149,8 +153,8 @@ int ScaleOpenCLKernel::InitBuffer() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
std::function<float(float)> to_dtype = [](float x) -> float { return (float)x; };
|
||||
PackNHWCToNC4HW4<float, float>(in_tensors_[1]->Data(), scale, batch, plane, channel, to_dtype);
|
||||
PackNHWCToNC4HW4<float, float>(in_tensors_[2]->Data(), offset, batch, plane, channel, to_dtype);
|
||||
PackNHWCToNC4HW4<float, float>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype);
|
||||
PackNHWCToNC4HW4<float, float>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype);
|
||||
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size);
|
||||
offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size);
|
||||
delete[] scale;
|
||||
|
@ -168,8 +172,8 @@ int ScaleOpenCLKernel::InitBuffer() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
std::function<int16_t(float)> to_dtype = Float32ToShort;
|
||||
PackNHWCToNC4HW4<float, int16_t>(in_tensors_[1]->Data(), scale, batch, plane, channel, to_dtype);
|
||||
PackNHWCToNC4HW4<float, int16_t>(in_tensors_[2]->Data(), offset, batch, plane, channel, to_dtype);
|
||||
PackNHWCToNC4HW4<float, int16_t>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype);
|
||||
PackNHWCToNC4HW4<float, int16_t>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype);
|
||||
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size);
|
||||
offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size);
|
||||
delete[] scale;
|
||||
|
@ -199,8 +203,8 @@ int ScaleOpenCLKernel::InitBuffer() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
std::function<float(float)> to_dtype = [](float x) -> float { return (float)x; };
|
||||
PackNHWCToNHWC4<float, float>(in_tensors_[1]->Data(), scale, batch, plane, channel, to_dtype);
|
||||
PackNHWCToNHWC4<float, float>(in_tensors_[2]->Data(), offset, batch, plane, channel, to_dtype);
|
||||
PackNHWCToNHWC4<float, float>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype);
|
||||
PackNHWCToNHWC4<float, float>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype);
|
||||
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size);
|
||||
offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size);
|
||||
delete[] scale;
|
||||
|
@ -218,8 +222,8 @@ int ScaleOpenCLKernel::InitBuffer() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
std::function<int16_t(float)> to_dtype = Float32ToShort;
|
||||
PackNHWCToNHWC4<float, int16_t>(in_tensors_[1]->Data(), scale, batch, plane, channel, to_dtype);
|
||||
PackNHWCToNHWC4<float, int16_t>(in_tensors_[2]->Data(), offset, batch, plane, channel, to_dtype);
|
||||
PackNHWCToNHWC4<float, int16_t>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype);
|
||||
PackNHWCToNHWC4<float, int16_t>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype);
|
||||
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size);
|
||||
offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size);
|
||||
delete[] scale;
|
||||
|
@ -291,7 +295,7 @@ int ScaleOpenCLKernel::Init() {
|
|||
in_ori_format_ = in_tensors_[0]->GetFormat();
|
||||
out_ori_format_ = out_tensors_[0]->GetFormat();
|
||||
in_tensors_[0]->SetFormat(format);
|
||||
if (element_flag_ && in_tensors_[1]->TensorType() != schema::NodeType_ValueNode) {
|
||||
if (element_flag_ && in_tensors_[1]->category() != lite::Tensor::Category::CONST) {
|
||||
in_tensors_[1]->SetFormat(format);
|
||||
in_tensors_[2]->SetFormat(format);
|
||||
}
|
||||
|
@ -305,27 +309,27 @@ int ScaleOpenCLKernel::Run() {
|
|||
MS_LOG(DEBUG) << this->name() << " Running!";
|
||||
|
||||
int arg_idx = 0;
|
||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data());
|
||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData());
|
||||
if (element_flag_) {
|
||||
void *scale = scale_ptr_ == nullptr ? in_tensors_[1]->Data() : scale_ptr_;
|
||||
void *offset = offset_ptr_ == nullptr ? in_tensors_[2]->Data() : offset_ptr_;
|
||||
void *scale = scale_ptr_ == nullptr ? in_tensors_[1]->MutableData() : scale_ptr_;
|
||||
void *offset = offset_ptr_ == nullptr ? in_tensors_[2]->MutableData() : offset_ptr_;
|
||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale);
|
||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset);
|
||||
} else {
|
||||
if (in_tensors_[0]->data_type() == kNumberTypeFloat32) {
|
||||
float scale = static_cast<float *>(in_tensors_[1]->Data())[0];
|
||||
float offset = static_cast<float *>(in_tensors_[2]->Data())[0];
|
||||
float scale = static_cast<float *>(in_tensors_[1]->MutableData())[0];
|
||||
float offset = static_cast<float *>(in_tensors_[2]->MutableData())[0];
|
||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale);
|
||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset);
|
||||
} else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) {
|
||||
if (in_tensors_[1]->data_type() == kNumberTypeFloat32) {
|
||||
float scale = static_cast<float *>(in_tensors_[1]->Data())[0];
|
||||
float offset = static_cast<float *>(in_tensors_[2]->Data())[0];
|
||||
float scale = static_cast<float *>(in_tensors_[1]->MutableData())[0];
|
||||
float offset = static_cast<float *>(in_tensors_[2]->MutableData())[0];
|
||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale));
|
||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset));
|
||||
} else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) {
|
||||
int16_t scale = static_cast<int16_t *>(in_tensors_[1]->Data())[0];
|
||||
int16_t offset = static_cast<int16_t *>(in_tensors_[2]->Data())[0];
|
||||
int16_t scale = static_cast<int16_t *>(in_tensors_[1]->MutableData())[0];
|
||||
int16_t offset = static_cast<int16_t *>(in_tensors_[2]->MutableData())[0];
|
||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale));
|
||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset));
|
||||
} else {
|
||||
|
@ -334,7 +338,7 @@ int ScaleOpenCLKernel::Run() {
|
|||
}
|
||||
}
|
||||
}
|
||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data());
|
||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData());
|
||||
int H = 0;
|
||||
int W = 0;
|
||||
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
|
||||
|
@ -359,10 +363,9 @@ int ScaleOpenCLKernel::Run() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *OpenCLScaleKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc,
|
||||
kernel::LiteKernel *OpenCLScaleKernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
auto *kernel =
|
||||
new (std::nothrow) ScaleOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs, ctx);
|
||||
|
|
|
@ -26,8 +26,8 @@ namespace mindspore::kernel {
|
|||
|
||||
class ScaleOpenCLKernel : public OpenCLKernel {
|
||||
public:
|
||||
explicit ScaleOpenCLKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
|
||||
explicit ScaleOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx)
|
||||
: OpenCLKernel(parameter, inputs, outputs) {}
|
||||
~ScaleOpenCLKernel() override;
|
||||
|
||||
|
|
|
@ -71,7 +71,7 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
|
|||
auto allocator = ocl_runtime->GetAllocator();
|
||||
|
||||
bool is_broadcast = shape_b.empty();
|
||||
auto tensorType = schema::NodeType_ValueNode;
|
||||
auto tensorType = lite::TensorCategory(schema::NodeType_ValueNode);
|
||||
auto format = schema::Format_NHWC4;
|
||||
|
||||
auto data_type = kNumberTypeFloat32;
|
||||
|
@ -79,10 +79,10 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
|
|||
data_type = kNumberTypeFloat16;
|
||||
ocl_runtime->SetFp16Enable(true);
|
||||
}
|
||||
lite::tensor::Tensor *tensor_in = new (std::nothrow) lite::tensor::Tensor(data_type, shape_a, format, tensorType);
|
||||
lite::tensor::Tensor *tensor_scale = new (std::nothrow) lite::tensor::Tensor(data_type, shape_b, format, tensorType);
|
||||
lite::tensor::Tensor *tensor_offset = new (std::nothrow) lite::tensor::Tensor(data_type, shape_b, format, tensorType);
|
||||
lite::tensor::Tensor *tensor_out = new (std::nothrow) lite::tensor::Tensor(data_type, shape_a, format, tensorType);
|
||||
lite::Tensor *tensor_in = new (std::nothrow) lite::Tensor(data_type, shape_a, format, tensorType);
|
||||
lite::Tensor *tensor_scale = new (std::nothrow) lite::Tensor(data_type, shape_b, format, tensorType);
|
||||
lite::Tensor *tensor_offset = new (std::nothrow) lite::Tensor(data_type, shape_b, format, tensorType);
|
||||
lite::Tensor *tensor_out = new (std::nothrow) lite::Tensor(data_type, shape_a, format, tensorType);
|
||||
if (tensor_in == nullptr || tensor_scale == nullptr || tensor_offset == nullptr) {
|
||||
MS_LOG(ERROR) << "Create tensor failed!";
|
||||
delete tensor_in;
|
||||
|
@ -126,17 +126,17 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
|
|||
Scale(data_in, data_scale, data_offset, data_out_cpu, element_num);
|
||||
}
|
||||
|
||||
std::vector<lite::tensor::Tensor *> inputs = {tensor_in};
|
||||
std::vector<lite::Tensor *> inputs = {tensor_in};
|
||||
if (!is_broadcast) {
|
||||
inputs.push_back(tensor_scale);
|
||||
inputs.push_back(tensor_offset);
|
||||
} else {
|
||||
tensor_scale->MallocData();
|
||||
tensor_offset->MallocData();
|
||||
memcpy(tensor_scale->Data(), data_scale, sizeof(T));
|
||||
memcpy(tensor_offset->Data(), data_offset, sizeof(T));
|
||||
memcpy(tensor_scale->MutableData(), data_scale, sizeof(T));
|
||||
memcpy(tensor_offset->MutableData(), data_offset, sizeof(T));
|
||||
}
|
||||
std::vector<lite::tensor::Tensor *> outputs = {tensor_out};
|
||||
std::vector<lite::Tensor *> outputs = {tensor_out};
|
||||
|
||||
ScaleParameter *param = new (std::nothrow) ScaleParameter();
|
||||
if (param == nullptr) {
|
||||
|
@ -155,7 +155,7 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
|
|||
param->axis_ = 0;
|
||||
param->op_parameter_.type_ = schema::PrimitiveType_Scale;
|
||||
|
||||
std::vector<lite::tensor::Tensor *> scale_inputs = {tensor_in, tensor_scale, tensor_offset};
|
||||
std::vector<lite::Tensor *> scale_inputs = {tensor_in, tensor_scale, tensor_offset};
|
||||
lite::Context ctx;
|
||||
auto *scale_kernel =
|
||||
new (std::nothrow) kernel::ScaleOpenCLKernel(reinterpret_cast<OpParameter *>(param), scale_inputs, outputs, &ctx);
|
||||
|
@ -196,21 +196,21 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
|
|||
}
|
||||
kernel->Init();
|
||||
|
||||
memcpy(inputs[0]->Data(), data_in, sizeof(T) * element_num);
|
||||
memcpy(inputs[0]->MutableData(), data_in, sizeof(T) * element_num);
|
||||
if (!is_broadcast) {
|
||||
memcpy(inputs[1]->Data(), data_scale, sizeof(T) * element_num_b);
|
||||
memcpy(inputs[2]->Data(), data_offset, sizeof(T) * element_num_b);
|
||||
memcpy(inputs[1]->MutableData(), data_scale, sizeof(T) * element_num_b);
|
||||
memcpy(inputs[2]->MutableData(), data_offset, sizeof(T) * element_num_b);
|
||||
}
|
||||
|
||||
kernel->Run();
|
||||
|
||||
memcpy(data_out_ocl, outputs[0]->Data(), sizeof(T) * element_num);
|
||||
memcpy(data_out_ocl, outputs[0]->MutableData(), sizeof(T) * element_num);
|
||||
|
||||
LogData<T>(data_in, 10, "Data input : ");
|
||||
LogData<T>(data_scale, tensor_scale->shape().empty() ? 1 : 10, "Data scale : ");
|
||||
LogData<T>(data_offset, tensor_offset->shape().empty() ? 1 : 10, "Data offset : ");
|
||||
LogData<T>(data_out_cpu, 10, "Expect compute : ");
|
||||
LogData<T>(outputs[0]->Data(), 10, "OpenCL compute : ");
|
||||
LogData<T>(outputs[0]->MutableData(), 10, "OpenCL compute : ");
|
||||
bool cmp = DataCompare(data_out_cpu, data_out_ocl, element_num);
|
||||
MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!");
|
||||
EXPECT_EQ(true, cmp);
|
||||
|
|
Loading…
Reference in New Issue