forked from mindspore-Ecosystem/mindspore
!6058 add FP16 support for arithmetch and scale op for opencl
Merge pull request !6058 from liuchao/master
This commit is contained in:
commit
ed38d787d1
|
@ -9,9 +9,9 @@ __kernel void Scale_IMG(__read_only image2d_t input, __read_only image2d_t scale
|
|||
return;
|
||||
}
|
||||
|
||||
FLT4 in = read_imagef(input, smp_none, (int2)(X, Y));
|
||||
FLT4 s = read_imagef(scale, smp_none, (int2)(X, Y));
|
||||
FLT4 o = read_imagef(offset, smp_none, (int2)(X, Y));
|
||||
FLT4 in = READ_IMAGE(input, smp_none, (int2)(X, Y));
|
||||
FLT4 s = READ_IMAGE(scale, smp_none, (int2)(X, Y));
|
||||
FLT4 o = READ_IMAGE(offset, smp_none, (int2)(X, Y));
|
||||
WRITE_IMAGE(output, (int2)(X, Y), in * s + o);
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,7 @@ __kernel void BoardcastScale_IMG(__read_only image2d_t input, float scale, float
|
|||
return;
|
||||
}
|
||||
|
||||
FLT4 in = read_imagef(input, smp_none, (int2)(X, Y));
|
||||
FLT4 in = READ_IMAGE(input, smp_none, (int2)(X, Y));
|
||||
WRITE_IMAGE(output, (int2)(X, Y), in * (FLT)scale + (FLT)offset);
|
||||
}
|
||||
|
||||
|
@ -35,8 +35,8 @@ __kernel void Scale_C_IMG(__read_only image2d_t input, __read_only image2d_t sca
|
|||
return;
|
||||
}
|
||||
|
||||
FLT4 in = read_imagef(input, smp_none, (int2)(X, Y));
|
||||
FLT4 s = read_imagef(scale, smp_none, (int2)(X % C, 0));
|
||||
FLT4 o = read_imagef(offset, smp_none, (int2)(X % C, 0));
|
||||
FLT4 in = READ_IMAGE(input, smp_none, (int2)(X, Y));
|
||||
FLT4 s = READ_IMAGE(scale, smp_none, (int2)(X % C, 0));
|
||||
FLT4 o = READ_IMAGE(offset, smp_none, (int2)(X % C, 0));
|
||||
WRITE_IMAGE(output, (int2)(X, Y), in * s + o);
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <set>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "nnacl/fp32/common_func.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "src/runtime/kernel/opencl/utils.h"
|
||||
|
@ -48,16 +49,20 @@ std::vector<size_t> ArithmeticOpenCLKernel::InitGlobalSize() const {
|
|||
|
||||
void ArithmeticOpenCLKernel::Image2dGetWorkGroupSize() {
|
||||
local_size_ = {16, 16};
|
||||
if (out_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) {
|
||||
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
|
||||
size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
|
||||
size_t W = out_tensors_[0]->Width();
|
||||
global_size_ = {W, H};
|
||||
} else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
|
||||
size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height();
|
||||
size_t W = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
|
||||
global_size_ = {W, H};
|
||||
} else if (out_tensors_[0]->GetFormat() == schema::Format::Format_NC4) {
|
||||
} else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) {
|
||||
size_t H = out_tensors_[0]->Batch();
|
||||
size_t W = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
|
||||
global_size_ = {W, H};
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unspport data format " << out_tensors_[0]->GetFormat();
|
||||
MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -68,21 +73,28 @@ void ArithmeticOpenCLKernel::BufferGetWorkGroupSize() {
|
|||
|
||||
int ArithmeticOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
|
||||
size_t im_dst_x, im_dst_y;
|
||||
if (out_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) {
|
||||
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
|
||||
im_dst_x = out_tensors_[0]->Width();
|
||||
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
|
||||
} else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
|
||||
im_dst_x = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
|
||||
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height();
|
||||
} else if (out_tensors_[0]->GetFormat() == schema::Format::Format_NC4) {
|
||||
} else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) {
|
||||
im_dst_y = out_tensors_[0]->Batch();
|
||||
im_dst_x = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unspport data format " << out_tensors_[0]->GetFormat();
|
||||
MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat();
|
||||
return RET_ERROR;
|
||||
}
|
||||
#ifdef ENABLE_FP16
|
||||
size_t img_dtype = CL_HALF_FLOAT;
|
||||
#else
|
||||
|
||||
size_t img_dtype = CL_FLOAT;
|
||||
#endif
|
||||
if (in_tensors_[0]->data_type() == kNumberTypeFloat16) {
|
||||
img_dtype = CL_HALF_FLOAT;
|
||||
} else if (in_tensors_[0]->data_type() == kNumberTypeFloat32) {
|
||||
img_dtype = CL_FLOAT;
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[0]->data_type();
|
||||
}
|
||||
img_size->clear();
|
||||
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
|
||||
*img_size = vec;
|
||||
|
@ -93,23 +105,99 @@ int ArithmeticOpenCLKernel::InitBuffer() {
|
|||
const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_);
|
||||
if (!arithmetic_parameter->broadcasting_) {
|
||||
if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->MutableData() != nullptr) {
|
||||
auto allocatdor = runtime_->GetAllocator();
|
||||
auto allocator = runtime_->GetAllocator();
|
||||
std::vector<size_t> img_size;
|
||||
GetImageSize(0, &img_size);
|
||||
weight_ptr_ =
|
||||
allocatdor->CreateImageFromHost(in_tensors_[1]->MutableData(), in_tensors_[1]->ElementsNum(), img_size);
|
||||
return RET_OK;
|
||||
int pack_weight_size = in_tensors_[1]->ElementsC4Num();
|
||||
int plane = in_tensors_[1]->Height() * in_tensors_[1]->Width();
|
||||
int channel = in_tensors_[1]->Channel();
|
||||
int batch = in_tensors_[1]->Batch();
|
||||
|
||||
if (in_tensors_[0]->GetFormat() == in_tensors_[1]->GetFormat()) {
|
||||
if (in_tensors_[0]->data_type() == in_tensors_[1]->data_type()) {
|
||||
weight_ptr_ =
|
||||
allocator->CreateImageFromHost(in_tensors_[1]->MutableData(), in_tensors_[1]->ElementsNum(), img_size);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupport data type transpose from " << in_tensors_[1]->data_type() << "to "
|
||||
<< in_tensors_[0]->data_type();
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else if (in_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
|
||||
if (in_tensors_[1]->GetFormat() == schema::Format_NHWC) {
|
||||
if (in_tensors_[0]->data_type() == kNumberTypeFloat32) {
|
||||
float *weight = new (std::nothrow) float[pack_weight_size];
|
||||
if (weight == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc buffer failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
std::function<float(float)> to_dtype = [](float x) -> float { return (float)x; };
|
||||
PackNHWCToNC4HW4<float, float>(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype);
|
||||
weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size);
|
||||
delete[] weight;
|
||||
} else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) {
|
||||
int16_t *weight = new (std::nothrow) int16_t[pack_weight_size];
|
||||
if (weight == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc buffer failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
std::function<int16_t(float)> to_dtype = Float32ToShort;
|
||||
PackNHWCToNC4HW4<float, int16_t>(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype);
|
||||
weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size);
|
||||
delete[] weight;
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupport data type transpose from " << in_tensors_[1]->data_type() << "to "
|
||||
<< in_tensors_[0]->data_type();
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupport format transpose from " << in_tensors_[1]->GetFormat() << "to "
|
||||
<< in_tensors_[0]->GetFormat();
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else if (in_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
|
||||
if (in_tensors_[1]->GetFormat() == schema::Format_NHWC) {
|
||||
if (in_tensors_[0]->data_type() == kNumberTypeFloat32) {
|
||||
float *weight = new (std::nothrow) float[pack_weight_size];
|
||||
if (weight == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc buffer failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
std::function<float(float)> to_dtype = [](float x) -> float { return (float)x; };
|
||||
PackNHWCToNHWC4<float, float>(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype);
|
||||
weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size);
|
||||
delete[] weight;
|
||||
} else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) {
|
||||
int16_t *weight = new (std::nothrow) int16_t[pack_weight_size];
|
||||
if (weight == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc buffer failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
std::function<int16_t(float)> to_dtype = Float32ToShort;
|
||||
PackNHWCToNHWC4<float, int16_t>(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype);
|
||||
weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size);
|
||||
delete[] weight;
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupport data type transpose from " << in_tensors_[1]->data_type() << "to "
|
||||
<< in_tensors_[0]->data_type();
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupport format transpose from " << in_tensors_[1]->GetFormat() << "to "
|
||||
<< in_tensors_[0]->GetFormat();
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ArithmeticOpenCLKernel::Init() {
|
||||
runtime_ = lite::opencl::OpenCLRuntime::GetInstance();
|
||||
std::string kernel_name;
|
||||
|
||||
const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_);
|
||||
if (arithmetic_parameter->broadcasting_ && in_tensors_[1]->category() == lite::Tensor::Category::CONST &&
|
||||
in_tensors_[1]->MutableData() != nullptr) {
|
||||
if (arithmetic_parameter->broadcasting_) {
|
||||
element_flag_ = false;
|
||||
kernel_name = "BoardcastArith";
|
||||
} else {
|
||||
|
@ -202,10 +290,13 @@ int ArithmeticOpenCLKernel::Run() {
|
|||
|
||||
int H = 0;
|
||||
int W = 0;
|
||||
if (out_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) {
|
||||
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
|
||||
H = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
|
||||
W = out_tensors_[0]->Width();
|
||||
} else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
|
||||
H = out_tensors_[0]->Batch() * out_tensors_[0]->Height();
|
||||
W = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
|
||||
} else if (out_tensors_[0]->GetFormat() == schema::Format::Format_NC4) {
|
||||
} else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) {
|
||||
H = out_tensors_[0]->Batch();
|
||||
W = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
|
||||
} else {
|
||||
|
|
|
@ -20,19 +20,22 @@
|
|||
|
||||
namespace mindspore {
|
||||
|
||||
void BoardcaseAdd(const float *a, const float b, float *c, const int size) {
|
||||
template <class T>
|
||||
static void BoardcaseAdd(const T *a, const T b, T *c, const int size) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
c[i] = a[i] + b;
|
||||
}
|
||||
}
|
||||
|
||||
void ElementAdd(const float *a, const float *b, float *c, const int size) {
|
||||
template <class T>
|
||||
static void ElementAdd(const T *a, const T *b, T *c, const int size) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
c[i] = a[i] + b[i];
|
||||
}
|
||||
}
|
||||
|
||||
bool DataCompare(const float *a, const float *b, const int size, const float accuracy = 1e-4) {
|
||||
template <class T>
|
||||
static bool DataCompare(const T *a, const T *b, const int size, const float accuracy = 1e-4) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
auto diff = fabs(a[i] - b[i]);
|
||||
if (diff > accuracy) {
|
||||
|
@ -43,36 +46,40 @@ bool DataCompare(const float *a, const float *b, const int size, const float acc
|
|||
return true;
|
||||
}
|
||||
|
||||
void InitData(void *data, const int size) {
|
||||
float *data_float = reinterpret_cast<float *>(data);
|
||||
template <class T>
|
||||
static void InitData(void *data, const int size) {
|
||||
T *data_float = reinterpret_cast<T *>(data);
|
||||
static unsigned int seed = 123;
|
||||
for (int i = 0; i < size; i++) {
|
||||
data_float[i] = static_cast<int>(rand_r(&seed)) % 100;
|
||||
}
|
||||
}
|
||||
|
||||
void LogData(void *data, const int size, const std::string prefix) {
|
||||
template <class T>
|
||||
static void LogData(void *data, const int size, const std::string prefix) {
|
||||
std::cout << prefix;
|
||||
float *data_float = reinterpret_cast<float *>(data);
|
||||
T *data_float = reinterpret_cast<T *>(data);
|
||||
for (int i = 0; i < size; i++) {
|
||||
std::cout << data_float[i] << ",";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b) {
|
||||
template <class T>
|
||||
static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b) {
|
||||
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
|
||||
auto allocator = ocl_runtime->GetAllocator();
|
||||
|
||||
bool is_bias_add = shape_b.empty();
|
||||
auto tensorType = lite::TensorCategory(schema::NodeType_ValueNode);
|
||||
auto data_type = kNumberTypeFloat32;
|
||||
if (sizeof(T) == 2) {
|
||||
data_type = kNumberTypeFloat16;
|
||||
ocl_runtime->SetFp16Enable(true);
|
||||
}
|
||||
|
||||
lite::Tensor *tensor_a =
|
||||
new (std::nothrow) lite::Tensor(kNumberTypeFloat32, shape_a, schema::Format_NHWC4, tensorType);
|
||||
lite::Tensor *tensor_b =
|
||||
new (std::nothrow) lite::Tensor(kNumberTypeFloat32, shape_b, schema::Format_NHWC4, tensorType);
|
||||
lite::Tensor *tensor_c =
|
||||
new (std::nothrow) lite::Tensor(kNumberTypeFloat32, shape_a, schema::Format_NHWC4, tensorType);
|
||||
lite::Tensor *tensor_a = new (std::nothrow) lite::Tensor(data_type, shape_a, schema::Format_NHWC4);
|
||||
lite::Tensor *tensor_b = new (std::nothrow) lite::Tensor(data_type, shape_b, schema::Format_NHWC4);
|
||||
lite::Tensor *tensor_c = new (std::nothrow) lite::Tensor(data_type, shape_a, schema::Format_NHWC4);
|
||||
if (tensor_a == nullptr || tensor_b == nullptr || tensor_c == nullptr) {
|
||||
MS_LOG(ERROR) << "Create tensor failed!";
|
||||
delete tensor_a;
|
||||
|
@ -84,10 +91,10 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
|
|||
int64_t element_num = tensor_a->ElementsC4Num();
|
||||
int64_t element_num_b = is_bias_add ? 1 : tensor_b->ElementsC4Num();
|
||||
|
||||
float *data_a = new (std::nothrow) float[element_num];
|
||||
float *data_b = new (std::nothrow) float[element_num_b];
|
||||
float *data_c_cpu = new (std::nothrow) float[element_num];
|
||||
float *data_c_ocl = new (std::nothrow) float[element_num];
|
||||
T *data_a = new (std::nothrow) T[element_num];
|
||||
T *data_b = new (std::nothrow) T[element_num_b];
|
||||
T *data_c_cpu = new (std::nothrow) T[element_num];
|
||||
T *data_c_ocl = new (std::nothrow) T[element_num];
|
||||
if (data_a == nullptr || data_b == nullptr || data_c_cpu == nullptr || data_c_ocl == nullptr) {
|
||||
MS_LOG(ERROR) << "Create buffer failed!";
|
||||
delete tensor_a;
|
||||
|
@ -100,12 +107,12 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
|
|||
return;
|
||||
}
|
||||
|
||||
InitData(data_a, element_num);
|
||||
InitData(data_b, element_num_b);
|
||||
memset(data_c_ocl, 0, sizeof(float) * element_num);
|
||||
InitData<T>(data_a, element_num);
|
||||
InitData<T>(data_b, element_num_b);
|
||||
memset(data_c_ocl, 0, sizeof(T) * element_num);
|
||||
|
||||
if (is_bias_add) {
|
||||
BoardcaseAdd(data_a, static_cast<float *>(data_b)[0], data_c_cpu, element_num);
|
||||
BoardcaseAdd(data_a, static_cast<T *>(data_b)[0], data_c_cpu, element_num);
|
||||
} else {
|
||||
ElementAdd(data_a, data_b, data_c_cpu, element_num);
|
||||
}
|
||||
|
@ -115,11 +122,12 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
|
|||
inputs.push_back(tensor_b);
|
||||
} else {
|
||||
tensor_b->MallocData();
|
||||
memcpy(tensor_b->MutableData(), data_b, sizeof(float));
|
||||
memcpy(tensor_b->MutableData(), data_b, sizeof(T));
|
||||
}
|
||||
std::vector<lite::Tensor *> outputs = {tensor_c};
|
||||
|
||||
ArithmeticParameter *param = new (std::nothrow) ArithmeticParameter();
|
||||
param->broadcasting_ = is_bias_add;
|
||||
if (param == nullptr) {
|
||||
MS_LOG(ERROR) << "Create parameter failed!";
|
||||
delete tensor_a;
|
||||
|
@ -170,19 +178,19 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
|
|||
}
|
||||
kernel->Init();
|
||||
|
||||
memcpy(inputs[0]->MutableData(), data_a, sizeof(float) * element_num);
|
||||
memcpy(inputs[0]->MutableData(), data_a, sizeof(T) * element_num);
|
||||
if (!is_bias_add) {
|
||||
memcpy(inputs[1]->MutableData(), data_b, sizeof(float) * element_num_b);
|
||||
memcpy(inputs[1]->MutableData(), data_b, sizeof(T) * element_num_b);
|
||||
}
|
||||
|
||||
kernel->Run();
|
||||
|
||||
memcpy(data_c_ocl, outputs[0]->MutableData(), sizeof(float) * element_num);
|
||||
memcpy(data_c_ocl, outputs[0]->MutableData(), sizeof(T) * element_num);
|
||||
|
||||
LogData(data_a, 10, "Data A : ");
|
||||
LogData(data_b, tensor_b->shape().empty() ? 1 : 10, "Data B : ");
|
||||
LogData(data_c_cpu, 10, "Expect compute : ");
|
||||
LogData(outputs[0]->MutableData(), 10, "OpenCL compute : ");
|
||||
LogData<T>(data_a, 10, "Data A : ");
|
||||
LogData<T>(data_b, tensor_b->shape().empty() ? 1 : 10, "Data B : ");
|
||||
LogData<T>(data_c_cpu, 10, "Expect compute : ");
|
||||
LogData<T>(outputs[0]->MutableData(), 10, "OpenCL compute : ");
|
||||
bool cmp = DataCompare(data_c_cpu, data_c_ocl, element_num);
|
||||
MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!");
|
||||
EXPECT_EQ(true, cmp);
|
||||
|
@ -210,16 +218,27 @@ class TestArithmeticOpenCL : public mindspore::CommonTest {
|
|||
TestArithmeticOpenCL() {}
|
||||
};
|
||||
|
||||
TEST_F(TestArithmeticOpenCL, AddElementwiseTest) {
|
||||
TEST_F(TestArithmeticOpenCL, AddElementwiseFP32) {
|
||||
const std::vector<int> &shape_a = {1, 1024, 1024, 4};
|
||||
const std::vector<int> &shape_b = {1, 1024, 1024, 4};
|
||||
TestCase(shape_a, shape_b);
|
||||
TestCase<float>(shape_a, shape_b);
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticOpenCL, AddBroadcastTest) {
|
||||
TEST_F(TestArithmeticOpenCL, AddBroadcastFP32) {
|
||||
const std::vector<int> &shape_a = {1, 128, 128, 4};
|
||||
const std::vector<int> &shape_b = {};
|
||||
TestCase(shape_a, shape_b);
|
||||
TestCase<float>(shape_a, shape_b);
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticOpenCL, AddElementwiseFP16) {
|
||||
const std::vector<int> &shape_a = {1, 1024, 1024, 4};
|
||||
const std::vector<int> &shape_b = {1, 1024, 1024, 4};
|
||||
TestCase<float16_t>(shape_a, shape_b);
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticOpenCL, AddBroadcastFP16) {
|
||||
const std::vector<int> &shape_a = {1, 128, 128, 4};
|
||||
const std::vector<int> &shape_b = {};
|
||||
TestCase<float16_t>(shape_a, shape_b);
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -71,7 +71,6 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
|
|||
auto allocator = ocl_runtime->GetAllocator();
|
||||
|
||||
bool is_broadcast = shape_b.empty();
|
||||
auto tensorType = lite::TensorCategory(schema::NodeType_ValueNode);
|
||||
auto format = schema::Format_NHWC4;
|
||||
|
||||
auto data_type = kNumberTypeFloat32;
|
||||
|
@ -79,10 +78,10 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
|
|||
data_type = kNumberTypeFloat16;
|
||||
ocl_runtime->SetFp16Enable(true);
|
||||
}
|
||||
lite::Tensor *tensor_in = new (std::nothrow) lite::Tensor(data_type, shape_a, format, tensorType);
|
||||
lite::Tensor *tensor_scale = new (std::nothrow) lite::Tensor(data_type, shape_b, format, tensorType);
|
||||
lite::Tensor *tensor_offset = new (std::nothrow) lite::Tensor(data_type, shape_b, format, tensorType);
|
||||
lite::Tensor *tensor_out = new (std::nothrow) lite::Tensor(data_type, shape_a, format, tensorType);
|
||||
lite::Tensor *tensor_in = new (std::nothrow) lite::Tensor(data_type, shape_a, format);
|
||||
lite::Tensor *tensor_scale = new (std::nothrow) lite::Tensor(data_type, shape_b, format);
|
||||
lite::Tensor *tensor_offset = new (std::nothrow) lite::Tensor(data_type, shape_b, format);
|
||||
lite::Tensor *tensor_out = new (std::nothrow) lite::Tensor(data_type, shape_a, format);
|
||||
if (tensor_in == nullptr || tensor_scale == nullptr || tensor_offset == nullptr) {
|
||||
MS_LOG(ERROR) << "Create tensor failed!";
|
||||
delete tensor_in;
|
||||
|
|
Loading…
Reference in New Issue