!10591 [MS][LITE][DDevelop] solve some problems for stack and gather ops

From: @pengyongrong
Reviewed-by: @ddwsky,@zhang_xue_tong
Signed-off-by: @ddwsky
This commit is contained in:
mindspore-ci-bot 2020-12-26 17:46:23 +08:00 committed by Gitee
commit b936b9641d
5 changed files with 20 additions and 19 deletions

View File

@ -14,16 +14,19 @@ __constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE |
// input -1D
__kernel void stack_2input_3axis_1inshape(__read_only image2d_t input0, __read_only image2d_t input1,
__write_only image2d_t output, int4 input_shape, int4 output_shape) {
int X = get_global_id(0);
int Y = get_global_id(1);
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W*C
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z * output_shape.w) {
return;
}
int coordinate_x_out = output_shape.w;
FLT4 result1 = READ_IMAGE(input0, smp_none, (int2)(0, (X)));
FLT4 result2 = READ_IMAGE(input1, smp_none, (int2)(0, (X)));
FLT4 result = {result1.x, result2.x, 0, 0};
WRITE_IMAGE(output, (int2)(Y, (X)), result);
FLT4 result1 = READ_IMAGE(input0, smp_none, (int2)(X, 0));
FLT result1_temp[4] = {result1.x, result1.y, result1.z, result1.w};
FLT4 result2 = READ_IMAGE(input1, smp_none, (int2)(X, 0));
FLT result2_temp[4] = {result2.x, result2.y, result2.z, result2.w};
for (int i = 0; i < C4NUM; ++i) {
FLT4 result = {result1_temp[i], result2_temp[i], 0, 0};
WRITE_IMAGE(output, (int2)(Y, (X * C4NUM + i)), result);
}
}
// input -2D -axis = 1

View File

@ -126,8 +126,6 @@ int GatherOpenCLKernel::Prepare() {
int GatherOpenCLKernel::ConvertTensorToweight() {
auto allocator = ocl_runtime_->GetAllocator();
GpuTensorInfo img_info(in_tensors_[1]);
size_t dtype = sizeof(cl_int);
stride_w = img_info.RowPitch() / dtype;
auto indices_tensor = in_tensors_.at(1);
auto indices_num = indices_tensor->ElementsNum();
indices_data_ = reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num));
@ -140,7 +138,7 @@ int GatherOpenCLKernel::ConvertTensorToweight() {
auto data = indices_tensor->data_c();
if (data_type == kNumberTypeInt32) {
for (int i = 0; i < indices_num; i++) {
indices_data_[i] = reinterpret_cast<int32_t *>(data)[i * stride_w];
indices_data_[i] = reinterpret_cast<int32_t *>(data)[i];
}
} else {
MS_LOG(ERROR) << "Gather Only supported The DataType Of Intensor1 is Int32 "

View File

@ -49,7 +49,6 @@ class GatherOpenCLKernel : public OpenCLKernel {
int axis_ = {0};
bool intensor1_is_tensor{false};
bool enable_fp16_{false};
cl_int stride_w{1};
};
} // namespace mindspore::kernel
#endif

View File

@ -119,8 +119,8 @@ void StackOpenCLKernel::SetGlobalLocal() {
OW_ = out_shape_.s[2];
OC_ = out_shape_.s[3];
} else if (in_tensors_[0]->shape().size() == 1) {
OH_ = out_shape_.s[0] * out_shape_.s[1];
OW_ = out_shape_.s[2];
OH_ = UP_DIV(out_shape_.s[0], C4NUM);
OW_ = out_shape_.s[3];
} else {
OH_ = out_shape_.s[0];
OW_ = out_shape_.s[1];

View File

@ -33,11 +33,12 @@ OpParameter *CreateParameter(int axis) {
TEST_F(TestOpenCL_Stack, input2_ndim1_axis1) {
constexpr int INPUT_NUM = 2;
int axis = 1;
std::vector<int> input_shapes[INPUT_NUM] = {{8}, {8}};
std::vector<int> output_shape = {8, 2};
float input_datas[INPUT_NUM][8] = {{0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.03, 0.37},
{0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.47}};
float output_data[] = {0.75, 0.5, 0.06, 0.6, 0.74, 0.74, 0.30, 0.23, 0.9, 0.46, 0.59, 0.69, 0.03, 0.13, 0.37, 0.47};
std::vector<int> input_shapes[INPUT_NUM] = {{10}, {10}};
std::vector<int> output_shape = {10, 2};
float input_datas[INPUT_NUM][10] = {{0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.03, 0.37, 0.13, 0.47},
{0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.47, 0.59, 0.03}};
float output_data[] = {0.75, 0.5, 0.06, 0.6, 0.74, 0.74, 0.30, 0.23, 0.9, 0.46,
0.59, 0.69, 0.03, 0.13, 0.37, 0.47, 0.13, 0.59, 0.47, 0.03};
for (auto fp16_enable : {true}) {
auto *param = CreateParameter(axis);