forked from mindspore-Ecosystem/mindspore
!10591 [MS][LITE][DDevelop] solve some problems for stack and gather ops
From: @pengyongrong Reviewed-by: @ddwsky,@zhang_xue_tong Signed-off-by: @ddwsky
This commit is contained in:
commit
b936b9641d
|
@ -14,16 +14,19 @@ __constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE |
|
|||
// input -1D
|
||||
__kernel void stack_2input_3axis_1inshape(__read_only image2d_t input0, __read_only image2d_t input1,
|
||||
__write_only image2d_t output, int4 input_shape, int4 output_shape) {
|
||||
int X = get_global_id(0);
|
||||
int Y = get_global_id(1);
|
||||
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z) {
|
||||
int X = get_global_id(0); // N*H
|
||||
int Y = get_global_id(1); // W*C
|
||||
if (X >= output_shape.x * output_shape.y || Y >= output_shape.z * output_shape.w) {
|
||||
return;
|
||||
}
|
||||
int coordinate_x_out = output_shape.w;
|
||||
FLT4 result1 = READ_IMAGE(input0, smp_none, (int2)(0, (X)));
|
||||
FLT4 result2 = READ_IMAGE(input1, smp_none, (int2)(0, (X)));
|
||||
FLT4 result = {result1.x, result2.x, 0, 0};
|
||||
WRITE_IMAGE(output, (int2)(Y, (X)), result);
|
||||
FLT4 result1 = READ_IMAGE(input0, smp_none, (int2)(X, 0));
|
||||
FLT result1_temp[4] = {result1.x, result1.y, result1.z, result1.w};
|
||||
FLT4 result2 = READ_IMAGE(input1, smp_none, (int2)(X, 0));
|
||||
FLT result2_temp[4] = {result2.x, result2.y, result2.z, result2.w};
|
||||
for (int i = 0; i < C4NUM; ++i) {
|
||||
FLT4 result = {result1_temp[i], result2_temp[i], 0, 0};
|
||||
WRITE_IMAGE(output, (int2)(Y, (X * C4NUM + i)), result);
|
||||
}
|
||||
}
|
||||
|
||||
// input -2D -axis = 1
|
||||
|
|
|
@ -126,8 +126,6 @@ int GatherOpenCLKernel::Prepare() {
|
|||
int GatherOpenCLKernel::ConvertTensorToweight() {
|
||||
auto allocator = ocl_runtime_->GetAllocator();
|
||||
GpuTensorInfo img_info(in_tensors_[1]);
|
||||
size_t dtype = sizeof(cl_int);
|
||||
stride_w = img_info.RowPitch() / dtype;
|
||||
auto indices_tensor = in_tensors_.at(1);
|
||||
auto indices_num = indices_tensor->ElementsNum();
|
||||
indices_data_ = reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num));
|
||||
|
@ -140,7 +138,7 @@ int GatherOpenCLKernel::ConvertTensorToweight() {
|
|||
auto data = indices_tensor->data_c();
|
||||
if (data_type == kNumberTypeInt32) {
|
||||
for (int i = 0; i < indices_num; i++) {
|
||||
indices_data_[i] = reinterpret_cast<int32_t *>(data)[i * stride_w];
|
||||
indices_data_[i] = reinterpret_cast<int32_t *>(data)[i];
|
||||
}
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Gather Only supported The DataType Of Intensor1 is Int32 "
|
||||
|
|
|
@ -49,7 +49,6 @@ class GatherOpenCLKernel : public OpenCLKernel {
|
|||
int axis_ = {0};
|
||||
bool intensor1_is_tensor{false};
|
||||
bool enable_fp16_{false};
|
||||
cl_int stride_w{1};
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif
|
||||
|
|
|
@ -119,8 +119,8 @@ void StackOpenCLKernel::SetGlobalLocal() {
|
|||
OW_ = out_shape_.s[2];
|
||||
OC_ = out_shape_.s[3];
|
||||
} else if (in_tensors_[0]->shape().size() == 1) {
|
||||
OH_ = out_shape_.s[0] * out_shape_.s[1];
|
||||
OW_ = out_shape_.s[2];
|
||||
OH_ = UP_DIV(out_shape_.s[0], C4NUM);
|
||||
OW_ = out_shape_.s[3];
|
||||
} else {
|
||||
OH_ = out_shape_.s[0];
|
||||
OW_ = out_shape_.s[1];
|
||||
|
|
|
@ -33,11 +33,12 @@ OpParameter *CreateParameter(int axis) {
|
|||
TEST_F(TestOpenCL_Stack, input2_ndim1_axis1) {
|
||||
constexpr int INPUT_NUM = 2;
|
||||
int axis = 1;
|
||||
std::vector<int> input_shapes[INPUT_NUM] = {{8}, {8}};
|
||||
std::vector<int> output_shape = {8, 2};
|
||||
float input_datas[INPUT_NUM][8] = {{0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.03, 0.37},
|
||||
{0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.47}};
|
||||
float output_data[] = {0.75, 0.5, 0.06, 0.6, 0.74, 0.74, 0.30, 0.23, 0.9, 0.46, 0.59, 0.69, 0.03, 0.13, 0.37, 0.47};
|
||||
std::vector<int> input_shapes[INPUT_NUM] = {{10}, {10}};
|
||||
std::vector<int> output_shape = {10, 2};
|
||||
float input_datas[INPUT_NUM][10] = {{0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.03, 0.37, 0.13, 0.47},
|
||||
{0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.47, 0.59, 0.03}};
|
||||
float output_data[] = {0.75, 0.5, 0.06, 0.6, 0.74, 0.74, 0.30, 0.23, 0.9, 0.46,
|
||||
0.59, 0.69, 0.03, 0.13, 0.37, 0.47, 0.13, 0.59, 0.47, 0.03};
|
||||
|
||||
for (auto fp16_enable : {true}) {
|
||||
auto *param = CreateParameter(axis);
|
||||
|
|
Loading…
Reference in New Issue