From 659c329e988f7eccdf9180756a5736a14c42c781 Mon Sep 17 00:00:00 2001 From: greatpanc Date: Tue, 26 Oct 2021 15:09:47 +0800 Subject: [PATCH] tanh bugfix and reduce op add new type --- .../runtime/kernel/opencl/cl/activation.cl | 2 +- .../src/runtime/kernel/opencl/cl/reduce.cl | 36 +++++++++++++++---- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl index ff0adb5aec1..ca1d525eb62 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl @@ -64,7 +64,7 @@ __kernel void Tanh(__read_only image2d_t input, __write_only image2d_t output, c int Y = get_global_id(1); if (X >= img_shape.x || Y >= img_shape.y) return; FLT4 in_c4 = READ_IMAGE(input, smp_zero, (int2)(X, Y)); - in_c4 = clamp(in_c4, -10.0f, 10.0f); + in_c4 = clamp(in_c4, (FLT)(-10.0f), (FLT)(10.0f)); WRITE_IMAGE(output, (int2)(X, Y), tanh(in_c4)); } diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/reduce.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/reduce.cl index 58c3bba08dd..cf1656b1faf 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/reduce.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/reduce.cl @@ -362,7 +362,7 @@ __kernel void GlobalHMean(__read_only image2d_t src_data, __write_only image2d_t __kernel void GlobalH##Method(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 size) { \ int w = get_global_id(0); \ int c4 = get_global_id(1); \ - float4 result = (float4)0.f; \ + float4 result = (float4)Init##Method; \ for (int h = 0; h < size.x; h++) { \ Do##Method(result, convert_float4(READ_IMAGE(src_data, smp_zero, (int2)(w * size.z + c4, h)))); \ } \ @@ -373,7 +373,7 @@ __kernel void GlobalHMean(__read_only image2d_t src_data, __write_only image2d_t __kernel void GlobalWMean(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 size) { int h = get_global_id(0); int c4 = get_global_id(1); - float4 result = (float4)0.f; + float4 result = (float4)(0.f); for (int w = 0; w < size.y; w++) { result += convert_float4(READ_IMAGE(src_data, smp_zero, (int2)(w * size.z + c4, h))); } @@ -385,25 +385,47 @@ __kernel void GlobalWMean(__read_only image2d_t src_data, __write_only image2d_t __kernel void GlobalW##Method(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 size) { \ int h = get_global_id(0); \ int c4 = get_global_id(1); \ - float4 result = (float4)0.f; \ + float4 result = (float4)Init##Method; \ for (int w = 0; w < size.y; w++) { \ Do##Method(result, convert_float4(READ_IMAGE(src_data, smp_zero, (int2)(w * size.z + c4, h)))); \ } \ WRITE_IMAGE(dst_data, (int2)(c4, h), TO_FLT4(result)); \ } +// C +#define DoCSum(a, B) ((a) = dot((float4)(1.0f), (B))) +#define DoCMax(a, B) ((a) = max((B).x, max((B).y, max((B).z, (B).w)))) +#define DoCMin(a, B) ((a) = min((B).x, min((B).y, min((B).z, (B).w)))) +#define DoCProd(a, B) ((a) = (B).x * (B).y * (B).z * (B).w) + +#define GlobalC(Method) \ + __kernel void GlobalC##Method(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 size, \ + float4 mask) { \ + int h = get_global_id(0); \ + int w = get_global_id(1); \ + float4 value = (float4)Init##Method; \ + for (int c4 = 0; c4 < size.z - 1; c4++) { \ + Do##Method(value, convert_float4(READ_IMAGE(src_data, smp_zero, (int2)(w * size.z + c4, h)))); \ + } \ + Do##Method(value, convert_float4(READ_IMAGE(src_data, smp_zero, (int2)(w * size.z + size.z - 1, h))) + mask); \ + float4 result = (float4)0.f; \ + DoC##Method(result.x, value); \ + WRITE_IMAGE(dst_data, (int2)(w * size.z, h), TO_FLT4(result)); \ + } + #define DoSum(A, B) A += B #define InitSum 0.f -GlobalHWC(Sum) GlobalHW(Sum) GlobalWC(Sum) GlobalH(Sum) GlobalW(Sum) LocalHW(Sum) LocalWC(Sum) +GlobalHWC(Sum) GlobalHW(Sum) GlobalWC(Sum) GlobalH(Sum) GlobalW(Sum) GlobalC(Sum) LocalHW(Sum) LocalWC(Sum) #define DoMin(A, B) A = min(A, B) #define InitMin 10000.f - GlobalHWC(Min) GlobalHW(Min) GlobalWC(Min) GlobalH(Min) GlobalW(Min) LocalHW(Min) LocalWC(Min) + GlobalHWC(Min) GlobalHW(Min) GlobalWC(Min) GlobalH(Min) GlobalW(Min) GlobalC(Min) LocalHW(Min) LocalWC(Min) #define DoMax(A, B) A = max(A, B) #define InitMax -10000.f - GlobalHWC(Max) GlobalHW(Max) GlobalWC(Max) GlobalH(Max) GlobalW(Max) LocalHW(Max) LocalWC(Max) + GlobalHWC(Max) GlobalHW(Max) GlobalWC(Max) GlobalH(Max) GlobalW(Max) GlobalC(Max) LocalHW(Max) LocalWC(Max) #define DoProd(A, B) A *= B #define InitProd 1.f - GlobalHWC(Prod) GlobalHW(Prod) GlobalWC(Prod) GlobalH(Prod) GlobalW(Prod) LocalHW(Prod) LocalWC(Prod) + GlobalHWC(Prod) GlobalHW(Prod) GlobalWC(Prod) GlobalH(Prod) GlobalW(Prod) GlobalC(Prod) LocalHW(Prod) + LocalWC(Prod)