forked from mindspore-Ecosystem/mindspore
codeclean for layernorm and conv
This commit is contained in:
parent
b0794bb5f6
commit
36bb147844
|
@ -39,9 +39,9 @@ __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP
|
|||
__kernel void Conv2D_H1W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
|
||||
__global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
|
||||
int2 dilation, int act_type, float alpha) {
|
||||
int BlockH = 1;
|
||||
int BlockW = 1;
|
||||
int BlockC = 1;
|
||||
const int BlockH = 1;
|
||||
const int BlockW = 1;
|
||||
const int BlockC = 1;
|
||||
DEFINE_ARGS;
|
||||
|
||||
int oh0 = oh + 0;
|
||||
|
@ -102,9 +102,9 @@ __kernel void Conv2D_H1W1C1(__read_only image2d_t input, __write_only image2d_t
|
|||
__kernel void Conv2D_H2W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
|
||||
__global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
|
||||
int2 dilation, int act_type, float alpha) {
|
||||
int BlockH = 2;
|
||||
int BlockW = 1;
|
||||
int BlockC = 1;
|
||||
const int BlockH = 2;
|
||||
const int BlockW = 1;
|
||||
const int BlockC = 1;
|
||||
DEFINE_ARGS;
|
||||
|
||||
int oh0 = oh + 0;
|
||||
|
@ -189,9 +189,9 @@ __kernel void Conv2D_H2W1C1(__read_only image2d_t input, __write_only image2d_t
|
|||
__kernel void Conv2D_H2W1C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
|
||||
__global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
|
||||
int2 dilation, int act_type, float alpha) {
|
||||
int BlockH = 2;
|
||||
int BlockW = 1;
|
||||
int BlockC = 2;
|
||||
const int BlockH = 2;
|
||||
const int BlockW = 1;
|
||||
const int BlockC = 2;
|
||||
DEFINE_ARGS;
|
||||
|
||||
int oh0 = oh + 0;
|
||||
|
@ -312,9 +312,9 @@ __kernel void Conv2D_H2W1C2(__read_only image2d_t input, __write_only image2d_t
|
|||
__kernel void Conv2D_H2W2C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
|
||||
__global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
|
||||
int2 dilation, int act_type, float alpha) {
|
||||
int BlockH = 2;
|
||||
int BlockW = 2;
|
||||
int BlockC = 2;
|
||||
const int BlockH = 2;
|
||||
const int BlockW = 2;
|
||||
const int BlockC = 2;
|
||||
DEFINE_ARGS;
|
||||
|
||||
int oh0 = oh + 0;
|
||||
|
|
|
@ -7,7 +7,7 @@ __kernel void ComputeMeanVarDim1NHWC4(__read_only image2d_t src_data, __global F
|
|||
int4 in_shape, int normalized_shape_size) {
|
||||
int X = get_global_id(0); // n*h
|
||||
int Y = get_global_id(1); // w
|
||||
if (X > in_shape.x * in_shape.y || Y > in_shape.z || in_shape.y == 0) {
|
||||
if (X > in_shape.x * in_shape.y || Y > in_shape.z || in_shape.y == 0 || normalized_shape_size == 0) {
|
||||
return;
|
||||
}
|
||||
int n = X / in_shape.y;
|
||||
|
|
|
@ -41,8 +41,8 @@ using mindspore::schema::PrimitiveType_FullConnection;
|
|||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
constexpr size_t CI_TILE = C4NUM;
|
||||
constexpr size_t CO_TILE = C4NUM;
|
||||
const size_t CI_TILE = C4NUM;
|
||||
const size_t CO_TILE = C4NUM;
|
||||
|
||||
int Conv2DOpenCLKernel::CheckSpecs() {
|
||||
if (in_tensors_.size() != 2 && in_tensors_.size() != 3) {
|
||||
|
@ -164,12 +164,12 @@ int Conv2DOpenCLKernel::Prepare() {
|
|||
}
|
||||
|
||||
int Conv2DOpenCLKernel::GenerateWinogradFilter() {
|
||||
constexpr float Gt[] = {1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 0.0000000000,
|
||||
0.0000000000, 0.7071067691, -0.7071067691, 1.4142135382, -1.4142135382, 0.0000000000,
|
||||
0.0000000000, 0.4999999702, 0.4999999702, 1.9999998808, 1.9999998808, 1.0000000000};
|
||||
constexpr float G[] = {1.0000000000, 0.0000000000, 0.0000000000, 1.0000000000, 0.7071067691, 0.4999999702,
|
||||
1.0000000000, -0.7071067691, 0.4999999702, 1.0000000000, 1.4142135382, 1.9999998808,
|
||||
1.0000000000, -1.4142135382, 1.9999998808, 0.0000000000, 0.0000000000, 1.0000000000};
|
||||
const float Gt[] = {1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 0.0000000000,
|
||||
0.0000000000, 0.7071067691, -0.7071067691, 1.4142135382, -1.4142135382, 0.0000000000,
|
||||
0.0000000000, 0.4999999702, 0.4999999702, 1.9999998808, 1.9999998808, 1.0000000000};
|
||||
const float G[] = {1.0000000000, 0.0000000000, 0.0000000000, 1.0000000000, 0.7071067691, 0.4999999702,
|
||||
1.0000000000, -0.7071067691, 0.4999999702, 1.0000000000, 1.4142135382, 1.9999998808,
|
||||
1.0000000000, -1.4142135382, 1.9999998808, 0.0000000000, 0.0000000000, 1.0000000000};
|
||||
|
||||
auto weight_tensor = in_tensors_.at(1);
|
||||
auto origin_weight_fp32 = reinterpret_cast<float *>(weight_tensor->data_c());
|
||||
|
@ -366,11 +366,11 @@ void Conv2DOpenCLKernel::SetGlobalLocal() {
|
|||
size_t global_h = batch_size_ * UP_DIV(OH_, block_size_.H);
|
||||
size_t global_w = UP_DIV(OW_, block_size_.W);
|
||||
size_t global_c = UP_DIV(CO_SLICES_, block_size_.C);
|
||||
constexpr int local_c_max = 16;
|
||||
constexpr int local_hw_max = 256;
|
||||
constexpr int OH_threshold = 100;
|
||||
constexpr int OW_threshold = 100;
|
||||
constexpr int OC_threshold = 64;
|
||||
const int local_c_max = 16;
|
||||
const int local_hw_max = 256;
|
||||
const int OH_threshold = 100;
|
||||
const int OW_threshold = 100;
|
||||
const int OC_threshold = 64;
|
||||
size_t local_c = GetMaxDivisor(global_c, local_c_max);
|
||||
local_c = std::max<size_t>(local_c, 1);
|
||||
size_t local_hw = local_hw_max / local_c;
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
mobilenet_v1_1.0_224.tflite 2.5
|
||||
mobilenet_v2_1.0_224.tflite 2.5
|
||||
squeezenet.tflite 2.5
|
||||
inception_v3.tflite 1
|
||||
|
|
Loading…
Reference in New Issue