codeclean for layernorm and conv

This commit is contained in:
Pengyongrong 2020-12-29 03:01:48 -08:00
parent b0794bb5f6
commit 36bb147844
4 changed files with 27 additions and 27 deletions

View File

@ -39,9 +39,9 @@ __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP
__kernel void Conv2D_H1W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
__global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
int2 dilation, int act_type, float alpha) {
int BlockH = 1;
int BlockW = 1;
int BlockC = 1;
const int BlockH = 1;
const int BlockW = 1;
const int BlockC = 1;
DEFINE_ARGS;
int oh0 = oh + 0;
@ -102,9 +102,9 @@ __kernel void Conv2D_H1W1C1(__read_only image2d_t input, __write_only image2d_t
__kernel void Conv2D_H2W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
__global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
int2 dilation, int act_type, float alpha) {
int BlockH = 2;
int BlockW = 1;
int BlockC = 1;
const int BlockH = 2;
const int BlockW = 1;
const int BlockC = 1;
DEFINE_ARGS;
int oh0 = oh + 0;
@ -189,9 +189,9 @@ __kernel void Conv2D_H2W1C1(__read_only image2d_t input, __write_only image2d_t
__kernel void Conv2D_H2W1C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
__global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
int2 dilation, int act_type, float alpha) {
int BlockH = 2;
int BlockW = 1;
int BlockC = 2;
const int BlockH = 2;
const int BlockW = 1;
const int BlockC = 2;
DEFINE_ARGS;
int oh0 = oh + 0;
@ -312,9 +312,9 @@ __kernel void Conv2D_H2W1C2(__read_only image2d_t input, __write_only image2d_t
__kernel void Conv2D_H2W2C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
__global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
int2 dilation, int act_type, float alpha) {
int BlockH = 2;
int BlockW = 2;
int BlockC = 2;
const int BlockH = 2;
const int BlockW = 2;
const int BlockC = 2;
DEFINE_ARGS;
int oh0 = oh + 0;

View File

@ -7,7 +7,7 @@ __kernel void ComputeMeanVarDim1NHWC4(__read_only image2d_t src_data, __global F
int4 in_shape, int normalized_shape_size) {
int X = get_global_id(0); // n*h
int Y = get_global_id(1); // w
if (X > in_shape.x * in_shape.y || Y > in_shape.z || in_shape.y == 0) {
if (X > in_shape.x * in_shape.y || Y > in_shape.z || in_shape.y == 0 || normalized_shape_size == 0) {
return;
}
int n = X / in_shape.y;

View File

@ -41,8 +41,8 @@ using mindspore::schema::PrimitiveType_FullConnection;
namespace mindspore::kernel {
constexpr size_t CI_TILE = C4NUM;
constexpr size_t CO_TILE = C4NUM;
const size_t CI_TILE = C4NUM;
const size_t CO_TILE = C4NUM;
int Conv2DOpenCLKernel::CheckSpecs() {
if (in_tensors_.size() != 2 && in_tensors_.size() != 3) {
@ -164,12 +164,12 @@ int Conv2DOpenCLKernel::Prepare() {
}
int Conv2DOpenCLKernel::GenerateWinogradFilter() {
constexpr float Gt[] = {1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 0.0000000000,
0.0000000000, 0.7071067691, -0.7071067691, 1.4142135382, -1.4142135382, 0.0000000000,
0.0000000000, 0.4999999702, 0.4999999702, 1.9999998808, 1.9999998808, 1.0000000000};
constexpr float G[] = {1.0000000000, 0.0000000000, 0.0000000000, 1.0000000000, 0.7071067691, 0.4999999702,
1.0000000000, -0.7071067691, 0.4999999702, 1.0000000000, 1.4142135382, 1.9999998808,
1.0000000000, -1.4142135382, 1.9999998808, 0.0000000000, 0.0000000000, 1.0000000000};
const float Gt[] = {1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 0.0000000000,
0.0000000000, 0.7071067691, -0.7071067691, 1.4142135382, -1.4142135382, 0.0000000000,
0.0000000000, 0.4999999702, 0.4999999702, 1.9999998808, 1.9999998808, 1.0000000000};
const float G[] = {1.0000000000, 0.0000000000, 0.0000000000, 1.0000000000, 0.7071067691, 0.4999999702,
1.0000000000, -0.7071067691, 0.4999999702, 1.0000000000, 1.4142135382, 1.9999998808,
1.0000000000, -1.4142135382, 1.9999998808, 0.0000000000, 0.0000000000, 1.0000000000};
auto weight_tensor = in_tensors_.at(1);
auto origin_weight_fp32 = reinterpret_cast<float *>(weight_tensor->data_c());
@ -366,11 +366,11 @@ void Conv2DOpenCLKernel::SetGlobalLocal() {
size_t global_h = batch_size_ * UP_DIV(OH_, block_size_.H);
size_t global_w = UP_DIV(OW_, block_size_.W);
size_t global_c = UP_DIV(CO_SLICES_, block_size_.C);
constexpr int local_c_max = 16;
constexpr int local_hw_max = 256;
constexpr int OH_threshold = 100;
constexpr int OW_threshold = 100;
constexpr int OC_threshold = 64;
const int local_c_max = 16;
const int local_hw_max = 256;
const int OH_threshold = 100;
const int OW_threshold = 100;
const int OC_threshold = 64;
size_t local_c = GetMaxDivisor(global_c, local_c_max);
local_c = std::max<size_t>(local_c, 1);
size_t local_hw = local_hw_max / local_c;

View File

@ -1,3 +1,3 @@
mobilenet_v1_1.0_224.tflite 2.5
mobilenet_v2_1.0_224.tflite 2.5
squeezenet.tflite 2.5
inception_v3.tflite 1