commit
e103ba3aab
|
@ -1,102 +0,0 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/experimental/base_matmul.h"
|
||||
#include "nnacl/experimental/fp32_funcs.h"
|
||||
|
||||
typedef struct BaseMatmulStru {
|
||||
KernelBase *base;
|
||||
size_t deep;
|
||||
size_t row;
|
||||
size_t col;
|
||||
size_t thread_num;
|
||||
uint8_t *a_ptr;
|
||||
uint8_t *b_ptr;
|
||||
uint8_t *c_ptr;
|
||||
uint8_t *bias;
|
||||
uint8_t *tmp_ptr;
|
||||
float min;
|
||||
float max;
|
||||
size_t row_unit;
|
||||
size_t row_tile;
|
||||
} BaseMatmulStru;
|
||||
|
||||
int BaseMatmulRun(void *param, int task_id, float lhs_scale, float rhs_scale) {
|
||||
BaseMatmulStru *mm = (BaseMatmulStru *)param;
|
||||
if (mm == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t pack_uint = mm->base->funcs->pack * mm->base->funcs->byte;
|
||||
|
||||
for (size_t i = task_id; i < mm->row_unit; i += mm->thread_num) {
|
||||
int xStart = i * mm->row_tile;
|
||||
uint8_t *a = mm->a_ptr + xStart * pack_uint;
|
||||
uint8_t *tmp = mm->tmp_ptr + mm->row_tile * mm->deep * task_id * mm->base->funcs->byte;
|
||||
mm->base->funcs->PackLeft(tmp, a, mm->row_tile, mm->deep, mm->row);
|
||||
mm->base->funcs->Matmul(mm->c_ptr + xStart * pack_uint, tmp, mm->b_ptr, mm->bias, mm->row_tile, mm->deep, mm->col,
|
||||
mm->row * mm->base->funcs->pack, mm->min, mm->max);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void BaseMatmul(uint8_t *a_ptr, uint8_t *b_ptr, uint8_t *bias, uint8_t *c_ptr, int row, int deep, int col,
|
||||
ActType act_type, int thread_num, KernelBase *base) {
|
||||
BaseMatmulStru basemm;
|
||||
if (a_ptr == NULL || b_ptr == NULL || c_ptr == NULL) {
|
||||
return;
|
||||
}
|
||||
basemm.base = base;
|
||||
basemm.deep = deep;
|
||||
basemm.col = col;
|
||||
basemm.row = row;
|
||||
basemm.a_ptr = a_ptr;
|
||||
basemm.b_ptr = b_ptr;
|
||||
basemm.c_ptr = c_ptr;
|
||||
basemm.bias = bias;
|
||||
basemm.thread_num = thread_num;
|
||||
|
||||
int byte = basemm.base->funcs->byte;
|
||||
int pack = basemm.base->funcs->pack;
|
||||
int row_tile, deep_tile, col_tile;
|
||||
basemm.base->funcs->InitMatmulTileCount(&row_tile, &deep_tile, &col_tile);
|
||||
|
||||
basemm.row_tile = row_tile;
|
||||
if (row_tile == 0) {
|
||||
return;
|
||||
}
|
||||
basemm.row_unit = row / row_tile;
|
||||
|
||||
if (bias != NULL || act_type != ActType_No) {
|
||||
GetPostParameters(act_type, &basemm.min, &basemm.max);
|
||||
}
|
||||
|
||||
basemm.tmp_ptr = (uint8_t *)basemm.base->env->alloc(basemm.base->env->allocator,
|
||||
thread_num * UP_ROUND(deep, deep_tile) * row_tile * byte);
|
||||
basemm.base->env->parallelLaunch(basemm.base->env->threadPool, BaseMatmulRun, &basemm, thread_num);
|
||||
|
||||
size_t row_remain = row - basemm.row_unit * row_tile;
|
||||
if (row_remain != 0) {
|
||||
int32_t start_row = basemm.row_unit * row_tile;
|
||||
uint8_t *a_remain_ptr = a_ptr + start_row * pack * byte;
|
||||
basemm.base->funcs->PackLeft(basemm.tmp_ptr, a_remain_ptr, row_remain, deep, row);
|
||||
basemm.base->funcs->MatMulRes(c_ptr + start_row * pack * byte, basemm.tmp_ptr, b_ptr, bias, row_remain, basemm.deep,
|
||||
basemm.col, basemm.row * basemm.base->funcs->pack, basemm.min, basemm.max);
|
||||
}
|
||||
|
||||
basemm.base->env->free(basemm.base->env->allocator, basemm.tmp_ptr);
|
||||
return;
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_NNACL_EXPERIMENT_CORE_FUNCS_H_
|
||||
#define MINDSPORE_NNACL_EXPERIMENT_CORE_FUNCS_H_
|
||||
|
||||
typedef struct CoreFuncs {
|
||||
int pack;
|
||||
int byte;
|
||||
void (*InitMatmulTileCount)(int *row_tile, int *deep_tile, int *col_tile);
|
||||
void (*PackNcX)(const void *src, void *dst, int batch, int plane, int channel);
|
||||
void (*UnPackNcX)(const void *src, void *dst, int batch, int plane, int channel);
|
||||
void (*PackLeft)(void *dst, void *src, size_t row, size_t deep, size_t src_stride);
|
||||
void (*PackRight)(const void *src, void *dst, int batch, int plane, int channel);
|
||||
void (*Matmul)(void *c_ptr, void *a_ptr, void *b_ptr, void *bias, size_t row, size_t deep, size_t col,
|
||||
size_t dst_stride, float min, float max);
|
||||
void (*MatMulRes)(void *c_ptr, void *a_ptr, void *b_ptr, void *bias, size_t row, size_t deep, size_t col,
|
||||
size_t dst_stride, float min, float max);
|
||||
} CoreFuncs;
|
||||
|
||||
#endif // MINDSPORE_NNACL_EXPERIMENT_CORE_FUNCS_H_
|
|
@ -0,0 +1,70 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_NNACL_EXPERIMENT_MS_CORE_H_
|
||||
#define MINDSPORE_NNACL_EXPERIMENT_MS_CORE_H_
|
||||
|
||||
#include <float.h>
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/exp_parameter.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct CoreFuncs {
|
||||
int pack;
|
||||
int byte;
|
||||
int (*ExpFusion)(const void *src_data, void *dst_data, const ExpParameter *param, int task_id);
|
||||
void (*PackNcX)(const void *src, void *dst, int batch, int plane, int channel);
|
||||
void (*UnPackNcX)(const void *src, void *dst, int batch, int plane, int channel);
|
||||
void (*PostParam)(ActType act, float *min, float *max);
|
||||
|
||||
void (*ExpMatmulTile)(int *row_tile, int *deep_tile, int *col_tile);
|
||||
void (*ExpMatmulPackIn)(void *dst, void *src, size_t row, size_t deep, size_t src_stride);
|
||||
void (*ExpMatmulBlock)(void *c_ptr, void *a_ptr, void *b_ptr, void *bias, size_t row, size_t deep, size_t col,
|
||||
size_t dst_stride, float min, float max);
|
||||
void (*ExpMatMulRemain)(void *c_ptr, void *a_ptr, void *b_ptr, void *bias, size_t row, size_t deep, size_t col,
|
||||
size_t dst_stride, float min, float max);
|
||||
|
||||
void (*OptMatmulTile)(int *row_tile, int *col_tile);
|
||||
} CoreFuncs;
|
||||
|
||||
/* x86 */
|
||||
void InitCore(CoreFuncs *funcs_);
|
||||
|
||||
/* arm64 fp32 */
|
||||
void InitFp32Core(CoreFuncs *funcs_);
|
||||
|
||||
/* arm64 fp16 */
|
||||
void InitFp16Core(CoreFuncs *funcs_);
|
||||
|
||||
/* arm32 */
|
||||
void InitArm32Core(CoreFuncs *funcs_);
|
||||
|
||||
/* avx */
|
||||
void InitAvxCore(CoreFuncs *funcs_);
|
||||
|
||||
/* avx512 */
|
||||
void InitAvx512Core(CoreFuncs *funcs_);
|
||||
|
||||
/* sse */
|
||||
void InitSseCore(CoreFuncs *funcs_);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // MINDSPORE_NNACL_EXPERIMENT_MS_CORE_H_
|
|
@ -13,20 +13,17 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_NNACL_EXPERIMENT_FP32_FUNCS_H_
|
||||
#define MINDSPORE_NNACL_EXPERIMENT_FP32_FUNCS_H_
|
||||
|
||||
#include "nnacl/kernel.h"
|
||||
#ifdef ENABLE_ARM32
|
||||
#include "nnacl/experimental/ms_core.h"
|
||||
void InitOptMatmulTileArm32(int *row_tile, int *col_tile) {
|
||||
*row_tile = C12NUM;
|
||||
*col_tile = C4NUM;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void InitFp32Funcs(CoreFuncs *funcs_);
|
||||
|
||||
void GetPostParameters(ActType act, float *min, float *max);
|
||||
|
||||
#ifdef __cplusplus
|
||||
void InitArm32Core(CoreFuncs *funcs_) {
|
||||
funcs_->pack = C4NUM;
|
||||
funcs_->byte = sizeof(float);
|
||||
funcs_->OptMatmulTile = InitOptMatmulTileArm32;
|
||||
}
|
||||
#endif
|
||||
#endif // MINDSPORE_NNACL_EXPERIMENT_FP32_FUNCS_H_
|
|
@ -13,18 +13,14 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_NNACL_EXPERIMENT_FP16_FUNCS_H_
|
||||
#define MINDSPORE_NNACL_EXPERIMENT_FP16_FUNCS_H_
|
||||
|
||||
#include "nnacl/kernel.h"
|
||||
#ifdef ENABLE_FP16
|
||||
#include "nnacl/experimental/ms_core.h"
|
||||
#include "nnacl/fp16/exp_fp16.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void InitFp16Funcs(CoreFuncs *funcs_);
|
||||
|
||||
#ifdef __cplusplus
|
||||
void InitFp16Core(CoreFuncs *funcs_) {
|
||||
funcs_->pack = C8NUM;
|
||||
funcs_->byte = sizeof(float16_t);
|
||||
funcs_->ExpFusion = ExpFusionFp16;
|
||||
}
|
||||
#endif
|
||||
#endif // MINDSPORE_NNACL_EXPERIMENT_FP32_FUNCS_H_
|
|
@ -14,11 +14,11 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/experimental/fp16_funcs.h"
|
||||
|
||||
void InitFp16Funcs(CoreFuncs *funcs_) {
|
||||
#ifdef ENABLE_ARM64
|
||||
funcs_->pack = C8NUM;
|
||||
funcs_->byte = sizeof(float16_t);
|
||||
#endif
|
||||
#include "nnacl/experimental/ms_core.h"
|
||||
|
||||
void InitFp32Core(CoreFuncs *funcs_) {
|
||||
funcs_->pack = C4NUM;
|
||||
funcs_->byte = sizeof(float);
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,29 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef ENABLE_AVX
|
||||
#include "nnacl/experimental/ms_core.h"
|
||||
void InitOptMatmulTileAvx(int *row_tile, int *col_tile) {
|
||||
*row_tile = C6NUM;
|
||||
*col_tile = C16NUM;
|
||||
}
|
||||
|
||||
void InitAvxCore(CoreFuncs *funcs_) {
|
||||
funcs_->pack = C8NUM;
|
||||
funcs_->byte = sizeof(float);
|
||||
funcs_->OptMatmulTile = InitOptMatmulTileAvx;
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,24 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef ENABLE_AVX512
|
||||
#include "nnacl/experimental/ms_core.h"
|
||||
|
||||
void InitAvx512Core(CoreFuncs *funcs_) {
|
||||
funcs_->pack = C16NUM;
|
||||
funcs_->byte = sizeof(float);
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,29 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef ENABLE_SSE
|
||||
#include "nnacl/experimental/ms_core.h"
|
||||
void InitOptMatmulTileSse(int *row_tile, int *col_tile) {
|
||||
*row_tile = C4NUM;
|
||||
*col_tile = C8NUM;
|
||||
}
|
||||
|
||||
void InitSseCore(CoreFuncs *funcs_) {
|
||||
funcs_->pack = C4NUM;
|
||||
funcs_->byte = sizeof(float);
|
||||
funcs_->OptMatmulTile = InitOptMatmulTileSse;
|
||||
}
|
||||
#endif
|
|
@ -14,10 +14,10 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/experimental/fp32_funcs.h"
|
||||
#include <float.h>
|
||||
#include "nnacl/experimental/ms_core.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/fp32/pack_fp32.h"
|
||||
#include "nnacl/fp32/exp_fp32.h"
|
||||
|
||||
void GetPostParameters(ActType act, float *min, float *max) {
|
||||
#define RELU6_VALUE 6.0f
|
||||
|
@ -35,13 +35,13 @@ void GetPostParameters(ActType act, float *min, float *max) {
|
|||
return;
|
||||
}
|
||||
|
||||
void InitBaseMMFp32TileCount(int *row_tile, int *deep_tile, int *col_tile) {
|
||||
void InitExpMMFp32TileCount(int *row_tile, int *deep_tile, int *col_tile) {
|
||||
*row_tile = C16NUM;
|
||||
*col_tile = C4NUM;
|
||||
*deep_tile = 1;
|
||||
}
|
||||
|
||||
void PackMatmulA(void *dst_ptr, void *src_ptr, size_t row, size_t deep, size_t src_stride) {
|
||||
void PackExpMatmulIn(void *dst_ptr, void *src_ptr, size_t row, size_t deep, size_t src_stride) {
|
||||
/* src_stride : total row */
|
||||
float *dst = (float *)dst_ptr;
|
||||
float *src = (float *)src_ptr;
|
||||
|
@ -54,8 +54,8 @@ void PackMatmulA(void *dst_ptr, void *src_ptr, size_t row, size_t deep, size_t s
|
|||
}
|
||||
}
|
||||
|
||||
static void DoBaseMatmul(float *c_ptr, const float *a_ptr, const float *b_ptr, const float *bias, size_t row,
|
||||
size_t deep, size_t col, size_t dst_stride, float min, float max) {
|
||||
static void ExpMatmul(float *c_ptr, const float *a_ptr, const float *b_ptr, const float *bias, size_t row, size_t deep,
|
||||
size_t col, size_t dst_stride, float min, float max) {
|
||||
/* dst_stride : total_row * pack */
|
||||
for (size_t r = 0; r < row; r++) {
|
||||
for (size_t c = 0; c < col; c++) {
|
||||
|
@ -79,32 +79,39 @@ static void DoBaseMatmul(float *c_ptr, const float *a_ptr, const float *b_ptr, c
|
|||
}
|
||||
}
|
||||
|
||||
void BaseMatMul(void *c_ptr, void *a_ptr, void *b_ptr, void *bias_ptr, size_t row, size_t deep, size_t col,
|
||||
size_t dst_stride, float min, float max) {
|
||||
void ExpMatMulBlock(void *c_ptr, void *a_ptr, void *b_ptr, void *bias_ptr, size_t row, size_t deep, size_t col,
|
||||
size_t dst_stride, float min, float max) {
|
||||
float *c = (float *)c_ptr;
|
||||
float *a = (float *)a_ptr;
|
||||
float *b = (float *)b_ptr;
|
||||
float *bias = (float *)bias_ptr;
|
||||
return DoBaseMatmul(c, a, b, bias, row, deep, col, dst_stride, min, max);
|
||||
return ExpMatmul(c, a, b, bias, row, deep, col, dst_stride, min, max);
|
||||
}
|
||||
|
||||
void BaseMatMulRes(void *c_ptr, void *a_ptr, void *b_ptr, void *bias_ptr, size_t row, size_t deep, size_t col,
|
||||
size_t dst_stride, float min, float max) {
|
||||
void ExpMatmulRemain(void *c_ptr, void *a_ptr, void *b_ptr, void *bias_ptr, size_t row, size_t deep, size_t col,
|
||||
size_t dst_stride, float min, float max) {
|
||||
float *c = (float *)c_ptr;
|
||||
float *a = (float *)a_ptr;
|
||||
float *b = (float *)b_ptr;
|
||||
float *bias = (float *)bias_ptr;
|
||||
return DoBaseMatmul(c, a, b, bias, row, deep, col, dst_stride, min, max);
|
||||
return ExpMatmul(c, a, b, bias, row, deep, col, dst_stride, min, max);
|
||||
}
|
||||
|
||||
void InitFp32Funcs(CoreFuncs *funcs_) {
|
||||
void InitOptMatmulTile(int *row_tile, int *col_tile) {
|
||||
*row_tile = C12NUM;
|
||||
*col_tile = C8NUM;
|
||||
}
|
||||
|
||||
void InitCore(CoreFuncs *funcs_) {
|
||||
funcs_->pack = C4NUM;
|
||||
funcs_->byte = sizeof(float);
|
||||
funcs_->InitMatmulTileCount = InitBaseMMFp32TileCount;
|
||||
funcs_->PackNcX = NULL;
|
||||
funcs_->UnPackNcX = NULL;
|
||||
funcs_->PackLeft = PackMatmulA;
|
||||
funcs_->PackRight = NULL;
|
||||
funcs_->Matmul = BaseMatMul;
|
||||
funcs_->MatMulRes = BaseMatMulRes;
|
||||
funcs_->ExpMatmulTile = InitExpMMFp32TileCount;
|
||||
funcs_->PackNcX = PackNCHWToNC4HW4Fp32;
|
||||
funcs_->UnPackNcX = PackNC4HW4ToNCHWFp32;
|
||||
funcs_->ExpMatmulPackIn = PackExpMatmulIn;
|
||||
funcs_->ExpMatmulBlock = ExpMatMulBlock;
|
||||
funcs_->ExpMatMulRemain = ExpMatmulRemain;
|
||||
funcs_->ExpFusion = ExpFusionFp32;
|
||||
funcs_->OptMatmulTile = InitOptMatmulTile;
|
||||
funcs_->PostParam = GetPostParameters;
|
||||
}
|
|
@ -1249,8 +1249,8 @@ void UnPackC4Uint(const void *src, void *dst, size_t plane, size_t channel) {
|
|||
size_t c_div = c / C4NUM;
|
||||
size_t c_mod = c % C4NUM;
|
||||
for (size_t p = 0; p < plane; p++) {
|
||||
int src_offset = c_div * plane * C4NUM + plane * C4NUM + c_mod;
|
||||
int dst_offset = p * channel + c;
|
||||
int src_offset = c_div * plane * C4NUM + p * C4NUM + c_mod;
|
||||
int dst_offset = c * plane + p;
|
||||
fp32_dst[dst_offset] = fp32_src[src_offset];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,20 +13,21 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/kernel.h"
|
||||
#include "nnacl/tensor_c.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/experimental/fp32_funcs.h"
|
||||
#include "nnacl/experimental/fp16_funcs.h"
|
||||
#include "nnacl/experimental/ms_core.h"
|
||||
#ifdef _MSC_VER
|
||||
#include "nnacl/experimental/conv.h"
|
||||
#include "nnacl/kernel/exp.h"
|
||||
#endif
|
||||
|
||||
static KernelCreator g_kernelCreatorRegistry[PrimType_MAX][Format_MAX][16];
|
||||
#define REGIST_DT(DT) (DT - kNumberTypeBegin - 1)
|
||||
|
||||
void RegKernelCreator(int opType, int format, int dataType, KernelCreator creator) {
|
||||
g_kernelCreatorRegistry[opType][format][dataType - kNumberTypeBegin - 1] = creator;
|
||||
g_kernelCreatorRegistry[opType][format][REGIST_DT(dataType)] = creator;
|
||||
}
|
||||
|
||||
void Init_MSC_VER_kernels(void) {
|
||||
|
@ -35,14 +36,13 @@ void Init_MSC_VER_kernels(void) {
|
|||
* register here first time */
|
||||
static bool inited = false;
|
||||
if (inited == false) {
|
||||
g_kernelCreatorRegistry[PrimType_Conv2DFusion][Format_NC4HW4][kNumberTypeFloat32 - kNumberTypeBegin - 1] =
|
||||
CreateConv;
|
||||
g_kernelCreatorRegistry[PrimType_ExpFusion][Format_NHWC][kNumberTypeFloat32 - kNumberTypeBegin - 1] = CreateExp;
|
||||
g_kernelCreatorRegistry[PrimType_ExpFusion][Format_NHWC][kNumberTypeFloat16 - kNumberTypeBegin - 1] = CreateExp;
|
||||
g_kernelCreatorRegistry[PrimType_ExpFusion][Format_NCHW][kNumberTypeFloat32 - kNumberTypeBegin - 1] = CreateExp;
|
||||
g_kernelCreatorRegistry[PrimType_ExpFusion][Format_NCHW][kNumberTypeFloat16 - kNumberTypeBegin - 1] = CreateExp;
|
||||
g_kernelCreatorRegistry[PrimType_ExpFusion][Format_NC4HW4][kNumberTypeFloat32 - kNumberTypeBegin - 1] = CreateExp;
|
||||
g_kernelCreatorRegistry[PrimType_ExpFusion][Format_NC8HW8][kNumberTypeFloat16 - kNumberTypeBegin - 1] = CreateExp;
|
||||
g_kernelCreatorRegistry[PrimType_Conv2DFusion][Format_NC4HW4][REGIST_DT(kNumberTypeFloat32)] = CreateConv;
|
||||
g_kernelCreatorRegistry[PrimType_ExpFusion][Format_NHWC][REGIST_DT(kNumberTypeFloat32)] = CreateExp;
|
||||
g_kernelCreatorRegistry[PrimType_ExpFusion][Format_NHWC][REGIST_DT(kNumberTypeFloat16)] = CreateExp;
|
||||
g_kernelCreatorRegistry[PrimType_ExpFusion][Format_NCHW][REGIST_DT(kNumberTypeFloat32)] = CreateExp;
|
||||
g_kernelCreatorRegistry[PrimType_ExpFusion][Format_NCHW][REGIST_DT(kNumberTypeFloat16)] = CreateExp;
|
||||
g_kernelCreatorRegistry[PrimType_ExpFusion][Format_NC4HW4][REGIST_DT(kNumberTypeFloat32)] = CreateExp;
|
||||
g_kernelCreatorRegistry[PrimType_ExpFusion][Format_NC8HW8][REGIST_DT(kNumberTypeFloat16)] = CreateExp;
|
||||
inited = true;
|
||||
}
|
||||
#endif
|
||||
|
@ -51,18 +51,18 @@ void Init_MSC_VER_kernels(void) {
|
|||
|
||||
bool SupportKernelC(int opType, int format, int dataType) {
|
||||
Init_MSC_VER_kernels();
|
||||
KernelCreator creator = g_kernelCreatorRegistry[opType][format][dataType - kNumberTypeBegin - 1];
|
||||
KernelCreator creator = g_kernelCreatorRegistry[opType][format][REGIST_DT(dataType)];
|
||||
return creator != NULL;
|
||||
}
|
||||
|
||||
KernelBase *CreateKernel(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize, int data_type,
|
||||
FormatC format) {
|
||||
Init_MSC_VER_kernels();
|
||||
KernelCreator creator = g_kernelCreatorRegistry[param->type_][format][data_type - kNumberTypeBegin - 1];
|
||||
KernelCreator creator = g_kernelCreatorRegistry[param->type_][format][REGIST_DT(data_type)];
|
||||
if (creator == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
return creator(param, in, insize, out, outsize);
|
||||
return creator(param, in, insize, out, outsize, data_type, format);
|
||||
}
|
||||
|
||||
ExecEnv *GetExecEnv() {
|
||||
|
@ -71,14 +71,51 @@ ExecEnv *GetExecEnv() {
|
|||
}
|
||||
|
||||
CoreFuncs *GetCoreFuncs(bool use_fp16) {
|
||||
static CoreFuncs fp23funcs;
|
||||
InitFp32Funcs(&fp23funcs);
|
||||
static CoreFuncs fp16funcs;
|
||||
InitFp16Funcs(&fp16funcs);
|
||||
static CoreFuncs core;
|
||||
InitCore(&core);
|
||||
|
||||
if (use_fp16) {
|
||||
return &fp16funcs;
|
||||
}
|
||||
#ifdef ENABLE_AVX512
|
||||
static CoreFuncs core_avx512;
|
||||
InitCore(&core_avx512);
|
||||
InitSseCore(&core_avx512);
|
||||
InitAvxCore(&core_avx512);
|
||||
InitAvx512Core(&core_avx512);
|
||||
return &core_avx512;
|
||||
#endif
|
||||
|
||||
return &fp23funcs;
|
||||
#ifdef ENABLE_AVX
|
||||
static CoreFuncs core_avx;
|
||||
InitCore(&core_avx);
|
||||
InitSseCore(&core_avx);
|
||||
InitAvxCore(&core_avx);
|
||||
return &core_avx;
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE
|
||||
static CoreFuncs core_sse;
|
||||
InitCore(&core_sse);
|
||||
InitSseCore(&core_sse);
|
||||
return &core_sse;
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_ARM32
|
||||
static CoreFuncs core_arm32;
|
||||
InitCore(&core_arm32);
|
||||
InitArm32Core(&core_arm32);
|
||||
return &core_arm32;
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_ARM64
|
||||
static CoreFuncs core_fp32;
|
||||
InitCore(&core_fp32);
|
||||
InitFp32Core(&core_fp32);
|
||||
static CoreFuncs core_fp16;
|
||||
InitCore(&core_fp16);
|
||||
#ifdef ENABLE_FP16
|
||||
InitFp16Core(&core_fp16);
|
||||
#endif
|
||||
return use_fp16 ? &core_fp16 : &core_fp32;
|
||||
#endif
|
||||
|
||||
return &core;
|
||||
}
|
||||
|
|
|
@ -15,9 +15,10 @@
|
|||
*/
|
||||
#ifndef MINDSPORE_NNACL_KERNEL_H_
|
||||
#define MINDSPORE_NNACL_KERNEL_H_
|
||||
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/infer/common_infer.h"
|
||||
#include "nnacl/experimental/core_funcs.h"
|
||||
#include "nnacl/experimental/ms_core.h"
|
||||
|
||||
typedef struct ExecEnv {
|
||||
void *allocator;
|
||||
|
@ -55,7 +56,8 @@ typedef struct KernelBase {
|
|||
}
|
||||
#endif
|
||||
|
||||
typedef KernelBase *(*KernelCreator)(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize);
|
||||
typedef KernelBase *(*KernelCreator)(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize,
|
||||
int data_type, FormatC format);
|
||||
void RegKernelCreator(int opType, int format, int dataType, KernelCreator func);
|
||||
CoreFuncs *GetCoreFuncs(bool use_fp16);
|
||||
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/kernel/convolution.h"
|
||||
#include "nnacl/kernel/convolution_1x1.h"
|
||||
#include "nnacl/tensor_c.h"
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
KernelBase *CreateConvolution(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize,
|
||||
int data_type, FormatC format) {
|
||||
return CreateConv1x1(param, in, insize, out, outsize, data_type, format);
|
||||
}
|
||||
|
||||
REG_KERNEL_CREATOR(PrimType_Conv2DFusion, Format_NC4HW4, kNumberTypeFloat32, CreateConvolution);
|
||||
REG_KERNEL_CREATOR(PrimType_Conv2DFusion, Format_NC8HW8, kNumberTypeFloat16, CreateConvolution);
|
|
@ -13,8 +13,8 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_NNACL_EXPERIMENT_CONV1X1_H_
|
||||
#define MINDSPORE_NNACL_EXPERIMENT_CONV1X1_H_
|
||||
#ifndef MINDSPORE_NNACL_KERNEL_CONVOLUTION_H_
|
||||
#define MINDSPORE_NNACL_KERNEL_CONVOLUTION_H_
|
||||
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/tensor_c.h"
|
||||
|
@ -24,9 +24,10 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
KernelBase *CreateConv1x1(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize);
|
||||
KernelBase *CreateConvolution(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize,
|
||||
int data_type, FormatC format);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // MINDSPORE_NNACL_EXPERIMENT_CONV1X1_H_
|
||||
#endif // MINDSPORE_NNACL_KERNEL_CONVOLUTION_1X1_H_
|
|
@ -14,34 +14,37 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/experimental/conv1x1.h"
|
||||
#include "nnacl/kernel/convolution_1x1.h"
|
||||
#include <stdint.h>
|
||||
#include "nnacl/conv_parameter.h"
|
||||
#include "nnacl/tensor_c.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/experimental/base_matmul.h"
|
||||
|
||||
typedef struct Conv1x1Stru {
|
||||
KernelBase base;
|
||||
uint8_t *bias_;
|
||||
uint8_t *weight_;
|
||||
} Conv1x1Stru;
|
||||
|
||||
int conv1x1_resize(struct KernelBase *self) { return 0; }
|
||||
|
||||
int conv1x1_prepare(struct KernelBase *self) {
|
||||
int conv1x1_exp_resize(struct KernelBase *self) {
|
||||
Conv1x1Stru *conv = (Conv1x1Stru *)self;
|
||||
ConvParameter *param = (ConvParameter *)conv->base.param;
|
||||
conv->exp_.row = param->input_h_ * param->input_w_;
|
||||
conv->exp_.deep = param->input_channel_;
|
||||
conv->exp_.col = param->output_channel_;
|
||||
conv->exp_.thread_num = param->op_parameter_.thread_num_;
|
||||
if (conv->bias_ != NULL || param->act_type_ != ActType_No) {
|
||||
conv->exp_.base->funcs->PostParam(param->act_type_, &conv->exp_.min, &conv->exp_.max);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
conv->base.funcs = GetCoreFuncs(conv->base.in[0].data_type_ == kNumberTypeFloat16);
|
||||
int conv1x1_exp_prepare(struct KernelBase *self) {
|
||||
Conv1x1Stru *conv = (Conv1x1Stru *)self;
|
||||
ConvParameter *param = (ConvParameter *)conv->base.param;
|
||||
conv->exp_.base = &conv->base;
|
||||
|
||||
int row_tile, deep_tile, col_tile;
|
||||
conv->base.funcs->InitMatmulTileCount(&row_tile, &deep_tile, &col_tile);
|
||||
conv->base.funcs->ExpMatmulTile(&row_tile, &deep_tile, &col_tile);
|
||||
|
||||
conv->weight_ = (uint8_t *)(conv->base.env->alloc(
|
||||
conv->base.env->allocator,
|
||||
UP_ROUND(param->output_channel_, col_tile) * UP_ROUND(param->input_channel_, deep_tile) * row_tile));
|
||||
conv->base.funcs->PackRight(conv->base.in[1].data_, conv->weight_, 1, param->input_channel_, param->output_channel_);
|
||||
conv->base.funcs->PackNcX(conv->base.in[1].data_, conv->weight_, 1, param->input_channel_, param->output_channel_);
|
||||
|
||||
if (conv->base.insize < kInputSize2) {
|
||||
conv->bias_ = NULL;
|
||||
|
@ -57,27 +60,21 @@ int conv1x1_prepare(struct KernelBase *self) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int conv1x1_release(struct KernelBase *self) {
|
||||
int conv1x1_exp_release(struct KernelBase *self) {
|
||||
Conv1x1Stru *conv = (Conv1x1Stru *)self;
|
||||
conv->base.env->free(conv->base.env->allocator, conv->bias_);
|
||||
conv->base.env->free(conv->base.env->allocator, conv->weight_);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int conv1x1_compute(struct KernelBase *self) {
|
||||
int conv1x1_exp_compute(struct KernelBase *self) {
|
||||
Conv1x1Stru *conv = (Conv1x1Stru *)self;
|
||||
ConvParameter *param = (ConvParameter *)conv->base.param;
|
||||
|
||||
BaseMatmul(conv->base.in[0].data_, conv->weight_, conv->bias_, conv->base.out[0].data_,
|
||||
param->input_h_ * param->input_w_, param->input_channel_, param->output_channel_, param->act_type_,
|
||||
param->op_parameter_.thread_num_, &conv->base);
|
||||
ExperimentalMatmul(conv->base.in[0].data_, conv->weight_, conv->bias_, conv->base.out[0].data_, &conv->exp_);
|
||||
return 0;
|
||||
}
|
||||
|
||||
KernelBase *CreateConv1x1(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize) {
|
||||
if (in[0].format_ != Format_NC4HW4) {
|
||||
return NULL;
|
||||
}
|
||||
KernelBase *CreateConv1x1(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize, int data_type,
|
||||
FormatC format) {
|
||||
Conv1x1Stru *conv1x1 = (Conv1x1Stru *)malloc(sizeof(Conv1x1Stru));
|
||||
conv1x1->base.param = param;
|
||||
conv1x1->base.in = in;
|
||||
|
@ -85,10 +82,14 @@ KernelBase *CreateConv1x1(OpParameter *param, TensorC *in, size_t insize, Tensor
|
|||
conv1x1->base.out = out;
|
||||
conv1x1->base.outsize = outsize;
|
||||
conv1x1->base.env = GetExecEnv();
|
||||
conv1x1->base.prepare = conv1x1_prepare;
|
||||
conv1x1->base.resize = conv1x1_resize;
|
||||
conv1x1->base.release = conv1x1_release;
|
||||
conv1x1->base.compute = conv1x1_compute;
|
||||
conv1x1->base.funcs = GetCoreFuncs(data_type == kNumberTypeFloat16);
|
||||
|
||||
if (format == Format_NC4HW4) {
|
||||
conv1x1->base.prepare = conv1x1_exp_prepare;
|
||||
conv1x1->base.resize = conv1x1_exp_resize;
|
||||
conv1x1->base.release = conv1x1_exp_release;
|
||||
conv1x1->base.compute = conv1x1_exp_compute;
|
||||
}
|
||||
|
||||
return (KernelBase *)conv1x1;
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_NNACL_KERNEL_CONVOLUTION_1X1_H_
|
||||
#define MINDSPORE_NNACL_KERNEL_CONVOLUTION_1X1_H_
|
||||
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/tensor_c.h"
|
||||
#include "nnacl/kernel.h"
|
||||
#include "nnacl/kernel/matmul_optimize.h"
|
||||
#include "nnacl/kernel/matmul_experimental.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct Conv1x1Stru {
|
||||
KernelBase base;
|
||||
uint8_t *bias_;
|
||||
uint8_t *weight_;
|
||||
MatmulOptStru opt_;
|
||||
MatmulExpStru exp_;
|
||||
} Conv1x1Stru;
|
||||
|
||||
KernelBase *CreateConv1x1(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize, int data_type,
|
||||
FormatC format);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // MINDSPORE_NNACL_KERNEL_CONVOLUTION_1X1_H_
|
|
@ -19,14 +19,6 @@
|
|||
#include "nnacl/exp_parameter.h"
|
||||
#include "nnacl/tensor_c.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/fp32/exp_fp32.h"
|
||||
#ifdef ENABLE_FP16
|
||||
#include "nnacl/fp16/exp_fp16.h"
|
||||
#endif
|
||||
|
||||
typedef struct ExpStru {
|
||||
KernelBase base;
|
||||
} ExpStru;
|
||||
|
||||
int exp_resize(struct KernelBase *self) {
|
||||
ExpStru *exp = (ExpStru *)self;
|
||||
|
@ -65,14 +57,8 @@ int exp_do_compute(void *param, int task_id, float lhs_scale, float rhs_scale) {
|
|||
ExpStru *exp_stru = (ExpStru *)param;
|
||||
ExpParameter *exp_param = (ExpParameter *)exp_stru->base.param;
|
||||
|
||||
int ret = NNACL_ERR;
|
||||
if (exp_stru->base.out[0].data_type_ == kNumberTypeFloat32) {
|
||||
ret = ExpFusionFp32(exp_stru->base.in[0].data_, exp_stru->base.out[0].data_, exp_param, task_id);
|
||||
#ifdef ENABLE_FP16
|
||||
} else if (exp_stru->base.out[0].data_type_ == kNumberTypeFloat16) {
|
||||
ret = ExpFusionFp16(exp_stru->base.in[0].data_, exp_stru->base.out[0].data_, exp_param, task_id);
|
||||
#endif
|
||||
}
|
||||
int ret =
|
||||
exp_stru->base.funcs->ExpFusion(exp_stru->base.in[0].data_, exp_stru->base.out[0].data_, exp_param, task_id);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -81,7 +67,8 @@ int exp_compute(struct KernelBase *self) {
|
|||
return self->env->parallelLaunch(self->env->threadPool, exp_do_compute, self, self->param->thread_num_);
|
||||
}
|
||||
|
||||
KernelBase *CreateExp(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize) {
|
||||
KernelBase *CreateExp(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize, int data_type,
|
||||
FormatC format) {
|
||||
ExpStru *exp = (ExpStru *)malloc(sizeof(ExpStru));
|
||||
exp->base.param = param;
|
||||
exp->base.in = in;
|
||||
|
@ -93,6 +80,7 @@ KernelBase *CreateExp(OpParameter *param, TensorC *in, size_t insize, TensorC *o
|
|||
exp->base.resize = exp_resize;
|
||||
exp->base.release = exp_release;
|
||||
exp->base.compute = exp_compute;
|
||||
exp->base.funcs = GetCoreFuncs(data_type == kNumberTypeFloat16);
|
||||
|
||||
return (KernelBase *)exp;
|
||||
}
|
||||
|
|
|
@ -24,7 +24,12 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
KernelBase *CreateExp(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize);
|
||||
typedef struct ExpStru {
|
||||
KernelBase base;
|
||||
} ExpStru;
|
||||
|
||||
KernelBase *CreateExp(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize, int data_type,
|
||||
FormatC format);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -103,7 +103,8 @@ int gather_d_compute(struct KernelBase *self) {
|
|||
return status;
|
||||
}
|
||||
|
||||
KernelBase *CreateGatherD(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize) {
|
||||
KernelBase *CreateGatherD(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize, int data_type,
|
||||
FormatC format) {
|
||||
GatherDStru *gather_d = (GatherDStru *)malloc(sizeof(GatherDStru));
|
||||
gather_d->base.param = param;
|
||||
gather_d->base.in = in;
|
||||
|
|
|
@ -24,7 +24,8 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
KernelBase *CreateGatherD(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize);
|
||||
KernelBase *CreateGatherD(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize, int data_type,
|
||||
FormatC format);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -100,7 +100,8 @@ static int groupnorm_compute(struct KernelBase *self) {
|
|||
return self->env->parallelLaunch(self->env->threadPool, groupnorm_do_compute, self, self->param->thread_num_);
|
||||
}
|
||||
|
||||
KernelBase *CreateGroupNorm(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize) {
|
||||
KernelBase *CreateGroupNorm(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize, int data_type,
|
||||
FormatC format) {
|
||||
GroupNormStru *groupnorm = (GroupNormStru *)malloc(sizeof(GroupNormStru));
|
||||
if (groupnorm == NULL) {
|
||||
return NULL;
|
||||
|
|
|
@ -26,7 +26,8 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
KernelBase *CreateGroupNorm(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize);
|
||||
KernelBase *CreateGroupNorm(OpParameter *param, TensorC *in, size_t insize, TensorC *out, size_t outsize, int data_type,
|
||||
FormatC format);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/kernel/matmul_experimental.h"
|
||||
|
||||
int ExpMatmulRun(void *param, int task_id, float lhs_scale, float rhs_scale) {
|
||||
MatmulExpStru *matmul = (MatmulExpStru *)param;
|
||||
if (matmul == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t pack_uint = matmul->base->funcs->pack * matmul->base->funcs->byte;
|
||||
|
||||
for (size_t i = task_id; i < matmul->row_unit; i += matmul->thread_num) {
|
||||
int xStart = i * matmul->row_tile;
|
||||
uint8_t *a = matmul->a_ptr + xStart * pack_uint;
|
||||
uint8_t *tmp = matmul->tmp_ptr + matmul->row_tile * matmul->deep * task_id * matmul->base->funcs->byte;
|
||||
matmul->base->funcs->ExpMatmulPackIn(tmp, a, matmul->row_tile, matmul->deep, matmul->row);
|
||||
matmul->base->funcs->ExpMatmulBlock(matmul->c_ptr + xStart * pack_uint, tmp, matmul->b_ptr, matmul->bias,
|
||||
matmul->row_tile, matmul->deep, matmul->col,
|
||||
matmul->row * matmul->base->funcs->pack, matmul->min, matmul->max);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ExperimentalMatmul(uint8_t *a_ptr, uint8_t *b_ptr, uint8_t *bias, uint8_t *c_ptr, MatmulExpStru *matmul) {
|
||||
if (a_ptr == NULL || b_ptr == NULL || c_ptr == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
matmul->a_ptr = a_ptr;
|
||||
matmul->b_ptr = b_ptr;
|
||||
matmul->c_ptr = c_ptr;
|
||||
matmul->bias = bias;
|
||||
|
||||
int byte = matmul->base->funcs->byte;
|
||||
int pack = matmul->base->funcs->pack;
|
||||
int row_tile, deep_tile, col_tile;
|
||||
matmul->base->funcs->ExpMatmulTile(&row_tile, &deep_tile, &col_tile);
|
||||
|
||||
matmul->row_tile = row_tile;
|
||||
if (row_tile == 0) {
|
||||
return;
|
||||
}
|
||||
matmul->row_unit = matmul->row / row_tile;
|
||||
|
||||
size_t tmp_size = matmul->thread_num * UP_ROUND(matmul->deep, deep_tile) * row_tile * byte;
|
||||
matmul->tmp_ptr = (uint8_t *)matmul->base->env->alloc(matmul->base->env->allocator, tmp_size);
|
||||
matmul->base->env->parallelLaunch(matmul->base->env->threadPool, ExpMatmulRun, matmul, matmul->thread_num);
|
||||
|
||||
size_t row_remain = matmul->row - matmul->row_unit * row_tile;
|
||||
if (row_remain != 0) {
|
||||
int32_t start_row = matmul->row_unit * row_tile;
|
||||
uint8_t *a_remain_ptr = a_ptr + start_row * pack * byte;
|
||||
matmul->base->funcs->ExpMatmulPackIn(matmul->tmp_ptr, a_remain_ptr, row_remain, matmul->deep, matmul->row);
|
||||
matmul->base->funcs->ExpMatMulRemain(c_ptr + start_row * pack * byte, matmul->tmp_ptr, b_ptr, bias, row_remain,
|
||||
matmul->deep, matmul->col, matmul->row * pack, matmul->min, matmul->max);
|
||||
}
|
||||
|
||||
matmul->base->env->free(matmul->base->env->allocator, matmul->tmp_ptr);
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_NNACL_EXPERIMENT_MATMUL_EXPERIMENTAL_H_
|
||||
#define MINDSPORE_NNACL_EXPERIMENT_MATMUL_EXPERIMENTAL_H_
|
||||
|
||||
#include "nnacl/kernel.h"
|
||||
#include "nnacl/experimental/ms_core.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct MatmulExpStru {
|
||||
KernelBase *base;
|
||||
size_t deep;
|
||||
size_t row;
|
||||
size_t col;
|
||||
size_t thread_num;
|
||||
uint8_t *a_ptr;
|
||||
uint8_t *b_ptr;
|
||||
uint8_t *c_ptr;
|
||||
uint8_t *bias;
|
||||
uint8_t *tmp_ptr;
|
||||
float min;
|
||||
float max;
|
||||
size_t row_unit;
|
||||
size_t row_tile;
|
||||
} MatmulExpStru;
|
||||
|
||||
void ExperimentalMatmul(uint8_t *a_ptr, uint8_t *b_ptr, uint8_t *bias, uint8_t *c_ptr, MatmulExpStru *matmul);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // MINDSPORE_NNACL_EXPERIMENT_MATMUL_EXPERIMENTAL_H_
|
|
@ -0,0 +1,22 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/kernel/matmul_optimize.h"
|
||||
|
||||
void MatmulOpt_prepare(MatmulOptStru *matmul) {
|
||||
matmul->base->funcs->OptMatmulTile(&matmul->row_tile, &matmul->col_tile);
|
||||
return;
|
||||
}
|
|
@ -13,19 +13,27 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_NNACL_EXPERIMENT_BASE_MATMUL_H_
|
||||
#define MINDSPORE_NNACL_EXPERIMENT_BASE_MATMUL_H_
|
||||
|
||||
#ifndef MINDSPORE_NNACL_EXPERIMENT_MATMUL_OPTIMIZE_H_
|
||||
#define MINDSPORE_NNACL_EXPERIMENT_MATMUL_OPTIMIZE_H_
|
||||
|
||||
#include "nnacl/kernel.h"
|
||||
#include "nnacl/matmul_parameter.h"
|
||||
#include "nnacl/experimental/ms_core.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
typedef struct MatmulOptStru {
|
||||
KernelBase *base;
|
||||
MatMulParameter param;
|
||||
int row_tile;
|
||||
int col_tile;
|
||||
} MatmulOptStru;
|
||||
|
||||
void BaseMatmul(uint8_t *a_ptr, uint8_t *b_ptr, uint8_t *bias, uint8_t *c_ptr, int row, int deep, int col,
|
||||
ActType act_type, int thread_num, KernelBase *base);
|
||||
void MatmulOpt_prepare(MatmulOptStru *matmul);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // MINDSPORE_NNACL_EXPERIMENT_BASE_MATMUL_H_
|
||||
#endif // MINDSPORE_NNACL_EXPERIMENT_MATMUL_OPTIMIZE_H_
|
|
@ -26,7 +26,7 @@
|
|||
|
||||
namespace mindspore::lite::pass {
|
||||
#ifdef ENABLE_RUNTIME_NCX_PASS
|
||||
std::set<schema::PrimitiveType> ncxhwx_kernels = {};
|
||||
std::set<schema::PrimitiveType> ncxhwx_kernels = {schema::PrimitiveType_Conv2DFusion};
|
||||
|
||||
bool RuntimeNCXPassVaild(kernel::SubGraphKernel *subgraph) {
|
||||
if (subgraph->subgraph_type() == kernel::kNotSubGraph) {
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
Note: This is the mindspore Lite inference framework size threshold. Offline review is required before modify this value!!!
|
||||
1022266
|
||||
1300000
|
||||
|
|
|
@ -176,6 +176,7 @@ getCommonFile() {
|
|||
mindspore/lite/src/expression/ops_utils.h
|
||||
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/tensor_c_utils.h
|
||||
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/tensorlist_c_utils.h
|
||||
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/ms_core.h
|
||||
mindspore/core/utils/log_adapter.h
|
||||
mindspore/core/ir/api_tensor_impl.h
|
||||
mindspore/lite/src/runtime/cxx_api/tensor/tensor_impl.h
|
||||
|
@ -227,6 +228,13 @@ getCommonFile() {
|
|||
mindspore/lite/src/expression/ops_utils.cc
|
||||
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/tensor_c_utils.c
|
||||
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/tensorlist_c_utils.c
|
||||
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/ms_core_x86.c
|
||||
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/ms_core_sse.c
|
||||
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/ms_core_avx.c
|
||||
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/ms_core_avx512.c
|
||||
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/ms_core_arm64_fp32.c
|
||||
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/ms_core_arm64_fp16.c
|
||||
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/ms_core_arm32.c
|
||||
)
|
||||
all_files=("${src_files[@]}" "${regist_files[@]}" "${common_files[@]}" "${runtime_files_cc[@]}"
|
||||
"${others_files_c[@]}" "${assembly_files[@]}" "${mindrt_files[@]}"
|
||||
|
|
Loading…
Reference in New Issue