conv sw common simd refactor master

This commit is contained in:
greatpan 2022-08-09 14:49:43 +08:00
parent 92eb606c6a
commit ebcabfe836
13 changed files with 1419 additions and 1421 deletions

View File

@ -73,7 +73,7 @@ mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/conv_int8.c:Conv1x
mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/pack_int8.c:PackNHWCToNCHWInt8
mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/pooling_fp32.c:AvgPooling
mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/matmul_fp32.c:MatMul4x1Kernel, MatMul2x1Kernel
mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/conv_common_fp32.c:SWConv3x32Kernel, SWConv4x24Kernel, SWConv12x8Kernel, SWConv8x8Kernel, SWConv4x8Kernel, SWConv6x16Kernel, SWConv4x16Kernel
mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/conv_sw_avx_fp32.c:SWConv3x32AVXKernel, SWConv4x24AVXKernel, SWConv12x8AVXKernel, SWConv8x8AVXKernel, SWConv4x8AVXKernel, SWConv6x16AVXKernel, SWConv4x16AVXKernel
mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/conv_depthwise_fp32.c:DepthwiseSW3x32Kernel, DepthwiseSW4x24Kernel, DepthwiseSW12x8Kernel, DepthwiseSW8x8Kernel, DepthwiseSW4x8Kernel, DepthwiseSW6x16Kernel, DepthwiseSW4x16Kernel
mindspore/mindspore/core/ir/dtype/type.cc:mindspore::ObjectIdLabel
mindspore/mindspore/python/mindspore/ops/_op_impl/_custom_op/dsd_impl.py:dsd_matmul

View File

@ -92,6 +92,14 @@ file(GLOB KERNEL_SRC
${NNACL_DIR}/experimental/*.c
)
set(KERNEL_AVX512_FILE ${NNACL_DIR}/fp32/matmul_avx512_fp32.c)
list(REMOVE_ITEM KERNEL_SRC ${KERNEL_AVX512_FILE})
set(KERNEL_AVX_FILE ${NNACL_DIR}/fp32/conv_sw_avx_fp32.c
${NNACL_DIR}/fp32/conv_1x1_avx_fp32.c
${NNACL_DIR}/fp32/matmul_avx_fp32.c)
list(REMOVE_ITEM KERNEL_SRC ${KERNEL_AVX_FILE})
if(NOT MSLITE_ENABLE_RUNTIME_PASS)
list(REMOVE_ITEM KERNEL_SRC ${NNACL_DIR}/infer/shape_fusion_infer.c)
endif()
@ -149,37 +157,49 @@ if(PLATFORM_ARM32)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
endif()
if("${X86_64_SIMD}" STREQUAL "sse")
file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/intrinsics/sse/*.c)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
if("${X86_64_SIMD}" STREQUAL "sse" OR "${X86_64_SIMD}" STREQUAL "avx" OR "${X86_64_SIMD}" STREQUAL "avx512")
file(GLOB ASSEMBLY_SSE_SRC ${NNACL_DIR}/intrinsics/sse/*.c)
set_property(SOURCE ${ASSEMBLY_SSE_SRC} PROPERTY LANGUAGE C)
set(MS_X86_SSE_SRC
${ASSEMBLY_SSE_SRC}
${KERNEL_SSE_FILE})
set_source_files_properties(${MS_X86_SSE_SRC} PROPERTIES LANGUAGE C
COMPILE_FLAGS "${CMAKE_C_FLAGS} -msse4.1")
set(MS_X86_SIMD_SRC ${MS_X86_SIMD_SRC} ${MS_X86_SSE_SRC})
endif()
if("${X86_64_SIMD}" STREQUAL "avx")
file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/intrinsics/sse/*.c
if("${X86_64_SIMD}" STREQUAL "avx" OR "${X86_64_SIMD}" STREQUAL "avx512")
file(GLOB ASSEMBLY_AVX_SRC
${NNACL_DIR}/intrinsics/avx/*.c
${NNACL_DIR}/assembly/avx/*.S
${NNACL_DIR}/intrinsics/ms_simd_cpu_info.c)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
set_property(SOURCE ${ASSEMBLY_AVX_SRC} PROPERTY LANGUAGE C)
set(MS_X86_AVX_SRC
${ASSEMBLY_AVX_SRC}
${KERNEL_AVX_FILE})
set_source_files_properties(${MS_X86_AVX_SRC} PROPERTIES LANGUAGE C
COMPILE_FLAGS "${CMAKE_C_FLAGS} -mavx -mavx2 -mfma -fPIC")
set(MS_X86_SIMD_SRC ${MS_X86_SIMD_SRC} ${MS_X86_AVX_SRC})
endif()
if("${X86_64_SIMD}" STREQUAL "avx512")
file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/intrinsics/sse/*.c
${NNACL_DIR}/intrinsics/avx/*.c
${NNACL_DIR}/assembly/avx/*.S
${NNACL_DIR}/intrinsics/ms_simd_cpu_info.c)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
file(GLOB HPC_SRC ${NNACL_DIR}/experimental/HPC-generator/gemm_avx512/*.c)
set_property(SOURCE ${HPC_SRC} PROPERTY LANGUAGE C)
endif()
set(MS_X86_AVX512_SRC ${HPC_SRC}
${NNACL_DIR}/fp32/matmul_avx512_fp32.c)
set(MS_X86_AVX512_SRC
${HPC_SRC}
${KERNEL_AVX512_FILE})
set_source_files_properties(${MS_X86_AVX512_SRC} PROPERTIES LANGUAGE C
COMPILE_FLAGS "${CMAKE_C_FLAGS} -mavx512f -fPIC")
set(MS_X86_SIMD_SRC ${MS_X86_SIMD_SRC} ${MS_X86_AVX512_SRC})
endif()
if(APPLE)
@ -189,7 +209,7 @@ endif()
########################### build nnacl library ########################
string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
add_library(nnacl_mid OBJECT ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC} ${MS_X86_AVX512_SRC})
add_library(nnacl_mid OBJECT ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC} ${MS_X86_SIMD_SRC})
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
target_compile_definitions(nnacl_mid PRIVATE ENABLE_DEBUG)

View File

@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef ENABLE_AVX
#include "nnacl/fp32/conv_1x1_avx_fp32.h"
#include "nnacl/intrinsics/ms_simd_avx_instructions.h"
@ -1607,4 +1606,3 @@ void Conv1x1SWOWxOCAVXKernel(float *dst, const float *src, const float *weight,
}
}
#endif
#endif

View File

@ -16,7 +16,6 @@
#ifndef MINDSPORE_NNACL_FP32_CONV_1X1_AVX_FP32_H_
#define MINDSPORE_NNACL_FP32_CONV_1X1_AVX_FP32_H_
#ifdef ENABLE_AVX
#include "nnacl/op_base.h"
#include "nnacl/conv_parameter.h"
@ -35,9 +34,7 @@ void Conv1x1SWOWxOCAVXKernel(float *dst, const float *src, const float *weight,
size_t ow_block, size_t oc_block, size_t oc_align, size_t ic_align, size_t in_sw_step,
size_t dst_flag);
#endif
#endif
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_NNACL_FP32_CONV_1X1_AVX_FP32_H_
#endif

View File

@ -21,6 +21,7 @@
#include "nnacl/op_base.h"
#include "nnacl/common_func.h"
#include "nnacl/conv_parameter.h"
#include "nnacl/fp32/conv_sw_avx_fp32.h"
#ifdef __cplusplus
extern "C" {
@ -50,78 +51,8 @@ void ConvFp32OutNC4HW4(const float *input_data, float *packed_input, const float
#ifdef ENABLE_AVX
void CommonConv6x16Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t depth,
size_t out_step, size_t act_flag, size_t real_cal_row);
typedef void (*SWConvKernel)(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step,
size_t kw_remainder, size_t write_mode);
void SWBorder(float *dst, const float *src, const float *weight, const float *bias, int top, int bottom, int left,
int right, const ConvParameter *conv_param, const SlidingWindowParam *sw_param, SWConvKernel kernel,
int act_type, int ow_bock, int oc_block, size_t write_mode);
void ConvSWFp32(const float *input_data, const float *packed_weight, const float *bias_data, float *output_data,
int task_id, ConvParameter *conv_param, SlidingWindowParam *sw_param);
void SWCenter(float *dst, const float *src, const float *weight, const float *bias, int height, int width, int kernel_h,
int kernel_w, bool is_relu, bool is_relu6, SlidingWindowParam *sw_param);
#ifdef ENABLE_DEBUG
void SWConvWxKKernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
size_t write_mode);
#endif
void SWConv3x32Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
size_t write_mode);
void SWConv1x32Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
size_t write_mode);
void SWConv4x24Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
size_t write_mode);
void SWConv1x24Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
size_t write_mode);
void SWConv6x16Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
size_t write_mode);
void SWConv1x16Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
size_t write_mode);
void SWConv12x8Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
size_t write_mode);
void SWConv8x8Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
size_t write_mode);
void SWConv4x8Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
size_t write_mode);
void SWConv1x8Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
size_t write_mode);
#endif
#ifdef __cplusplus
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,42 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_NNACL_FP32_CONV_SW_AVX_H_
#define MINDSPORE_NNACL_FP32_CONV_SW_AVX_H_
#include "nnacl/pack.h"
#include "nnacl/op_base.h"
#include "nnacl/common_func.h"
#include "nnacl/conv_parameter.h"
#ifdef __cplusplus
extern "C" {
#endif
void ConvSWAVXFp32(const float *input_data, const float *packed_weight, const float *bias_data, float *output_data,
int task_id, ConvParameter *conv_param, SlidingWindowParam *sw_param);
#ifdef ENABLE_DEBUG
void SWConvWxKAVXKernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
size_t write_mode);
#endif
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_NNACL_FP32_CONV_SW_AVX_H_

View File

@ -13,8 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef ENABLE_AVX512
#include "nnacl/fp32/matmul_avx512_fp32.h"
#include "nnacl/op_base.h"
#include "nnacl/intrinsics/ms_simd_instructions.h"
@ -248,5 +246,3 @@ int64_t GemmIsNotPackOptimizeAVX512(int64_t m_index, const float *a, const float
}
return m_index;
}
#endif

View File

@ -15,9 +15,7 @@
*/
#ifndef MINDSPORE_NNACL_FP32_MATMUL_AVX512_H_
#define MINDSPORE_NNACL_FP32_MATMUL_AVX512_H_
#include <stdint.h>
#ifdef ENABLE_AVX512
#include <x86intrin.h>
#include "nnacl/op_base.h"
typedef void (*GemmAvx512Kernel)(float *dst, const float *src, const float *weight, const float *bias,
const size_t act_flag, const size_t row_block, const size_t col_block,
const size_t deep, const size_t src_stride, const size_t dst_stride,
@ -197,5 +195,4 @@ void nnacl_gemm_avx512_1x16_kernel_nhwc_fp32(float *dst, const float *src, const
}
#endif
#endif
#endif // MINDSPORE_NNACL_FP32_MATMUL_AVX512_H_

View File

@ -13,8 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef ENABLE_AVX
#include "nnacl/fp32/matmul_avx_fp32.h"
#include "nnacl/intrinsics/ms_simd_avx_instructions.h"
@ -954,4 +952,3 @@ void MatVecMulRowxColKernel(float *dst, const float *src, const float *weight, c
}
}
#endif
#endif

View File

@ -20,8 +20,6 @@
#include <float.h>
#include "nnacl/op_base.h"
#if defined(ENABLE_AVX)
#ifdef __cplusplus
extern "C" {
#endif
@ -62,7 +60,5 @@ void MatVecMulRowxColKernel(float *dst, const float *src, const float *weight, c
#ifdef __cplusplus
}
#endif
#endif
#endif // MINDSPORE_NNACL_FP32_MATMUL_H_

View File

@ -34,8 +34,8 @@ int ConvolutionSWAVXCPUKernel::RunImpl(int task_id) {
Conv1x1SWAVXFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
output_data_, task_id, conv_param_, slidingWindow_param_);
} else {
ConvSWFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
output_data_, task_id, conv_param_, slidingWindow_param_);
ConvSWAVXFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
output_data_, task_id, conv_param_, slidingWindow_param_);
}
return RET_OK;
}