forked from mindspore-Ecosystem/mindspore
conv sw common simd refactor master
This commit is contained in:
parent
92eb606c6a
commit
ebcabfe836
|
@ -73,7 +73,7 @@ mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/conv_int8.c:Conv1x
|
|||
mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/pack_int8.c:PackNHWCToNCHWInt8
|
||||
mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/pooling_fp32.c:AvgPooling
|
||||
mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/matmul_fp32.c:MatMul4x1Kernel, MatMul2x1Kernel
|
||||
mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/conv_common_fp32.c:SWConv3x32Kernel, SWConv4x24Kernel, SWConv12x8Kernel, SWConv8x8Kernel, SWConv4x8Kernel, SWConv6x16Kernel, SWConv4x16Kernel
|
||||
mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/conv_sw_avx_fp32.c:SWConv3x32AVXKernel, SWConv4x24AVXKernel, SWConv12x8AVXKernel, SWConv8x8AVXKernel, SWConv4x8AVXKernel, SWConv6x16AVXKernel, SWConv4x16AVXKernel
|
||||
mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/conv_depthwise_fp32.c:DepthwiseSW3x32Kernel, DepthwiseSW4x24Kernel, DepthwiseSW12x8Kernel, DepthwiseSW8x8Kernel, DepthwiseSW4x8Kernel, DepthwiseSW6x16Kernel, DepthwiseSW4x16Kernel
|
||||
mindspore/mindspore/core/ir/dtype/type.cc:mindspore::ObjectIdLabel
|
||||
mindspore/mindspore/python/mindspore/ops/_op_impl/_custom_op/dsd_impl.py:dsd_matmul
|
||||
|
|
|
@ -92,6 +92,14 @@ file(GLOB KERNEL_SRC
|
|||
${NNACL_DIR}/experimental/*.c
|
||||
)
|
||||
|
||||
set(KERNEL_AVX512_FILE ${NNACL_DIR}/fp32/matmul_avx512_fp32.c)
|
||||
list(REMOVE_ITEM KERNEL_SRC ${KERNEL_AVX512_FILE})
|
||||
|
||||
set(KERNEL_AVX_FILE ${NNACL_DIR}/fp32/conv_sw_avx_fp32.c
|
||||
${NNACL_DIR}/fp32/conv_1x1_avx_fp32.c
|
||||
${NNACL_DIR}/fp32/matmul_avx_fp32.c)
|
||||
list(REMOVE_ITEM KERNEL_SRC ${KERNEL_AVX_FILE})
|
||||
|
||||
if(NOT MSLITE_ENABLE_RUNTIME_PASS)
|
||||
list(REMOVE_ITEM KERNEL_SRC ${NNACL_DIR}/infer/shape_fusion_infer.c)
|
||||
endif()
|
||||
|
@ -149,37 +157,49 @@ if(PLATFORM_ARM32)
|
|||
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
|
||||
endif()
|
||||
|
||||
if("${X86_64_SIMD}" STREQUAL "sse")
|
||||
file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/intrinsics/sse/*.c)
|
||||
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
|
||||
if("${X86_64_SIMD}" STREQUAL "sse" OR "${X86_64_SIMD}" STREQUAL "avx" OR "${X86_64_SIMD}" STREQUAL "avx512")
|
||||
file(GLOB ASSEMBLY_SSE_SRC ${NNACL_DIR}/intrinsics/sse/*.c)
|
||||
set_property(SOURCE ${ASSEMBLY_SSE_SRC} PROPERTY LANGUAGE C)
|
||||
|
||||
set(MS_X86_SSE_SRC
|
||||
${ASSEMBLY_SSE_SRC}
|
||||
${KERNEL_SSE_FILE})
|
||||
set_source_files_properties(${MS_X86_SSE_SRC} PROPERTIES LANGUAGE C
|
||||
COMPILE_FLAGS "${CMAKE_C_FLAGS} -msse4.1")
|
||||
|
||||
set(MS_X86_SIMD_SRC ${MS_X86_SIMD_SRC} ${MS_X86_SSE_SRC})
|
||||
endif()
|
||||
|
||||
if("${X86_64_SIMD}" STREQUAL "avx")
|
||||
file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/intrinsics/sse/*.c
|
||||
if("${X86_64_SIMD}" STREQUAL "avx" OR "${X86_64_SIMD}" STREQUAL "avx512")
|
||||
file(GLOB ASSEMBLY_AVX_SRC
|
||||
${NNACL_DIR}/intrinsics/avx/*.c
|
||||
${NNACL_DIR}/assembly/avx/*.S
|
||||
${NNACL_DIR}/intrinsics/ms_simd_cpu_info.c)
|
||||
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
|
||||
set_property(SOURCE ${ASSEMBLY_AVX_SRC} PROPERTY LANGUAGE C)
|
||||
|
||||
set(MS_X86_AVX_SRC
|
||||
${ASSEMBLY_AVX_SRC}
|
||||
${KERNEL_AVX_FILE})
|
||||
set_source_files_properties(${MS_X86_AVX_SRC} PROPERTIES LANGUAGE C
|
||||
COMPILE_FLAGS "${CMAKE_C_FLAGS} -mavx -mavx2 -mfma -fPIC")
|
||||
|
||||
set(MS_X86_SIMD_SRC ${MS_X86_SIMD_SRC} ${MS_X86_AVX_SRC})
|
||||
endif()
|
||||
|
||||
if("${X86_64_SIMD}" STREQUAL "avx512")
|
||||
file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/intrinsics/sse/*.c
|
||||
${NNACL_DIR}/intrinsics/avx/*.c
|
||||
${NNACL_DIR}/assembly/avx/*.S
|
||||
${NNACL_DIR}/intrinsics/ms_simd_cpu_info.c)
|
||||
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
|
||||
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
file(GLOB HPC_SRC ${NNACL_DIR}/experimental/HPC-generator/gemm_avx512/*.c)
|
||||
set_property(SOURCE ${HPC_SRC} PROPERTY LANGUAGE C)
|
||||
endif()
|
||||
|
||||
set(MS_X86_AVX512_SRC ${HPC_SRC}
|
||||
${NNACL_DIR}/fp32/matmul_avx512_fp32.c)
|
||||
set(MS_X86_AVX512_SRC
|
||||
${HPC_SRC}
|
||||
${KERNEL_AVX512_FILE})
|
||||
|
||||
set_source_files_properties(${MS_X86_AVX512_SRC} PROPERTIES LANGUAGE C
|
||||
COMPILE_FLAGS "${CMAKE_C_FLAGS} -mavx512f -fPIC")
|
||||
|
||||
set(MS_X86_SIMD_SRC ${MS_X86_SIMD_SRC} ${MS_X86_AVX512_SRC})
|
||||
endif()
|
||||
|
||||
if(APPLE)
|
||||
|
@ -189,7 +209,7 @@ endif()
|
|||
########################### build nnacl library ########################
|
||||
string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
|
||||
add_library(nnacl_mid OBJECT ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC} ${MS_X86_AVX512_SRC})
|
||||
add_library(nnacl_mid OBJECT ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC} ${MS_X86_SIMD_SRC})
|
||||
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
|
||||
target_compile_definitions(nnacl_mid PRIVATE ENABLE_DEBUG)
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifdef ENABLE_AVX
|
||||
#include "nnacl/fp32/conv_1x1_avx_fp32.h"
|
||||
#include "nnacl/intrinsics/ms_simd_avx_instructions.h"
|
||||
|
||||
|
@ -1607,4 +1606,3 @@ void Conv1x1SWOWxOCAVXKernel(float *dst, const float *src, const float *weight,
|
|||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
#ifndef MINDSPORE_NNACL_FP32_CONV_1X1_AVX_FP32_H_
|
||||
#define MINDSPORE_NNACL_FP32_CONV_1X1_AVX_FP32_H_
|
||||
|
||||
#ifdef ENABLE_AVX
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/conv_parameter.h"
|
||||
|
||||
|
@ -35,9 +34,7 @@ void Conv1x1SWOWxOCAVXKernel(float *dst, const float *src, const float *weight,
|
|||
size_t ow_block, size_t oc_block, size_t oc_align, size_t ic_align, size_t in_sw_step,
|
||||
size_t dst_flag);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // MINDSPORE_NNACL_FP32_CONV_1X1_AVX_FP32_H_
|
||||
#endif
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -21,6 +21,7 @@
|
|||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/common_func.h"
|
||||
#include "nnacl/conv_parameter.h"
|
||||
#include "nnacl/fp32/conv_sw_avx_fp32.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
@ -50,78 +51,8 @@ void ConvFp32OutNC4HW4(const float *input_data, float *packed_input, const float
|
|||
#ifdef ENABLE_AVX
|
||||
void CommonConv6x16Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t depth,
|
||||
size_t out_step, size_t act_flag, size_t real_cal_row);
|
||||
|
||||
typedef void (*SWConvKernel)(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
|
||||
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
|
||||
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step,
|
||||
size_t kw_remainder, size_t write_mode);
|
||||
|
||||
void SWBorder(float *dst, const float *src, const float *weight, const float *bias, int top, int bottom, int left,
|
||||
int right, const ConvParameter *conv_param, const SlidingWindowParam *sw_param, SWConvKernel kernel,
|
||||
int act_type, int ow_bock, int oc_block, size_t write_mode);
|
||||
|
||||
void ConvSWFp32(const float *input_data, const float *packed_weight, const float *bias_data, float *output_data,
|
||||
int task_id, ConvParameter *conv_param, SlidingWindowParam *sw_param);
|
||||
void SWCenter(float *dst, const float *src, const float *weight, const float *bias, int height, int width, int kernel_h,
|
||||
int kernel_w, bool is_relu, bool is_relu6, SlidingWindowParam *sw_param);
|
||||
|
||||
#ifdef ENABLE_DEBUG
|
||||
void SWConvWxKKernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
|
||||
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
|
||||
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
|
||||
size_t write_mode);
|
||||
#endif
|
||||
|
||||
void SWConv3x32Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
|
||||
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
|
||||
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
|
||||
size_t write_mode);
|
||||
|
||||
void SWConv1x32Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
|
||||
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
|
||||
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
|
||||
size_t write_mode);
|
||||
|
||||
void SWConv4x24Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
|
||||
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
|
||||
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
|
||||
size_t write_mode);
|
||||
|
||||
void SWConv1x24Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
|
||||
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
|
||||
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
|
||||
size_t write_mode);
|
||||
|
||||
void SWConv6x16Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
|
||||
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
|
||||
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
|
||||
size_t write_mode);
|
||||
|
||||
void SWConv1x16Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
|
||||
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
|
||||
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
|
||||
size_t write_mode);
|
||||
|
||||
void SWConv12x8Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
|
||||
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
|
||||
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
|
||||
size_t write_mode);
|
||||
|
||||
void SWConv8x8Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
|
||||
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
|
||||
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
|
||||
size_t write_mode);
|
||||
|
||||
void SWConv4x8Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
|
||||
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
|
||||
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
|
||||
size_t write_mode);
|
||||
|
||||
void SWConv1x8Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
|
||||
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
|
||||
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
|
||||
size_t write_mode);
|
||||
#endif
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,42 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_NNACL_FP32_CONV_SW_AVX_H_
|
||||
#define MINDSPORE_NNACL_FP32_CONV_SW_AVX_H_
|
||||
|
||||
#include "nnacl/pack.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/common_func.h"
|
||||
#include "nnacl/conv_parameter.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void ConvSWAVXFp32(const float *input_data, const float *packed_weight, const float *bias_data, float *output_data,
|
||||
int task_id, ConvParameter *conv_param, SlidingWindowParam *sw_param);
|
||||
|
||||
#ifdef ENABLE_DEBUG
|
||||
void SWConvWxKAVXKernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
|
||||
size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
|
||||
size_t ic_algin, size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder,
|
||||
size_t write_mode);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_NNACL_FP32_CONV_SW_AVX_H_
|
|
@ -13,8 +13,6 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifdef ENABLE_AVX512
|
||||
|
||||
#include "nnacl/fp32/matmul_avx512_fp32.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/intrinsics/ms_simd_instructions.h"
|
||||
|
@ -248,5 +246,3 @@ int64_t GemmIsNotPackOptimizeAVX512(int64_t m_index, const float *a, const float
|
|||
}
|
||||
return m_index;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -15,9 +15,7 @@
|
|||
*/
|
||||
#ifndef MINDSPORE_NNACL_FP32_MATMUL_AVX512_H_
|
||||
#define MINDSPORE_NNACL_FP32_MATMUL_AVX512_H_
|
||||
#include <stdint.h>
|
||||
#ifdef ENABLE_AVX512
|
||||
#include <x86intrin.h>
|
||||
#include "nnacl/op_base.h"
|
||||
typedef void (*GemmAvx512Kernel)(float *dst, const float *src, const float *weight, const float *bias,
|
||||
const size_t act_flag, const size_t row_block, const size_t col_block,
|
||||
const size_t deep, const size_t src_stride, const size_t dst_stride,
|
||||
|
@ -197,5 +195,4 @@ void nnacl_gemm_avx512_1x16_kernel_nhwc_fp32(float *dst, const float *src, const
|
|||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif // MINDSPORE_NNACL_FP32_MATMUL_AVX512_H_
|
||||
|
|
|
@ -13,8 +13,6 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifdef ENABLE_AVX
|
||||
|
||||
#include "nnacl/fp32/matmul_avx_fp32.h"
|
||||
#include "nnacl/intrinsics/ms_simd_avx_instructions.h"
|
||||
|
||||
|
@ -954,4 +952,3 @@ void MatVecMulRowxColKernel(float *dst, const float *src, const float *weight, c
|
|||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -20,8 +20,6 @@
|
|||
#include <float.h>
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
#if defined(ENABLE_AVX)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
@ -62,7 +60,5 @@ void MatVecMulRowxColKernel(float *dst, const float *src, const float *weight, c
|
|||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif // MINDSPORE_NNACL_FP32_MATMUL_H_
|
||||
|
|
|
@ -34,8 +34,8 @@ int ConvolutionSWAVXCPUKernel::RunImpl(int task_id) {
|
|||
Conv1x1SWAVXFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
|
||||
output_data_, task_id, conv_param_, slidingWindow_param_);
|
||||
} else {
|
||||
ConvSWFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
|
||||
output_data_, task_id, conv_param_, slidingWindow_param_);
|
||||
ConvSWAVXFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
|
||||
output_data_, task_id, conv_param_, slidingWindow_param_);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue