forked from mindspore-Ecosystem/mindspore
!40800 [MSLITE][CPU] conv im2col refacor
Merge pull request !40800 from Greatpan/conv_im2col_refactor
This commit is contained in:
commit
e42ce78f04
|
@ -60,3 +60,4 @@
|
|||
"mindspore/mindspore/lite/examples/quick_start_micro/" "syntaxError"
|
||||
"mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental" "unreadVariable"
|
||||
"mindspore/mindspore/lite/python/src/pybind_module.cc" "syntaxError"
|
||||
"mindspore/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_im2col_fp32.cc" "knownConditionTrueFalse"
|
||||
|
|
|
@ -29,6 +29,40 @@ else()
|
|||
list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/base/quant_dtype_cast.cc)
|
||||
endif()
|
||||
|
||||
if(NOT PLATFORM_ARM64)
|
||||
set(KERNEL_SRC_ARM64_FILE ${CMAKE_CURRENT_SOURCE_DIR}/fp32/convolution_im2col_arm64_fp32.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fp32/matmul_fp32_arm64.cc
|
||||
)
|
||||
list(REMOVE_ITEM KERNEL_SRC ${KERNEL_SRC_ARM64_FILE})
|
||||
endif()
|
||||
|
||||
if(NOT PLATFORM_ARM32)
|
||||
set(KERNEL_SRC_ARM32_FILE ${CMAKE_CURRENT_SOURCE_DIR}/fp32/convolution_im2col_arm32_fp32.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fp32/matmul_fp32_arm32.cc
|
||||
)
|
||||
list(REMOVE_ITEM KERNEL_SRC ${KERNEL_SRC_ARM32_FILE})
|
||||
endif()
|
||||
|
||||
if(NOT("${X86_64_SIMD}" STREQUAL "sse" OR "${X86_64_SIMD}" STREQUAL "avx" OR "${X86_64_SIMD}" STREQUAL "avx512"))
|
||||
set(KERNEL_SRC_SSE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/fp32/convolution_im2col_sse_fp32.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fp32/matmul_fp32_sse.cc
|
||||
)
|
||||
list(REMOVE_ITEM KERNEL_SRC ${KERNEL_SRC_SSE_FILE})
|
||||
endif()
|
||||
|
||||
if(NOT("${X86_64_SIMD}" STREQUAL "avx" OR "${X86_64_SIMD}" STREQUAL "avx512"))
|
||||
set(KERNEL_SRC_AVX_FILE ${CMAKE_CURRENT_SOURCE_DIR}/fp32/convolution_im2col_avx_fp32.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fp32/matmul_fp32_avx.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fp32/convolution_slidewindows_avx_fp32.cc
|
||||
)
|
||||
list(REMOVE_ITEM KERNEL_SRC ${KERNEL_SRC_AVX_FILE})
|
||||
endif()
|
||||
|
||||
if(NOT("${X86_64_SIMD}" STREQUAL "avx512"))
|
||||
set(KERNEL_SRC_AVX512_FILE ${CMAKE_CURRENT_SOURCE_DIR}/fp32/matmul_fp32_avx512.cc)
|
||||
list(REMOVE_ITEM KERNEL_SRC ${KERNEL_SRC_AVX512_FILE})
|
||||
endif()
|
||||
|
||||
if(MSLITE_ENABLE_SPARSE_COMPUTE)
|
||||
file(GLOB SPARSE_KERNEL_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fp32_sparse/*.cc
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_delegate_fp32.h"
|
||||
#include "src/litert/kernel_registry.h"
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_fp32.h"
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_fp32.h"
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_1x1_fp32.h"
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_winograd_fp32.h"
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_depthwise_fp32.h"
|
||||
|
@ -189,9 +189,9 @@ kernel::LiteKernel *ConvolutionDelegateCPUKernel::CpuConvFp32NC4KernelSelect() {
|
|||
#endif
|
||||
|
||||
#if defined(ENABLE_ARM64) || defined(ENABLE_AVX)
|
||||
auto kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(
|
||||
op_parameter_, in_tensors_, out_tensors_, static_cast<const lite::InnerContext *>(this->ms_context_),
|
||||
origin_weight_, origin_bias_);
|
||||
auto kernel = CreateConvolutionIm2ColCPUKernel(op_parameter_, in_tensors_, out_tensors_,
|
||||
static_cast<const lite::InnerContext *>(this->ms_context_),
|
||||
origin_weight_, origin_bias_);
|
||||
return kernel;
|
||||
#endif
|
||||
|
||||
|
@ -281,9 +281,9 @@ kernel::LiteKernel *ConvolutionDelegateCPUKernel::CpuConvFp32NHWCKernelSelect()
|
|||
op_parameter_, in_tensors_, out_tensors_, static_cast<const lite::InnerContext *>(this->ms_context_),
|
||||
origin_weight_, origin_bias_);
|
||||
} else {
|
||||
kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(
|
||||
op_parameter_, in_tensors_, out_tensors_, static_cast<const lite::InnerContext *>(this->ms_context_),
|
||||
origin_weight_, origin_bias_);
|
||||
kernel = CreateConvolutionIm2ColCPUKernel(op_parameter_, in_tensors_, out_tensors_,
|
||||
static_cast<const lite::InnerContext *>(this->ms_context_),
|
||||
origin_weight_, origin_bias_);
|
||||
}
|
||||
}
|
||||
return kernel;
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_arm32_fp32.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
void ConvolutionIm2ColARM32CPUKernel::InitGlobalVariable() {
|
||||
oc_tile_ = C4NUM;
|
||||
row_tile_ = C12NUM;
|
||||
|
||||
rowMajor2ColNMajorFunc = RowMajor2Col4Major;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,36 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_ARM32_FP32_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_ARM32_FP32_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_base_fp32.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ConvolutionIm2ColARM32CPUKernel : public ConvolutionIm2ColBaseCPUKernel {
|
||||
public:
|
||||
ConvolutionIm2ColARM32CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
float *origin_weight, float *origin_bias)
|
||||
: ConvolutionIm2ColBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
|
||||
~ConvolutionIm2ColARM32CPUKernel() override {}
|
||||
|
||||
void InitGlobalVariable() override;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_FP32_H_
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_arm64_fp32.h"
|
||||
#include "nnacl/fp32/conv_common_fp32.h"
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_INFER_INVALID;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
void ConvolutionIm2ColARM64CPUKernel::InitGlobalVariable() {
|
||||
oc_tile_ = C8NUM;
|
||||
row_tile_ = C12NUM;
|
||||
|
||||
rowMajor2ColNMajorFunc = RowMajor2Col8Major;
|
||||
}
|
||||
|
||||
int ConvolutionIm2ColARM64CPUKernel::RunImpl(int task_id) {
|
||||
auto ori_input_data = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->data());
|
||||
if (out_tensors_[0]->format() != NC4HW4) {
|
||||
if (use_batch_cut_flag_) {
|
||||
ConvFp32CutByBatch(ori_input_data, packed_input_, reinterpret_cast<float *>(packed_weight_),
|
||||
reinterpret_cast<float *>(bias_data_), col_major_input_, tmp_output_, task_id, conv_param_);
|
||||
} else {
|
||||
ConvFp32(ori_input_data, packed_input_, reinterpret_cast<float *>(packed_weight_),
|
||||
reinterpret_cast<float *>(bias_data_), col_major_input_, tmp_output_, task_id, conv_param_);
|
||||
}
|
||||
} else {
|
||||
ConvFp32OutNC4HW4(ori_input_data, packed_input_, reinterpret_cast<float *>(packed_weight_),
|
||||
reinterpret_cast<float *>(bias_data_), col_major_input_, tmp_output_, task_id, conv_param_);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,37 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_ARM64_FP32_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_ARM64_FP32_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_base_fp32.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ConvolutionIm2ColARM64CPUKernel : public ConvolutionIm2ColBaseCPUKernel {
|
||||
public:
|
||||
ConvolutionIm2ColARM64CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
float *origin_weight, float *origin_bias)
|
||||
: ConvolutionIm2ColBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
|
||||
~ConvolutionIm2ColARM64CPUKernel() override {}
|
||||
|
||||
void InitGlobalVariable() override;
|
||||
int RunImpl(int task_id) override;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_FP32_H_
|
|
@ -0,0 +1,113 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_avx_fp32.h"
|
||||
#include "nnacl/fp32/conv_common_fp32.h"
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_INFER_INVALID;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
void ConvolutionIm2ColAVXCPUKernel::InitGlobalVariable() {
|
||||
oc_tile_ = C16NUM;
|
||||
row_tile_ = C6NUM;
|
||||
|
||||
rowMajor2ColNMajorFunc = RowMajor2Col16Major;
|
||||
}
|
||||
|
||||
int ConvolutionIm2ColAVXCPUKernel::InitTmpBuffer() {
|
||||
MS_ASSERT(ctx_->allocator != nullptr);
|
||||
CHECK_NULL_RETURN(out_tensors_[0]);
|
||||
CHECK_NULL_RETURN(out_tensors_[0]->MutableData());
|
||||
|
||||
int unit_size =
|
||||
conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ * row_tile_ * thread_count_;
|
||||
|
||||
packed_input_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(unit_size * sizeof(float)));
|
||||
if (packed_input_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc packed input failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
col_major_input_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(unit_size * sizeof(float)));
|
||||
if (col_major_input_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc col_major_input_ failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (conv_param_->output_channel_ % oc_tile_ != 0 && out_tensors_[0]->format() == NC4HW4) {
|
||||
output_need_align_ = true;
|
||||
int oc_algin = UP_DIV(conv_param_->output_channel_, oc_tile_);
|
||||
int pack_output_size =
|
||||
conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * oc_tile_ * oc_algin;
|
||||
tmp_output_ = reinterpret_cast<float *>(ms_context_->allocator->Malloc(pack_output_size * sizeof(float)));
|
||||
if (tmp_output_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc tmp_output_ buffer is failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionIm2ColAVXCPUKernel::RunImpl(int task_id) {
|
||||
auto ori_input_data = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->data());
|
||||
if (out_tensors_[0]->format() != NC4HW4) {
|
||||
if (use_batch_cut_flag_) {
|
||||
ConvFp32CutByBatch(ori_input_data, packed_input_, reinterpret_cast<float *>(packed_weight_),
|
||||
reinterpret_cast<float *>(bias_data_), col_major_input_, tmp_output_, task_id, conv_param_);
|
||||
} else {
|
||||
ConvFp32(ori_input_data, packed_input_, reinterpret_cast<float *>(packed_weight_),
|
||||
reinterpret_cast<float *>(bias_data_), col_major_input_, tmp_output_, task_id, conv_param_);
|
||||
}
|
||||
} else {
|
||||
ConvFp32OutNC4HW4(ori_input_data, packed_input_, reinterpret_cast<float *>(packed_weight_),
|
||||
reinterpret_cast<float *>(bias_data_), col_major_input_, tmp_output_, task_id, conv_param_);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionIm2ColAVXCPUKernel::Run() {
|
||||
auto ret = InitTmpBuffer();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init tmp buffer failed.";
|
||||
FreeTmpBuffer();
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto output_addr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData());
|
||||
if (!output_need_align_) {
|
||||
tmp_output_ = output_addr;
|
||||
}
|
||||
if (RepackWeight() != RET_OK) {
|
||||
FreeTmpBuffer();
|
||||
MS_LOG(ERROR) << "Repack weight failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ret = ParallelLaunch(this->ms_context_, ConvolutionIm2ColImpl, this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "conv error error_code[" << ret << "]";
|
||||
}
|
||||
|
||||
if (output_need_align_) {
|
||||
PackNC8HW8AlignedToNC8HW8NotAlignedFp32(tmp_output_, output_addr, conv_param_->output_batch_,
|
||||
conv_param_->output_h_ * conv_param_->output_w_,
|
||||
conv_param_->output_channel_);
|
||||
}
|
||||
|
||||
FreeTmpBuffer();
|
||||
return ret;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,40 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_AVX_FP32_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_AVX_FP32_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_base_fp32.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ConvolutionIm2ColAVXCPUKernel : public ConvolutionIm2ColBaseCPUKernel {
|
||||
public:
|
||||
ConvolutionIm2ColAVXCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
float *origin_weight, float *origin_bias)
|
||||
: ConvolutionIm2ColBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
|
||||
~ConvolutionIm2ColAVXCPUKernel() override {}
|
||||
|
||||
void InitGlobalVariable() override;
|
||||
|
||||
int InitTmpBuffer() override;
|
||||
int Run() override;
|
||||
int RunImpl(int task_id) override;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_FP32_H_
|
|
@ -0,0 +1,224 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_base_fp32.h"
|
||||
#include "src/litert/pack_weight_manager.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "nnacl/common_func.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/litert/kernel_registry.h"
|
||||
#include "nnacl/fp32/conv_common_fp32.h"
|
||||
#include "nnacl/fp32/matmul_fp32.h"
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_INFER_INVALID;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
#define CONV_MIN_CALC_BLOCK C1NUM
|
||||
|
||||
void ConvolutionIm2ColBaseCPUKernel::InitGlobalVariable() {
|
||||
oc_tile_ = C8NUM;
|
||||
row_tile_ = C12NUM;
|
||||
|
||||
rowMajor2ColNMajorFunc = RowMajor2Col8Major;
|
||||
}
|
||||
|
||||
int ConvolutionIm2ColBaseCPUKernel::InitTmpBuffer() {
|
||||
MS_ASSERT(ctx_->allocator != nullptr);
|
||||
CHECK_NULL_RETURN(out_tensors_[0]);
|
||||
CHECK_NULL_RETURN(out_tensors_[0]->MutableData());
|
||||
|
||||
int unit_size =
|
||||
conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ * row_tile_ * thread_count_;
|
||||
|
||||
packed_input_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(unit_size * sizeof(float)));
|
||||
if (packed_input_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc packed input failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
col_major_input_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(unit_size * sizeof(float)));
|
||||
if (col_major_input_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc col_major_input_ failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionIm2ColBaseCPUKernel::Prepare() {
|
||||
CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
|
||||
CHECK_LESS_RETURN(out_tensors_.size(), 1);
|
||||
InitGlobalVariable();
|
||||
if (op_parameter_->is_train_session_) {
|
||||
auto filter_tensor = in_tensors_.at(kWeightIndex);
|
||||
CHECK_NULL_RETURN(filter_tensor);
|
||||
size_t in_channel = filter_tensor->Channel();
|
||||
size_t out_channel = filter_tensor->Batch();
|
||||
size_t oc_block_num = UP_ROUND(out_channel, oc_tile_);
|
||||
size_t kernel_plane = filter_tensor->Height() * filter_tensor->Width();
|
||||
size_t pack_weight_size = oc_block_num * in_channel * kernel_plane;
|
||||
set_workspace_size(pack_weight_size * sizeof(float));
|
||||
}
|
||||
auto ret = InitConvWeightBias();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init weight bias failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionIm2ColBaseCPUKernel::UpdateThreadNumProcess(int32_t kernel_type, int64_t per_unit_load_num,
|
||||
int64_t per_unit_store_num, int64_t unit_num) {
|
||||
if (conv_param_->input_batch_ % conv_param_->thread_num_ == 0) {
|
||||
use_batch_cut_flag_ = true;
|
||||
return RET_OK;
|
||||
} else {
|
||||
use_batch_cut_flag_ = false;
|
||||
}
|
||||
|
||||
auto output_hw = conv_param_->output_h_ * conv_param_->output_w_;
|
||||
|
||||
conv_param_->thread_num_ =
|
||||
MSMIN(UP_DIV(UP_DIV(output_hw, row_tile_), CONV_MIN_CALC_BLOCK), op_parameter_->thread_num_);
|
||||
thread_count_ = conv_param_->thread_num_;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionIm2ColBaseCPUKernel::ReSize() {
|
||||
auto ret = ConvolutionBaseCPUKernel::CheckResizeValid();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Resize is invalid.";
|
||||
return ret;
|
||||
}
|
||||
ret = ConvolutionBaseCPUKernel::Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "conv base init failed.";
|
||||
return ret;
|
||||
}
|
||||
if (UpdateThreadNumPass(TC_PTYPE(type_), 0, 0, 0) != RET_OK) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionIm2ColBaseCPUKernel::RunImpl(int task_id) {
|
||||
auto ori_input_data = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->data());
|
||||
|
||||
if (use_batch_cut_flag_) {
|
||||
ConvFp32CutByBatch(ori_input_data, packed_input_, reinterpret_cast<float *>(packed_weight_),
|
||||
reinterpret_cast<float *>(bias_data_), col_major_input_, tmp_output_, task_id, conv_param_);
|
||||
} else {
|
||||
ConvFp32(ori_input_data, packed_input_, reinterpret_cast<float *>(packed_weight_),
|
||||
reinterpret_cast<float *>(bias_data_), col_major_input_, tmp_output_, task_id, conv_param_);
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionIm2ColImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv = reinterpret_cast<ConvolutionIm2ColBaseCPUKernel *>(cdata);
|
||||
auto error_code = conv->RunImpl(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Convolution Run error task_id[" << task_id << "] error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionIm2ColBaseCPUKernel::Run() {
|
||||
auto ret = InitTmpBuffer();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init tmp buffer failed.";
|
||||
FreeTmpBuffer();
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto output_addr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData());
|
||||
if (!output_need_align_) {
|
||||
tmp_output_ = output_addr;
|
||||
}
|
||||
if (RepackWeight() != RET_OK) {
|
||||
FreeTmpBuffer();
|
||||
MS_LOG(ERROR) << "Repack weight failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ret = ParallelLaunch(this->ms_context_, ConvolutionIm2ColImpl, this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "conv error error_code[" << ret << "]";
|
||||
}
|
||||
|
||||
FreeTmpBuffer();
|
||||
return ret;
|
||||
}
|
||||
|
||||
void ConvolutionIm2ColBaseCPUKernel::PackWeight() {
|
||||
auto filter_tensor = in_tensors_.at(kWeightIndex);
|
||||
int32_t in_channel = filter_tensor->Channel();
|
||||
if (in_channel < 0) {
|
||||
MS_LOG(ERROR) << "get channel from filter_tensor failed.";
|
||||
return;
|
||||
}
|
||||
int32_t out_channel = filter_tensor->Batch();
|
||||
if (out_channel < 0) {
|
||||
MS_LOG(ERROR) << "get batch from filter_tensor failed.";
|
||||
return;
|
||||
}
|
||||
int32_t kernel_plane = filter_tensor->Height() * filter_tensor->Width();
|
||||
if (kernel_plane < 0) {
|
||||
MS_LOG(ERROR) << "get height and width from filter_tensor failed.";
|
||||
return;
|
||||
}
|
||||
void *origin_weight = (op_parameter_->is_train_session_) ? filter_tensor->data() : origin_weight_;
|
||||
MS_ASSERT(origin_weight != nullptr);
|
||||
|
||||
MS_ASSERT(rowMajor2ColNMajorFunc != nullptr);
|
||||
rowMajor2ColNMajorFunc(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
|
||||
out_channel, in_channel * kernel_plane);
|
||||
}
|
||||
|
||||
int ConvolutionIm2ColBaseCPUKernel::MallocWeightBiasData() {
|
||||
auto filter_tensor = in_tensors_.at(kWeightIndex);
|
||||
int32_t in_channel = filter_tensor->Channel();
|
||||
int32_t out_channel = filter_tensor->Batch();
|
||||
MS_CHECK_TRUE_RET(in_channel > 0 && out_channel > 0, RET_ERROR);
|
||||
conv_param_->input_channel_ = in_channel;
|
||||
conv_param_->output_channel_ = out_channel;
|
||||
size_t oc_block_num = UP_ROUND(out_channel, oc_tile_);
|
||||
size_t kernel_plane = filter_tensor->Height() * filter_tensor->Width();
|
||||
size_t pack_weight_size = oc_block_num * in_channel * kernel_plane;
|
||||
if (!op_parameter_->is_train_session_) {
|
||||
CHECK_LESS_RETURN(MAX_MALLOC_SIZE, pack_weight_size * sizeof(float));
|
||||
packed_weight_ = lite::PackWeightManager::GetInstance()->GetPackData(
|
||||
in_tensors_[1]->data(), static_cast<size_t>(pack_weight_size) * sizeof(float), &weight_is_packed_);
|
||||
if (packed_weight_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc packed weight failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
if (bias_data_ == nullptr) {
|
||||
CHECK_LESS_RETURN(MAX_MALLOC_SIZE, oc_block_num * sizeof(float));
|
||||
bias_data_ = malloc(oc_block_num * sizeof(float));
|
||||
if (bias_data_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc bias failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
memset(bias_data_, 0, oc_block_num * sizeof(float));
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,79 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_BASE_FP32_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_BASE_FP32_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/litert/lite_kernel.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "src/litert/kernel/cpu/base/convolution_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
using RowMajor2ColNMajorFunc = void (*)(const float *src_ptr, float *dst_ptr, int row, int col);
|
||||
|
||||
int ConvolutionIm2ColImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
class ConvolutionIm2ColBaseCPUKernel : public ConvolutionBaseCPUKernel {
|
||||
public:
|
||||
ConvolutionIm2ColBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
float *origin_weight, float *origin_bias)
|
||||
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
|
||||
~ConvolutionIm2ColBaseCPUKernel() override {}
|
||||
|
||||
virtual void InitGlobalVariable();
|
||||
int Prepare() override;
|
||||
virtual int InitTmpBuffer();
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
virtual int RunImpl(int task_id);
|
||||
|
||||
protected:
|
||||
int MallocWeightBiasData() override;
|
||||
void PackWeight() override;
|
||||
void FreeTmpBuffer() {
|
||||
if (packed_input_ != nullptr) {
|
||||
ctx_->allocator->Free(packed_input_);
|
||||
packed_input_ = nullptr;
|
||||
}
|
||||
if (col_major_input_ != nullptr) {
|
||||
ctx_->allocator->Free(col_major_input_);
|
||||
col_major_input_ = nullptr;
|
||||
}
|
||||
if (output_need_align_ && tmp_output_ != nullptr) {
|
||||
ctx_->allocator->Free(tmp_output_);
|
||||
tmp_output_ = nullptr;
|
||||
output_need_align_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int UpdateThreadNumProcess(int32_t kernel_type, int64_t per_unit_load_num, int64_t per_unit_store_num,
|
||||
int64_t unit_num) override;
|
||||
|
||||
protected:
|
||||
float *tmp_output_ = nullptr;
|
||||
float *packed_input_ = nullptr;
|
||||
float *col_major_input_ = nullptr;
|
||||
bool output_need_align_ = false;
|
||||
|
||||
int oc_tile_ = C8NUM; // oc tile is C8NUM in C
|
||||
int row_tile_ = C12NUM; // oc tile is C12NUM in C
|
||||
RowMajor2ColNMajorFunc rowMajor2ColNMajorFunc = nullptr;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_BASE_FP32_H_
|
|
@ -0,0 +1,68 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_fp32.h"
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_base_fp32.h"
|
||||
#if defined(ENABLE_AVX)
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_avx_fp32.h"
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_SSE)
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_sse_fp32.h"
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_ARM32)
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_arm32_fp32.h"
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_ARM64)
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_arm64_fp32.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore::kernel {
|
||||
LiteKernel *CreateConvolutionIm2ColCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
float *origin_weight, float *origin_bias) {
|
||||
LiteKernel *kernel = nullptr;
|
||||
#if defined(ENABLE_AVX)
|
||||
if (kernel == nullptr) {
|
||||
kernel = new (std::nothrow)
|
||||
kernel::ConvolutionIm2ColAVXCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_SSE)
|
||||
if (kernel == nullptr) {
|
||||
kernel = new (std::nothrow)
|
||||
kernel::ConvolutionIm2ColSSECPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_ARM64)
|
||||
kernel = new (std::nothrow)
|
||||
kernel::ConvolutionIm2ColARM64CPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias);
|
||||
#elif defined(ENABLE_ARM32)
|
||||
kernel = new (std::nothrow)
|
||||
kernel::ConvolutionIm2ColARM32CPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias);
|
||||
#endif
|
||||
|
||||
if (kernel == nullptr) {
|
||||
kernel = new (std::nothrow)
|
||||
kernel::ConvolutionIm2ColBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias);
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,30 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_FP32_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_FP32_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/litert/lite_kernel.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "src/litert/kernel/cpu/base/convolution_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
LiteKernel *CreateConvolutionIm2ColCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
float *origin_weight, float *origin_bias);
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_FP32_H_
|
|
@ -0,0 +1,26 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_sse_fp32.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
void ConvolutionIm2ColSSECPUKernel::InitGlobalVariable() {
|
||||
oc_tile_ = C8NUM;
|
||||
row_tile_ = C4NUM;
|
||||
|
||||
rowMajor2ColNMajorFunc = RowMajor2Col8Major;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,36 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_SSE_FP32_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_SSE_FP32_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_im2col_base_fp32.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ConvolutionIm2ColSSECPUKernel : public ConvolutionIm2ColBaseCPUKernel {
|
||||
public:
|
||||
ConvolutionIm2ColSSECPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
float *origin_weight, float *origin_bias)
|
||||
: ConvolutionIm2ColBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
|
||||
~ConvolutionIm2ColSSECPUKernel() override {}
|
||||
|
||||
void InitGlobalVariable() override;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_IM2COL_FP32_H_
|
Loading…
Reference in New Issue