forked from mindspore-Ecosystem/mindspore
!39851 [MSLITE][CPU] conv 1x1 slidewindows, runtime part code refactor
Merge pull request !39851 from Greatpan/conv_sw1x1_avx_master
This commit is contained in:
commit
2b35007b0c
|
@ -34,7 +34,7 @@
|
|||
#include "src/litert/kernel/cpu/fp32/convolution_depthwise_indirect_fp32.h"
|
||||
#endif
|
||||
#ifdef ENABLE_AVX
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_slidewindow_fp32.h"
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_slidewindow_avx_fp32.h"
|
||||
#endif
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
|
@ -227,7 +227,7 @@ kernel::LiteKernel *ConvolutionDelegateCPUKernel::CpuConvFp32NHWCKernelSelect()
|
|||
|
||||
#ifdef ENABLE_AVX
|
||||
if (kernel == nullptr && CheckAvxUseSWConv(conv_param)) {
|
||||
kernel = new (std::nothrow) kernel::ConvolutionSWCPUKernel(
|
||||
kernel = new (std::nothrow) kernel::ConvolutionSWAVXCPUKernel(
|
||||
op_parameter_, in_tensors_, out_tensors_, static_cast<const lite::InnerContext *>(this->ms_context_),
|
||||
origin_weight_, origin_bias_);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifdef ENABLE_AVX
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_slidewindow_avx_fp32.h"
|
||||
#include "nnacl/fp32/conv_common_fp32.h"
|
||||
#include "nnacl/fp32/conv_1x1_x86_fp32.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
void ConvolutionSWAVXCPUKernel::InitGlobalVariable() {
|
||||
oc_tile_ = C8NUM;
|
||||
oc_res_ = conv_param_->output_channel_ % oc_tile_;
|
||||
if (conv_param_->kernel_h_ == 1 && conv_param_->kernel_w_ == 1) {
|
||||
// 1x1 conv is aligned to C8NUM
|
||||
in_tile_ = C8NUM;
|
||||
ic_res_ = conv_param_->input_channel_ % in_tile_;
|
||||
}
|
||||
}
|
||||
|
||||
int ConvolutionSWAVXCPUKernel::RunImpl(int task_id) {
|
||||
if (conv_param_->kernel_w_ == 1 && conv_param_->kernel_h_ == 1) {
|
||||
Conv1x1SWAVXFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
|
||||
output_data_, task_id, conv_param_, slidingWindow_param_);
|
||||
} else {
|
||||
ConvSWFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
|
||||
output_data_, task_id, conv_param_, slidingWindow_param_);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
||||
#endif // ENABLE_AVX
|
|
@ -0,0 +1,35 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_SLIDEWINDOW_AVX_FP32_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_SLIDEWINDOW_AVX_FP32_H_
|
||||
#ifdef ENABLE_AVX
|
||||
#include <vector>
|
||||
#include "src/litert/kernel/cpu/fp32/convolution_slidewindow_fp32.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ConvolutionSWAVXCPUKernel : public ConvolutionSWCPUKernel {
|
||||
public:
|
||||
ConvolutionSWAVXCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
float *origin_weight, float *origin_bias)
|
||||
: ConvolutionSWCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
|
||||
|
||||
void InitGlobalVariable() override;
|
||||
int RunImpl(int task_id) override;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // ENABLE_AVX
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_SLIDEWINDOW_FP32_H_
|
|
@ -28,14 +28,19 @@ using mindspore::lite::RET_NULL_PTR;
|
|||
using mindspore::lite::RET_OK;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
int ConvolutionSWCPUKernel::Prepare() {
|
||||
oc_tile_ = C8NUM;
|
||||
void ConvolutionSWCPUKernel::InitGlobalVariable() {
|
||||
oc_tile_ = C1NUM;
|
||||
oc_res_ = conv_param_->output_channel_ % oc_tile_;
|
||||
if (conv_param_->kernel_h_ == 1 && conv_param_->kernel_w_ == 1) {
|
||||
// 1x1 conv is aligned to C8NUM
|
||||
in_tile_ = C8NUM;
|
||||
// 1x1 conv is aligned to C1NUM
|
||||
in_tile_ = C1NUM;
|
||||
ic_res_ = conv_param_->input_channel_ % in_tile_;
|
||||
}
|
||||
}
|
||||
|
||||
int ConvolutionSWCPUKernel::Prepare() {
|
||||
InitGlobalVariable();
|
||||
|
||||
if (op_parameter_->is_train_session_) {
|
||||
auto filter_tensor = in_tensors_.at(kWeightIndex);
|
||||
CHECK_NULL_RETURN(filter_tensor);
|
||||
|
@ -91,14 +96,8 @@ int ConvolutionSWCPUKernel::ReSize() {
|
|||
}
|
||||
|
||||
int ConvolutionSWCPUKernel::RunImpl(int task_id) {
|
||||
if (conv_param_->kernel_w_ == 1 && conv_param_->kernel_h_ == 1) {
|
||||
Conv1x1SWAVXFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
|
||||
output_data_, task_id, conv_param_, slidingWindow_param_);
|
||||
} else {
|
||||
ConvSWFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
|
||||
output_data_, task_id, conv_param_, slidingWindow_param_);
|
||||
}
|
||||
return RET_OK;
|
||||
MS_LOG(ERROR) << "new SlidingWindow run fail, do not support slidewindows fp32 implement!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
int ConvolutionSWImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
|
@ -127,8 +126,8 @@ int ConvolutionSWCPUKernel::InitTmpBuffer() {
|
|||
MS_LOG(ERROR) << "malloc tmp input_data_ failed.";
|
||||
return RET_NULL_PTR;
|
||||
}
|
||||
PackNHWCToNHWC8Fp32(input_data, input_data_, conv_param_->input_batch_,
|
||||
conv_param_->input_w_ * conv_param_->input_h_, conv_param_->input_channel_);
|
||||
PackNHWCToNHWCXFp32(input_data, input_data_, conv_param_->input_batch_,
|
||||
conv_param_->input_w_ * conv_param_->input_h_, conv_param_->input_channel_, oc_tile_);
|
||||
} else {
|
||||
input_data_ = input_data;
|
||||
}
|
||||
|
|
|
@ -36,10 +36,11 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel {
|
|||
}
|
||||
}
|
||||
|
||||
virtual void InitGlobalVariable();
|
||||
int Prepare() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
int RunImpl(int task_id);
|
||||
virtual int RunImpl(int task_id);
|
||||
int InitTmpBuffer();
|
||||
|
||||
private:
|
||||
|
@ -55,6 +56,8 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel {
|
|||
input_data_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
int oc_tile_ = C8NUM; // oc tile is C8NUM in avx
|
||||
int in_tile_ = 0; // input channel algin
|
||||
int oc_res_ = 0;
|
||||
|
|
Loading…
Reference in New Issue