!39851 [MSLITE][CPU] conv 1x1 slidewindows, runtime part code refactor

Merge pull request !39851 from Greatpan/conv_sw1x1_avx_master
This commit is contained in:
i-robot 2022-08-08 02:10:58 +00:00 committed by Gitee
commit 2b35007b0c
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
5 changed files with 97 additions and 17 deletions

View File

@ -34,7 +34,7 @@
#include "src/litert/kernel/cpu/fp32/convolution_depthwise_indirect_fp32.h"
#endif
#ifdef ENABLE_AVX
#include "src/litert/kernel/cpu/fp32/convolution_slidewindow_fp32.h"
#include "src/litert/kernel/cpu/fp32/convolution_slidewindow_avx_fp32.h"
#endif
using mindspore::lite::KernelRegistrar;
@ -227,7 +227,7 @@ kernel::LiteKernel *ConvolutionDelegateCPUKernel::CpuConvFp32NHWCKernelSelect()
#ifdef ENABLE_AVX
if (kernel == nullptr && CheckAvxUseSWConv(conv_param)) {
kernel = new (std::nothrow) kernel::ConvolutionSWCPUKernel(
kernel = new (std::nothrow) kernel::ConvolutionSWAVXCPUKernel(
op_parameter_, in_tensors_, out_tensors_, static_cast<const lite::InnerContext *>(this->ms_context_),
origin_weight_, origin_bias_);
}

View File

@ -0,0 +1,43 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef ENABLE_AVX
#include "src/litert/kernel/cpu/fp32/convolution_slidewindow_avx_fp32.h"
#include "nnacl/fp32/conv_common_fp32.h"
#include "nnacl/fp32/conv_1x1_x86_fp32.h"
namespace mindspore::kernel {
void ConvolutionSWAVXCPUKernel::InitGlobalVariable() {
oc_tile_ = C8NUM;
oc_res_ = conv_param_->output_channel_ % oc_tile_;
if (conv_param_->kernel_h_ == 1 && conv_param_->kernel_w_ == 1) {
// 1x1 conv is aligned to C8NUM
in_tile_ = C8NUM;
ic_res_ = conv_param_->input_channel_ % in_tile_;
}
}
int ConvolutionSWAVXCPUKernel::RunImpl(int task_id) {
if (conv_param_->kernel_w_ == 1 && conv_param_->kernel_h_ == 1) {
Conv1x1SWAVXFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
output_data_, task_id, conv_param_, slidingWindow_param_);
} else {
ConvSWFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
output_data_, task_id, conv_param_, slidingWindow_param_);
}
return RET_OK;
}
} // namespace mindspore::kernel
#endif // ENABLE_AVX

View File

@ -0,0 +1,35 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_SLIDEWINDOW_AVX_FP32_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_SLIDEWINDOW_AVX_FP32_H_
#ifdef ENABLE_AVX
#include <vector>
#include "src/litert/kernel/cpu/fp32/convolution_slidewindow_fp32.h"
namespace mindspore::kernel {
class ConvolutionSWAVXCPUKernel : public ConvolutionSWCPUKernel {
public:
ConvolutionSWAVXCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
float *origin_weight, float *origin_bias)
: ConvolutionSWCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
void InitGlobalVariable() override;
int RunImpl(int task_id) override;
};
} // namespace mindspore::kernel
#endif // ENABLE_AVX
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_SLIDEWINDOW_FP32_H_

View File

@ -28,14 +28,19 @@ using mindspore::lite::RET_NULL_PTR;
using mindspore::lite::RET_OK;
namespace mindspore::kernel {
int ConvolutionSWCPUKernel::Prepare() {
oc_tile_ = C8NUM;
void ConvolutionSWCPUKernel::InitGlobalVariable() {
oc_tile_ = C1NUM;
oc_res_ = conv_param_->output_channel_ % oc_tile_;
if (conv_param_->kernel_h_ == 1 && conv_param_->kernel_w_ == 1) {
// 1x1 conv is aligned to C8NUM
in_tile_ = C8NUM;
// 1x1 conv is aligned to C1NUM
in_tile_ = C1NUM;
ic_res_ = conv_param_->input_channel_ % in_tile_;
}
}
int ConvolutionSWCPUKernel::Prepare() {
InitGlobalVariable();
if (op_parameter_->is_train_session_) {
auto filter_tensor = in_tensors_.at(kWeightIndex);
CHECK_NULL_RETURN(filter_tensor);
@ -91,14 +96,8 @@ int ConvolutionSWCPUKernel::ReSize() {
}
int ConvolutionSWCPUKernel::RunImpl(int task_id) {
if (conv_param_->kernel_w_ == 1 && conv_param_->kernel_h_ == 1) {
Conv1x1SWAVXFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
output_data_, task_id, conv_param_, slidingWindow_param_);
} else {
ConvSWFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
output_data_, task_id, conv_param_, slidingWindow_param_);
}
return RET_OK;
MS_LOG(ERROR) << "new SlidingWindow run fail, do not support slidewindows fp32 implement!";
return RET_ERROR;
}
int ConvolutionSWImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
@ -127,8 +126,8 @@ int ConvolutionSWCPUKernel::InitTmpBuffer() {
MS_LOG(ERROR) << "malloc tmp input_data_ failed.";
return RET_NULL_PTR;
}
PackNHWCToNHWC8Fp32(input_data, input_data_, conv_param_->input_batch_,
conv_param_->input_w_ * conv_param_->input_h_, conv_param_->input_channel_);
PackNHWCToNHWCXFp32(input_data, input_data_, conv_param_->input_batch_,
conv_param_->input_w_ * conv_param_->input_h_, conv_param_->input_channel_, oc_tile_);
} else {
input_data_ = input_data;
}

View File

@ -36,10 +36,11 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel {
}
}
virtual void InitGlobalVariable();
int Prepare() override;
int ReSize() override;
int Run() override;
int RunImpl(int task_id);
virtual int RunImpl(int task_id);
int InitTmpBuffer();
private:
@ -55,6 +56,8 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel {
input_data_ = nullptr;
}
}
protected:
int oc_tile_ = C8NUM; // oc tile is C8NUM in avx
int in_tile_ = 0; // input channel algin
int oc_res_ = 0;