forked from mindspore-Ecosystem/mindspore
!26573 [MS][LITE][develop] optimize deconv fp16 ram
Merge pull request !26573 from sunsuodong/fix_deconv_winograd_fp16
This commit is contained in:
commit
56e61892bf
|
@ -274,6 +274,7 @@ int PackDeConvWgDataFp16(const float16_t *nhwc_weight, DeConvComputeUnit *unit,
|
|||
DECONV_WINOGRAD_DEFAULT_UNIT, unit->h_size_);
|
||||
if (ret != NNACL_OK) {
|
||||
free(current_unit_weight);
|
||||
current_unit_weight = NULL;
|
||||
return NNACL_ERRCODE_WINOGRAD_GENERATOR_ERROR;
|
||||
}
|
||||
|
||||
|
@ -343,6 +344,9 @@ int PackDeConvWgDataFp16(const float16_t *nhwc_weight, DeConvComputeUnit *unit,
|
|||
|
||||
void DeconvWgFp16(const float16_t *nhwc_input_, float16_t *tile_in, float16_t *tile_out, int start_index,
|
||||
int calculate_count, const ConvParameter *conv_param, DeConvParam *deconv_param, int task_id) {
|
||||
if (deconv_param->in_tile_w_count_ == 0) {
|
||||
return;
|
||||
}
|
||||
/* pack tile input */
|
||||
int tile_in_unit_stride = deconv_param->ic_up_ * DECONV_WINOGRAD_DEFAULT_TILE;
|
||||
float16x4_t zero = vdup_n_f16(0.0f);
|
||||
|
@ -382,6 +386,9 @@ void DeconvWgFp16(const float16_t *nhwc_input_, float16_t *tile_in, float16_t *t
|
|||
deconv_param->oc_up_ * DECONV_WINOGRAD_DEFAULT_TILE;
|
||||
|
||||
/* winograd a buffer */
|
||||
if (unit->winograd_.kh_ >= DECONV_WINOGRAD_BUFFER_COUNT) {
|
||||
return;
|
||||
}
|
||||
DeConvWgABuffer *tmp_a = &deconv_param->a_buffer_[unit->winograd_.kh_];
|
||||
float16_t *mid_a = (float16_t *)tmp_a->middle_buffer_ + task_id * unit->winograd_.kw_ * unit->winograd_.kh_ *
|
||||
DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up_;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h"
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_MEMORY_FAILED;
|
||||
using mindspore::lite::RET_NULL_PTR;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
||||
|
@ -64,16 +65,6 @@ void DeConvWinogradFp16CPUKernel::FreeResizeBuf() {
|
|||
free(tile_input_);
|
||||
tile_input_ = nullptr;
|
||||
}
|
||||
|
||||
if (tile_output_ != nullptr) {
|
||||
free(tile_output_);
|
||||
tile_output_ = nullptr;
|
||||
}
|
||||
|
||||
if (nc4hw4_output_ != nullptr) {
|
||||
free(nc4hw4_output_);
|
||||
nc4hw4_output_ = nullptr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -114,12 +105,6 @@ int DeConvWinogradFp16CPUKernel::InitParameter() {
|
|||
deconv_param_->input_plane_ = conv_param_->input_h_ * conv_param_->input_w_;
|
||||
deconv_param_->output_plane_ = conv_param_->output_h_ * conv_param_->output_w_;
|
||||
|
||||
nc4hw4_output_ =
|
||||
reinterpret_cast<float16_t *>(malloc(deconv_param_->oc_up_ * deconv_param_->output_plane_ * sizeof(float16_t)));
|
||||
if (nc4hw4_output_ == nullptr) {
|
||||
return RET_NULL_PTR;
|
||||
}
|
||||
|
||||
deconv_param_->in_tile_w_count_ = UP_DIV(conv_param_->input_w_, DECONV_WINOGRAD_DEFAULT_UNIT);
|
||||
deconv_param_->in_tile_h_count_ = UP_DIV(conv_param_->input_h_, DECONV_WINOGRAD_DEFAULT_UNIT);
|
||||
|
||||
|
@ -141,12 +126,6 @@ int DeConvWinogradFp16CPUKernel::InitParameter() {
|
|||
|
||||
deconv_param_->out_tile_w_ = (DECONV_WINOGRAD_DEFAULT_UNIT - 1) * conv_param_->stride_w_ + conv_param_->kernel_w_;
|
||||
deconv_param_->out_tile_h_ = (DECONV_WINOGRAD_DEFAULT_UNIT - 1) * conv_param_->stride_h_ + conv_param_->kernel_h_;
|
||||
size = deconv_param_->thread_num_ * deconv_param_->out_tile_w_ * deconv_param_->out_tile_h_ *
|
||||
DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->oc_up_;
|
||||
tile_output_ = reinterpret_cast<float16_t *>(malloc(size * sizeof(float16_t)));
|
||||
if (tile_output_ == nullptr) {
|
||||
return RET_NULL_PTR;
|
||||
}
|
||||
|
||||
for (int i = 0; i < deconv_param_->compute_size_; i++) {
|
||||
DeConvComputeUnit &unit = deconv_param_->compute_units_[i];
|
||||
|
@ -438,6 +417,37 @@ int DeConvWinogradFp16CPUKernel::Prepare() {
|
|||
return ReSize();
|
||||
}
|
||||
|
||||
int DeConvWinogradFp16CPUKernel::InitRunBuf() {
|
||||
int size = deconv_param_->oc_up_ * deconv_param_->output_plane_;
|
||||
nc4hw4_output_ = reinterpret_cast<float16_t *>(ctx_->allocator->Malloc(size * sizeof(float16_t)));
|
||||
if (nc4hw4_output_ == nullptr) {
|
||||
MS_LOG(ERROR) << "de conv wg Malloc nc4hw4_output_ error!";
|
||||
return RET_MEMORY_FAILED;
|
||||
}
|
||||
|
||||
size = deconv_param_->thread_num_ * deconv_param_->out_tile_w_ * deconv_param_->out_tile_h_ *
|
||||
DECONV_WINOGRAD_DEFAULT_TILE * deconv_param_->oc_up_;
|
||||
tile_output_ = reinterpret_cast<float16_t *>(ctx_->allocator->Malloc(size * sizeof(float16_t)));
|
||||
if (tile_output_ == nullptr) {
|
||||
MS_LOG(ERROR) << "de conv wg Malloc tile_output_ error!";
|
||||
return RET_MEMORY_FAILED;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void DeConvWinogradFp16CPUKernel::FreeRunBuf() {
|
||||
if (nc4hw4_output_ != nullptr) {
|
||||
ctx_->allocator->Free(nc4hw4_output_);
|
||||
nc4hw4_output_ = nullptr;
|
||||
}
|
||||
|
||||
if (tile_output_ != nullptr) {
|
||||
ctx_->allocator->Free(tile_output_);
|
||||
tile_output_ = nullptr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
int DeConvWinogradFp16CPUKernel::Run() {
|
||||
auto input_tensor = in_tensors_.at(kInputIndex);
|
||||
auto output_tensor = out_tensors_.at(kOutputIndex);
|
||||
|
@ -446,18 +456,27 @@ int DeConvWinogradFp16CPUKernel::Run() {
|
|||
CHECK_NULL_RETURN(input_ptr);
|
||||
CHECK_NULL_RETURN(output_ptr);
|
||||
|
||||
if (InitRunBuf() != RET_OK) {
|
||||
MS_LOG(ERROR) << "InitRunBuf fail!";
|
||||
FreeRunBuf();
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (!valid_weight_shape_) {
|
||||
if (InitComputeParam() != RET_OK) {
|
||||
MS_LOG(ERROR) << "InitDataParam error!";
|
||||
FreeRunBuf();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (!valid_weight_shape_ || InitParameter() != RET_OK) {
|
||||
MS_LOG(ERROR) << "InitDataParam error!";
|
||||
FreeRunBuf();
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
if (IsRepack() && InitDataParam() != RET_OK) {
|
||||
MS_LOG(ERROR) << "InitDataParam error!";
|
||||
FreeRunBuf();
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
|
@ -470,16 +489,18 @@ int DeConvWinogradFp16CPUKernel::Run() {
|
|||
auto ret = ParallelLaunch(this->ms_context_, DeConvWgFp16Run, this, deconv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "DeConvWgFp16Run failed!";
|
||||
FreeRunBuf();
|
||||
return ret;
|
||||
}
|
||||
// post bias activate and nhwc
|
||||
ret = ParallelLaunch(this->ms_context_, DeConvWgPostFp16Run, this, thread_num_hw_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "DeConvWgPostFp16Run failed!";
|
||||
FreeRunBuf();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
FreeRunBuf();
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -36,7 +36,6 @@ class DeConvWinogradFp16CPUKernel : public ConvolutionBaseCPUKernel {
|
|||
int Run() override;
|
||||
int ReSize() override;
|
||||
|
||||
public:
|
||||
int DoDeconv(int task_id);
|
||||
int DeDeconvPost(int task_id);
|
||||
|
||||
|
@ -46,8 +45,9 @@ class DeConvWinogradFp16CPUKernel : public ConvolutionBaseCPUKernel {
|
|||
int InitParameter();
|
||||
void FreeDeconvParam();
|
||||
void FreeResizeBuf();
|
||||
int InitRunBuf();
|
||||
void FreeRunBuf();
|
||||
|
||||
private:
|
||||
DeConvParam *deconv_param_ = nullptr;
|
||||
std::mutex nc4hw4_mutex_;
|
||||
std::condition_variable nc4hw4_cond_var_;
|
||||
|
|
|
@ -479,15 +479,18 @@ int DeConvolutionWinogradCPUKernel::Run() {
|
|||
if (!valid_weight_shape_) {
|
||||
if (InitComputeParam() != RET_OK) {
|
||||
MS_LOG(ERROR) << "InitDataParam error!";
|
||||
FreeRunBuf();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (!valid_weight_shape_ || InitParameter() != RET_OK) {
|
||||
MS_LOG(ERROR) << "InitDataParam error!";
|
||||
FreeRunBuf();
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
if (IsRepack() && InitDataParam() != RET_OK) {
|
||||
MS_LOG(ERROR) << "InitDataParam error!";
|
||||
FreeRunBuf();
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
|
|
|
@ -39,7 +39,6 @@ class DeConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel {
|
|||
int Run() override;
|
||||
int ReSize() override;
|
||||
|
||||
public:
|
||||
int DoDeconv(int task_id);
|
||||
int DeDeconvPost(int task_id);
|
||||
|
||||
|
@ -52,7 +51,6 @@ class DeConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel {
|
|||
int InitRunBuf();
|
||||
void FreeRunBuf();
|
||||
|
||||
private:
|
||||
DeConvParam *deconv_param_ = nullptr;
|
||||
float *nhwc_input_ = nullptr;
|
||||
float *nhwc_output_ = nullptr;
|
||||
|
|
Loading…
Reference in New Issue