forked from mindspore-Ecosystem/mindspore
!5305 [MS][LITE][Develop]conv1x1 prepare & memory optimize
Merge pull request !5305 from ling/bug
This commit is contained in:
commit
4d3eaa2068
|
@ -41,6 +41,7 @@ void RowMajor2Row12Major(float *src_ptr, float *dst_ptr, int row, int col) {
|
|||
}
|
||||
|
||||
void RowMajor2Col12Major(float *src_ptr, float *dst_ptr, size_t row, size_t col) {
|
||||
size_t row_up_12 = UP_ROUND(row, C12NUM);
|
||||
size_t row12 = row / C12NUM * C12NUM;
|
||||
size_t col4 = col / C4NUM * C4NUM;
|
||||
float *src_r = src_ptr;
|
||||
|
@ -129,12 +130,6 @@ void RowMajor2Col12Major(float *src_ptr, float *dst_ptr, size_t row, size_t col)
|
|||
dst_c[i] = src_c[i * col];
|
||||
}
|
||||
}
|
||||
for (; ci < col4; ci++) {
|
||||
float *dst_c = dst_r + ci * C12NUM;
|
||||
for (size_t i = 0; i < C12NUM; i++) {
|
||||
dst_c[i] = 0;
|
||||
}
|
||||
}
|
||||
src_r += C12NUM * col;
|
||||
dst_r += C12NUM * col;
|
||||
}
|
||||
|
@ -147,7 +142,7 @@ void RowMajor2Col12Major(float *src_ptr, float *dst_ptr, size_t row, size_t col)
|
|||
dst_r += 1;
|
||||
}
|
||||
|
||||
for (; ri < row12; ri++) {
|
||||
for (; ri < row_up_12; ri++) {
|
||||
for (size_t i = 0; i < col; i++) {
|
||||
dst_r[i * C12NUM] = 0;
|
||||
}
|
||||
|
|
|
@ -116,21 +116,19 @@ int Convolution1x1FP16CPUKernel::InitWeightBias() {
|
|||
}
|
||||
|
||||
int Convolution1x1FP16CPUKernel::Init() {
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
matmul_param_ = new (std::nothrow) MatMulParameter();
|
||||
if (matmul_param_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Init matmul_param_ failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
int ret = InitWeightBias();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init weight bias failed.";
|
||||
return ret;
|
||||
}
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
return ReSize();
|
||||
}
|
||||
|
||||
|
|
|
@ -125,15 +125,14 @@ void Convolution1x1CPUKernel::Pre1x1Trans(float *src_input, float *src_output) {
|
|||
}
|
||||
|
||||
int Convolution1x1CPUKernel::Init() {
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int error_code = InitConv1x1BiasWeight();
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Convolution base init failed.";
|
||||
return error_code;
|
||||
}
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
return ReSize();
|
||||
}
|
||||
|
||||
|
|
|
@ -26,27 +26,17 @@ using mindspore::schema::PrimitiveType_DeConv2D;
|
|||
|
||||
namespace mindspore::kernel {
|
||||
DeConvolutionCPUKernel::~DeConvolutionCPUKernel() {
|
||||
FreeTmpBuffer();
|
||||
if (matmul_param_ != nullptr) {
|
||||
delete matmul_param_;
|
||||
matmul_param_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void DeConvolutionCPUKernel::FreeTmpBuffer() {
|
||||
if (weight_ptr_ != nullptr) {
|
||||
free(weight_ptr_);
|
||||
weight_ptr_ = nullptr;
|
||||
}
|
||||
if (pack_input_ != nullptr) {
|
||||
free(pack_input_);
|
||||
pack_input_ = nullptr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
int DeConvolutionCPUKernel::ReSize() {
|
||||
FreeTmpBuffer();
|
||||
ConvolutionBaseCPUKernel::Init();
|
||||
|
||||
int error_code = InitParam();
|
||||
|
@ -54,36 +44,35 @@ int DeConvolutionCPUKernel::ReSize() {
|
|||
MS_LOG(ERROR) << "deconv InitParam error!ret: " << error_code;
|
||||
return error_code;
|
||||
}
|
||||
|
||||
error_code = InitWeightBias();
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "deconv InitWeightBias error!ret: " << error_code;
|
||||
return error_code;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int DeConvolutionCPUKernel::InitWeightBias() {
|
||||
bias_data_ = malloc(UP_ROUND(conv_param_->output_channel_, C4NUM) * sizeof(float));
|
||||
auto weight_tensor = in_tensors_.at(kWeightIndex);
|
||||
auto input_channel = weight_tensor->Batch();
|
||||
auto output_channel = weight_tensor->Channel();
|
||||
auto kernel_h_ = weight_tensor->Height();
|
||||
auto kernel_w_ = weight_tensor->Width();
|
||||
|
||||
bias_data_ = malloc(UP_ROUND(output_channel, C4NUM) * sizeof(float));
|
||||
if (bias_data_ == nullptr) {
|
||||
MS_LOG(ERROR) << "deconv malloc bias_data_ error!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
memset(bias_data_, 0, UP_ROUND(conv_param_->output_channel_, C4NUM) * sizeof(float));
|
||||
memset(bias_data_, 0, UP_ROUND(output_channel, C4NUM) * sizeof(float));
|
||||
if (in_tensors_.size() == 3) {
|
||||
memcpy(bias_data_, in_tensors_[2]->Data(), conv_param_->output_channel_ * sizeof(float));
|
||||
memcpy(bias_data_, in_tensors_[2]->Data(), output_channel * sizeof(float));
|
||||
}
|
||||
|
||||
size_t weight_pack_size = conv_param_->input_channel_ * conv_param_->kernel_w_ * conv_param_->kernel_h_ *
|
||||
UP_ROUND(conv_param_->output_channel_, C8NUM) * sizeof(float);
|
||||
size_t weight_pack_size = input_channel * kernel_w_ * kernel_h_ * UP_ROUND(output_channel, C8NUM) * sizeof(float);
|
||||
weight_ptr_ = reinterpret_cast<float *>(malloc(weight_pack_size));
|
||||
if (weight_ptr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "deconv malloc weight_ptr_ error!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
memset(weight_ptr_, 0, weight_pack_size);
|
||||
PackNHWCToC8HWN8Fp32(reinterpret_cast<float *>(in_tensors_[1]->Data()), weight_ptr_, conv_param_->input_channel_,
|
||||
kernel_plane_, conv_param_->output_channel_);
|
||||
PackNHWCToC8HWN8Fp32(reinterpret_cast<float *>(in_tensors_[1]->Data()), weight_ptr_, input_channel,
|
||||
kernel_w_ * kernel_h_, output_channel);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -100,12 +89,6 @@ int DeConvolutionCPUKernel::InitParam() {
|
|||
|
||||
thread_count_ = MSMIN(op_parameter_->thread_num_, UP_DIV(conv_param_->output_channel_, C8NUM));
|
||||
thread_stride_ = UP_DIV(UP_DIV(conv_param_->output_channel_, C8NUM), thread_count_);
|
||||
|
||||
pack_input_ = reinterpret_cast<float *>(malloc(matmul_param_->row_12_ * matmul_param_->deep_ * sizeof(float)));
|
||||
if (pack_input_ == nullptr) {
|
||||
MS_LOG(ERROR) << "deconv Malloc pack_input_ error!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -138,6 +121,11 @@ int DeConvolutionCPUKernel::DoDeconv(int task_id) {
|
|||
}
|
||||
|
||||
int DeConvolutionCPUKernel::Init() {
|
||||
int error_code = InitWeightBias();
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "deconv InitWeightBias error!ret: " << error_code;
|
||||
return error_code;
|
||||
}
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -153,6 +141,10 @@ void DeConvolutionCPUKernel::FreeRunBuf() {
|
|||
ctx_->allocator->Free(tmp_buffer_);
|
||||
tmp_buffer_ = nullptr;
|
||||
}
|
||||
if (pack_input_ != nullptr) {
|
||||
ctx_->allocator->Free(pack_input_);
|
||||
pack_input_ = nullptr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -170,6 +162,13 @@ int DeConvolutionCPUKernel::InitRunBuf() {
|
|||
MS_LOG(ERROR) << "Conv1x1 Malloc tmp_buffer_ error!";
|
||||
return RET_NULL_PTR;
|
||||
}
|
||||
|
||||
pack_input_ =
|
||||
reinterpret_cast<float *>(ctx_->allocator->Malloc(matmul_param_->row_12_ * matmul_param_->deep_ * sizeof(float)));
|
||||
if (pack_input_ == nullptr) {
|
||||
MS_LOG(ERROR) << "deconv Malloc pack_input_ error!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -49,7 +49,6 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
|
|||
void FreeRunBuf();
|
||||
int InitParam();
|
||||
int InitWeightBias();
|
||||
void FreeTmpBuffer();
|
||||
|
||||
private:
|
||||
MatMulParameter *matmul_param_ = nullptr;
|
||||
|
|
Loading…
Reference in New Issue