forked from mindspore-Ecosystem/mindspore
code clean
This commit is contained in:
parent
59bdc993c8
commit
51c1182449
|
@ -271,9 +271,9 @@ void ConvWinogardFp32(float *input_data, float *trans_weight, const float *bias_
|
||||||
int out_h_block = UP_DIV(conv_param->output_h_, out_unit);
|
int out_h_block = UP_DIV(conv_param->output_h_, out_unit);
|
||||||
int output_count = out_w_block * out_h_block;
|
int output_count = out_w_block * out_h_block;
|
||||||
#ifdef ENABLE_ARM32
|
#ifdef ENABLE_ARM32
|
||||||
int tile_num = 4;
|
const int tile_num = 4;
|
||||||
#else
|
#else
|
||||||
int tile_num = 12;
|
const int tile_num = 12;
|
||||||
#endif
|
#endif
|
||||||
int output_tile_count = UP_DIV(output_count, tile_num);
|
int output_tile_count = UP_DIV(output_count, tile_num);
|
||||||
int out_channel = conv_param->output_channel_;
|
int out_channel = conv_param->output_channel_;
|
||||||
|
@ -470,9 +470,9 @@ void Conv3x3Fp32(float *input_data, float *transed_weight, const float *bias_dat
|
||||||
int out_h_block = UP_DIV(conv_param->output_h_, OUPUT_UNIT);
|
int out_h_block = UP_DIV(conv_param->output_h_, OUPUT_UNIT);
|
||||||
int output_count = out_w_block * out_h_block;
|
int output_count = out_w_block * out_h_block;
|
||||||
#ifdef ENABLE_ARM32
|
#ifdef ENABLE_ARM32
|
||||||
int tile_num = 4;
|
const int tile_num = 4;
|
||||||
#else
|
#else
|
||||||
int tile_num = 12;
|
const int tile_num = 12;
|
||||||
#endif
|
#endif
|
||||||
int output_tile_count = UP_DIV(output_count, tile_num);
|
int output_tile_count = UP_DIV(output_count, tile_num);
|
||||||
const int input_unit_square = 4 * 4;
|
const int input_unit_square = 4 * 4;
|
||||||
|
|
|
@ -41,9 +41,9 @@ int DeConvPostFp32C12x8(const float *src, float *tmp, const float *bias, float *
|
||||||
size_t output_plane = conv_param->output_w_ * conv_param->output_h_;
|
size_t output_plane = conv_param->output_w_ * conv_param->output_h_;
|
||||||
int oc8 = UP_ROUND(output_channel, C8NUM);
|
int oc8 = UP_ROUND(output_channel, C8NUM);
|
||||||
#ifdef ENABLE_ARM32
|
#ifdef ENABLE_ARM32
|
||||||
int tile_num = 4;
|
const int tile_num = 4;
|
||||||
#else
|
#else
|
||||||
int tile_num = 12;
|
const int tile_num = 12;
|
||||||
#endif
|
#endif
|
||||||
int in_plane12 = UP_ROUND(input_plane, tile_num);
|
int in_plane12 = UP_ROUND(input_plane, tile_num);
|
||||||
int src_iw_stride = C8NUM;
|
int src_iw_stride = C8NUM;
|
||||||
|
|
|
@ -55,8 +55,8 @@ void DecodeBoxes(const int num_boxes, const float *input_boxes, const float *anc
|
||||||
BboxCorner *decoded_box = (BboxCorner *)(decoded_boxes) + i;
|
BboxCorner *decoded_box = (BboxCorner *)(decoded_boxes) + i;
|
||||||
float y_center = box->y / scaler.y * anchor->h + anchor->y;
|
float y_center = box->y / scaler.y * anchor->h + anchor->y;
|
||||||
float x_center = box->x / scaler.x * anchor->w + anchor->x;
|
float x_center = box->x / scaler.x * anchor->w + anchor->x;
|
||||||
float h_half = 0.5f * expf(box->h / scaler.h) * anchor->h;
|
const float h_half = 0.5f * expf(box->h / scaler.h) * anchor->h;
|
||||||
float w_half = 0.5f * expf(box->w / scaler.w) * anchor->w;
|
const float w_half = 0.5f * expf(box->w / scaler.w) * anchor->w;
|
||||||
decoded_box->ymin = y_center - h_half;
|
decoded_box->ymin = y_center - h_half;
|
||||||
decoded_box->xmin = x_center - w_half;
|
decoded_box->xmin = x_center - w_half;
|
||||||
decoded_box->ymax = y_center + h_half;
|
decoded_box->ymax = y_center + h_half;
|
||||||
|
|
|
@ -68,9 +68,9 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float *
|
||||||
}
|
}
|
||||||
// input transform
|
// input transform
|
||||||
#ifdef ENABLE_ARM32
|
#ifdef ENABLE_ARM32
|
||||||
int tile_num = 4;
|
const int tile_num = 4;
|
||||||
#else
|
#else
|
||||||
int tile_num = 12;
|
const int tile_num = 12;
|
||||||
#endif
|
#endif
|
||||||
int dst_ic4_offset = dst_plane_offset + ic * C4NUM;
|
int dst_ic4_offset = dst_plane_offset + ic * C4NUM;
|
||||||
size_t dst_step = tile_num * ic4 * C4NUM;
|
size_t dst_step = tile_num * ic4 * C4NUM;
|
||||||
|
@ -337,9 +337,9 @@ void Conv3x3Fp32InputTransform(const float *input_data, float *trans_input, floa
|
||||||
|
|
||||||
// input transform
|
// input transform
|
||||||
#ifdef ENABLE_ARM32
|
#ifdef ENABLE_ARM32
|
||||||
int tile_num = 4;
|
const int tile_num = 4;
|
||||||
#else
|
#else
|
||||||
int tile_num = 12;
|
const int tile_num = 12;
|
||||||
#endif
|
#endif
|
||||||
int dst_ic4_offset = dst_plane_offset + ic * C4NUM;
|
int dst_ic4_offset = dst_plane_offset + ic * C4NUM;
|
||||||
size_t dst_step = tile_num * ic4 * C4NUM;
|
size_t dst_step = tile_num * ic4 * C4NUM;
|
||||||
|
|
|
@ -51,6 +51,7 @@ int ConvolutionWinogradFP16CPUKernel::WinogradFilterTransformFp16(const float16_
|
||||||
}
|
}
|
||||||
auto matrix_gt_data_fp16 = reinterpret_cast<float16_t *>(malloc(input_unit_ * kernel_unit_ * sizeof(float16_t)));
|
auto matrix_gt_data_fp16 = reinterpret_cast<float16_t *>(malloc(input_unit_ * kernel_unit_ * sizeof(float16_t)));
|
||||||
if (matrix_gt_data_fp16 == nullptr) {
|
if (matrix_gt_data_fp16 == nullptr) {
|
||||||
|
free(matrix_g_data_fp16);
|
||||||
MS_LOG(ERROR) << "malloc matrix_gt_data_fp16 failed.";
|
MS_LOG(ERROR) << "malloc matrix_gt_data_fp16 failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
@ -61,16 +62,25 @@ int ConvolutionWinogradFP16CPUKernel::WinogradFilterTransformFp16(const float16_
|
||||||
// separate into two steps ===> tmp = G*g ===> out = tmp * GT
|
// separate into two steps ===> tmp = G*g ===> out = tmp * GT
|
||||||
auto tmp_weight_data = reinterpret_cast<float16_t *>(malloc(kernel_unit_ * kernel_unit_ * sizeof(float16_t)));
|
auto tmp_weight_data = reinterpret_cast<float16_t *>(malloc(kernel_unit_ * kernel_unit_ * sizeof(float16_t)));
|
||||||
if (tmp_weight_data == nullptr) {
|
if (tmp_weight_data == nullptr) {
|
||||||
|
free(matrix_g_data_fp16);
|
||||||
|
free(matrix_gt_data_fp16);
|
||||||
MS_LOG(ERROR) << "malloc tmp_weight_data failed.";
|
MS_LOG(ERROR) << "malloc tmp_weight_data failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
auto tmp_data = reinterpret_cast<float16_t *>(malloc(input_unit_ * kernel_unit_ * sizeof(float16_t)));
|
auto tmp_data = reinterpret_cast<float16_t *>(malloc(input_unit_ * kernel_unit_ * sizeof(float16_t)));
|
||||||
if (tmp_data == nullptr) {
|
if (tmp_data == nullptr) {
|
||||||
|
free(tmp_weight_data);
|
||||||
|
free(matrix_g_data_fp16);
|
||||||
|
free(matrix_gt_data_fp16);
|
||||||
MS_LOG(ERROR) << "malloc tmp_data failed.";
|
MS_LOG(ERROR) << "malloc tmp_data failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
auto trans_out_data = reinterpret_cast<float16_t *>(malloc(input_unit_ * input_unit_ * sizeof(float16_t)));
|
auto trans_out_data = reinterpret_cast<float16_t *>(malloc(input_unit_ * input_unit_ * sizeof(float16_t)));
|
||||||
if (trans_out_data == nullptr) {
|
if (trans_out_data == nullptr) {
|
||||||
|
free(tmp_data);
|
||||||
|
free(tmp_weight_data);
|
||||||
|
free(matrix_g_data_fp16);
|
||||||
|
free(matrix_gt_data_fp16);
|
||||||
MS_LOG(ERROR) << "malloc trans_out_data failed.";
|
MS_LOG(ERROR) << "malloc trans_out_data failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
@ -206,11 +216,14 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() {
|
||||||
}
|
}
|
||||||
auto matrix_gt = reinterpret_cast<float *>(malloc(input_unit_ * kernel_unit_ * sizeof(float)));
|
auto matrix_gt = reinterpret_cast<float *>(malloc(input_unit_ * kernel_unit_ * sizeof(float)));
|
||||||
if (matrix_gt == nullptr) {
|
if (matrix_gt == nullptr) {
|
||||||
|
free(matrix_g);
|
||||||
MS_LOG(ERROR) << "malloc matrix_gt failed.";
|
MS_LOG(ERROR) << "malloc matrix_gt failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
ret = MallocTransformMatrices();
|
ret = MallocTransformMatrices();
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
|
free(matrix_g);
|
||||||
|
free(matrix_gt);
|
||||||
MS_LOG(ERROR) << "Malloc transform matrices failed.";
|
MS_LOG(ERROR) << "Malloc transform matrices failed.";
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -221,6 +234,8 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() {
|
||||||
float matrix_bt[MAX_LEN];
|
float matrix_bt[MAX_LEN];
|
||||||
ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, 0.5f, output_unit_, kernel_unit_);
|
ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, 0.5f, output_unit_, kernel_unit_);
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
|
free(matrix_g);
|
||||||
|
free(matrix_gt);
|
||||||
MS_LOG(ERROR) << "get matrix g from CookToomFilter failed.";
|
MS_LOG(ERROR) << "get matrix g from CookToomFilter failed.";
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -235,6 +250,8 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() {
|
||||||
|
|
||||||
ret = WinogradFilterTransformFp16(execute_weight_, matrix_g, matrix_gt, oc_block);
|
ret = WinogradFilterTransformFp16(execute_weight_, matrix_g, matrix_gt, oc_block);
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
|
free(matrix_g);
|
||||||
|
free(matrix_gt);
|
||||||
MS_LOG(ERROR) << "winograd filter transfrom failed.";
|
MS_LOG(ERROR) << "winograd filter transfrom failed.";
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -242,6 +259,8 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() {
|
||||||
// init bias
|
// init bias
|
||||||
bias_data_ = malloc(oc_block_num * oc_block * sizeof(float16_t));
|
bias_data_ = malloc(oc_block_num * oc_block * sizeof(float16_t));
|
||||||
if (bias_data_ == nullptr) {
|
if (bias_data_ == nullptr) {
|
||||||
|
free(matrix_g);
|
||||||
|
free(matrix_gt);
|
||||||
MS_LOG(ERROR) << "malloc bias_data_ failed.";
|
MS_LOG(ERROR) << "malloc bias_data_ failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
|
|
@ -200,7 +200,7 @@ int MatmulFP16CPUKernel::Run() {
|
||||||
}
|
}
|
||||||
auto b = reinterpret_cast<float *>(in_tensors_[1]->MutableData());
|
auto b = reinterpret_cast<float *>(in_tensors_[1]->MutableData());
|
||||||
auto out_tensor = out_tensors_[0];
|
auto out_tensor = out_tensors_[0];
|
||||||
float16_t *c_ptr;
|
float16_t *c_ptr = nullptr;
|
||||||
if (out_tensor->data_type() == kNumberTypeFloat32) {
|
if (out_tensor->data_type() == kNumberTypeFloat32) {
|
||||||
c_ptr = output_ptr_;
|
c_ptr = output_ptr_;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -96,9 +96,9 @@ int Convolution3x3CPUKernel::InitTmpBuffer() {
|
||||||
MS_ASSERT(ctx_->allocator != nullptr);
|
MS_ASSERT(ctx_->allocator != nullptr);
|
||||||
|
|
||||||
#ifdef ENABLE_ARM32
|
#ifdef ENABLE_ARM32
|
||||||
int tile_num = 4;
|
const int tile_num = 4;
|
||||||
#else
|
#else
|
||||||
int tile_num = 12;
|
const int tile_num = 12;
|
||||||
#endif
|
#endif
|
||||||
size_t nhwc4_input_size =
|
size_t nhwc4_input_size =
|
||||||
ic4 * C4NUM * conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * sizeof(float);
|
ic4 * C4NUM * conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * sizeof(float);
|
||||||
|
|
|
@ -47,11 +47,14 @@ int ConvolutionWinogradCPUKernel::WinogradFilterTransform(const float *weight_da
|
||||||
}
|
}
|
||||||
auto tmp_data = reinterpret_cast<float *>(malloc(input_unit_ * kernel_unit_ * sizeof(float)));
|
auto tmp_data = reinterpret_cast<float *>(malloc(input_unit_ * kernel_unit_ * sizeof(float)));
|
||||||
if (tmp_data == nullptr) {
|
if (tmp_data == nullptr) {
|
||||||
|
free(tmp_weight_data);
|
||||||
MS_LOG(ERROR) << "malloc tmp_data failed.";
|
MS_LOG(ERROR) << "malloc tmp_data failed.";
|
||||||
return RET_MEMORY_FAILED;
|
return RET_MEMORY_FAILED;
|
||||||
}
|
}
|
||||||
auto trans_out_data = reinterpret_cast<float *>(malloc(input_unit_ * input_unit_ * sizeof(float)));
|
auto trans_out_data = reinterpret_cast<float *>(malloc(input_unit_ * input_unit_ * sizeof(float)));
|
||||||
if (trans_out_data == nullptr) {
|
if (trans_out_data == nullptr) {
|
||||||
|
free(tmp_data);
|
||||||
|
free(tmp_weight_data);
|
||||||
MS_LOG(ERROR) << "malloc trans_out_data failed.";
|
MS_LOG(ERROR) << "malloc trans_out_data failed.";
|
||||||
return RET_MEMORY_FAILED;
|
return RET_MEMORY_FAILED;
|
||||||
}
|
}
|
||||||
|
|
|
@ -149,7 +149,7 @@ int ReduceCPUKernel::Run() {
|
||||||
int ReduceCPUKernel::MallocTmpBuffer() {
|
int ReduceCPUKernel::MallocTmpBuffer() {
|
||||||
data_buffers_.clear();
|
data_buffers_.clear();
|
||||||
for (auto size : buffer_sizes_) {
|
for (auto size : buffer_sizes_) {
|
||||||
void *buffer;
|
void *buffer = nullptr;
|
||||||
if (data_type_ == kDataTypeFloat) {
|
if (data_type_ == kDataTypeFloat) {
|
||||||
buffer = context_->allocator->Malloc(size * sizeof(float));
|
buffer = context_->allocator->Malloc(size * sizeof(float));
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -143,6 +143,7 @@ int SqueezeInt8CPUKernel::Run() {
|
||||||
auto input_size = quant_Squeeze_parm_->input_sizes_[i];
|
auto input_size = quant_Squeeze_parm_->input_sizes_[i];
|
||||||
inputs_array[i] = reinterpret_cast<int8_t *>(malloc(sizeof(int8_t) * input_size));
|
inputs_array[i] = reinterpret_cast<int8_t *>(malloc(sizeof(int8_t) * input_size));
|
||||||
if (inputs_array[i] == nullptr) {
|
if (inputs_array[i] == nullptr) {
|
||||||
|
free(inputs_array);
|
||||||
MS_LOG(ERROR) << "malloc inputs_array[" << i << "]"
|
MS_LOG(ERROR) << "malloc inputs_array[" << i << "]"
|
||||||
<< " failed.";
|
<< " failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
|
|
Loading…
Reference in New Issue