From c43b41a378bde434e02aa1aaf80eb6fe2b5a43d8 Mon Sep 17 00:00:00 2001 From: Haim Moushkatel Date: Wed, 22 Dec 2021 17:05:24 +0200 Subject: [PATCH] Cropper Support for ToD --- cmake/package_lite.cmake | 4 + .../cpu/nnacl/activation_parameter.h | 28 ++++ .../cpu/nnacl/fp32/activation_fp32.h | 9 +- .../cpu/nnacl/fp32_grad/batch_norm.h | 7 +- .../nnacl/fp32_grad/batch_norm_parameter.h | 27 ++++ .../cpu/nnacl/fp32_grad/pack_ext.c | 1 - .../cpu/nnacl/fp32_grad/resize_grad.h | 14 +- .../nnacl/fp32_grad/resize_grad_parameter.h | 34 +++++ .../softmax_crossentropy_parameter.h | 36 ++++++ .../cpu/nnacl/fp32_grad/softmax_grad.c | 1 - .../cpu/nnacl/fp32_grad/softmax_grad.h | 16 +-- mindspore/lite/src/inner_kernel.h | 2 + .../runtime/kernel/arm/fp32/batchnorm_fp32.cc | 10 ++ .../runtime/kernel/arm/fp32/batchnorm_fp32.h | 1 + .../src/train/train_populate_parameter.cc | 10 +- .../src/train/train_populate_parameter_v0.cc | 9 +- mindspore/lite/src/train/train_session.cc | 20 +-- mindspore/lite/src/train/transfer_session.cc | 21 +-- .../tools/cropper/build_cropper_config.sh | 121 ++++++++++++------ 19 files changed, 253 insertions(+), 118 deletions(-) create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/activation_parameter.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/batch_norm_parameter.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/resize_grad_parameter.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_crossentropy_parameter.h diff --git a/cmake/package_lite.cmake b/cmake/package_lite.cmake index 8a581ba5cc4..838b613e369 100644 --- a/cmake/package_lite.cmake +++ b/cmake/package_lite.cmake @@ -599,6 +599,10 @@ else() DESTINATION ${CROPPER_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) install(FILES ${TOP_DIR}/mindspore/lite/build/tools/cropper/cropper_mapping_npu.cfg DESTINATION ${CROPPER_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) + if(SUPPORT_TRAIN) + install(FILES ${TOP_DIR}/mindspore/lite/build/tools/cropper/cropper_mapping_cpu_train.cfg + DESTINATION ${CROPPER_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() endif() endif() diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/activation_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/activation_parameter.h new file mode 100644 index 00000000000..2b5bae55930 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/activation_parameter.h @@ -0,0 +1,28 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_NNACL_ACTIVATION_PARAMETER_H_ +#define MINDSPORE_NNACL_ACTIVATION_PARAMETER_H_ + +#include "nnacl/op_base.h" +typedef struct ActivationParameter { + OpParameter op_parameter_; + int type_; + float alpha_; + float min_val_; + float max_val_; +} ActivationParameter; + +#endif // MINDSPORE_NNACL_ACTIVATION_PARAMETER_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/activation_fp32.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/activation_fp32.h index 4090da2a294..288d88755dd 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/activation_fp32.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/activation_fp32.h @@ -19,14 +19,7 @@ #include #include "nnacl/op_base.h" #include "nnacl/int8/fixed_point.h" - -typedef struct ActivationParameter { - OpParameter op_parameter_; - int type_; - float alpha_; - float min_val_; - float max_val_; -} ActivationParameter; +#include "nnacl/activation_parameter.h" #ifdef __cplusplus extern "C" { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/batch_norm.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/batch_norm.h index 7cdf3f0ee48..2e31b13e4c3 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/batch_norm.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/batch_norm.h @@ -17,12 +17,7 @@ #ifndef MINDSPORE_NNACL_FP32_GRAD_BATCH_NORM_H_ #define MINDSPORE_NNACL_FP32_GRAD_BATCH_NORM_H_ -#include "nnacl/op_base.h" - -typedef struct BNGradParameter { - OpParameter op_parameter_; - float epsilon_; -} BNGradParameter; +#include "nnacl/fp32_grad/batch_norm_parameter.h" #ifdef __cplusplus extern "C" { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/batch_norm_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/batch_norm_parameter.h new file mode 100644 index 00000000000..b51acdf5ac9 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/batch_norm_parameter.h @@ -0,0 +1,27 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_NNACL_FP32_GRAD_BATCH_NORM_PARAMATER_H_ +#define MINDSPORE_NNACL_FP32_GRAD_BATCH_NORM_PARAMATER_H_ + +#include "nnacl/op_base.h" + +typedef struct BNGradParameter { + OpParameter op_parameter_; + float epsilon_; +} BNGradParameter; + +#endif // MINDSPORE_NNACL_FP32_GRAD_BATCH_NORM_PARAMATER_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/pack_ext.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/pack_ext.c index 75032fb17c0..bc1113b80d8 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/pack_ext.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/pack_ext.c @@ -16,7 +16,6 @@ #include #include "nnacl/fp32_grad/pack_ext.h" -#include "nnacl/pack.h" void RollingIm2ColPackDwUnitFp32(const float *in_data, const ConvParameter *conv_param, float *data_col_orig, int real_cal_num, int start) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/resize_grad.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/resize_grad.h index 77d8cd6de36..6a21610b488 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/resize_grad.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/resize_grad.h @@ -17,24 +17,12 @@ #ifndef MINDSPORE_NNACL_FP32_GRAD_RESIZE_GRAD_H_ #define MINDSPORE_NNACL_FP32_GRAD_RESIZE_GRAD_H_ -#include "nnacl/op_base.h" +#include "nnacl/fp32_grad/resize_grad_parameter.h" #ifdef __cplusplus extern "C" { #endif -typedef struct ResizeGradParameter { - OpParameter op_parameter_; - bool align_corners_; - int method; - size_t in_height_; - size_t in_width_; - size_t out_height_; - size_t out_width_; - float height_scale_; - float width_scale_; -} ResizeGradParameter; - int ResizeNearestNeighborGrad(const float *in_addr, float *out_addr, int batch_size, int channel, int format, const ResizeGradParameter *param); int ResizeBiLinearGrad(const float *in_addr, float *out_addr, int batch_size, int channel, int format, diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/resize_grad_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/resize_grad_parameter.h new file mode 100644 index 00000000000..b3991dfe69f --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/resize_grad_parameter.h @@ -0,0 +1,34 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_NNACL_FP32_GRAD_RESIZE_PARAMETER_GRAD_H_ +#define MINDSPORE_NNACL_FP32_GRAD_RESIZE_PARAMETER_GRAD_H_ + +#include "nnacl/op_base.h" + +typedef struct ResizeGradParameter { + OpParameter op_parameter_; + bool align_corners_; + int method; + size_t in_height_; + size_t in_width_; + size_t out_height_; + size_t out_width_; + float height_scale_; + float width_scale_; +} ResizeGradParameter; + +#endif // MINDSPORE_NNACL_FP32_GRAD_RESIZE_PARAMETER_GRAD_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_crossentropy_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_crossentropy_parameter.h new file mode 100644 index 00000000000..4ba26e3afaf --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_crossentropy_parameter.h @@ -0,0 +1,36 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_NNACL_FP32_GRAD_SOFTMAX_CROSSENTROPY_PARAMETER_H_ +#define MINDSPORE_NNACL_FP32_GRAD_SOFTMAX_CROSSENTROPY_PARAMETER_H_ + +#include "nnacl/op_base.h" + +typedef struct SoftmaxCrossEntropyParameter { + // primitive parameter + OpParameter op_parameter_; + int n_dim_; + + // shape correlative + int input_shape_[5]; + + // other parameter + int32_t batch_size_; + unsigned int number_of_classes_; + bool is_grad_; +} SoftmaxCrossEntropyParameter; + +#endif // MINDSPORE_NNACL_FP32_GRAD_SOFTMAX_CROSSENTROPY_PARAMETER_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c index d2e7cb53aa4..4886bd596d6 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c @@ -16,7 +16,6 @@ #include "nnacl/fp32_grad/softmax_grad.h" #include -#include "nnacl/fp32_grad/gemm.h" void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, float *sum_data, float *sum_mul, const SoftmaxParameter *parameter) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.h index 005a2d47baa..46465b301e7 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.h @@ -17,27 +17,13 @@ #ifndef MINDSPORE_NNACL_FP32_GRAD_SOFTMAX_GRAD_H_ #define MINDSPORE_NNACL_FP32_GRAD_SOFTMAX_GRAD_H_ -#include "nnacl/op_base.h" #include "nnacl/fp32/softmax_fp32.h" +#include "nnacl/fp32_grad/softmax_crossentropy_parameter.h" #ifdef __cplusplus extern "C" { #endif -typedef struct SoftmaxCrossEntropyParameter { - // primitive parameter - OpParameter op_parameter_; - int n_dim_; - - // shape correlative - int input_shape_[5]; - - // other parameter - int32_t batch_size_; - unsigned int number_of_classes_; - bool is_grad_; -} SoftmaxCrossEntropyParameter; - void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, float *sum_data, float *sum_mul, const SoftmaxParameter *parameter); #ifdef __cplusplus diff --git a/mindspore/lite/src/inner_kernel.h b/mindspore/lite/src/inner_kernel.h index b1767a1d8e3..2794a962a60 100644 --- a/mindspore/lite/src/inner_kernel.h +++ b/mindspore/lite/src/inner_kernel.h @@ -159,6 +159,8 @@ class InnerKernel : public Kernel { return mindspore::lite::RET_OK; } + virtual int SetupVirtualBatch(int virtual_batch_multiplier, int param) { return mindspore::lite::RET_OK; } + virtual bool IsEval() const { return !this->train_mode_; } virtual void SetTrainable(bool trainable = true) { this->trainable_ = trainable; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc index ed3dccd9474..8298732a5a4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc @@ -141,5 +141,15 @@ int BatchnormCPUKernel::RestoreDefaultMomentum() { return RET_OK; } +int BatchnormCPUKernel::SetupVirtualBatch(int virtual_batch_multiplier, int param) { + if ((virtual_batch_multiplier > 0)) { + int momentum = (param < 0.0f) ? (this->get_momentum() / virtual_batch_multiplier) : param; + return this->set_momentum(momentum); + } else { + return this->RestoreDefaultMomentum(); + } + return RET_OK; +} + REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_BatchNorm, LiteKernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.h index db550e291ca..6f0aadce4a1 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.h @@ -36,6 +36,7 @@ class BatchnormCPUKernel : public InnerKernel { int Prepare() override; int ReSize() override; int Run() override; + int SetupVirtualBatch(int virtual_batch_multiplier, int param) override; virtual int InitConstTensor(); virtual int DoExecute(int task_id); virtual int set_momentum(float momentum); diff --git a/mindspore/lite/src/train/train_populate_parameter.cc b/mindspore/lite/src/train/train_populate_parameter.cc index 9b48f72ba54..15800e62745 100644 --- a/mindspore/lite/src/train/train_populate_parameter.cc +++ b/mindspore/lite/src/train/train_populate_parameter.cc @@ -17,19 +17,19 @@ #include #include "src/ops/populate/populate_register.h" #include "src/ops/populate/default_populate.h" -#include "src/ops/populate/strided_slice_populate.h" +#include "nnacl/strided_slice_parameter.h" #include "nnacl/arithmetic.h" #include "nnacl/conv_parameter.h" #include "nnacl/lstm_parameter.h" #include "nnacl/pooling_parameter.h" #include "nnacl/power_parameter.h" -#include "nnacl/fp32/activation_fp32.h" -#include "nnacl/fp32_grad/softmax_grad.h" +#include "nnacl/activation_parameter.h" +#include "nnacl/fp32_grad/softmax_crossentropy_parameter.h" #include "nnacl/fp32_grad/optimizer.h" -#include "nnacl/fp32_grad/batch_norm.h" +#include "nnacl/fp32_grad/batch_norm_parameter.h" #include "nnacl/fp32_grad/dropout_parameter.h" #include "nnacl/fp32_grad/smooth_l1_loss.h" -#include "nnacl/fp32_grad/resize_grad.h" +#include "nnacl/fp32_grad/resize_grad_parameter.h" using mindspore::lite::Registry; diff --git a/mindspore/lite/src/train/train_populate_parameter_v0.cc b/mindspore/lite/src/train/train_populate_parameter_v0.cc index aa52e773b11..d3329399c75 100644 --- a/mindspore/lite/src/train/train_populate_parameter_v0.cc +++ b/mindspore/lite/src/train/train_populate_parameter_v0.cc @@ -19,18 +19,15 @@ #include "src/ops/populate/populate_register.h" #include "schema/model_v0_generated.h" #include "nnacl/pooling_parameter.h" -#include "nnacl/fp32_grad/softmax_grad.h" -#include "nnacl/fp32/activation_fp32.h" +#include "nnacl/fp32_grad/softmax_crossentropy_parameter.h" +#include "nnacl/activation_parameter.h" #include "nnacl/conv_parameter.h" #include "nnacl/power_parameter.h" #include "nnacl/arithmetic.h" #include "nnacl/fp32_grad/optimizer.h" -#include "nnacl/fp32_grad/batch_norm.h" +#include "nnacl/fp32_grad/batch_norm_parameter.h" #include "nnacl/fp32_grad/dropout_parameter.h" #include "nnacl/fp32_grad/smooth_l1_loss.h" -#include "nnacl/infer/conv2d_grad_filter_infer.h" -#include "nnacl/infer/conv2d_grad_input_infer.h" -#include "nnacl/infer/group_conv2d_grad_input_infer.h" namespace mindspore::kernel { namespace { diff --git a/mindspore/lite/src/train/train_session.cc b/mindspore/lite/src/train/train_session.cc index e68fe03b778..b4cef76f459 100644 --- a/mindspore/lite/src/train/train_session.cc +++ b/mindspore/lite/src/train/train_session.cc @@ -26,18 +26,13 @@ #include #include #include "include/errorcode.h" -#include "src/executor.h" #include "src/lite_model.h" #include "src/lite_kernel_util.h" -#include "src/sub_graph_kernel.h" #include "src/tensor.h" #include "src/kernel_registry.h" #include "src/common/prim_util.h" #include "src/common/tensor_util.h" #include "src/common/utils.h" -#include "src/runtime/kernel/arm/fp32_grad/convolution.h" -#include "src/runtime/kernel/arm/fp32/batchnorm_fp32.h" -#include "src/train/loss_kernel.h" #include "src/train/optimizer_kernel.h" #include "src/train/train_utils.h" #include "src/train/train_export.h" @@ -890,8 +885,9 @@ int TrainSession::ApplyGradients(const std::vector &gradient if (current_gradient->tensor_name() == gradient->tensor_name()) { found = true; if (current_gradient->Size() == gradient->Size()) { - std::copy(static_cast(gradient->data()), static_cast(gradient->data()) + gradient->Size(), - static_cast(current_gradient->MutableData())); + std::copy(static_cast(gradient->data()), + static_cast(gradient->data()) + gradient->Size(), + static_cast(current_gradient->MutableData())); } else { MS_LOG(ERROR) << "gradient tensor " << gradient->tensor_name() << " has wrong size " << gradient->Size() << " instead of " << current_gradient->Size(); @@ -954,14 +950,8 @@ int TrainSession::AdminSetupVirtualBatch(int virtual_batch_multiplier, float lr, } if (IsBN(kernel) && kernel->IsTrainable()) { - auto batchnorm = static_cast(kernel->kernel()); - auto ret = RET_OK; - if (mod == kernel::WeightUpdateMode::VIRTUAL_BATCH) { - momentum = (momentum < 0.0f) ? (batchnorm->get_momentum() / virtual_batch_multiplier_) : momentum; - ret = batchnorm->set_momentum(momentum); - } else { - ret = batchnorm->RestoreDefaultMomentum(); - } + auto batchnorm = static_cast(kernel->kernel()); + auto ret = batchnorm->SetupVirtualBatch(virtual_batch_multiplier_, momentum); if (ret != RET_OK) { MS_LOG(ERROR) << kernel->name() << " failed to set momentum"; return RET_ERROR; diff --git a/mindspore/lite/src/train/transfer_session.cc b/mindspore/lite/src/train/transfer_session.cc index 28532cfeb43..97cecafff9b 100644 --- a/mindspore/lite/src/train/transfer_session.cc +++ b/mindspore/lite/src/train/transfer_session.cc @@ -28,12 +28,8 @@ #include "src/tensor.h" #include "src/train/loss_kernel.h" #include "src/train/optimizer_kernel.h" -#include "src/sub_graph_kernel.h" #include "src/train/train_populate_parameter.h" #include "src/executor.h" -#include "src/kernel_registry.h" -#include "src/runtime/kernel/arm/fp32_grad/convolution.h" -#include "nnacl/fp32/pack_fp32.h" #include "src/train/train_export.h" #include "src/train/train_utils.h" @@ -141,13 +137,22 @@ int TransferSession::RunGraph(const KernelCallBack &before, const KernelCallBack for (auto &backbone_head_pair : backbone_head_map_) { auto input = backbone_head_pair.first; auto output = backbone_head_pair.second; - char *input_data = reinterpret_cast(input->MutableData()); - char *output_data = reinterpret_cast(output->MutableData()); + float *input_data = reinterpret_cast(input->MutableData()); + float *output_data = reinterpret_cast(output->MutableData()); if (nchw2nhwc_) { - int plane = input->shape().at(1) * input->shape().at(2); int batch = input->shape().at(0); + int plane = input->shape().at(1) * input->shape().at(2); int channel = input->shape().at(3); - PackNCHWToNHWCFp32(output_data, input_data, batch, plane, channel, 0, 1); + int img_size = plane * channel; + for (int b = 0; b < batch; b++) { + float *in = input_data + b * img_size; + float *out = output_data + b * img_size; + for (int p = 0; p < plane; p++) { + for (int c = 0; c < channel; c++) { + in[p * channel + c] = out[c * plane + p]; + } + } + } } else { std::copy(output_data, output_data + output->Size(), input_data); } diff --git a/mindspore/lite/tools/cropper/build_cropper_config.sh b/mindspore/lite/tools/cropper/build_cropper_config.sh index aed5fec9e71..b82626051aa 100644 --- a/mindspore/lite/tools/cropper/build_cropper_config.sh +++ b/mindspore/lite/tools/cropper/build_cropper_config.sh @@ -7,13 +7,18 @@ cd "${MINDSPORE_HOME}" || exit 1 CROPPER_OUTPUT_DIR=mindspore/lite/build/tools/cropper mkdir -p ${CROPPER_OUTPUT_DIR} MAPPING_OUTPUT_FILE_NAME_TMP=${CROPPER_OUTPUT_DIR}/cropper_mapping_tmp.cfg +MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP=${CROPPER_OUTPUT_DIR}/cropper_mapping_train_tmp.cfg CPU_MAPPING_OUTPUT_FILE=${CROPPER_OUTPUT_DIR}/cropper_mapping_cpu.cfg GPU_MAPPING_OUTPUT_FILE=${CROPPER_OUTPUT_DIR}/cropper_mapping_gpu.cfg NPU_MAPPING_OUTPUT_FILE=${CROPPER_OUTPUT_DIR}/cropper_mapping_npu.cfg +CPU_TRAIN_MAPPING_OUTPUT_FILE=${CROPPER_OUTPUT_DIR}/cropper_mapping_cpu_train.cfg [ -n "${MAPPING_OUTPUT_FILE_NAME_TMP}" ] && rm -f ${MAPPING_OUTPUT_FILE_NAME_TMP} +[ -n "${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP}" ] && rm -f ${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP} [ -n "${CPU_MAPPING_OUTPUT_FILE}" ] && rm -f ${CPU_MAPPING_OUTPUT_FILE} [ -n "${GPU_MAPPING_OUTPUT_FILE}" ] && rm -f ${GPU_MAPPING_OUTPUT_FILE} [ -n "${NPU_MAPPING_OUTPUT_FILE}" ] && rm -f ${NPU_MAPPING_OUTPUT_FILE} +[ -n "${CPU_TRAIN_MAPPING_OUTPUT_FILE}" ] && rm -f ${CPU_TRAIN_MAPPING_OUTPUT_FILE} + ops_list=() DEFINE_STR="-DENABLE_ANDROID -DENABLE_ARM -DENABLE_ARM64 -DENABLE_NEON -DNO_DLIB -DUSE_ANDROID_LOG -DANDROID -DENABLE_FP16" # get the flatbuffers path @@ -57,11 +62,17 @@ getDeep() { # only add existing files if [[ -e ${array_deep_file%h*}cc ]]; then file_split=$(echo ${array_deep_file} | awk -F '/' '{print $NF}') - echo "${1},${3},${file_split%h*}cc.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP} + if [[ "$4" != "train_source" ]] ; then + echo "${1},${3},${file_split%h*}cc.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP} + fi + echo "${1},${3},${file_split%h*}cc.o" >>${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP} fi if [[ -e ${array_deep_file%h*}c ]]; then file_split=$(echo ${array_deep_file} | awk -F '/' '{print $NF}') - echo "${1},${3},${file_split%h*}c.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP} + if [[ "$4" != "train_source" ]] ; then + echo "${1},${3},${file_split%h*}c.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP} + fi + echo "${1},${3},${file_split%h*}c.o" >>${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP} fi done } @@ -79,6 +90,7 @@ getOpsFile() { out_file=$(echo ${file} | awk -F '/' '{print $NF}') # concat schemaType + fileType + fileName append to files echo "${type},${3},${out_file}.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP} + echo "${type},${3},${out_file}.o" >>${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP} map_files=$(gcc -MM ${file} ${DEFINE_STR} ${HEADER_LOCATION}) # first is *.o second is *.cc array_file=() @@ -91,18 +103,44 @@ getOpsFile() { getDeep ${type} ${array_file} ${3} & array_file_split=$(echo ${array_file} | awk -F '/' '{print $NF}') echo "${type},${3},${array_file_split%h*}cc.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP} + echo "${type},${3},${array_file_split%h*}cc.o" >>${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP} fi if [[ -e ${array_file%h*}c ]]; then getDeep ${type} ${array_file%h*}c ${3} & getDeep ${type} ${array_file} ${3} & array_file_split=$(echo ${array_file} | awk -F '/' '{print $NF}') echo "${type},${3},${array_file_split%h*}c.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP} + echo "${type},${3},${array_file_split%h*}c.o" >>${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP} fi done done done } +getFilesFromArr() { + local -n arr_files=${1} + # echo " func parm 1 : ${arr_files[@]}" + # echo " func parm 2 : $2" + # shellcheck disable=SC2068 + for file in ${arr_files[@]}; do + map_files=$(gcc -MM ${file} ${DEFINE_STR} ${HEADER_LOCATION}) + # first is *.o second is *.cc + # shellcheck disable=SC2207 + array_runtime=($(echo ${map_files} | awk -F '\' '{for(i=3;i<=NF;i++){print $i}}' | grep -v "flatbuffers" | egrep -v ${REMOVE_LISTS_STR})) + # only add existing files + for array_runtime_file in "${array_runtime[@]}"; do + if [[ -e ${array_runtime_file%h*}cc && ! ${all_files[*]} =~ ${array_runtime_file%h*}cc ]]; then + all_files=("${all_files[@]}" "${array_runtime_file%h*}cc") + getDeep "CommonFile" ${array_runtime_file%h*}cc "common" $2 & + fi + if [[ -e ${array_runtime_file%h*}c && ! ${all_files[*]} =~ ${array_runtime_file%h*}c ]]; then + all_files=("${all_files[@]}" "${array_runtime_file%h*}c") + getDeep "CommonFile" ${array_runtime_file%h*}c "common" $2 & + fi + done + done +} + getCommonFile() { echo "start get common files" include_h=() @@ -115,6 +153,9 @@ getCommonFile() { while IFS='' read -r line; do common_files_h+=("$line"); done < <(ls mindspore/lite/src/common/*.h) runtime_files_h=() while IFS='' read -r line; do runtime_files_h+=("$line"); done < <(ls mindspore/lite/src/runtime/*.h) + train_files_h=() + while IFS='' read -r line; do train_files_h+=("$line"); done < <(ls mindspore/lite/include/train/*.h) + while IFS='' read -r line; do train_files_h+=("$line"); done < <(ls mindspore/lite/src/train/*.h) others_files_h=( mindspore/lite/src/runtime/infer_manager.h mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h @@ -130,7 +171,9 @@ getCommonFile() { mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/tensor_c.h mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/errorcode.h ) - all_files_h=("${include_h[@]}" "${regist_include_h[@]}" "${src_files_h[@]}" "${common_files_h[@]}" "${runtime_files_h[@]}" "${others_files_h[@]}") + all_files_h=("${include_h[@]}" "${regist_include_h[@]}" "${src_files_h[@]}" "${common_files_h[@]}" + "${runtime_files_h[@]}" "${others_files_h[@]}" + ) # concat regx REMOVE_LISTS_STR="${all_files_h[0]}" @@ -167,51 +210,43 @@ getCommonFile() { mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c mindspore/core/utils/status.cc ) + # save train files + train_files=() + while IFS='' read -r line; do train_files+=("$line"); done < <(ls mindspore/lite/src/train/*.cc) + while IFS='' read -r line; do train_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/callback/*.cc) + while IFS='' read -r line; do train_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/metrics/*.cc) + while IFS='' read -r line; do train_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/train/*.cc) + others_train_files=( + mindspore/lite/tools/common/storage.cc + ) all_files=("${src_files[@]}" "${regist_files[@]}" "${common_files[@]}" "${runtime_files_cc[@]}" "${others_files_c[@]}" "${assembly_files[@]}" "${mindrt_files[@]}" "${cxx_api_files[@]}" - ) - # shellcheck disable=SC2068 - for file in ${all_files[@]}; do - map_files=$(gcc -MM ${file} ${DEFINE_STR} ${HEADER_LOCATION}) - # first is *.o second is *.cc - # shellcheck disable=SC2207 - array_runtime=($(echo ${map_files} | awk -F '\' '{for(i=3;i<=NF;i++){print $i}}' | grep -v "flatbuffers" | egrep -v ${REMOVE_LISTS_STR})) - # only add existing files - for array_runtime_file in "${array_runtime[@]}"; do - if [[ -e ${array_runtime_file%h*}cc && ! ${all_files[*]} =~ ${array_runtime_file%h*}cc ]]; then - all_files=("${all_files[@]}" "${array_runtime_file%h*}cc") - getDeep "CommonFile" ${array_runtime_file%h*}cc "common" & - fi - if [[ -e ${array_runtime_file%h*}c && ! ${all_files[*]} =~ ${array_runtime_file%h*}c ]]; then - all_files=("${all_files[@]}" "${array_runtime_file%h*}c") - getDeep "CommonFile" ${array_runtime_file%h*}c "common" & - fi - done - done - # shellcheck disable=SC2068 - for file in ${all_files_h[@]}; do - map_files=$(gcc -MM ${file} ${DEFINE_STR} ${HEADER_LOCATION}) - # first is *.o second is *.cc - # shellcheck disable=SC2207 - array_runtime=($(echo ${map_files} | awk -F '\' '{for(i=3;i<=NF;i++){print $i}}' | grep -v "flatbuffers" | egrep -v ${REMOVE_LISTS_STR})) - # only add existing files - for array_runtime_file in "${array_runtime[@]}"; do - if [[ -e ${array_runtime_file%h*}cc && ! ${all_files[*]} =~ ${array_runtime_file%h*}cc ]]; then - all_files=("${all_files[@]}" "${array_runtime_file%h*}cc") - getDeep "CommonFile" ${array_runtime_file%h*}cc "common" & - fi - if [[ -e ${array_runtime_file%h*}c && ! ${all_files[*]} =~ ${array_runtime_file%h*}c ]]; then - all_files=("${all_files[@]}" "${array_runtime_file%h*}c") - getDeep "CommonFile" ${array_runtime_file%h*}c "common" & - fi - done - done + ) + getFilesFromArr all_files + getFilesFromArr all_files_h # shellcheck disable=SC2068 for file in ${all_files[@]}; do file=$(echo ${file} | awk -F '/' '{print $NF}') echo "CommonFile,common,${file}.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP} done + + all_files_train=("${all_files[@]}" "${train_files[@]}" "${others_train_files[@]}" + ) + all_files_train_h=("${all_files_h[@]}" "${train_files_h[@]}" + ) + REMOVE_LISTS_STR="${all_files_train_h[0]}" + # shellcheck disable=SC2068 + for val in ${all_files_train_h[@]:1}; do + REMOVE_LISTS_STR="$REMOVE_LISTS_STR|$val" + done + getFilesFromArr all_files_train "train_source" + getFilesFromArr all_files_train_h "train_source" + # shellcheck disable=SC2068 + for file in ${all_files_train[@]}; do + file=$(echo ${file} | awk -F '/' '{print $NF}') + echo "CommonFile,common,${file}.o" >>${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP} + done } # The x86 platform cannot search based on header files, so manually search for the first layer. @@ -276,6 +311,11 @@ sleep 1 sort ${MAPPING_OUTPUT_FILE_NAME_TMP} | uniq >${CPU_MAPPING_OUTPUT_FILE} chmod 444 ${CPU_MAPPING_OUTPUT_FILE} +sleep 1 +# remove duplicate files +sort ${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP} | uniq >${CPU_TRAIN_MAPPING_OUTPUT_FILE} +chmod 444 ${CPU_TRAIN_MAPPING_OUTPUT_FILE} + # support for gpu opencl_files=() while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls mindspore/lite/src/runtime/kernel/opencl/*.cc) @@ -320,4 +360,5 @@ chmod 444 ${NPU_MAPPING_OUTPUT_FILE} # modify file permissions to read-only [ -n "${MAPPING_OUTPUT_FILE_NAME_TMP}" ] && rm -f ${MAPPING_OUTPUT_FILE_NAME_TMP} +[ -n "${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP}" ] && rm -f ${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP} echo "Complete all tasks."