!27891 [MS][LITE][TOD] Cropper Support for ToD

Merge pull request !27891 from Haim/export_haim
2021-12-23 08:03:13 +00:00 · 2021-12-23 08:03:13 +00:00 · 0eecd9721f
parent 1a474138da c43b41a378
commit 0eecd9721f
19 changed files with 253 additions and 118 deletions
--- a/cmake/package_lite.cmake
+++ b/cmake/package_lite.cmake
@ -646,6 +646,10 @@ else()
                DESTINATION ${CROPPER_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
        install(FILES ${TOP_DIR}/mindspore/lite/build/tools/cropper/cropper_mapping_npu.cfg
                DESTINATION ${CROPPER_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
+        if(SUPPORT_TRAIN)
+            install(FILES ${TOP_DIR}/mindspore/lite/build/tools/cropper/cropper_mapping_cpu_train.cfg
+                    DESTINATION ${CROPPER_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
+        endif()
    endif()
 endif()

--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/activation_parameter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/activation_parameter.h
@ -0,0 +1,28 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_ACTIVATION_PARAMETER_H_
+#define MINDSPORE_NNACL_ACTIVATION_PARAMETER_H_
+
+#include "nnacl/op_base.h"
+typedef struct ActivationParameter {
+  OpParameter op_parameter_;
+  int type_;
+  float alpha_;
+  float min_val_;
+  float max_val_;
+} ActivationParameter;
+
+#endif  // MINDSPORE_NNACL_ACTIVATION_PARAMETER_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/activation_fp32.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/activation_fp32.h
@ -19,14 +19,7 @@
 #include <math.h>
 #include "nnacl/op_base.h"
 #include "nnacl/int8/fixed_point.h"
-
-typedef struct ActivationParameter {
-  OpParameter op_parameter_;
-  int type_;
-  float alpha_;
-  float min_val_;
-  float max_val_;
-} ActivationParameter;
+#include "nnacl/activation_parameter.h"

 #ifdef __cplusplus
 extern "C" {
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/batch_norm.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/batch_norm.h
@ -17,12 +17,7 @@
 #ifndef MINDSPORE_NNACL_FP32_GRAD_BATCH_NORM_H_
 #define MINDSPORE_NNACL_FP32_GRAD_BATCH_NORM_H_

-#include "nnacl/op_base.h"
-
-typedef struct BNGradParameter {
-  OpParameter op_parameter_;
-  float epsilon_;
-} BNGradParameter;
+#include "nnacl/fp32_grad/batch_norm_parameter.h"

 #ifdef __cplusplus
 extern "C" {
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/batch_norm_parameter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/batch_norm_parameter.h
@ -0,0 +1,27 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_NNACL_FP32_GRAD_BATCH_NORM_PARAMATER_H_
+#define MINDSPORE_NNACL_FP32_GRAD_BATCH_NORM_PARAMATER_H_
+
+#include "nnacl/op_base.h"
+
+typedef struct BNGradParameter {
+  OpParameter op_parameter_;
+  float epsilon_;
+} BNGradParameter;
+
+#endif  // MINDSPORE_NNACL_FP32_GRAD_BATCH_NORM_PARAMATER_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/pack_ext.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/pack_ext.c
@ -16,7 +16,6 @@

 #include <string.h>
 #include "nnacl/fp32_grad/pack_ext.h"
-#include "nnacl/pack.h"

 void RollingIm2ColPackDwUnitFp32(const float *in_data, const ConvParameter *conv_param, float *data_col_orig,
                                 int real_cal_num, int start) {
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/resize_grad.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/resize_grad.h
@ -17,24 +17,12 @@
 #ifndef MINDSPORE_NNACL_FP32_GRAD_RESIZE_GRAD_H_
 #define MINDSPORE_NNACL_FP32_GRAD_RESIZE_GRAD_H_

-#include "nnacl/op_base.h"
+#include "nnacl/fp32_grad/resize_grad_parameter.h"

 #ifdef __cplusplus
 extern "C" {
 #endif

-typedef struct ResizeGradParameter {
-  OpParameter op_parameter_;
-  bool align_corners_;
-  int method;
-  size_t in_height_;
-  size_t in_width_;
-  size_t out_height_;
-  size_t out_width_;
-  float height_scale_;
-  float width_scale_;
-} ResizeGradParameter;
-
 int ResizeNearestNeighborGrad(const float *in_addr, float *out_addr, int batch_size, int channel, int format,
                              const ResizeGradParameter *param);
 int ResizeBiLinearGrad(const float *in_addr, float *out_addr, int batch_size, int channel, int format,
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/resize_grad_parameter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/resize_grad_parameter.h
@ -0,0 +1,34 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_NNACL_FP32_GRAD_RESIZE_PARAMETER_GRAD_H_
+#define MINDSPORE_NNACL_FP32_GRAD_RESIZE_PARAMETER_GRAD_H_
+
+#include "nnacl/op_base.h"
+
+typedef struct ResizeGradParameter {
+  OpParameter op_parameter_;
+  bool align_corners_;
+  int method;
+  size_t in_height_;
+  size_t in_width_;
+  size_t out_height_;
+  size_t out_width_;
+  float height_scale_;
+  float width_scale_;
+} ResizeGradParameter;
+
+#endif  //  MINDSPORE_NNACL_FP32_GRAD_RESIZE_PARAMETER_GRAD_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_crossentropy_parameter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_crossentropy_parameter.h
@ -0,0 +1,36 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_NNACL_FP32_GRAD_SOFTMAX_CROSSENTROPY_PARAMETER_H_
+#define MINDSPORE_NNACL_FP32_GRAD_SOFTMAX_CROSSENTROPY_PARAMETER_H_
+
+#include "nnacl/op_base.h"
+
+typedef struct SoftmaxCrossEntropyParameter {
+  // primitive parameter
+  OpParameter op_parameter_;
+  int n_dim_;
+
+  // shape correlative
+  int input_shape_[5];
+
+  // other parameter
+  int32_t batch_size_;
+  unsigned int number_of_classes_;
+  bool is_grad_;
+} SoftmaxCrossEntropyParameter;
+
+#endif  // MINDSPORE_NNACL_FP32_GRAD_SOFTMAX_CROSSENTROPY_PARAMETER_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c
@ -16,7 +16,6 @@

 #include "nnacl/fp32_grad/softmax_grad.h"
 #include <string.h>
-#include "nnacl/fp32_grad/gemm.h"

 void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, float *sum_data, float *sum_mul,
                 const SoftmaxParameter *parameter) {
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.h
@ -17,27 +17,13 @@
 #ifndef MINDSPORE_NNACL_FP32_GRAD_SOFTMAX_GRAD_H_
 #define MINDSPORE_NNACL_FP32_GRAD_SOFTMAX_GRAD_H_

-#include "nnacl/op_base.h"
 #include "nnacl/fp32/softmax_fp32.h"
+#include "nnacl/fp32_grad/softmax_crossentropy_parameter.h"

 #ifdef __cplusplus
 extern "C" {
 #endif

-typedef struct SoftmaxCrossEntropyParameter {
-  // primitive parameter
-  OpParameter op_parameter_;
-  int n_dim_;
-
-  // shape correlative
-  int input_shape_[5];
-
-  // other parameter
-  int32_t batch_size_;
-  unsigned int number_of_classes_;
-  bool is_grad_;
-} SoftmaxCrossEntropyParameter;
-
 void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, float *sum_data, float *sum_mul,
                 const SoftmaxParameter *parameter);
 #ifdef __cplusplus
--- a/mindspore/lite/src/inner_kernel.h
+++ b/mindspore/lite/src/inner_kernel.h
@ -159,6 +159,8 @@ class InnerKernel : public Kernel {
    return mindspore::lite::RET_OK;
  }

+  virtual int SetupVirtualBatch(int virtual_batch_multiplier, int param) { return mindspore::lite::RET_OK; }
+
  virtual bool IsEval() const { return !this->train_mode_; }

  virtual void SetTrainable(bool trainable = true) { this->trainable_ = trainable; }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc
@ -141,5 +141,15 @@ int BatchnormCPUKernel::RestoreDefaultMomentum() {
  return RET_OK;
 }

+int BatchnormCPUKernel::SetupVirtualBatch(int virtual_batch_multiplier, int param) {
+  if ((virtual_batch_multiplier > 0)) {
+    int momentum = (param < 0.0f) ? (this->get_momentum() / virtual_batch_multiplier) : param;
+    return this->set_momentum(momentum);
+  } else {
+    return this->RestoreDefaultMomentum();
+  }
+  return RET_OK;
+}
+
 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_BatchNorm, LiteKernelCreator<BatchnormCPUKernel>)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.h
@ -36,6 +36,7 @@ class BatchnormCPUKernel : public InnerKernel {
  int Prepare() override;
  int ReSize() override;
  int Run() override;
+  int SetupVirtualBatch(int virtual_batch_multiplier, int param) override;
  virtual int InitConstTensor();
  virtual int DoExecute(int task_id);
  virtual int set_momentum(float momentum);
--- a/mindspore/lite/src/train/train_populate_parameter.cc
+++ b/mindspore/lite/src/train/train_populate_parameter.cc
@ -17,19 +17,19 @@
 #include <algorithm>
 #include "src/ops/populate/populate_register.h"
 #include "src/ops/populate/default_populate.h"
-#include "src/ops/populate/strided_slice_populate.h"
+#include "nnacl/strided_slice_parameter.h"
 #include "nnacl/arithmetic.h"
 #include "nnacl/conv_parameter.h"
 #include "nnacl/lstm_parameter.h"
 #include "nnacl/pooling_parameter.h"
 #include "nnacl/power_parameter.h"
-#include "nnacl/fp32/activation_fp32.h"
-#include "nnacl/fp32_grad/softmax_grad.h"
+#include "nnacl/activation_parameter.h"
+#include "nnacl/fp32_grad/softmax_crossentropy_parameter.h"
 #include "nnacl/fp32_grad/optimizer.h"
-#include "nnacl/fp32_grad/batch_norm.h"
+#include "nnacl/fp32_grad/batch_norm_parameter.h"
 #include "nnacl/fp32_grad/dropout_parameter.h"
 #include "nnacl/fp32_grad/smooth_l1_loss.h"
-#include "nnacl/fp32_grad/resize_grad.h"
+#include "nnacl/fp32_grad/resize_grad_parameter.h"

 using mindspore::lite::Registry;

--- a/mindspore/lite/src/train/train_populate_parameter_v0.cc
+++ b/mindspore/lite/src/train/train_populate_parameter_v0.cc
@ -19,18 +19,15 @@
 #include "src/ops/populate/populate_register.h"
 #include "schema/model_v0_generated.h"
 #include "nnacl/pooling_parameter.h"
-#include "nnacl/fp32_grad/softmax_grad.h"
-#include "nnacl/fp32/activation_fp32.h"
+#include "nnacl/fp32_grad/softmax_crossentropy_parameter.h"
+#include "nnacl/activation_parameter.h"
 #include "nnacl/conv_parameter.h"
 #include "nnacl/power_parameter.h"
 #include "nnacl/arithmetic.h"
 #include "nnacl/fp32_grad/optimizer.h"
-#include "nnacl/fp32_grad/batch_norm.h"
+#include "nnacl/fp32_grad/batch_norm_parameter.h"
 #include "nnacl/fp32_grad/dropout_parameter.h"
 #include "nnacl/fp32_grad/smooth_l1_loss.h"
-#include "nnacl/infer/conv2d_grad_filter_infer.h"
-#include "nnacl/infer/conv2d_grad_input_infer.h"
-#include "nnacl/infer/group_conv2d_grad_input_infer.h"

 namespace mindspore::kernel {
 namespace {
--- a/mindspore/lite/src/train/train_session.cc
+++ b/mindspore/lite/src/train/train_session.cc
@ -26,18 +26,13 @@
 #include <map>
 #include <set>
 #include "include/errorcode.h"
-#include "src/executor.h"
 #include "src/lite_model.h"
 #include "src/lite_kernel_util.h"
-#include "src/sub_graph_kernel.h"
 #include "src/tensor.h"
 #include "src/kernel_registry.h"
 #include "src/common/prim_util.h"
 #include "src/common/tensor_util.h"
 #include "src/common/utils.h"
-#include "src/runtime/kernel/arm/fp32_grad/convolution.h"
-#include "src/runtime/kernel/arm/fp32/batchnorm_fp32.h"
-#include "src/train/loss_kernel.h"
 #include "src/train/optimizer_kernel.h"
 #include "src/train/train_utils.h"
 #include "src/train/train_export.h"
@ -890,8 +885,9 @@ int TrainSession::ApplyGradients(const std::vector<tensor::MSTensor *> &gradient
      if (current_gradient->tensor_name() == gradient->tensor_name()) {
        found = true;
        if (current_gradient->Size() == gradient->Size()) {
-          std::copy(static_cast<char *>(gradient->data()), static_cast<char *>(gradient->data()) + gradient->Size(),
-                    static_cast<char *>(current_gradient->MutableData()));
+          std::copy(static_cast<uint8_t *>(gradient->data()),
+                    static_cast<uint8_t *>(gradient->data()) + gradient->Size(),
+                    static_cast<uint8_t *>(current_gradient->MutableData()));
        } else {
          MS_LOG(ERROR) << "gradient tensor " << gradient->tensor_name() << " has wrong size " << gradient->Size()
                        << " instead of " << current_gradient->Size();
@ -954,14 +950,8 @@ int TrainSession::AdminSetupVirtualBatch(int virtual_batch_multiplier, float lr,
    }

    if (IsBN(kernel) && kernel->IsTrainable()) {
-      auto batchnorm = static_cast<kernel::BatchnormCPUKernel *>(kernel->kernel());
-      auto ret = RET_OK;
-      if (mod == kernel::WeightUpdateMode::VIRTUAL_BATCH) {
-        momentum = (momentum < 0.0f) ? (batchnorm->get_momentum() / virtual_batch_multiplier_) : momentum;
-        ret = batchnorm->set_momentum(momentum);
-      } else {
-        ret = batchnorm->RestoreDefaultMomentum();
-      }
+      auto batchnorm = static_cast<kernel::InnerKernel *>(kernel->kernel());
+      auto ret = batchnorm->SetupVirtualBatch(virtual_batch_multiplier_, momentum);
      if (ret != RET_OK) {
        MS_LOG(ERROR) << kernel->name() << " failed to set momentum";
        return RET_ERROR;
--- a/mindspore/lite/src/train/transfer_session.cc
+++ b/mindspore/lite/src/train/transfer_session.cc
@ -28,12 +28,8 @@
 #include "src/tensor.h"
 #include "src/train/loss_kernel.h"
 #include "src/train/optimizer_kernel.h"
-#include "src/sub_graph_kernel.h"
 #include "src/train/train_populate_parameter.h"
 #include "src/executor.h"
-#include "src/kernel_registry.h"
-#include "src/runtime/kernel/arm/fp32_grad/convolution.h"
-#include "nnacl/fp32/pack_fp32.h"
 #include "src/train/train_export.h"
 #include "src/train/train_utils.h"

@ -141,13 +137,22 @@ int TransferSession::RunGraph(const KernelCallBack &before, const KernelCallBack
  for (auto &backbone_head_pair : backbone_head_map_) {
    auto input = backbone_head_pair.first;
    auto output = backbone_head_pair.second;
-    char *input_data = reinterpret_cast<char *>(input->MutableData());
-    char *output_data = reinterpret_cast<char *>(output->MutableData());
+    float *input_data = reinterpret_cast<float *>(input->MutableData());
+    float *output_data = reinterpret_cast<float *>(output->MutableData());
    if (nchw2nhwc_) {
-      int plane = input->shape().at(1) * input->shape().at(2);
      int batch = input->shape().at(0);
+      int plane = input->shape().at(1) * input->shape().at(2);
      int channel = input->shape().at(3);
-      PackNCHWToNHWCFp32(output_data, input_data, batch, plane, channel, 0, 1);
+      int img_size = plane * channel;
+      for (int b = 0; b < batch; b++) {
+        float *in = input_data + b * img_size;
+        float *out = output_data + b * img_size;
+        for (int p = 0; p < plane; p++) {
+          for (int c = 0; c < channel; c++) {
+            in[p * channel + c] = out[c * plane + p];
+          }
+        }
+      }
    } else {
      std::copy(output_data, output_data + output->Size(), input_data);
    }
--- a/mindspore/lite/tools/cropper/build_cropper_config.sh
+++ b/mindspore/lite/tools/cropper/build_cropper_config.sh
@ -7,13 +7,18 @@ cd "${MINDSPORE_HOME}" || exit 1
 CROPPER_OUTPUT_DIR=mindspore/lite/build/tools/cropper
 mkdir -p ${CROPPER_OUTPUT_DIR}
 MAPPING_OUTPUT_FILE_NAME_TMP=${CROPPER_OUTPUT_DIR}/cropper_mapping_tmp.cfg
+MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP=${CROPPER_OUTPUT_DIR}/cropper_mapping_train_tmp.cfg
 CPU_MAPPING_OUTPUT_FILE=${CROPPER_OUTPUT_DIR}/cropper_mapping_cpu.cfg
 GPU_MAPPING_OUTPUT_FILE=${CROPPER_OUTPUT_DIR}/cropper_mapping_gpu.cfg
 NPU_MAPPING_OUTPUT_FILE=${CROPPER_OUTPUT_DIR}/cropper_mapping_npu.cfg
+CPU_TRAIN_MAPPING_OUTPUT_FILE=${CROPPER_OUTPUT_DIR}/cropper_mapping_cpu_train.cfg
 [ -n "${MAPPING_OUTPUT_FILE_NAME_TMP}" ] && rm -f ${MAPPING_OUTPUT_FILE_NAME_TMP}
+[ -n "${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP}" ] && rm -f ${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP}
 [ -n "${CPU_MAPPING_OUTPUT_FILE}" ] && rm -f ${CPU_MAPPING_OUTPUT_FILE}
 [ -n "${GPU_MAPPING_OUTPUT_FILE}" ] && rm -f ${GPU_MAPPING_OUTPUT_FILE}
 [ -n "${NPU_MAPPING_OUTPUT_FILE}" ] && rm -f ${NPU_MAPPING_OUTPUT_FILE}
+[ -n "${CPU_TRAIN_MAPPING_OUTPUT_FILE}" ] && rm -f ${CPU_TRAIN_MAPPING_OUTPUT_FILE}
+
 ops_list=()
 DEFINE_STR="-DENABLE_ANDROID -DENABLE_ARM -DENABLE_ARM64 -DENABLE_NEON -DNO_DLIB -DUSE_ANDROID_LOG -DANDROID -DENABLE_FP16"
 # get the flatbuffers path
@ -57,11 +62,17 @@ getDeep() {
    # only add existing files
    if [[ -e ${array_deep_file%h*}cc ]]; then
      file_split=$(echo ${array_deep_file} | awk -F '/' '{print $NF}')
-      echo "${1},${3},${file_split%h*}cc.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP}
+      if [[ "$4" != "train_source" ]] ; then
+        echo "${1},${3},${file_split%h*}cc.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP}
+      fi
+      echo "${1},${3},${file_split%h*}cc.o" >>${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP} 
    fi
    if [[ -e ${array_deep_file%h*}c ]]; then
      file_split=$(echo ${array_deep_file} | awk -F '/' '{print $NF}')
-      echo "${1},${3},${file_split%h*}c.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP}
+      if [[ "$4" != "train_source" ]] ; then
+        echo "${1},${3},${file_split%h*}c.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP}
+      fi
+      echo "${1},${3},${file_split%h*}c.o" >>${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP}
    fi
  done
 }
@ -79,6 +90,7 @@ getOpsFile() {
      out_file=$(echo ${file} | awk -F '/' '{print $NF}')
      # concat schemaType + fileType + fileName append to files
      echo "${type},${3},${out_file}.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP}
+      echo "${type},${3},${out_file}.o" >>${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP}
      map_files=$(gcc -MM ${file} ${DEFINE_STR} ${HEADER_LOCATION})
      # first is *.o second is *.cc
      array_file=()
@ -91,18 +103,44 @@ getOpsFile() {
          getDeep ${type} ${array_file} ${3} &
          array_file_split=$(echo ${array_file} | awk -F '/' '{print $NF}')
          echo "${type},${3},${array_file_split%h*}cc.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP}
+          echo "${type},${3},${array_file_split%h*}cc.o" >>${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP}
        fi
        if [[ -e ${array_file%h*}c ]]; then
          getDeep ${type} ${array_file%h*}c ${3} &
          getDeep ${type} ${array_file} ${3} &
          array_file_split=$(echo ${array_file} | awk -F '/' '{print $NF}')
          echo "${type},${3},${array_file_split%h*}c.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP}
+          echo "${type},${3},${array_file_split%h*}c.o" >>${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP}
        fi
      done
    done
  done
 }

+getFilesFromArr() {
+  local -n arr_files=${1}
+  # echo " func parm 1 : ${arr_files[@]}"
+  # echo " func parm 2 : $2"
+  # shellcheck disable=SC2068
+  for file in ${arr_files[@]}; do
+    map_files=$(gcc -MM ${file} ${DEFINE_STR} ${HEADER_LOCATION})
+    # first is *.o second is *.cc
+    # shellcheck disable=SC2207
+    array_runtime=($(echo ${map_files} | awk -F '\' '{for(i=3;i<=NF;i++){print $i}}' | grep -v "flatbuffers" | egrep -v ${REMOVE_LISTS_STR}))
+    # only add existing files
+    for array_runtime_file in "${array_runtime[@]}"; do
+      if [[ -e ${array_runtime_file%h*}cc && ! ${all_files[*]} =~ ${array_runtime_file%h*}cc ]]; then
+        all_files=("${all_files[@]}" "${array_runtime_file%h*}cc")
+        getDeep "CommonFile" ${array_runtime_file%h*}cc "common" $2 &
+      fi
+      if [[ -e ${array_runtime_file%h*}c && ! ${all_files[*]} =~ ${array_runtime_file%h*}c ]]; then
+        all_files=("${all_files[@]}" "${array_runtime_file%h*}c")
+        getDeep "CommonFile" ${array_runtime_file%h*}c "common" $2 &
+      fi
+    done
+  done  
+}
+
 getCommonFile() {
  echo "start get common files"
  include_h=()
@ -115,6 +153,9 @@ getCommonFile() {
  while IFS='' read -r line; do common_files_h+=("$line"); done < <(ls mindspore/lite/src/common/*.h)
  runtime_files_h=()
  while IFS='' read -r line; do runtime_files_h+=("$line"); done < <(ls mindspore/lite/src/runtime/*.h)
+  train_files_h=()
+  while IFS='' read -r line; do train_files_h+=("$line"); done < <(ls mindspore/lite/include/train/*.h)
+  while IFS='' read -r line; do train_files_h+=("$line"); done < <(ls mindspore/lite/src/train/*.h)
  others_files_h=(
    mindspore/lite/src/runtime/infer_manager.h
    mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h
@ -130,7 +171,9 @@ getCommonFile() {
    mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/tensor_c.h
    mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/errorcode.h
  )
-  all_files_h=("${include_h[@]}" "${regist_include_h[@]}" "${src_files_h[@]}" "${common_files_h[@]}" "${runtime_files_h[@]}" "${others_files_h[@]}")
+  all_files_h=("${include_h[@]}" "${regist_include_h[@]}" "${src_files_h[@]}" "${common_files_h[@]}"
+               "${runtime_files_h[@]}" "${others_files_h[@]}"
+  )

  # concat regx
  REMOVE_LISTS_STR="${all_files_h[0]}"
@ -167,51 +210,43 @@ getCommonFile() {
    mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c
    mindspore/core/utils/status.cc
  )
+  # save train files
+  train_files=()
+  while IFS='' read -r line; do train_files+=("$line"); done < <(ls mindspore/lite/src/train/*.cc)
+  while IFS='' read -r line; do train_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/callback/*.cc)
+  while IFS='' read -r line; do train_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/metrics/*.cc)
+  while IFS='' read -r line; do train_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/train/*.cc)
+  others_train_files=(
+    mindspore/lite/tools/common/storage.cc
+  )
  all_files=("${src_files[@]}" "${regist_files[@]}" "${common_files[@]}" "${runtime_files_cc[@]}"
    "${others_files_c[@]}" "${assembly_files[@]}" "${mindrt_files[@]}"
    "${cxx_api_files[@]}"
-  )
-  # shellcheck disable=SC2068
-  for file in ${all_files[@]}; do
-    map_files=$(gcc -MM ${file} ${DEFINE_STR} ${HEADER_LOCATION})
-    # first is *.o second is *.cc
-    # shellcheck disable=SC2207
-    array_runtime=($(echo ${map_files} | awk -F '\' '{for(i=3;i<=NF;i++){print $i}}' | grep -v "flatbuffers" | egrep -v ${REMOVE_LISTS_STR}))
-    # only add existing files
-    for array_runtime_file in "${array_runtime[@]}"; do
-      if [[ -e ${array_runtime_file%h*}cc && ! ${all_files[*]} =~ ${array_runtime_file%h*}cc ]]; then
-        all_files=("${all_files[@]}" "${array_runtime_file%h*}cc")
-        getDeep "CommonFile" ${array_runtime_file%h*}cc "common" &
-      fi
-      if [[ -e ${array_runtime_file%h*}c && ! ${all_files[*]} =~ ${array_runtime_file%h*}c ]]; then
-        all_files=("${all_files[@]}" "${array_runtime_file%h*}c")
-        getDeep "CommonFile" ${array_runtime_file%h*}c "common" &
-      fi
-    done
-  done
-  # shellcheck disable=SC2068
-  for file in ${all_files_h[@]}; do
-    map_files=$(gcc -MM ${file} ${DEFINE_STR} ${HEADER_LOCATION})
-    # first is *.o second is *.cc
-    # shellcheck disable=SC2207
-    array_runtime=($(echo ${map_files} | awk -F '\' '{for(i=3;i<=NF;i++){print $i}}' | grep -v "flatbuffers" | egrep -v ${REMOVE_LISTS_STR}))
-    # only add existing files
-    for array_runtime_file in "${array_runtime[@]}"; do
-      if [[ -e ${array_runtime_file%h*}cc && ! ${all_files[*]} =~ ${array_runtime_file%h*}cc ]]; then
-        all_files=("${all_files[@]}" "${array_runtime_file%h*}cc")
-        getDeep "CommonFile" ${array_runtime_file%h*}cc "common" &
-      fi
-      if [[ -e ${array_runtime_file%h*}c && ! ${all_files[*]} =~ ${array_runtime_file%h*}c ]]; then
-        all_files=("${all_files[@]}" "${array_runtime_file%h*}c")
-        getDeep "CommonFile" ${array_runtime_file%h*}c "common" &
-      fi
-    done
-  done
+  ) 
+  getFilesFromArr all_files
+  getFilesFromArr all_files_h
  # shellcheck disable=SC2068
  for file in ${all_files[@]}; do
    file=$(echo ${file} | awk -F '/' '{print $NF}')
    echo "CommonFile,common,${file}.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP}
  done
+  
+  all_files_train=("${all_files[@]}" "${train_files[@]}" "${others_train_files[@]}"
+  )
+  all_files_train_h=("${all_files_h[@]}" "${train_files_h[@]}"
+  )
+  REMOVE_LISTS_STR="${all_files_train_h[0]}"
+  # shellcheck disable=SC2068
+  for val in ${all_files_train_h[@]:1}; do
+    REMOVE_LISTS_STR="$REMOVE_LISTS_STR|$val"
+  done
+  getFilesFromArr all_files_train "train_source"
+  getFilesFromArr all_files_train_h "train_source"
+  # shellcheck disable=SC2068
+  for file in ${all_files_train[@]}; do
+    file=$(echo ${file} | awk -F '/' '{print $NF}')
+    echo "CommonFile,common,${file}.o" >>${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP}
+  done
 }

 # The x86 platform cannot search based on header files, so manually search for the first layer.
@ -276,6 +311,11 @@ sleep 1
 sort ${MAPPING_OUTPUT_FILE_NAME_TMP} | uniq >${CPU_MAPPING_OUTPUT_FILE}
 chmod 444 ${CPU_MAPPING_OUTPUT_FILE}

+sleep 1
+# remove duplicate files
+sort ${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP} | uniq >${CPU_TRAIN_MAPPING_OUTPUT_FILE}
+chmod 444 ${CPU_TRAIN_MAPPING_OUTPUT_FILE}
+
 # support for gpu
 opencl_files=()
 while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls mindspore/lite/src/runtime/kernel/opencl/*.cc)
@ -320,4 +360,5 @@ chmod 444 ${NPU_MAPPING_OUTPUT_FILE}

 # modify file permissions to read-only
 [ -n "${MAPPING_OUTPUT_FILE_NAME_TMP}" ] && rm -f ${MAPPING_OUTPUT_FILE_NAME_TMP}
+[ -n "${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP}" ] && rm -f ${MAPPING_OUTPUT_FILE_NAME_TRAIN_TMP}
 echo "Complete all tasks."