diff --git a/build.sh b/build.sh
index 428743f0ffb..cfa657ff3ed 100755
--- a/build.sh
+++ b/build.sh
@@ -24,7 +24,7 @@ usage()
 {
   echo "Usage:"
   echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
-  echo "              [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
+  echo "              [-a on|off] [-Q on|off] [-S on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
   echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]"
   echo ""
   echo "Options:"
@@ -48,6 +48,7 @@ usage()
   echo "    -P Enable dump anf graph to file in ProtoBuffer format, default on"
   echo "    -Q Enable dump memory, default off"
   echo "    -D Enable dumping of function graph ir, default on"
+  echo "    -S Enable async data dump, default off"
   echo "    -z Compile dataset & mindrecord, default on"
   echo "    -M Enable MPI and NCCL for GPU training, gpu default on"
   echo "    -V Specify the minimum required cuda version, default CUDA 10.1"
@@ -88,6 +89,7 @@ checkopts()
   ENABLE_TIMELINE="off"
   ENABLE_DUMP2PROTO="on"
   ENABLE_DUMPE2E="off"
+  ENABLE_DATA_DUMP="off"
   ENABLE_DUMP_IR="on"
   COMPILE_MINDDATA="on"
   ENABLE_MPI="off"
@@ -102,7 +104,7 @@ checkopts()
   ENABLE_PYTHON="on"
 
   # Process the options
-  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt
+  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:S:D:zM:V:K:sB:E' opt
   do
     OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
     case "${opt}" in
@@ -218,6 +220,11 @@ checkopts()
         ENABLE_DUMPE2E="$OPTARG"
         echo "enable dump end to end"
         ;;
+      S)
+        check_on_off $OPTARG S
+        ENABLE_DATA_DUMP="$OPTARG"
+        echo "enable data dump"
+        ;;
       D)
         check_on_off $OPTARG D
         ENABLE_DUMP_IR="$OPTARG"
@@ -321,6 +328,9 @@ build_mindspore()
     if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then
         CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON"
     fi
+    if [[ "X$ENABLE_DATA_DUMP" = "Xon" ]]; then
+        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DATA_DUMP=ON"
+    fi
     CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}"
     CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}"
     if [[ "X$ENABLE_MPI" = "Xon" ]]; then
diff --git a/cmake/options.cmake b/cmake/options.cmake
index b01c623377d..2470c25a90c 100644
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@@ -116,6 +116,10 @@ if(ENABLE_DUMP_E2E)
     add_compile_definitions(ENABLE_DUMP_E2E)
 endif()
 
+if(ENABLE_DATA_DUMP)
+    add_compile_definitions(ENABLE_DATA_DUMP)
+endif()
+
 if(ENABLE_DEBUGGER)
     add_compile_definitions(ENABLE_DEBUGGER)
 endif()
diff --git a/config/data_dump.json b/config/data_dump.json
new file mode 100644
index 00000000000..fc08f785906
--- /dev/null
+++ b/config/data_dump.json
@@ -0,0 +1,15 @@
+{
+  "DumpSettings": {
+    "net_name": "ResNet50",
+    "mode": 1,
+    "iteration": 0,
+    "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
+  },
+
+  "DumpSettingsSpec": {
+    "net_name": "net name eg:ResNet50",
+    "mode": "0: dump all kernels, 1: dump kernels in kernels list",
+    "iteration": "specified iteration ",
+    "kernels": "op's full scope name which need to be dump"
+  }
+}
\ No newline at end of file
diff --git a/graphengine b/graphengine
index 1c2672868fd..18cf690152a 160000
--- a/graphengine
+++ b/graphengine
@@ -1 +1 @@
-Subproject commit 1c2672868fda8b1d012c99e5aca73725ac869ba9
+Subproject commit 18cf690152add623ffbddfbbb4674d1b34484ca7
diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt
index 8523475b1f6..58b3ce6881e 100644
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -109,8 +109,12 @@ if (ENABLE_D)
     file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto")
     ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER})
 
+    file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/dump/proto/*.proto")
+    ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP})
+
     list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS})
     list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS})
+    list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS})
 
     add_compile_definitions(ENABLE_D)
 endif ()
diff --git a/mindspore/ccsrc/debug/CMakeLists.txt b/mindspore/ccsrc/debug/CMakeLists.txt
index ba0c5e07ac2..37ffcceeaf5 100644
--- a/mindspore/ccsrc/debug/CMakeLists.txt
+++ b/mindspore/ccsrc/debug/CMakeLists.txt
@@ -19,6 +19,15 @@ if (ENABLE_DEBUGGER)
         )
 endif (ENABLE_DEBUGGER)
 
+if (ENABLE_D)
+    list(APPEND _DEBUG_SRC_LIST
+        "${CMAKE_CURRENT_SOURCE_DIR}/common.cc"
+        )
+    if (ENABLE_DATA_DUMP)
+        list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/data_dump_parser.cc")
+    endif(ENABLE_DATA_DUMP)
+endif()
+
 if (ENABLE_DUMP_E2E)
     list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc")
 endif (ENABLE_DUMP_E2E)
diff --git a/mindspore/ccsrc/debug/common.cc b/mindspore/ccsrc/debug/common.cc
new file mode 100644
index 00000000000..6caf7e2c393
--- /dev/null
+++ b/mindspore/ccsrc/debug/common.cc
@@ -0,0 +1,125 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "debug/common.h"
+
+#include <memory>
+#include <optional>
+#include "utils/system/env.h"
+#include "utils/system/file_system.h"
+#include "utils/log_adapter.h"
+#include "utils/context/ms_context.h"
+
+namespace mindspore {
+std::optional<std::string> Common::GetRealPath(const std::string &input_path) {
+  std::string out_path;
+  auto path_split_pos = input_path.find_last_of('/');
+  if (path_split_pos == std::string::npos) {
+    path_split_pos = input_path.find_last_of('\\');
+  }
+  // get real path
+  char real_path[PATH_MAX] = {0};
+  if (path_split_pos != std::string::npos) {
+    std::string prefix_path = input_path.substr(0, path_split_pos);
+    if (prefix_path.length() >= PATH_MAX) {
+      MS_LOG(ERROR) << "Prefix path is too longer!";
+      return std::nullopt;
+    }
+    std::string last_path = input_path.substr(path_split_pos, input_path.length() - path_split_pos);
+    auto ret = CreateNotExistDirs(prefix_path);
+    if (!ret) {
+      MS_LOG(ERROR) << "CreateNotExistDirs Failed!";
+      return std::nullopt;
+    }
+
+    if (nullptr == realpath(prefix_path.c_str(), real_path)) {
+      MS_LOG(ERROR) << "dir " << prefix_path << " does not exit.";
+      return std::nullopt;
+    }
+    out_path = std::string(real_path) + last_path;
+  }
+
+  if (path_split_pos == std::string::npos) {
+    if (input_path.length() >= PATH_MAX) {
+      MS_LOG(ERROR) << "Prefix path is too longer!";
+      return std::nullopt;
+    }
+    if (nullptr == realpath(input_path.c_str(), real_path)) {
+      MS_LOG(ERROR) << "File " << input_path << " does not exit, it will be created.";
+    }
+    out_path = std::string(real_path);
+  }
+  return out_path;
+}
+
+bool Common::CreateNotExistDirs(const std::string &path) {
+  std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
+  MS_EXCEPTION_IF_NULL(fs);
+  char temp_path[PATH_MAX] = {0};
+  if (path.length() > PATH_MAX) {
+    MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX;
+    return false;
+  }
+  for (uint32_t i = 0; i < path.length(); i++) {
+    temp_path[i] = path[i];
+    if (temp_path[i] == '\\' || temp_path[i] == '/') {
+      if (i != 0) {
+        char tmp_char = temp_path[i];
+        temp_path[i] = '\0';
+        std::string path_handle(temp_path);
+        if (!fs->FileExist(temp_path)) {
+          MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating...";
+          if (!fs->CreateDir(temp_path)) {
+            MS_LOG(ERROR) << "Create " << path_handle << " dir error";
+            return false;
+          }
+        }
+        temp_path[i] = tmp_char;
+      }
+    }
+  }
+
+  if (!fs->FileExist(path)) {
+    MS_LOG(INFO) << "Dir " << path << " does not exit, creating...";
+    if (!fs->CreateDir(path)) {
+      MS_LOG(ERROR) << "Create " << path << " dir error";
+      return false;
+    }
+  }
+  return true;
+}
+
+std::optional<std::string> Common::GetConfigFile(const std::string &env) {
+  if (env.empty()) {
+    MS_LOG(EXCEPTION) << "Invalid env";
+  }
+  auto config_path_str = std::getenv(env.c_str());
+  if (config_path_str == nullptr) {
+    MS_LOG(ERROR) << "Please export env:" << env;
+    return {};
+  }
+  MS_LOG(INFO) << "Async Dump Getenv env:" << env << "=" << config_path_str;
+
+  std::string dump_config_file(config_path_str);
+  std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
+  MS_EXCEPTION_IF_NULL(fs);
+  if (!fs->FileExist(dump_config_file)) {
+    MS_LOG(ERROR) << dump_config_file << " not exist.";
+    return {};
+  }
+  return dump_config_file;
+}
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/common.h b/mindspore/ccsrc/debug/common.h
new file mode 100644
index 00000000000..8d4a6cb4674
--- /dev/null
+++ b/mindspore/ccsrc/debug/common.h
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
+
+#include <string>
+#include <optional>
+#include "utils/contract.h"
+
+namespace mindspore {
+class Common {
+ public:
+  Common() = default;
+  ~Common() = default;
+  static std::optional<std::string> GetRealPath(const std::string &input_path);
+  static std::optional<std::string> GetConfigFile(const std::string &env);
+
+ private:
+  static bool CreateNotExistDirs(const std::string &path);
+};
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
diff --git a/mindspore/ccsrc/debug/data_dump_parser.cc b/mindspore/ccsrc/debug/data_dump_parser.cc
new file mode 100644
index 00000000000..259ec388d32
--- /dev/null
+++ b/mindspore/ccsrc/debug/data_dump_parser.cc
@@ -0,0 +1,152 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "debug/data_dump_parser.h"
+
+#include <fstream>
+#include "utils/context/ms_context.h"
+#include "debug/common.h"
+
+constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH";
+constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP";
+constexpr auto kDataDumpPath = "DATA_DUMP_PATH";
+namespace mindspore {
+void DataDumpParser::ResetParam() {
+  enable_ = false;
+  net_name_.clear();
+  dump_mode_ = 0;
+  dump_step_ = 0;
+  kernel_set_.clear();
+}
+
+bool DataDumpParser::DumpEnabled() const {
+  auto enable_dump = std::getenv(kEnableDataDump);
+  if (!enable_dump) {
+    MS_LOG(WARNING) << "[DataDump] enable dump is null. Please export ENABLE_DATA_DUMP";
+    return false;
+  }
+
+  auto enabled = std::atoi(enable_dump);
+  if (enabled != 1) {
+    MS_LOG(WARNING) << "[DataDump] Please export ENABLE_DATA_DUMP=1";
+    return false;
+  }
+
+  auto context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context);
+  if (context->execution_mode() == kPynativeMode) {
+    MS_LOG(EXCEPTION) << "[DataDump] PyNative mode not support data dump";
+  }
+  return true;
+}
+
+std::optional<std::string> DataDumpParser::GetDumpPath() const {
+  auto dump_path = std::getenv(kDataDumpPath);
+  if (!dump_path) {
+    MS_LOG(ERROR) << "[DataDump] dump path is null. Please export DATA_DUMP_PATH";
+    return {};
+  }
+  std::string dump_path_str(dump_path);
+  return dump_path_str;
+}
+
+void DataDumpParser::ParseDumpConfig() {
+  std::lock_guard<std::mutex> guard(lock_);
+  MS_LOG(INFO) << "[DataDump] parse start";
+  if (!DumpEnabled()) {
+    MS_LOG(INFO) << "[DataDump] dump not enable";
+    return;
+  }
+
+  ResetParam();
+
+  auto dump_config_file = Common::GetConfigFile(kDataDumpConfigPtah);
+  if (!dump_config_file.has_value()) {
+    MS_LOG(EXCEPTION) << "[DataDump] Get config file failed";
+  }
+
+  std::ifstream json_file(dump_config_file.value());
+  if (!json_file.is_open()) {
+    MS_LOG(EXCEPTION) << "[DataDump] " << dump_config_file.value() << " open failed.";
+  }
+
+  nlohmann::json j;
+  json_file >> j;
+  if (j.find("DumpSettings") == j.end()) {
+    MS_LOG(EXCEPTION) << "[DataDump] DumpSettings is not exist.";
+  }
+
+  nlohmann::json dump_settings = j.at("DumpSettings");
+  // convert json to string
+  std::stringstream ss;
+  ss << dump_settings;
+  std::string cfg = ss.str();
+  MS_LOG(INFO) << "[DataDump] Async dump settings Json: " << cfg;
+  if (!IsConfigExist(dump_settings)) {
+    MS_LOG(EXCEPTION) << "[DataDump] Async dump json invalid";
+  }
+
+  if (!ParseDumpSetting(dump_settings)) {
+    MS_LOG(EXCEPTION) << "[DataDump] Parse dump json failed";
+  }
+}
+
+bool DataDumpParser::NeedDump(const std::string &op_full_name) const {
+  if (!DumpEnabled()) {
+    return false;
+  }
+  if (dump_mode_ == 0) {
+    return true;
+  }
+  auto iter = kernel_set_.find(op_full_name);
+  return iter != kernel_set_.end();
+}
+
+bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const {
+  if (dump_settings.find("mode") == dump_settings.end() || dump_settings.find("net_name") == dump_settings.end() ||
+      dump_settings.find("iteration") == dump_settings.end() || dump_settings.find("kernels") == dump_settings.end()) {
+    MS_LOG(ERROR) << "[DataDump] DumpSettings keys are not exist.";
+    return false;
+  }
+  return true;
+}
+
+bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) {
+  auto mode = dump_settings.at("mode");
+  auto net_name = dump_settings.at("net_name");
+  auto iteration = dump_settings.at("iteration");
+  auto kernels = dump_settings.at("kernels");
+  if (!(mode.is_number() && net_name.is_string() && iteration.is_number() && kernels.is_array())) {
+    MS_LOG(ERROR) << "[DataDump] Element's type in Dump config json is invalid.";
+    enable_ = false;
+    return false;
+  }
+
+  enable_ = true;
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  dump_mode_ = mode;
+  net_name_ = net_name;
+  dump_step_ = iteration;
+  for (const auto &kernel : kernels) {
+    auto kernel_str = kernel.dump();
+    kernel_str.erase(std::remove(kernel_str.begin(), kernel_str.end(), '\"'), kernel_str.end());
+    MS_LOG(INFO) << "[DataDump] Need dump kernel:" << kernel_str;
+    kernel_set_.insert(kernel_str);
+  }
+  return true;
+}
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/data_dump_parser.h b/mindspore/ccsrc/debug/data_dump_parser.h
new file mode 100644
index 00000000000..751c61dd1a1
--- /dev/null
+++ b/mindspore/ccsrc/debug/data_dump_parser.h
@@ -0,0 +1,61 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
+
+#include <string>
+#include <set>
+#include <mutex>
+#include <optional>
+#include "nlohmann/json.hpp"
+#include "common/utils.h"
+
+namespace mindspore {
+class DataDumpParser {
+ public:
+  static DataDumpParser &GetInstance() {
+    static DataDumpParser instance;
+    return instance;
+  }
+  void ParseDumpConfig();
+  bool NeedDump(const std::string &op_full_name) const;
+  bool DumpEnabled() const;
+  std::optional<std::string> GetDumpPath() const;
+  bool enable() const { return enable_; }
+  const std::string &net_name() const { return net_name_; }
+  uint32_t dump_mode() const { return dump_mode_; }
+  uint32_t dump_step() const { return dump_step_; }
+  const std::set<std::string> &kernel_set() const { return kernel_set_; }
+
+ private:
+  DataDumpParser() = default;
+  virtual ~DataDumpParser() = default;
+  DISABLE_COPY_AND_ASSIGN(DataDumpParser);
+
+  void ResetParam();
+  bool IsConfigExist(const nlohmann::json &dump_settings) const;
+  bool ParseDumpSetting(const nlohmann::json &dump_settings);
+
+  std::mutex lock_;
+  bool enable_{false};
+  std::string net_name_;
+  uint32_t dump_mode_{0};
+  uint32_t dump_step_{0};
+  std::set<std::string> kernel_set_;
+};
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
diff --git a/mindspore/ccsrc/debug/e2e_dump.cc b/mindspore/ccsrc/debug/e2e_dump.cc
index 78a331fc278..9037a6d00b3 100644
--- a/mindspore/ccsrc/debug/e2e_dump.cc
+++ b/mindspore/ccsrc/debug/e2e_dump.cc
@@ -17,12 +17,14 @@
 #include <limits.h>
 #include <fstream>
 #include <string>
+#include <optional>
 #include <nlohmann/json.hpp>
 #include "utils/log_adapter.h"
 #include "utils/system/file_system.h"
 #include "utils/system/env.h"
 #include "utils/convert_utils.h"
 #include "utils/context/ms_context.h"
+#include "debug/common.h"
 
 using json = nlohmann::json;
 
@@ -158,100 +160,19 @@ bool Dump::DumpToFile(const std::string &filename, const void *data, size_t len)
     return false;
   }
 
-  std::string realpath;
-  bool ret = GetRealPath(filename, &realpath);
-  if (!ret) {
+  auto realpath = Common::GetRealPath(filename);
+  if (!realpath.has_value()) {
     MS_LOG(ERROR) << "Get real path failed.";
     return false;
   }
   std::ofstream fd;
-  fd.open(realpath, std::ios::binary | std::ios::out);
+  fd.open(realpath.value(), std::ios::binary | std::ios::out);
   if (!fd.is_open()) {
-    MS_LOG(ERROR) << "Open file " << realpath << " fail.";
+    MS_LOG(ERROR) << "Open file " << realpath.value() << " fail.";
     return false;
   }
   (void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len));
   fd.close();
   return true;
 }
-
-bool Dump::GetRealPath(const std::string &inpath, std::string *outpath) {
-  MS_EXCEPTION_IF_NULL(outpath);
-  auto path_split_pos = inpath.find_last_of('/');
-  if (path_split_pos == std::string::npos) {
-    path_split_pos = inpath.find_last_of('\\');
-  }
-  // get real path
-  char real_path[PATH_MAX] = {0};
-  if (path_split_pos != std::string::npos) {
-    std::string prefix_path = inpath.substr(0, path_split_pos);
-    if (prefix_path.length() >= PATH_MAX) {
-      MS_LOG(ERROR) << "Prefix path is too longer!";
-      return false;
-    }
-    std::string last_path = inpath.substr(path_split_pos, inpath.length() - path_split_pos);
-    auto ret = CreateNotExistDirs(prefix_path);
-    if (ret == false) {
-      MS_LOG(ERROR) << "CreateNotExistDirs Failed!";
-      return false;
-    }
-
-    if (nullptr == realpath(prefix_path.c_str(), real_path)) {
-      MS_LOG(ERROR) << "dir " << prefix_path << " does not exit.";
-      return false;
-    }
-    *outpath = std::string(real_path) + last_path;
-  }
-
-  if (path_split_pos == std::string::npos) {
-    if (inpath.length() >= PATH_MAX) {
-      MS_LOG(ERROR) << "Prefix path is too longer!";
-      return false;
-    }
-    if (nullptr == realpath(inpath.c_str(), real_path)) {
-      MS_LOG(ERROR) << "File " << inpath << " does not exit, it will be created.";
-    }
-    *outpath = std::string(real_path);
-  }
-
-  return true;
-}
-
-bool Dump::CreateNotExistDirs(const std::string &path) {
-  std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
-  MS_EXCEPTION_IF_NULL(fs);
-  char temp_path[PATH_MAX] = {0};
-  if (path.length() > PATH_MAX) {
-    MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX;
-    return false;
-  }
-  for (uint32_t i = 0; i < path.length(); i++) {
-    temp_path[i] = path[i];
-    if (temp_path[i] == '\\' || temp_path[i] == '/') {
-      if (i != 0) {
-        char tmp_char = temp_path[i];
-        temp_path[i] = '\0';
-        std::string path_handle(temp_path);
-        if (!fs->FileExist(temp_path)) {
-          MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating...";
-          if (!fs->CreateDir(temp_path)) {
-            MS_LOG(ERROR) << "Create " << path_handle << " dir error";
-            return false;
-          }
-        }
-        temp_path[i] = tmp_char;
-      }
-    }
-  }
-
-  if (!fs->FileExist(path)) {
-    MS_LOG(INFO) << "Dir " << path << " does not exit, creating...";
-    if (!fs->CreateDir(path)) {
-      MS_LOG(ERROR) << "Create " << path << " dir error";
-      return false;
-    }
-  }
-
-  return true;
-}
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/e2e_dump.h b/mindspore/ccsrc/debug/e2e_dump.h
index 4c3e8308da7..acde1626cb4 100644
--- a/mindspore/ccsrc/debug/e2e_dump.h
+++ b/mindspore/ccsrc/debug/e2e_dump.h
@@ -59,10 +59,6 @@ class Dump {
   uint32_t cur_iter_;
   std::vector<std::string> dump_kernels_;
 
-  static bool GetRealPath(const std::string &inpath, std::string *outpath);
-
-  static bool CreateNotExistDirs(const std::string &path);
-
  private:
   bool ParseDumpConfig(const std::string &dump_config_file);
   bool IsConfigExist(const nlohmann::json &dumpSettings);
diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
index 8b176af5fc0..42b1d93dd55 100644
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
@@ -42,6 +42,7 @@
 #include "device/ascend/ascend_memory_manager.h"
 #include "debug/tensor_load.h"
 
+using ge::model_runner::ModelRunner;
 using mindspore::device::ascend::ProfilingManager;
 using mindspore::device::ascend::ProfilingUtils;
 using mindspore::device::ascend::tasksink::TaskGenerator;
@@ -90,9 +91,16 @@ std::string GetRankId() {
 AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); }
 
 void AscendKernelRuntime::ClearGraphModelMap() {
+#ifdef ENABLE_DATA_DUMP
+  for (auto &iter : graph_data_dumper_) {
+    MS_LOG(INFO) << "[DataDump] Unload data dumper:" << iter.first;
+    iter.second->UnloadDumpInfo();
+  }
+  graph_data_dumper_.clear();
+#endif
   for (auto &iter : graph_model_map_) {
     MS_LOG(INFO) << "Ge UnloadModel " << iter.first;
-    auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter.first);
+    auto ret = ModelRunner::Instance().UnloadModel(iter.first);
     if (!ret) {
       MS_LOG(ERROR) << "UnloadModel failed";
     }
@@ -107,7 +115,7 @@ void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
     return;
   }
   MS_LOG(DEBUG) << "Ge UnloadModel " << iter->first;
-  auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter->first);
+  auto ret = ModelRunner::Instance().UnloadModel(iter->first);
   if (!ret) {
     MS_LOG(ERROR) << "UnloadModel failed";
   }
@@ -159,6 +167,10 @@ bool AscendKernelRuntime::Init() {
   }
 #endif
 
+#ifdef ENABLE_DATA_DUMP
+  DataDumpParser::GetInstance().ParseDumpConfig();
+#endif
+
   // Start up profiling before rtSetDevice
   ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
   if (!ret) {
@@ -440,7 +452,7 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
                << ", wait_active_stream_list size:" << wait_active_stream_list.size()
                << ", force_copy_stream_list size:" << force_copy_stream_list.size();
   std::vector<std::shared_ptr<ge::model_runner::OpInfo>> empty_list;
-  std::shared_ptr<ge::model_runner::DavinciModel> model = std::make_shared<ge::model_runner::DavinciModel>(
+  auto model = std::make_shared<ge::model_runner::DavinciModel>(
     task_info_list, empty_list, empty_list, empty_list, empty_list, wait_active_stream_list, force_copy_stream_list, 0,
     0, 0, 0, 0, 0, resource_manager.get_cur_stream_num(), label_assign_instance.GetLabelNum(NOT_NULL(graph)),
     resource_manager.get_cur_event_num(), 0);
@@ -477,21 +489,45 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
 
   std::shared_ptr<ge::ModelListener> listener;
   MS_LOG(INFO) << "LoadDavinciModel mode_id:" << model_iter->first;
-  bool status = ge::model_runner::ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first,
-                                                                           model_iter->second, listener);
+  bool status =
+    ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, model_iter->second, listener);
   if (!status) {
     MS_LOG(EXCEPTION) << "Load Task Failed";
   }
   if (ProfilingManager::GetInstance().IsProfiling()) {
-    auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(model_iter->first);
-    auto stream_ids = ge::model_runner::ModelRunner::Instance().GetStreamIdList(model_iter->first);
+    auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first);
+    auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first);
     ProfilingUtils::ReportProfilingData(task_ids, stream_ids, NOT_NULL(graph));
   }
+
+#ifdef ENABLE_DATA_DUMP
+  LaunchDataDump(NOT_NULL(graph));
+#endif
+  if (!ModelRunner::Instance().LoadModelComplete(model_iter->first)) {
+    MS_LOG(ERROR) << "Call ge runtime LoadModelComplete failed";
+    return false;
+  }
   return true;
 }
 
+#ifdef ENABLE_DATA_DUMP
+void AscendKernelRuntime::LaunchDataDump(NotNull<const session::KernelGraph *> graph) {
+  if (!DataDumpParser::GetInstance().DumpEnabled()) {
+    return;
+  }
+  auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph->graph_id());
+  auto data_dumper = std::make_shared<DataDumper>(graph.get(), runtime_info_map);
+  MS_EXCEPTION_IF_NULL(data_dumper);
+  data_dumper->LoadDumpInfo();
+  auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper);
+  if (!ret.second) {
+    MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed";
+  }
+}
+#endif
+
 void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) {
-  auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(graph_id);
+  auto task_ids = ModelRunner::Instance().GetTaskIdList(graph_id);
   auto graph_task_names = ProfilingUtils::graph_kernel_name();
   auto iter = graph_task_names.find(graph_id);
   if (iter != graph_task_names.end()) {
@@ -524,7 +560,7 @@ bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) {
     return false;
   }
 
-  bool status = ge::model_runner::ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors);
+  bool status = ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors);
   if (!status) {
     MS_LOG(ERROR) << "Run task failed";
     DebugTaskIdName(graph->graph_id());
diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
index 69ba8b295a8..771c3f8c4f8 100644
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
+++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
@@ -24,6 +24,10 @@
 #include "framework/ge_runtime/davinci_model.h"
 #include "device/kernel_runtime_manager.h"
 #include "session/session_basic.h"
+#ifdef ENABLE_DATA_DUMP
+#include "debug/data_dump_parser.h"
+#include "device/ascend/dump/data_dumper.h"
+#endif
 
 using ge::model_runner::TaskInfo;
 using std::unordered_map;
@@ -66,6 +70,10 @@ class AscendKernelRuntime : public KernelRuntime {
   bool initialized_{false};
   unordered_map<GraphId, vector<std::shared_ptr<TaskInfo>>> task_map_;
   unordered_map<GraphId, std::shared_ptr<ge::model_runner::DavinciModel>> graph_model_map_;
+#ifdef ENABLE_DATA_DUMP
+  void LaunchDataDump(NotNull<const session::KernelGraph *> graph);
+  unordered_map<GraphId, std::shared_ptr<DataDumper>> graph_data_dumper_;
+#endif
 };
 
 MS_REG_KERNEL_RUNTIME(kAscendDevice, AscendKernelRuntime);
diff --git a/mindspore/ccsrc/device/ascend/dump/data_dumper.cc b/mindspore/ccsrc/device/ascend/dump/data_dumper.cc
new file mode 100644
index 00000000000..57ac0e0947d
--- /dev/null
+++ b/mindspore/ccsrc/device/ascend/dump/data_dumper.cc
@@ -0,0 +1,282 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifdef ENABLE_DATA_DUMP
+#include "device/ascend/dump/data_dumper.h"
+
+#include <map>
+#include <memory>
+#include <string>
+#include "utility"
+#include "session/anf_runtime_algorithm.h"
+#include "runtime/mem.h"
+#include "runtime/kernel.h"
+#include "device/ascend/dump/ge_dump.h"
+#include "proto/op_mapping_info.pb.h"
+#include "utils/context/ms_context.h"
+#include "debug/data_dump_parser.h"
+
+constexpr uint32_t kAicpuLoadFlag = 1;
+constexpr uint32_t kAicpuUnloadFlag = 0;
+constexpr uint32_t kTupleTaskId = 0;
+constexpr uint32_t kTupleStreamId = 1;
+constexpr uint32_t kTupleArgs = 2;
+constexpr uint32_t kCurrentStepTensorIndex = 0;
+constexpr uint32_t kCurrentEpochTensorIndex = 1;
+constexpr uint32_t kStepsPerEpochTensorIndex = 2;
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
+void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
+void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr);
+
+DataDumper::~DataDumper() {
+  ReleaseDevMem(&dev_load_mem_);
+  ReleaseDevMem(&dev_unload_mem_);
+}
+
+void DataDumper::LoadDumpInfo() {
+  MS_LOG(INFO) << "[DataDump] LoadDumpInfo start";
+  MS_EXCEPTION_IF_NULL(kernel_graph_);
+  aicpu::dump::OpMappingInfo dump_info;
+  SetOpMappingInfo(NOT_NULL(&dump_info));
+
+  auto kernels = kernel_graph_->execution_order();
+  for (const auto &kernel : kernels) {
+    MS_EXCEPTION_IF_NULL(kernel);
+    if (!KernelNeedDump(kernel)) {
+      continue;
+    }
+    MS_LOG(INFO) << "[DataDump] LoadDumpInfo kernel:" << kernel->fullname_with_scope();
+    dump_kernel_names_.emplace_back(kernel->fullname_with_scope());
+
+    aicpu::dump::Task task;
+    ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task));
+    MS_EXCEPTION_IF_NULL(dump_info.mutable_task());
+    dump_info.mutable_task()->Add(std::move(task));
+  }
+  RtLoadDumpData(dump_info, &dev_load_mem_);
+  load_flag_ = true;
+  MS_LOG(INFO) << "[DataDump] LoadDumpInfo end";
+}
+
+void DataDumper::SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  MS_EXCEPTION_IF_NULL(kernel_graph_);
+  auto dump_path = DataDumpParser::GetInstance().GetDumpPath();
+  if (!dump_path.has_value()) {
+    MS_LOG(EXCEPTION) << "Dump path invalid";
+  }
+  auto device_id = context_ptr->device_id();
+  dump_info->set_dump_path(dump_path.value() + "_" + std::to_string(device_id) + "/");
+  MS_LOG(INFO) << "[DataDump] dump_path:" << dump_path.value();
+
+  dump_info->set_model_name(DataDumpParser::GetInstance().net_name() + "_" + std::to_string(kernel_graph_->graph_id()));
+  dump_info->set_dump_step(std::to_string(DataDumpParser::GetInstance().dump_step()));
+  dump_info->set_model_id(kernel_graph_->graph_id());
+  dump_info->set_flag(kAicpuLoadFlag);
+
+  const auto &input_ctrl_tensors = kernel_graph_->input_ctrl_tensors();
+  if (input_ctrl_tensors == nullptr || input_ctrl_tensors->size() < 3) {
+    MS_LOG(INFO) << "[DataDump] Not data sink mode, input_ctrl_tensor";
+    return;
+  }
+  const auto &current_step_tensor = input_ctrl_tensors->at(kCurrentStepTensorIndex);
+  const auto &currnet_epoch_tensor = input_ctrl_tensors->at(kCurrentEpochTensorIndex);
+  const auto &steps_per_epoch_tensor = input_ctrl_tensors->at(kStepsPerEpochTensorIndex);
+
+  MS_EXCEPTION_IF_NULL(current_step_tensor);
+  MS_EXCEPTION_IF_NULL(currnet_epoch_tensor);
+  MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor);
+  MS_EXCEPTION_IF_NULL(current_step_tensor->device_address());
+  MS_EXCEPTION_IF_NULL(currnet_epoch_tensor->device_address());
+  MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor->device_address());
+
+  void *current_step = current_step_tensor->device_address()->ptr_;
+  void *current_epoch = currnet_epoch_tensor->device_address()->ptr_;
+  void *steps_per_epoch = steps_per_epoch_tensor->device_address()->ptr_;
+
+  if (current_epoch != nullptr && current_step != nullptr && steps_per_epoch != nullptr) {
+    dump_info->set_step_id_addr(reinterpret_cast<uint64_t>(current_epoch));
+    dump_info->set_loop_cond_addr(reinterpret_cast<uint64_t>(current_step));
+    dump_info->set_iterations_per_loop_addr(reinterpret_cast<uint64_t>(steps_per_epoch));
+  } else {
+    MS_LOG(INFO) << "Invalid ctrl tensor device address";
+  }
+}
+
+bool DataDumper::KernelNeedDump(const CNodePtr &kernel) const {
+  if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL &&
+      AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) {
+    return false;
+  }
+  MS_EXCEPTION_IF_NULL(kernel);
+  const auto &kernel_set = DataDumpParser::GetInstance().kernel_set();
+  return kernel_set.find(kernel->fullname_with_scope()) != kernel_set.end();
+}
+
+void DataDumper::UnloadDumpInfo() {
+  if (!load_flag_) {
+    MS_LOG(WARNING) << "Load not success, no need to unload";
+    return;
+  }
+  MS_EXCEPTION_IF_NULL(kernel_graph_);
+  MS_LOG(INFO) << "[DataDump] UnloadDumpInfo start. graphId:" << kernel_graph_->graph_id();
+
+  aicpu::dump::OpMappingInfo op_mapping_info;
+  op_mapping_info.set_model_id(kernel_graph_->graph_id());
+  op_mapping_info.set_flag(kAicpuUnloadFlag);
+
+  for (const auto &kernel_name : dump_kernel_names_) {
+    aicpu::dump::Task task;
+    auto iter = runtime_info_map_.find(kernel_name);
+    if (iter == runtime_info_map_.end()) {
+      MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
+    }
+    MS_EXCEPTION_IF_NULL(iter->second);
+    auto task_id = std::get<kTupleTaskId>(*iter->second);
+    task.set_task_id(task_id);
+    MS_EXCEPTION_IF_NULL(op_mapping_info.mutable_task());
+    op_mapping_info.mutable_task()->Add(std::move(task));
+  }
+
+  RtLoadDumpData(op_mapping_info, &dev_unload_mem_);
+}
+
+void DataDumper::ReleaseDevMem(void **ptr) const {
+  if (ptr == nullptr) {
+    return;
+  }
+  if (*ptr != nullptr) {
+    rtError_t rt_error = rtFree(*ptr);
+    if (rt_error != RT_ERROR_NONE) {
+      MS_LOG(ERROR) << "[DataDump] Call rtFree failed, ret:" << rt_error;
+    }
+    *ptr = nullptr;
+  }
+}
+
+void DataDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const {
+  dump_task->set_end_graph(false);
+  auto iter = runtime_info_map_.find(kernel->fullname_with_scope());
+  if (iter == runtime_info_map_.end()) {
+    MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
+  }
+  MS_EXCEPTION_IF_NULL(iter->second);
+  auto task_id = std::get<kTupleTaskId>(*iter->second);
+  auto stream_id = std::get<kTupleStreamId>(*iter->second);
+  auto args = std::get<kTupleArgs>(*iter->second);
+  MS_LOG(INFO) << "[DataDump] Get runtime info task_id:" << task_id << " stream_id:" << stream_id;
+
+  dump_task->set_task_id(task_id);
+  dump_task->set_stream_id(stream_id);
+  MS_EXCEPTION_IF_NULL(dump_task->mutable_op());
+  dump_task->mutable_op()->set_op_name(kernel->fullname_with_scope());
+  dump_task->mutable_op()->set_op_type(AnfAlgo::GetCNodeName(kernel.get()));
+
+  DumpKernelOutput(kernel, args, dump_task);
+  DumpKernelInput(kernel, args, dump_task);
+}
+
+void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) {
+  std::string proto_str;
+  size_t proto_size = dump_info.ByteSizeLong();
+  bool ret = dump_info.SerializeToString(&proto_str);
+  if (!ret || proto_size == 0) {
+    MS_LOG(EXCEPTION) << "[DataDump] Protobuf SerializeToString failed, proto size %zu.";
+  }
+
+  rtError_t rt_ret = rtMalloc(ptr, proto_size, RT_MEMORY_HBM);
+  if (rt_ret != RT_ERROR_NONE) {
+    MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed";
+  }
+
+  if (ptr == nullptr) {
+    MS_LOG(ERROR) << "[DataDump] rtMalloc failed, ptr is nullptr";
+    return;
+  }
+  rt_ret = rtMemcpy(*ptr, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
+  if (rt_ret != RT_ERROR_NONE) {
+    MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed";
+  }
+
+  MS_LOG(INFO) << "[DataDump] rtDatadumpInfoLoad start";
+  rt_ret = rtDatadumpInfoLoad(*ptr, proto_size);
+  if (rt_ret != RT_ERROR_NONE) {
+    MS_LOG(EXCEPTION) << "[DataDump] Call rtDatadumpInfoLoad failed";
+  }
+}
+
+void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
+  MS_LOG(INFO) << "[DataDump] DumpKernelOutput start. Kernel:" << kernel->fullname_with_scope();
+  auto input_size = AnfAlgo::GetInputTensorNum(kernel);
+  auto output_size = AnfAlgo::GetOutputTensorNum(kernel);
+  uint64_t offset = sizeof(void *) * input_size;
+  for (size_t i = 0; i < output_size; ++i) {
+    auto data_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
+    auto output_format = AnfAlgo::GetOutputFormat(kernel, i);
+    auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel, i);
+
+    aicpu::dump::Output output;
+    output.set_data_type(GetGeDataType(data_type));
+    output.set_format(GetGeFormat(output_format, output_shape.size()));
+    MS_EXCEPTION_IF_NULL(output.mutable_shape());
+    for (auto dim : output_shape) {
+      output.mutable_shape()->add_dim(dim);
+    }
+    output.set_original_output_format(GetGeFormat(output_format, output_shape.size()));
+    output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
+    MS_EXCEPTION_IF_NULL(task->mutable_output());
+    task->mutable_output()->Add(std::move(output));
+    offset += sizeof(void *);
+  }
+}
+
+void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
+  MS_LOG(INFO) << "[DataDump] DumpKernelInput start. Kernel:" << kernel->fullname_with_scope();
+  auto input_size = AnfAlgo::GetInputTensorNum(kernel);
+  uint64_t offset = 0;
+  for (size_t i = 0; i < input_size; ++i) {
+    aicpu::dump::Input input;
+    auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
+    auto input_node = input_node_with_index.first;
+    auto input_index = input_node_with_index.second;
+    std::string output_format = AnfAlgo::GetOutputFormat(input_node, input_index);
+    auto output_type = AnfAlgo::GetOutputDeviceDataType(input_node, input_index);
+    if (output_type == kTypeUnknown) {
+      MS_LOG(WARNING) << "[DataDump] It is not suggested to use a lonely weight parameter as the output of graph";
+      output_type = AnfAlgo::GetOutputInferDataType(input_node, input_index);
+    }
+    auto output_shape = AnfAlgo::GetOutputDeviceShape(input_node, input_index);
+
+    input.set_data_type(GetGeDataType(output_type));
+    input.set_format(GetGeFormat(output_format, output_shape.size()));
+    MS_EXCEPTION_IF_NULL(input.mutable_shape());
+    for (auto dim : output_shape) {
+      input.mutable_shape()->add_dim(dim);
+    }
+    input.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
+    MS_EXCEPTION_IF_NULL(task->mutable_input());
+    task->mutable_input()->Add(std::move(input));
+    offset += sizeof(void *);
+  }
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+#endif
diff --git a/mindspore/ccsrc/device/ascend/dump/data_dumper.h b/mindspore/ccsrc/device/ascend/dump/data_dumper.h
new file mode 100644
index 00000000000..65b01c61c45
--- /dev/null
+++ b/mindspore/ccsrc/device/ascend/dump/data_dumper.h
@@ -0,0 +1,69 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
+#ifdef ENABLE_DATA_DUMP
+#include <tuple>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include "session/kernel_graph.h"
+
+namespace aicpu {
+namespace dump {
+class OpMappingInfo;
+class Task;
+}  // namespace dump
+}  // namespace aicpu
+namespace mindspore {
+namespace device {
+namespace ascend {
+// tuple(op_name, task_id, stream_id, args)
+using RuntimeInfo = std::tuple<uint32_t, uint32_t, void *>;
+class DataDumper {
+ public:
+  DataDumper(const session::KernelGraph *kernel_graph,
+             const std::map<std::string, std::shared_ptr<RuntimeInfo>> &runtime_info_map)
+      : load_flag_(false),
+        dev_load_mem_(nullptr),
+        dev_unload_mem_(nullptr),
+        kernel_graph_(kernel_graph),
+        runtime_info_map_(runtime_info_map) {}
+  ~DataDumper();
+  void LoadDumpInfo();
+
+  void UnloadDumpInfo();
+
+ private:
+  void ReleaseDevMem(void **ptr) const;
+  bool KernelNeedDump(const CNodePtr &kernel) const;
+  void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const;
+  void ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const;
+
+  bool load_flag_;
+  void *dev_load_mem_;
+  void *dev_unload_mem_;
+  std::vector<std::string> dump_kernel_names_;
+  const session::KernelGraph *kernel_graph_;
+  std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map_;
+};
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+#endif
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
diff --git a/mindspore/ccsrc/device/ascend/dump/ge_dump.h b/mindspore/ccsrc/device/ascend/dump/ge_dump.h
new file mode 100644
index 00000000000..eae70c4b0b7
--- /dev/null
+++ b/mindspore/ccsrc/device/ascend/dump/ge_dump.h
@@ -0,0 +1,120 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
+
+#include <map>
+#include <string>
+#include "proto/ge_dtype.pb.h"
+#include "ir/dtype/type_id.h"
+#include "utils/utils.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+static ge::proto::DataType GetGeDataType(TypeId type_id) {
+  static const std::map<TypeId, ge::proto::DataType> data_type_map = {
+    {TypeId::kTypeUnknown, ge::proto::DT_UNDEFINED},     {TypeId::kNumberTypeFloat32, ge::proto::DT_FLOAT},
+    {TypeId::kNumberTypeFloat16, ge::proto::DT_FLOAT16}, {TypeId::kNumberTypeInt8, ge::proto::DT_INT8},
+    {TypeId::kNumberTypeUInt8, ge::proto::DT_UINT8},     {TypeId::kNumberTypeInt16, ge::proto::DT_INT16},
+    {TypeId::kNumberTypeUInt16, ge::proto::DT_UINT16},   {TypeId::kNumberTypeInt32, ge::proto::DT_INT32},
+    {TypeId::kNumberTypeInt64, ge::proto::DT_INT64},     {TypeId::kNumberTypeUInt32, ge::proto::DT_UINT32},
+    {TypeId::kNumberTypeUInt64, ge::proto::DT_UINT64},   {TypeId::kNumberTypeBool, ge::proto::DT_BOOL},
+    {TypeId::kNumberTypeFloat64, ge::proto::DT_DOUBLE},
+  };
+  MS_LOG(INFO) << "Vm origin type_id:" << type_id;
+  auto iter = data_type_map.find(type_id);
+  if (iter == data_type_map.end()) {
+    MS_LOG(EXCEPTION) << "Invalid data type:" << type_id;
+  }
+  return iter->second;
+}
+
+enum GeFormat {
+  kFormat_NCHW = 0,   // NCHW
+  kFormat_NHWC,       // NHWC
+  kFormat_ND,         // Nd Tensor
+  kFormat_NC1HWC0,    // NC1HWC0
+  kFormat_FRACTAL_Z,  // FRACTAL_Z
+  kFormat_NC1C0HWPAD,
+  kFormat_NHWC1C0,
+  kFormat_FSR_NCHW,
+  kFormat_FRACTAL_DECONV,
+  kFormat_C1HWNC0,
+  kFormat_FRACTAL_DECONV_TRANSPOSE,
+  kFormat_FRACTAL_DECONV_SP_STRIDE_TRANS,
+  kFormat_NC1HWC0_C04,    // NC1HWC0, C0 =4
+  kFormat_FRACTAL_Z_C04,  // FRACZ, C0 =4
+  kFormat_CHWN,
+  kFormat_FRACTAL_DECONV_SP_STRIDE8_TRANS,
+  kFormat_HWCN,
+  kFormat_NC1KHKWHWC0,  // KH,KW kernel h& kernel w maxpooling max output format
+  kFormat_BN_WEIGHT,
+  kFormat_FILTER_HWCK,  // filter input tensor format
+  kFormat_HASHTABLE_LOOKUP_LOOKUPS = 20,
+  kFormat_HASHTABLE_LOOKUP_KEYS,
+  kFormat_HASHTABLE_LOOKUP_VALUE,
+  kFormat_HASHTABLE_LOOKUP_OUTPUT,
+  kFormat_HASHTABLE_LOOKUP_HITS = 24,
+  kFormat_C1HWNCoC0,
+  kFormat_MD,
+  kFormat_NDHWC,
+  kFormat_FRACTAL_ZZ,
+  kFormat_FRACTAL_NZ,
+  kFormat_NCDHW,
+  kFormat_DHWCN,  // 3D filter input tensor format
+  kFormat_NDC1HWC0,
+  kFormat_FRACTAL_Z_3D,
+  kFormat_CN,
+  kFormat_NC,
+  kFormat_DHWNC,
+  kFormat_FRACTAL_Z_3D_TRANSPOSE,  // 3D filter(transpose) input tensor format
+  kFormat_RESERVED,
+  kFormat_ALL
+};
+
+static GeFormat GetGeFormat(const std::string &format, size_t shape_size) {
+  static const std::map<std::string, GeFormat> format_map = {
+    // default format: nchw, fractal_nz?
+    {kOpFormat_DEFAULT, kFormat_NCHW},
+    {kOpFormat_NC1KHKWHWC0, kFormat_NC1KHKWHWC0},
+    {kOpFormat_ND, kFormat_ND},
+    {kOpFormat_NCHW, kFormat_NCHW},
+    {kOpFormat_NHWC, kFormat_NHWC},
+    {kOpFormat_HWCN, kFormat_HWCN},
+    {kOpFormat_NC1HWC0, kFormat_NC1HWC0},
+    {kOpFormat_FRAC_Z, kFormat_FRACTAL_Z},
+    {kOpFormat_FRAC_NZ, kFormat_FRACTAL_NZ},
+    {kOpFormat_C1HWNCoC0, kFormat_C1HWNCoC0},
+    {kOpFormat_NC1HWC0_C04, kFormat_NC1HWC0_C04},
+    {kOpFormat_FRACTAL_Z_C04, kFormat_FRACTAL_Z_C04},
+    {kOpFormat_NDHWC, kFormat_NDHWC},
+  };
+  MS_LOG(INFO) << "GetGeFormat format:" << format << " shape_size:" << shape_size;
+  if (format == kOpFormat_DEFAULT) {
+    return shape_size == 4 ? kFormat_NCHW : kFormat_ND;
+  }
+  auto iter = format_map.find(format);
+  if (iter == format_map.end()) {
+    MS_LOG(EXCEPTION) << "Invalid format:" << format;
+  }
+  return iter->second;
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
diff --git a/mindspore/ccsrc/device/ascend/dump/proto/ge_dtype.proto b/mindspore/ccsrc/device/ascend/dump/proto/ge_dtype.proto
new file mode 100644
index 00000000000..7c690524d9d
--- /dev/null
+++ b/mindspore/ccsrc/device/ascend/dump/proto/ge_dtype.proto
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+
+package ge.proto;
+
+enum DataType
+{
+  DT_UNDEFINED = 0;  // Used to indicate a DataType field has not been set.
+  DT_FLOAT     = 1;  // float type
+  DT_FLOAT16   = 2;  // fp16 type
+  DT_INT8      = 3;  // int8 type
+  DT_UINT8     = 4;  // uint8 type
+  DT_INT16     = 5;  // int16 type
+  DT_UINT16    = 6;  // uint16 type
+  DT_INT32     = 7;  //
+  DT_INT64     = 8;  // int64 type
+  DT_UINT32    = 9;  // unsigned int32
+  DT_UINT64    = 10;  // unsigned int64
+  DT_BOOL      = 11;  // bool type
+  DT_DOUBLE    = 12; // double type
+  DT_STRING = 13;            // string type
+  DT_DUAL_SUB_INT8 = 14;    /**< dual output int8 type */
+  DT_DUAL_SUB_UINT8 = 15;    /**< dual output uint8 type */
+  DT_COMPLEX64 = 16;         // complex64 type
+  DT_COMPLEX128 = 17;        // complex128 type
+  DT_QINT8 = 18;             // qint8 type
+  DT_QINT16 = 19;            // qint16 type
+  DT_QINT32 = 20;            // qint32 type
+  DT_QUINT8 = 21;            // quint8 type
+  DT_QUINT16 = 22;           // quint16 type
+  DT_RESOURCE  = 23;         // resource type
+  DT_STRING_REF = 24;        // string_ref type
+  DT_DUAL      = 25;              /**< dual output type */
+}
\ No newline at end of file
diff --git a/mindspore/ccsrc/device/ascend/dump/proto/op_mapping_info.proto b/mindspore/ccsrc/device/ascend/dump/proto/op_mapping_info.proto
new file mode 100644
index 00000000000..d3377c655d7
--- /dev/null
+++ b/mindspore/ccsrc/device/ascend/dump/proto/op_mapping_info.proto
@@ -0,0 +1,78 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+package aicpu.dump;
+
+message Shape {
+  repeated uint64 dim = 1;
+}
+
+message Output {
+  int32 data_type = 1;
+  int32 format = 2;
+  Shape shape = 3;
+  uint64 address = 4;
+  string original_name = 5;
+  int32 original_output_index = 6;
+  int32 original_output_data_type = 7;
+  int32 original_output_format = 8;
+  uint64 size = 9;
+};
+
+message Input {
+  int32 data_type = 1;
+  int32 format = 2;
+  Shape shape = 3;
+  uint64 address = 4;
+  uint64 size = 5;
+}
+
+message Op {
+  string op_name = 1;
+  string op_type = 2;
+};
+
+message Task {
+  uint32 task_id = 1;
+  uint32 stream_id = 2;
+  Op op = 3;
+  repeated Output output = 4;
+  bool end_graph = 5;
+  repeated Input input = 6;
+};
+
+message OpMappingInfo {
+  string dump_path = 1;
+  oneof model_name_param {
+    string model_name = 2;
+  }
+  oneof model_id_param {
+    uint32 model_id = 3;
+  }
+  oneof step_id {
+    uint64 step_id_addr = 4;
+  }
+  oneof iterations_per_loop {
+    uint64 iterations_per_loop_addr = 5;
+  }
+  oneof loop_cond {
+    uint64 loop_cond_addr = 6;
+  }
+  uint32 flag = 7; // 0x01 load, 0x00 unload
+  repeated Task task = 8;
+  string dump_step = 9;
+};
diff --git a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
index e026459ae97..00489c72990 100644
--- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
+++ b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
@@ -127,6 +127,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
   AddressPtrList kernel_outputs;
   auto kernel_mod = AnfAlgo::GetKernelMod(anf_node_ptr);
   MS_EXCEPTION_IF_NULL(kernel_mod);
+  kernel_mod->set_kernel_name(anf_node_ptr->fullname_with_scope());
   if (AnfAlgo::GetCNodeName(anf_node_ptr) != kAtomicAddrCleanOpName) {
     for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_node_ptr); ++i) {
       auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i);
diff --git a/mindspore/ccsrc/device/device_address.h b/mindspore/ccsrc/device/device_address.h
index f4597f6f460..879caf45fc1 100644
--- a/mindspore/ccsrc/device/device_address.h
+++ b/mindspore/ccsrc/device/device_address.h
@@ -34,6 +34,7 @@ class CPUKernelRuntime;
 namespace ascend {
 class AscendKernelRuntime;
 class AscendMemoryManager;
+class DataDumper;
 namespace tasksink {
 class TaskGenerator;
 }  // namespace tasksink
@@ -90,6 +91,7 @@ class DeviceAddress {
   friend class mindspore::device::gpu::GPUMemoryManager;
   friend class mindspore::device::ascend::AscendKernelRuntime;
   friend class mindspore::device::ascend::AscendMemoryManager;
+  friend class mindspore::device::ascend::DataDumper;
 };
 
 using DeviceAddressPtr = std::shared_ptr<DeviceAddress>;
diff --git a/mindspore/ccsrc/device/kernel_adjust.cc b/mindspore/ccsrc/device/kernel_adjust.cc
index fd0a8eb967b..86dcf2b4498 100644
--- a/mindspore/ccsrc/device/kernel_adjust.cc
+++ b/mindspore/ccsrc/device/kernel_adjust.cc
@@ -34,6 +34,7 @@
 #include "device/ascend/kernel_select_ascend.h"
 #include "runtime/base.h"
 #include "device/ascend/ascend_stream_assign.h"
+
 namespace mindspore {
 namespace device {
 using device::ascend::ProfilingUtils;
@@ -117,6 +118,7 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph>
   std::vector<AnfNodePtr> *mute_inputs = kernel_graph_ptr->MutableInputs();
   MS_EXCEPTION_IF_NULL(mute_inputs);
   mute_inputs->push_back(switch_loop_input[kLoopCountParamName]);
+  mute_inputs->push_back(switch_loop_input[kEpochParamName]);
   mute_inputs->push_back(switch_loop_input[kIterLoopParamName]);
   mute_inputs->push_back(switch_loop_input[kZeroParamName]);
   mute_inputs->push_back(switch_loop_input[kOneParamName]);
@@ -316,6 +318,13 @@ void KernelAdjust::CreateSwitchOpParameters(const std::shared_ptr<session::Kerne
   one->set_abstract(paremeter_abstract_ptr);
   ParameterPtr one_new = kernel_graph_ptr->NewParameter(one);
   (*switch_loop_input)[kOneParamName] = one_new;
+
+  ParameterPtr epoch = std::make_shared<Parameter>(kernel_graph_ptr);
+  MS_EXCEPTION_IF_NULL(epoch);
+  epoch->set_name(kEpochParamName);
+  epoch->set_abstract(paremeter_abstract_ptr);
+  ParameterPtr epoch_new = kernel_graph_ptr->NewParameter(epoch);
+  (*switch_loop_input)[kEpochParamName] = epoch_new;
 }
 
 kernel::KernelBuildInfo::KernelBuildInfoBuilder KernelAdjust::CreateMngKernelBuilder(
@@ -510,6 +519,14 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
   *val = 0;
   inputs->push_back(loop_count_tensor);
 
+  // Epoch in device
+  tensor::TensorPtr epoch_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp);
+  MS_EXCEPTION_IF_NULL(epoch_tensor);
+  val = static_cast<int32_t *>(epoch_tensor->data_c());
+  MS_EXCEPTION_IF_NULL(val);
+  *val = 0;
+  inputs->push_back(epoch_tensor);
+
   tensor::TensorPtr iter_loop_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp);
   MS_EXCEPTION_IF_NULL(iter_loop_tensor);
   val = static_cast<int32_t *>(iter_loop_tensor->data_c());
@@ -531,6 +548,7 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
   MS_EXCEPTION_IF_NULL(val);
   *val = 1;
   inputs->push_back(one_tensor);
+
   MS_LOG(INFO) << "---------------- LoadSwitchInputs End--";
 }
 
diff --git a/mindspore/ccsrc/device/kernel_adjust.h b/mindspore/ccsrc/device/kernel_adjust.h
index bf3ba2acb25..9f59c486bc3 100644
--- a/mindspore/ccsrc/device/kernel_adjust.h
+++ b/mindspore/ccsrc/device/kernel_adjust.h
@@ -37,6 +37,7 @@ constexpr auto kLoopCountParamName = "loop_count";
 constexpr auto kIterLoopParamName = "iter_loop";
 constexpr auto kZeroParamName = "zero";
 constexpr auto kOneParamName = "one";
+constexpr auto kEpochParamName = "loop_epoch";
 constexpr auto kStreamNeedActivedFirst = "stream_need_active_first";
 constexpr uint32_t kSecondStreamSwitchLabel = 2;
 
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc
index 2213f176cc4..c6d8a101cde 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc
+++ b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc
@@ -26,6 +26,7 @@
 #include "kernel/aicpu/aicpu_kernel_build.h"
 #include "utils/convert_utils.h"
 #include "kernel/aicpu/aicpu_util.h"
+#include "utils/context/ms_context.h"
 
 using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>;
 
@@ -144,8 +145,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
   if (node_name_ == kTopK) {
     node_name_ = kTopKV2;
   }
+
   AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
-    stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs);
+    kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());
 
   MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
   return {task_info_ptr};
diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc
index 69fc82aad39..101a9f79b6c 100644
--- a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc
+++ b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc
@@ -26,6 +26,7 @@
 #include "runtime/rt.h"
 #include "utils/log_adapter.h"
 #include "utils/convert_utils.h"
+#include "utils/context/ms_context.h"
 
 namespace mindspore {
 namespace kernel {
@@ -123,8 +124,8 @@ std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &in
   MS_LOG(DEBUG) << "The block_dim is:" << block_dim;
 
   TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
-    stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, input_data_addrs,
-    output_data_addrs, workspace_addrs);
+    kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data,
+    input_data_addrs, output_data_addrs, workspace_addrs, NeedDump());
   return {task_info_ptr};
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/kernel/ascend_kernel_mod.h b/mindspore/ccsrc/kernel/ascend_kernel_mod.h
index 0aee881f7d6..1ca1dbacc89 100644
--- a/mindspore/ccsrc/kernel/ascend_kernel_mod.h
+++ b/mindspore/ccsrc/kernel/ascend_kernel_mod.h
@@ -21,6 +21,9 @@
 #include <memory>
 #include "framework/ge_runtime/task_info.h"
 #include "kernel/kernel.h"
+#ifdef ENABLE_DATA_DUMP
+#include "debug/data_dump_parser.h"
+#endif
 
 using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>;
 namespace mindspore {
@@ -31,6 +34,13 @@ class AscendKernelMod : public KernelMod {
                                            const std::vector<AddressPtr> &, uint32_t) = 0;
   uint32_t block_dim() { return block_dim_; }
   uint32_t stream_id() { return stream_id_; }
+  virtual bool NeedDump() {
+#ifdef ENABLE_DATA_DUMP
+    return DataDumpParser::GetInstance().NeedDump(kernel_name_);
+#else
+    return false;
+#endif
+  }
 
  protected:
   uint32_t block_dim_{1};
diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc b/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc
index 87fb8d743d5..d5d6e556980 100644
--- a/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc
+++ b/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc
@@ -18,6 +18,7 @@
 #include "device/ascend/tasksink/runtime_utils.h"
 #include "session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
+#include "utils/context/ms_context.h"
 
 using HcclTaskInfoPtr = std::shared_ptr<ge::model_runner::HcclTaskInfo>;
 using ge::model_runner::HcclTaskInfo;
@@ -146,10 +147,12 @@ std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inpu
                << ", root_id=" << root_id_ << ", op_type=" << static_cast<int>(op_type_)
                << ", data_type=" << static_cast<int>(data_type);
 
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
   HcclTaskInfoPtr task_info_ptr = std::make_shared<HcclTaskInfo>(
-    stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0, private_def, nullptr,
-    hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel, RuntimeUtils::HcomUnbindModel,
-    RuntimeUtils::HcomDistribute);
+    kernel_name_, stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0,
+    private_def, nullptr, hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel,
+    RuntimeUtils::HcomUnbindModel, RuntimeUtils::HcomDistribute, NeedDump());
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/kernel.h b/mindspore/ccsrc/kernel/kernel.h
index 7bccce49c3a..a15f6b16e7a 100644
--- a/mindspore/ccsrc/kernel/kernel.h
+++ b/mindspore/ccsrc/kernel/kernel.h
@@ -129,6 +129,10 @@ class KernelMod {
   virtual std::vector<size_t> GenParameters() { return {}; }
 
   virtual ~KernelMod() = default;
+  void set_kernel_name(const std::string &kernel_name) { kernel_name_ = kernel_name; }
+
+ protected:
+  std::string kernel_name_;
 };
 using KernelModPtr = std::shared_ptr<KernelMod>;
 }  // namespace kernel
diff --git a/mindspore/ccsrc/kernel/rts/assign.cc b/mindspore/ccsrc/kernel/rts/assign.cc
index 7f214b6e6f6..7038004898d 100644
--- a/mindspore/ccsrc/kernel/rts/assign.cc
+++ b/mindspore/ccsrc/kernel/rts/assign.cc
@@ -58,8 +58,9 @@ std::vector<TaskInfoPtr> AssignKernel::GenTask(const std::vector<AddressPtr> &in
   }
   stream_id_ = stream_id;
 
-  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>(
-    stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE);
+  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr =
+    std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr,
+                                          inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE, false);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/rts/label_goto.cc b/mindspore/ccsrc/kernel/rts/label_goto.cc
index 7bcf42a210d..1d29bb4f358 100644
--- a/mindspore/ccsrc/kernel/rts/label_goto.cc
+++ b/mindspore/ccsrc/kernel/rts/label_goto.cc
@@ -55,7 +55,8 @@ std::vector<TaskInfoPtr> LabelGotoKernel::GenTask(const std::vector<AddressPtr>
                                                   const std::vector<AddressPtr> &, uint32_t stream_id) {
   MS_LOG(INFO) << "LabelGotoKernel GenTask label:" << label_ << ", stream id:" << stream_id;
   std::vector<TaskInfoPtr> task_info_list;
-  std::shared_ptr<LabelGotoTaskInfo> task_info_ptr = std::make_shared<LabelGotoTaskInfo>(stream_id, label_);
+  std::shared_ptr<LabelGotoTaskInfo> task_info_ptr =
+    std::make_shared<LabelGotoTaskInfo>(kernel_name_, stream_id, label_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   task_info_list.emplace_back(task_info_ptr);
   return task_info_list;
diff --git a/mindspore/ccsrc/kernel/rts/label_set.cc b/mindspore/ccsrc/kernel/rts/label_set.cc
index 5aedd012dc6..4266e2b0af8 100644
--- a/mindspore/ccsrc/kernel/rts/label_set.cc
+++ b/mindspore/ccsrc/kernel/rts/label_set.cc
@@ -55,7 +55,7 @@ std::vector<TaskInfoPtr> LabelSetKernel::GenTask(const std::vector<AddressPtr> &
                                                  const std::vector<AddressPtr> &, uint32_t stream_id) {
   MS_LOG(INFO) << "LabelSetKernel GenTask label:" << label_ << ", stream id:" << stream_id;
   std::vector<TaskInfoPtr> task_info_list;
-  std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(stream_id, label_);
+  std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(kernel_name_, stream_id, label_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   task_info_list.emplace_back(task_info_ptr);
   return task_info_list;
diff --git a/mindspore/ccsrc/kernel/rts/label_switch.cc b/mindspore/ccsrc/kernel/rts/label_switch.cc
index fb1ad1601a4..bc5282b4af5 100644
--- a/mindspore/ccsrc/kernel/rts/label_switch.cc
+++ b/mindspore/ccsrc/kernel/rts/label_switch.cc
@@ -67,7 +67,7 @@ std::vector<TaskInfoPtr> LabelSwitchKernel::GenTask(const std::vector<AddressPtr
   MS_LOG(INFO) << "LabelSwitchKernel GenTask label size:" << label_size_ << ", stream id:" << stream_id;
   std::vector<TaskInfoPtr> task_info_list;
   cond_ = inputs[0]->addr;
-  auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(stream_id, label_size_, label_list_, cond_);
+  auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(kernel_name_, stream_id, label_size_, label_list_, cond_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   task_info_list.emplace_back(task_info_ptr);
   return task_info_list;
diff --git a/mindspore/ccsrc/kernel/rts/memcpy_async.cc b/mindspore/ccsrc/kernel/rts/memcpy_async.cc
index f5fbec6e56d..ea33c4dd8b5 100644
--- a/mindspore/ccsrc/kernel/rts/memcpy_async.cc
+++ b/mindspore/ccsrc/kernel/rts/memcpy_async.cc
@@ -23,6 +23,7 @@
 #include "common/utils.h"
 #include "session/anf_runtime_algorithm.h"
 #include "common/trans.h"
+#include "utils/context/ms_context.h"
 
 using ge::model_runner::MemcpyAsyncTaskInfo;
 using MemcpyAsyncTaskInfoPtr = std::shared_ptr<MemcpyAsyncTaskInfo>;
@@ -118,8 +119,9 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr
   }
 
   stream_id_ = stream_id;
-  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>(
-    stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE);
+  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr =
+    std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr,
+                                          inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE, NeedDump());
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc b/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc
index ff005f399bd..0161e8562a3 100644
--- a/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc
+++ b/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc
@@ -63,7 +63,7 @@ std::vector<TaskInfoPtr> ProfilingKernelMod::GenTask(const std::vector<AddressPt
                << ", outputs size:" << outputs.size();
   stream_id_ = stream_id;
   std::shared_ptr<ProfilerTraceTaskInfo> task_info_ptr =
-    std::make_shared<ProfilerTraceTaskInfo>(stream_id, log_id_, notify_, flags_);
+    std::make_shared<ProfilerTraceTaskInfo>(kernel_name_, stream_id, log_id_, notify_, flags_);
   return {task_info_ptr};
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/kernel/rts/recv.cc b/mindspore/ccsrc/kernel/rts/recv.cc
index c195fd1c92e..3fb2fd6bb57 100644
--- a/mindspore/ccsrc/kernel/rts/recv.cc
+++ b/mindspore/ccsrc/kernel/rts/recv.cc
@@ -60,7 +60,7 @@ std::vector<TaskInfoPtr> RecvKernel::GenTask(const std::vector<AddressPtr> &, co
                                              const std::vector<AddressPtr> &, uint32_t stream_id) {
   MS_LOG(INFO) << "RecvKernel GenTask event_id_:" << event_id_ << ", stream_id_:" << stream_id;
   stream_id_ = stream_id;
-  EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(stream_id, event_id_);
+  EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(kernel_name_, stream_id, event_id_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/rts/send.cc b/mindspore/ccsrc/kernel/rts/send.cc
index ccdd43ebb61..298d75befda 100644
--- a/mindspore/ccsrc/kernel/rts/send.cc
+++ b/mindspore/ccsrc/kernel/rts/send.cc
@@ -57,7 +57,7 @@ std::vector<TaskInfoPtr> SendKernel::GenTask(const std::vector<AddressPtr> &, co
                                              const std::vector<AddressPtr> &, uint32_t stream_id) {
   MS_LOG(INFO) << "SendKernel GenTask event id:" << event_id_ << ", stream id:" << stream_id;
   stream_id_ = stream_id;
-  EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(stream_id, event_id_);
+  EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(kernel_name_, stream_id, event_id_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/rts/stream_active.cc b/mindspore/ccsrc/kernel/rts/stream_active.cc
index 4f0895a0be2..b5739648685 100644
--- a/mindspore/ccsrc/kernel/rts/stream_active.cc
+++ b/mindspore/ccsrc/kernel/rts/stream_active.cc
@@ -72,7 +72,8 @@ std::vector<TaskInfoPtr> StreamActiveKernel::GenTask(const std::vector<AddressPt
   stream_id_ = stream_id;
   std::vector<TaskInfoPtr> task_info_list;
   for (auto &index : active_streams_index_) {
-    std::shared_ptr<StreamActiveTaskInfo> task_info_ptr = std::make_shared<StreamActiveTaskInfo>(stream_id, index);
+    std::shared_ptr<StreamActiveTaskInfo> task_info_ptr =
+      std::make_shared<StreamActiveTaskInfo>(kernel_name_, stream_id, index);
     MS_EXCEPTION_IF_NULL(task_info_ptr);
     task_info_list.emplace_back(task_info_ptr);
     MS_LOG(INFO) << "StreamActiveKernel GenTask: streamId:" << stream_id << ", Active streamId:" << index;
diff --git a/mindspore/ccsrc/kernel/rts/stream_switch.cc b/mindspore/ccsrc/kernel/rts/stream_switch.cc
index bab6b043669..44b0a1ef867 100644
--- a/mindspore/ccsrc/kernel/rts/stream_switch.cc
+++ b/mindspore/ccsrc/kernel/rts/stream_switch.cc
@@ -91,8 +91,8 @@ std::vector<TaskInfoPtr> StreamSwitchKernel::GenTask(const std::vector<AddressPt
   auto ites_per_loop = inputs[1]->addr;
   MS_LOG(INFO) << "cond_:" << static_cast<int>(cond_) << ", true_stream_index_:" << true_stream_index_
                << ", stream_id:" << stream_id;
-  std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr =
-    std::make_shared<StreamSwitchTaskInfo>(stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_);
+  std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr = std::make_shared<StreamSwitchTaskInfo>(
+    kernel_name_, stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc
index 0f377940daf..9d5222659ab 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc
+++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc
@@ -17,7 +17,7 @@
 #include "kernel/tbe/tbe_kernel_mod.h"
 #include <algorithm>
 #include "runtime/rt.h"
-#include "nlohmann/json.hpp"
+#include "utils/context/ms_context.h"
 #include "graphengine/inc/framework/ge_runtime/task_info.h"
 
 namespace mindspore {
@@ -99,9 +99,9 @@ std::vector<TaskInfoPtr> TbeKernelMod::GenTask(const std::vector<AddressPtr> &in
 
   MS_LOG(INFO) << "block_dim is:" << block_dim_;
 
-  TbeTaskInfoPtr task_info_ptr =
-    make_shared<ge::model_runner::TbeTaskInfo>(stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0,
-                                               meta_data, input_data_addrs, output_data_addrs, workspace_addrs);
+  TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
+    kernel_name_, stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0, meta_data, input_data_addrs,
+    output_data_addrs, workspace_addrs, NeedDump());
   return {task_info_ptr};
 }
 
diff --git a/mindspore/ccsrc/session/kernel_graph.h b/mindspore/ccsrc/session/kernel_graph.h
index 6861d43de0d..2e46cfa76ad 100644
--- a/mindspore/ccsrc/session/kernel_graph.h
+++ b/mindspore/ccsrc/session/kernel_graph.h
@@ -36,7 +36,7 @@ namespace session {
 using AnfWithOutIndex = std::pair<AnfNodePtr, size_t>;
 class KernelGraph : public FuncGraph {
  public:
-  KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false) {
+  KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false), current_epoch_(0) {
     inputs_ = std::make_shared<std::vector<AnfNodePtr>>();
     execution_order_ = {};
     executable_ = true;
@@ -154,6 +154,8 @@ class KernelGraph : public FuncGraph {
   AnfNodePtr GetFrontNodeByInternalOutput(const AnfNodePtr &node) const;
   void AddFinalOutputKernel(const AnfNodePtr &node);
   bool IsFinalOutputKernel(const AnfNodePtr &node) const;
+  uint32_t current_epoch() const { return current_epoch_; }
+  void set_current_epoch(uint32_t epoch) { current_epoch_ = epoch; }
 
  private:
   // remove value node form graph
@@ -216,6 +218,7 @@ class KernelGraph : public FuncGraph {
   std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_;
   std::unordered_map<AnfNodePtr, AnfNodePtr> internal_outputs_to_front_map_;
   std::set<AnfNodePtr> final_output_kernels_;
+  uint32_t current_epoch_;
 };
 }  // namespace session
 using KernelGraphPtr = std::shared_ptr<session::KernelGraph>;
diff --git a/mindspore/ccsrc/session/session_basic.cc b/mindspore/ccsrc/session/session_basic.cc
index 4cc01e62a41..9e437673c92 100644
--- a/mindspore/ccsrc/session/session_basic.cc
+++ b/mindspore/ccsrc/session/session_basic.cc
@@ -187,6 +187,18 @@ size_t LoadCtrlInputTensor(const std::shared_ptr<KernelGraph> &graph, std::vecto
   // set loop_count to zero
   MS_EXCEPTION_IF_NULL(inputs);
   inputs->push_back(tensor);
+
+  auto epoch_tensor = (*inputs_params)[1];
+  MS_EXCEPTION_IF_NULL(epoch_tensor);
+  auto *epoch_val = static_cast<int32_t *>(epoch_tensor->data_c());
+  MS_EXCEPTION_IF_NULL(epoch_val);
+  *epoch_val = graph->current_epoch();
+  epoch_tensor->set_dirty(true);
+  inputs->push_back(epoch_tensor);
+  MS_LOG(INFO) << "Load epoch_val:" << *epoch_val;
+
+  graph->set_current_epoch(graph->current_epoch() + 1);
+
   return inputs_params->size();
 }
 
@@ -814,13 +826,13 @@ void SessionBasic::AddParameterToGraphInputs(const std::vector<AnfNodePtr> &para
 void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
                                  const std::vector<tensor::TensorPtr> &inputs_const) const {
   std::vector<tensor::TensorPtr> inputs(inputs_const);
-  size_t input_ctrl_size = 1;
+  size_t input_ctrl_size = 2;
   MS_EXCEPTION_IF_NULL(kernel_graph);
   if (kernel_graph->input_ctrl_tensors()) {
     input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs);
   }
   auto input_nodes = kernel_graph->inputs();
-  if ((inputs.size() + input_ctrl_size) - 1 != input_nodes.size()) {
+  if ((inputs.size() + input_ctrl_size) - 2 != input_nodes.size()) {
     MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size()
                       << ", input_ctrl_size:" << input_ctrl_size;
   }
diff --git a/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc b/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
index a3a991247cc..9b48adb574c 100644
--- a/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
+++ b/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
@@ -32,6 +32,8 @@ bool ModelRunner::LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint
 
 bool ModelRunner::UnloadModel(uint32_t model_id) { return true; }
 
+bool ModelRunner::LoadModelComplete(uint32_t model_id) { return true; }
+
 bool ModelRunner::RunModel(uint32_t model_id, const ge::InputData &input_data, ge::OutputData *output_data) {
   return true;
 }
@@ -45,6 +47,11 @@ const std::vector<uint32_t> &ModelRunner::GetStreamIdList(uint32_t model_id) con
   static std::vector<uint32_t> stream_id_list;
   return stream_id_list;
 }
+
+const std::map<std::string, std::shared_ptr<RuntimeInfo>> &ModelRunner::GetRuntimeInfoMap(uint32_t model_id) const {
+  static std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map;
+  return runtime_info_map;
+}
 }  // namespace model_runner
 }  // namespace ge
 
diff --git a/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc b/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
index a6ec3a50b5c..8c00e518c3b 100755
--- a/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
+++ b/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
@@ -15,7 +15,6 @@
  */
 #include "device/ascend/ascend_stream_assign.h"
 #include "device/ascend/ascend_label_assign.h"
-#include "device/ascend/tasksink/task_generator.h"
 #include "device/kernel_adjust.h"
 
 namespace mindspore {
@@ -31,13 +30,6 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr)
 void AscendStreamAssign::GetWaitStreams(vector<uint32_t> *wait_active_stream_list) { return; }
 
 void AscendStreamAssign::GetHcomStreams(std::vector<uint32_t> *streams) { return; }
-
-namespace tasksink {
-bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list,
-                             uint32_t graph_id) {
-  return true;
-}
-}  // namespace tasksink
 }  // namespace ascend
 void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return; }
 bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return true; }
diff --git a/tests/ut/cpp/stub/tasksink/task_sink_stub.cc b/tests/ut/cpp/stub/tasksink/task_sink_stub.cc
new file mode 100644
index 00000000000..b4318488c0c
--- /dev/null
+++ b/tests/ut/cpp/stub/tasksink/task_sink_stub.cc
@@ -0,0 +1,30 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/ascend/tasksink/task_generator.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+namespace tasksink {
+bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list,
+                             uint32_t graph_id) {
+  return true;
+}
+}  // namespace tasksink
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
\ No newline at end of file