memory reuse code clean

2021-06-18 09:41:55 +08:00 · 2021-06-18 09:41:55 +08:00 · d4d6fb940d
parent 8301489439
commit d4d6fb940d
13 changed files with 65 additions and 953 deletions
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@ -55,6 +55,7 @@ endif()

 if(DEBUG_MODE)
    set(CMAKE_BUILD_TYPE "Debug")
+    add_compile_definitions(MEM_REUSE_DEBUG)
 else()
    set(CMAKE_BUILD_TYPE "Release")
 endif()
--- a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse.h
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse.h
@ -111,6 +111,26 @@ class MemReuseUtil {
  std::unordered_map<AnfNodePtr, session::KernelWithIndex> visit_kernel_with_return_type_in0pos_skip_nop_cache_;
 };
 using MemReuseUtilPtr = std::shared_ptr<MemReuseUtil>;
+
+enum Status { kUnused, kReused };
+enum MemType { kNew, kInStreamReuse, kBetweenStreamReuse, kKernelDependenceReuse };
+class Membuf {
+ public:
+  Membuf() = default;
+  Membuf(Status status, size_t size, size_t offset, int index, MemType type, const KernelDefPtr &used_kernel)
+      : status_(status), size_(size), offset_(offset), index_(index), type_(type), used_kernel_(used_kernel) {}
+  ~Membuf() = default;
+  // Memory block status flags
+  Status status_ = kUnused;
+  size_t size_{0};
+  size_t offset_{0};
+  // Store the tensor index stored in this memory block at a certain moment
+  int index_{0};
+  MemType type_{kNew};
+  KernelDefPtr used_kernel_;
+};
+using MembufPtr = std::shared_ptr<Membuf>;
+
 }  // namespace memreuse
 }  // namespace mindspore

--- a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc
@ -1,536 +0,0 @@
-/**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
-#include "backend/optimizer/mem_reuse/mem_reuse.h"
-#include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
-#ifdef ENABLE_D
-#include "runtime/device/ascend/ascend_stream_assign.h"
-#endif
-#ifdef ENABLE_DEBUGGER
-#include "debug/debug_services.h"
-#include "debug/debugger/debugger.h"
-#endif
-
-namespace mindspore {
-namespace memreuse {
-void BestFitMemReuse::InitMemReuseInfo(const MemReuseUtil *mem_reuse_util_ptr) {
-  MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr);
-  set_tensor_ptr_list(mem_reuse_util_ptr->total_refs_list());
-  set_workspace_ptr_list(mem_reuse_util_ptr->total_wk_ref_list());
-  set_op_ptr_list(mem_reuse_util_ptr->kernel_def_ptr_list());
-  // check info Correctness
-  for (auto &tensor : tensor_ptr_list_) {
-    tensor->size_ = AlignCommonMemorySize(tensor->size_);
-  }
-  // align wk size to 512 && refcount == 1
-  for (auto &wk : wk_tensor_list_) {
-    wk->size_ = AlignCommonMemorySize(wk->size_);
-    wk->ref_count_ = 1;
-  }
-#ifdef ENABLE_D
-  stream_groups_ = device::ascend::AscendStreamAssign::GetInstance().get_stream_group();
-#endif
-}
-
-void BestFitMemReuse::InitKernelDependence() {
-  for (const auto &kernel : op_ptr_list_) {
-    std::set<KernelDefPtr> front;
-    std::queue<KernelDefPtr> to_visit;
-    to_visit.push(kernel);
-    // find all kernels before current kernel
-    while (!to_visit.empty()) {
-      auto curr = to_visit.front();
-      to_visit.pop();
-      if (front.count(curr)) {
-        continue;
-      }
-      front.insert(curr);
-      auto iter = kernel_front_map_.find(curr);
-      if (iter != kernel_front_map_.end()) {
-        auto visited_front = iter->second;
-        front.insert(visited_front.begin(), visited_front.end());
-        continue;
-      }
-      for (const auto &input : curr->input_kernels()) {
-        to_visit.push(input);
-      }
-    }
-    kernel_front_map_[kernel] = front;
-  }
-}
-
-bool BestFitMemReuse::IsUsable(const KernelDefPtr &kernel_curr, const MembufPtr &mem_buf) {
-  // determine whether the kernel_curr can reuse kernel_prev's output tensor membuf
-  MS_EXCEPTION_IF_NULL(kernel_curr);
-  MS_EXCEPTION_IF_NULL(mem_buf);
-  auto kernel_prev = mem_buf->used_kernel_;
-  MS_EXCEPTION_IF_NULL(kernel_prev);
-#ifdef ENABLE_DEBUGGER
-  auto debugger_ = mindspore::Debugger::GetInstance();
-  if (debugger_->DebuggerBackendEnabled()) {
-    std::string current_kernel_name = kernel_curr->scope_full_name();
-    if (debugger_->DebugServicesIsWatchPoint(current_kernel_name)) {
-      return false;
-    }
-  }
-#endif
-  auto curr_stream_id = kernel_curr->stream_id();
-  auto prev_stream_id = kernel_prev->stream_id();
-  if (curr_stream_id == prev_stream_id) {
-    mem_buf->type_ = kInStreamReuse;
-    return true;
-  }
-
-  bool reuse_between_streams = true;
-  for (auto &stream_group : stream_groups_) {
-    size_t cur_index = UINT32_MAX;
-    size_t prev_index = UINT32_MAX;
-    for (size_t index = 0; index < stream_group.size(); index++) {
-      if (curr_stream_id == stream_group[index]) {
-        cur_index = index;
-        continue;
-      }
-      if (prev_stream_id == stream_group[index]) {
-        prev_index = index;
-        continue;
-      }
-    }
-    if ((prev_index != UINT32_MAX) && (cur_index == UINT32_MAX || (prev_index > cur_index))) {
-      // previous stream and current stream are not in the same group can't be reused
-      // previous stream is behind current stream can't be reused
-      reuse_between_streams = false;
-      break;
-    }
-  }
-
-  if (reuse_between_streams) {
-    mem_buf->type_ = kBetweenStreamReuse;
-    return true;
-  }
-
-  auto iter = kernel_front_map_.find(kernel_curr);
-  if (iter == kernel_front_map_.end()) {
-    MS_LOG(EXCEPTION) << kernel_curr->scope_full_name() << " is not init.";
-  }
-  auto kernel_curr_front = iter->second;
-  auto depend_count = kernel_curr_front.count(kernel_prev);
-  if (depend_count) {
-    mem_buf->type_ = kKernelDependenceReuse;
-    return true;
-  }
-
-  return false;
-}
-
-void BestFitMemReuse::AssignCommonNodeOutputOffset() {
-  MS_EXCEPTION_IF_NULL(current_kernel_);
-  for (const auto &tensor_idx : current_kernel_->GetOutputRefIndexs()) {
-    size_t index = GetTensorIndex(tensor_idx);
-    auto tensor_desc = tensor_ptr_list_[index];
-    MS_EXCEPTION_IF_NULL(tensor_desc);
-    if (tensor_desc->type_ == kRefNodeInput) {
-      total_refinput_size += tensor_desc->size_;
-    } else if (tensor_desc->type_ == kRefNodeOutput) {
-      total_refoutput_size += tensor_desc->size_;
-      // no need to alloc refnode output's memory
-      continue;
-    } else if (tensor_desc->type_ == kCommNotReuse) {
-      total_comm_not_reuse_size += tensor_desc->size_;
-    } else if (tensor_desc->type_ == kCommReuse) {
-      // get align size for communication op's single input
-      tensor_desc->size_ = AlignCommunicationMemorySize(tensor_desc->size_);
-      total_comm_reuse_size += tensor_desc->size_;
-    }
-
-    auto reusable_membuf_map = GetReusableMembufMap(tensor_desc->size_);
-    if (!reusable_membuf_map.empty()) {
-      auto membuf_index = reusable_membuf_map.begin()->second;
-      // find the best suitable membuf in membuf list, and reuse it
-      ReuseExistMembuf(tensor_desc.get(), membuf_index, kDynamicMem);
-    } else {
-      // no membuf can reuse, add new membuf after the membuf_ptr_list
-      AddNewMembufPtr(tensor_desc.get(), kDynamicMem);
-#ifdef MEM_REUSE_DEBUG
-      MemReuseChecker::GetInstance().IsAddNewMembuf_ = true;
-#endif
-    }
-    // skip left align border for communication op single input to used
-    if (tensor_desc->type_ == kCommReuse) {
-      tensor_desc->offset_ += kDefaultMemAlignSize;
-    }
-  }
-}
-
-void BestFitMemReuse::AssignCommunicationNodeOutputOffset() {
-  size_t total_kernel_output_size = 0;
-  // get all output size
-  MS_EXCEPTION_IF_NULL(current_kernel_);
-  for (const auto &tensor_idx : current_kernel_->GetOutputRefIndexs()) {
-    size_t index = GetTensorIndex(tensor_idx);
-    auto tensor_desc = tensor_ptr_list_[index];
-    MS_EXCEPTION_IF_NULL(tensor_desc);
-    if (tensor_desc->type_ == kCommReuse) {
-      total_comm_reuse_size += tensor_desc->size_;
-      total_comm_output_reuse_size += tensor_desc->size_;
-      total_kernel_output_size += tensor_desc->size_;
-    } else {
-      MS_LOG(ERROR) << "All communication op's outputs should be memory reuse, Kernel:"
-                    << current_kernel_->scope_full_name() << " output index:" << tensor_idx
-                    << " tensor_type:" << tensor_desc->type_;
-      continue;
-    }
-  }
-  total_kernel_output_size = AlignCommunicationMemorySize(total_kernel_output_size);
-
-  // add left align border for the first output and right align border for the last output to alloc align border memory
-  size_t output_index = 0;
-  auto output_ref_indexes = current_kernel_->GetOutputRefIndexs();
-  for (const auto &out_index : output_ref_indexes) {
-    size_t index = GetTensorIndex(out_index);
-    auto descption = tensor_ptr_list_[index];
-    MS_EXCEPTION_IF_NULL(descption);
-    if (output_index == 0) {
-      descption->size_ += kDefaultMemAlignSize;
-    }
-
-    if ((output_index == 0) && (output_ref_indexes.size() == 1)) {
-      // add right align border for single output
-      descption->size_ += kDefaultMemAlignSize;
-    }
-
-    output_index++;
-  }
-
-  auto reusable_membuf_map = GetReusableMembufMap(total_kernel_output_size);
-  if (!reusable_membuf_map.empty()) {
-    auto membuf_index = reusable_membuf_map.begin()->second;
-    output_index = 0;
-    for (const auto &idx : current_kernel_->GetOutputRefIndexs()) {
-      size_t index = GetTensorIndex(idx);
-      auto desc = tensor_ptr_list_[index];
-      MS_EXCEPTION_IF_NULL(desc);
-      ReuseExistMembuf(desc.get(), membuf_index + output_index, kDynamicMem);
-      // skip skip left align border for communication op's first output to used
-      if (output_index == 0) {
-        desc->offset_ += kDefaultMemAlignSize;
-      }
-      output_index++;
-    }
-  } else {
-    // no membuf can reuse, add new membuf after the membuf_ptr_list
-    output_index = 0;
-    for (const auto &tensor_index : current_kernel_->GetOutputRefIndexs()) {
-      size_t index = GetTensorIndex(tensor_index);
-      auto desc = tensor_ptr_list_[index];
-      MS_EXCEPTION_IF_NULL(desc);
-      AddNewMembufPtr(desc.get(), kDynamicMem);
-      // skip align size offset for first output to used
-      if (output_index == 0) {
-        desc->offset_ += kDefaultMemAlignSize;
-      }
-      output_index++;
-#ifdef MEM_REUSE_DEBUG
-      MemReuseChecker::GetInstance().IsAddNewMembuf_ = true;
-#endif
-    }
-  }
-}
-
-void BestFitMemReuse::AssignNodeOutputOffset() {
-  if (current_kernel_->type_ == kCommunicationNode) {
-    AssignCommunicationNodeOutputOffset();
-  } else {
-    AssignCommonNodeOutputOffset();
-  }
-}
-
-void BestFitMemReuse::AssignNodeWorkspaceOffset() {
-  for (auto &wk_idx : current_kernel_->GetWorkspaceRefIndexs()) {
-    size_t index = GetWorkspaceIndex(wk_idx);
-    auto wk_ref = wk_tensor_list_[index];
-    MS_EXCEPTION_IF_NULL(wk_ref);
-    auto re_wk_membuf_map = GetReusableMembufMap(wk_ref->size_);
-    if (!re_wk_membuf_map.empty()) {
-      auto membuf_index = re_wk_membuf_map.begin()->second;
-      ReuseExistMembuf(wk_ref.get(), membuf_index, kWorkspaceMem);
-    } else {
-      AddNewMembufPtr(wk_ref.get(), kWorkspaceMem);
-    }
-  }
-}
-
-void BestFitMemReuse::ReuseExistMembuf(KernelRefCount *tensor_desc, size_t membuf_index, int flag) {
-  MS_EXCEPTION_IF_NULL(tensor_desc);
-  CheckMembufIndx(membuf_index);
-  auto membuf = membuf_ptr_list_[membuf_index];
-  MS_EXCEPTION_IF_NULL(membuf);
-  // first to split && then update membuf_info
-  if (IsSplit(tensor_desc->size_, membuf->size_)) {
-    // split the membuf, and insert a new membuf after this membuf
-    SplitMembuf(tensor_desc, membuf_index);
-  }
-  // update membuf status, and set tensor offset
-  UpdateMembufInfo(tensor_desc, membuf.get(), flag);
-}
-
-std::map<size_t, size_t> BestFitMemReuse::GetReusableMembufMap(size_t tensor_size) {
-  std::map<size_t, size_t> size_map;
-  for (size_t i = 0; i < membuf_ptr_list_.size(); ++i) {
-    auto membuf = membuf_ptr_list_[i];
-    auto index = i;
-    bool is_membuf_ok = membuf->status_ == kUnused && membuf->size_ >= tensor_size;
-    if (is_membuf_ok && IsUsable(current_kernel_, membuf)) {
-      (void)size_map.insert(std::make_pair(membuf->size_, index));
-      break;
-    }
-  }
-  return size_map;
-}
-
-void BestFitMemReuse::UpdateMembufInfo(KernelRefCount *tensor_desc, Membuf *membuf, int flag) {
-  MS_EXCEPTION_IF_NULL(tensor_desc);
-  MS_EXCEPTION_IF_NULL(membuf);
-  auto real_index = GetRealIndex(IntToSize(tensor_desc->index_), flag);
-  membuf->status_ = kReused;
-  membuf->index_ = real_index;
-  membuf->used_kernel_ = current_kernel_;
-  tensor_desc->offset_ = membuf->offset_;
-}
-
-bool BestFitMemReuse::IsSplit(size_t tensor_size, size_t membuf_size) const { return tensor_size < membuf_size; }
-
-void BestFitMemReuse::SplitMembuf(const KernelRefCount *tensor_desc, size_t membuf_index) {
-  MS_EXCEPTION_IF_NULL(tensor_desc);
-  CheckMembufIndx(membuf_index);
-  auto membuf = membuf_ptr_list_[membuf_index];
-  MS_EXCEPTION_IF_NULL(membuf);
-  auto bias = membuf->size_ - tensor_desc->size_;
-  membuf->size_ = tensor_desc->size_;
-  // to check if spilt membuf can be merge
-  auto new_membuf = std::make_shared<Membuf>(kUnused, bias, membuf->offset_ + membuf->size_, kInvalidIndex,
-                                             membuf->type_, current_kernel_);
-  (void)membuf_ptr_list_.insert(membuf_ptr_list_.begin() + SizeToInt(membuf_index + 1), new_membuf);
-}
-
-void BestFitMemReuse::AddNewMembufPtr(KernelRefCount *tensor_desc, int flag) {
-  MS_EXCEPTION_IF_NULL(tensor_desc);
-  size_t membuf_offset = 0;
-  if (!membuf_ptr_list_.empty()) {
-    membuf_offset = membuf_ptr_list_.back()->offset_ + membuf_ptr_list_.back()->size_;
-  }
-  auto membuf_size = tensor_desc->size_;
-  auto real_index = GetRealIndex(IntToSize(tensor_desc->index_), flag);
-  auto membuf = std::make_shared<Membuf>(kReused, membuf_size, membuf_offset, real_index, kNew, current_kernel_);
-  membuf_ptr_list_.push_back(membuf);
-  tensor_desc->offset_ = membuf_offset;
-}
-
-void BestFitMemReuse::UpdateNodeInputAndMembuf() {
-  // process node input tensor
-  for (const auto &tensor_idx : current_kernel_->GetInputRefIndexs()) {
-    size_t tensor_index = GetTensorIndex(tensor_idx);
-    auto tensor_desc = tensor_ptr_list_[tensor_index];
-    MS_EXCEPTION_IF_NULL(tensor_desc);
-    tensor_desc->ref_count_--;
-    if (tensor_desc->ref_count_ == 0) {
-      ReleaseMembuf(tensor_index, kDynamicMem);
-    } else if (tensor_desc->ref_count_ < 0) {
-      MS_LOG(EXCEPTION) << "tensor: " << tensor_desc->index_ << " refcount: " << tensor_desc->ref_count_
-                        << " check error";
-    }
-  }
-}
-
-void BestFitMemReuse::ReleaseNodeUnusedOutput() {
-  for (const auto &tensor_idx : current_kernel_->GetOutputRefIndexs()) {
-    size_t tensor_index = GetTensorIndex(tensor_idx);
-    auto tensor_desc = tensor_ptr_list_[tensor_index];
-    MS_EXCEPTION_IF_NULL(tensor_desc);
-    if (tensor_desc->ref_count_ == 0) {
-      ReleaseMembuf(tensor_index, kDynamicMem);
-    } else if (tensor_desc->ref_count_ < 0) {
-      MS_LOG(EXCEPTION) << "tensor: " << tensor_desc->index_ << " refcount: " << tensor_desc->ref_count_
-                        << " check error";
-    }
-  }
-}
-
-void BestFitMemReuse::ReleasePreNodeWorkspace(const KernelDef *kernel_def_ptr) {
-  for (auto &workspace_index : kernel_def_ptr->GetWorkspaceRefIndexs()) {
-    size_t index = GetWorkspaceIndex(workspace_index);
-    auto wk_tensor = wk_tensor_list_[index];
-    wk_tensor->ref_count_--;
-    if (wk_tensor->ref_count_ == 0) {
-      ReleaseMembuf(index, kWorkspaceMem);
-    } else if (wk_tensor->ref_count_ < 0) {
-      MS_LOG(EXCEPTION) << "tensor: " << wk_tensor->index_ << " refcount: " << wk_tensor->ref_count_ << " check error";
-    }
-  }
-}
-
-void BestFitMemReuse::ReleaseMembuf(size_t tensor_index, int flag) {
-  if (membuf_ptr_list_.empty()) {
-    return;
-  }
-  auto real_index = GetRealIndex(tensor_index, flag);
-  auto membuf_iter = std::find_if(membuf_ptr_list_.begin(), membuf_ptr_list_.end(),
-                                  [real_index](const MembufPtr &membuf) { return membuf->index_ == real_index; });
-  if (membuf_iter == membuf_ptr_list_.end()) {
-    return;
-  }
-  auto membuf = (*membuf_iter);
-  MS_EXCEPTION_IF_NULL(membuf);
-  membuf->status_ = kUnused;
-  if (membuf_iter != membuf_ptr_list_.end() - 1) {
-    auto next_iter = membuf_iter + 1;
-    auto membuf_next = (*next_iter);
-    MS_EXCEPTION_IF_NULL(membuf_next);
-    if (membuf_next->status_ == kUnused) {
-      bool is_merge = IsUsable(current_kernel_, membuf_next);
-      if (is_merge) {
-        membuf->size_ += membuf_next->size_;
-        (void)membuf_ptr_list_.erase(next_iter);
-      }
-    }
-  }
-  if (membuf_iter != membuf_ptr_list_.begin()) {
-    auto prev_iter = membuf_iter - 1;
-    auto membuf_prev = (*prev_iter);
-    MS_EXCEPTION_IF_NULL(membuf_prev);
-    if (membuf_prev->status_ == kUnused) {
-      bool is_merge = IsUsable(current_kernel_, membuf_prev);
-      if (is_merge) {
-        membuf->size_ += membuf_prev->size_;
-        membuf->offset_ = membuf_prev->offset_;
-        (void)membuf_ptr_list_.erase(prev_iter);
-      }
-    }
-  }
-}
-
-size_t BestFitMemReuse::AlignCommonMemorySize(size_t size) const {
-  // memory size 512 align
-  return (size + kDefaultMemAlignSize + kAttAlignSize) / kDefaultMemAlignSize * kDefaultMemAlignSize;
-}
-
-size_t BestFitMemReuse::AlignCommunicationMemorySize(size_t size) const {
-  // memory size 512 align and add communication memory:  left align border memory - data - right align border memory
-  return kDefaultMemAlignSize + (size + kDefaultMemAlignSize - 1) / kDefaultMemAlignSize * kDefaultMemAlignSize +
-         kDefaultMemAlignSize;
-}
-
-size_t BestFitMemReuse::GetAllocatedSize() {
-  size_t AllocatedSize = kTotalSize;
-  if (membuf_ptr_list_.empty()) {
-    return AllocatedSize;
-  }
-  AllocatedSize = membuf_ptr_list_.back()->offset_ + membuf_ptr_list_.back()->size_;
-  MS_LOG(INFO) << "MemReuse Allocated Dynamic Size: " << AllocatedSize;
-  return AllocatedSize;
-}
-
-bool BestFitMemReuse::IsRelease() {
-  // unable_used_node include the node type that output tensor cannot be released,
-  // even if its refcount is equal to zero.
-  std::unordered_set<std::string> unable_used_node = {
-    prim::kPrimBatchNorm->name(),
-    prim::kPrimBatchNormGrad->name(),
-  };
-  return unable_used_node.find(current_kernel_->kernel_name()) == unable_used_node.end();
-}
-
-size_t BestFitMemReuse::GetTensorIndex(int index) const {
-  if (index < 0 || IntToSize(index) >= tensor_ptr_list_.size()) {
-    MS_LOG(WARNING) << "current cnode: " << current_kernel_->scope_full_name();
-    MS_LOG(EXCEPTION) << "invalid tensor index";
-  }
-  return IntToSize(index);
-}
-
-size_t BestFitMemReuse::GetWorkspaceIndex(int index) const {
-  if (index < 0 || IntToSize(index) >= wk_tensor_list_.size()) {
-    MS_LOG(WARNING) << "current cnode: " << current_kernel_->scope_full_name();
-    MS_LOG(EXCEPTION) << "invalid tensor index";
-  }
-  return IntToSize(index);
-}
-
-int BestFitMemReuse::GetRealIndex(size_t index, int flag) const {
-  if (flag == kDynamicMem) {
-    return SizeToInt(index);
-  } else if (flag == kWorkspaceMem) {
-    return kWorkspaceIndexFactor * SizeToInt(index + 1);
-  } else {
-    MS_LOG(EXCEPTION) << "flag " << flag << " is invalid";
-  }
-}
-
-void BestFitMemReuse::CheckMembufIndx(size_t membuf_index) const {
-  if (membuf_index >= membuf_ptr_list_.size()) {
-    MS_LOG(WARNING) << "current cnode: " << current_kernel_->scope_full_name();
-    MS_LOG(EXCEPTION) << "invalid membuf index: " << membuf_index << ", real size: " << membuf_ptr_list_.size();
-  }
-}
-
-void BestFitMemReuse::Reuse(const MemReuseUtil *mem_reuse_util_ptr) {
-  MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr);
-  InitMemReuseInfo(mem_reuse_util_ptr);
-  InitKernelDependence();
-  KernelDefPtr pre_op = nullptr;
-#ifdef MEM_REUSE_DEBUG
-  size_t op_num = 0;
-#endif
-  for (const auto &op_def_ptr : op_ptr_list_) {
-    current_kernel_ = op_def_ptr;
-    // release pre_op_def
-    if (pre_op != nullptr) {
-      ReleasePreNodeWorkspace(pre_op.get());
-    }
-    MemReuseChecker::GetInstance().IsAddNewMembuf_ = false;
-    // process node output tensor
-    AssignNodeOutputOffset();
-#ifdef MEM_REUSE_DEBUG
-    if (MemReuseChecker::GetInstance().IsAddNewMembuf_) {
-      MemReuseChecker::GetInstance().SetAddNewMembuInfos(op_def_ptr.get(), membuf_ptr_list_, op_num);
-    }
-#endif
-    // deal with current op'workspace
-    AssignNodeWorkspaceOffset();
-    pre_op = op_def_ptr;
-    // update node input tensor refcount, and membuf list status
-    UpdateNodeInputAndMembuf();
-    // check node output tensor which refcount is equal to zero
-    if (IsRelease()) {
-      ReleaseNodeUnusedOutput();
-    }
-#ifdef MEM_REUSE_DEBUG
-    MemReuseChecker::GetInstance().SetMembuInfos(op_def_ptr.get(), membuf_ptr_list_);
-    ++op_num;
-#endif
-  }
-  MS_LOG(INFO) << "Special Tensor total size: RefInput: " << total_refinput_size
-               << " RefOutput: " << total_refoutput_size << " CommReuse: " << total_comm_reuse_size
-               << " CommOutputReuse: " << total_comm_output_reuse_size
-               << " CommNotReuse: " << total_comm_not_reuse_size;
-#ifdef MEM_REUSE_DEBUG
-  MemReuseChecker::GetInstance().ExportMembufInfoIR();
-  MemReuseChecker::GetInstance().ExportAddNewMmebufIR();
-  MemReuseChecker::GetInstance().set_kernel_front_map(kernel_front_map_);
-  MemReuseChecker::GetInstance().ExportKernelDependence();
-#endif
-}
-}  // namespace memreuse
-}  // namespace mindspore
--- a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.h
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.h
@ -1,174 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_MEM_REUSE_MEM_REUSE_ALLOCATOR_H_
-#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_MEM_REUSE_MEM_REUSE_ALLOCATOR_H_
-#include <cmath>
-#include <map>
-#include <list>
-#include <memory>
-#include <vector>
-#include <numeric>
-#include <algorithm>
-#include <utility>
-#include <fstream>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <set>
-#include <queue>
-#include "backend/optimizer/mem_reuse/kernel_refcount.h"
-#include "backend/optimizer/mem_reuse/mem_reuse.h"
-
-namespace mindspore {
-namespace memreuse {
-static constexpr int kWorkspaceIndexFactor = -1000;
-static constexpr int kDynamicMem = -1;
-static constexpr int kWorkspaceMem = 1;
-static constexpr size_t kTotalSize = 0;
-enum Status { kUnused, kReused };
-enum MemType { kNew, kInStreamReuse, kBetweenStreamReuse, kKernelDependenceReuse };
-class Membuf {
- public:
-  Membuf() = default;
-  Membuf(Status status, size_t size, size_t offset, int index, MemType type, const KernelDefPtr &used_kernel)
-      : status_(status), size_(size), offset_(offset), index_(index), type_(type), used_kernel_(used_kernel) {}
-  ~Membuf() = default;
-  // Memory block status flags
-  Status status_ = kUnused;
-  size_t size_{0};
-  size_t offset_{0};
-  // Store the tensor index stored in this memory block at a certain moment
-  int index_{0};
-  MemType type_{kNew};
-  KernelDefPtr used_kernel_;
-};
-using MembufPtr = std::shared_ptr<Membuf>;
-
-class BestFitMemReuse {
- public:
-  BestFitMemReuse() = default;
-  ~BestFitMemReuse() { membuf_ptr_list_.clear(); }
-  /**
-   * Init all information need by memory reuse
-   * @param mem_reuse_util_ptr, initialize in the memreuse.cc
-   */
-  void InitMemReuseInfo(const MemReuseUtil *mem_reuse_util_ptr);
-  void CheckMembufIndx(size_t check_idx) const;
-  void AssignNodeWorkspaceOffset();
-  void ReleasePreNodeWorkspace(const KernelDef *kernel_def_ptr);
-  /**
-   * Assign output tensor memory offset of current kernel
-   */
-  void AssignNodeOutputOffset();
-  /**
-   * Assign output tensor memory offset of common kernel
-   */
-  void AssignCommonNodeOutputOffset();
-  /**
-   * Assign output tensor memory offset of communication kernel
-   */
-  void AssignCommunicationNodeOutputOffset();
-  /**
-   * Update input tensor's status of current kernel, and the status of membuf used by current kernel
-   */
-  void UpdateNodeInputAndMembuf();
-  /**
-   * Check whether to release the kernel output tensor which refcount is equal to zero
-   */
-  void ReleaseNodeUnusedOutput();
-  /**
-   * Reuse the exist membuf if possible
-   * @param tensor_desc, the output tensor of current kernel
-   * @param membuf_index, the index of membuf to be reused
-   * @param flag
-   */
-  void ReuseExistMembuf(KernelRefCount *tensor_desc, size_t membuf_index, int flag);
-  /**
-   * Get the membuf that can be reused
-   * @param tensor_size, the size of the tensor ready to assign memory offset
-   * @return membuf map, key: the membuf size, value: the membuf index
-   */
-  std::map<size_t, size_t> GetReusableMembufMap(size_t tensor_size);
-  /**
-   * Update the status of the reused memory block
-   * @param tensor_desc, the tensor ready to assign memory
-   * @param membuf, the membuf to be reused
-   * @param flag, distinguish dynamic memory and workspace
-   */
-  void UpdateMembufInfo(KernelRefCount *tensor_desc, Membuf *membuf, int flag);
-  // If the size of the memory block is greater than the size of the tensor, split the extra memory
-  void SplitMembuf(const KernelRefCount *tensor_desc, size_t membuf_index);
-  // Determine if the memory block needs to be split
-  bool IsSplit(size_t tensor_size, size_t membuf_size) const;
-  // If there is no memory block that can be reused, add a new memory block at the end
-  void AddNewMembufPtr(KernelRefCount *tensor_desc, int flag);
-  // Merge unused membuf
-  void ReleaseMembuf(size_t tensor_index, int flag);
-  // Memory address alignment for common memory
-  size_t AlignCommonMemorySize(size_t size) const;
-  // Memory address alignment for communication used memory
-  size_t AlignCommunicationMemorySize(size_t size) const;
-  int GetRealIndex(size_t index, int flag = kDynamicMem) const;
-  size_t GetTensorIndex(int index) const;
-  size_t GetWorkspaceIndex(int index) const;
-  // Memory reuse main program entry
-  void Reuse(const MemReuseUtil *mem_reuse_util_ptr);
-  // Get the total memory that needs to be applied eventually
-  size_t GetAllocatedSize();
-  // return false, when the node output cannot be released
-  bool IsRelease();
-  /**
-   * determine if the kernel_curr can reuse the output tensor add of kernel_prev
-   * @param kernel_curr, current kernel
-   * @param mem_buf, the membuf
-   * @return bool
-   */
-  bool IsUsable(const KernelDefPtr &kernel_curr, const MembufPtr &mem_buf);
-  /**
-   * init the dependence of all kernels in the graph
-   */
-  void InitKernelDependence();
-  // set tensor_def and op_def
-  void set_tensor_ptr_list(const std::vector<KernelRefCountPtr> &tensor_ptr_list) {
-    tensor_ptr_list_ = tensor_ptr_list;
-  }
-  void set_workspace_ptr_list(const std::vector<KernelRefCountPtr> &workspace_ptr_list) {
-    wk_tensor_list_ = workspace_ptr_list;
-  }
-  void set_op_ptr_list(const std::vector<KernelDefPtr> &op_ptr_list) { op_ptr_list_ = op_ptr_list; }
-
- private:
-  KernelDefPtr current_kernel_;
-  // Save all tensor information
-  std::vector<KernelRefCountPtr> tensor_ptr_list_;
-  std::vector<KernelRefCountPtr> wk_tensor_list_;
-  // Save all op information, including input and output tensor index
-  std::vector<KernelDefPtr> op_ptr_list_;
-  // Memory block information sequence, temporary variables
-  std::vector<MembufPtr> membuf_ptr_list_;
-  // kernel_front_map_, key: the kernel_def, value: kernels before this kernel_def
-  std::map<KernelDefPtr, std::set<KernelDefPtr>> kernel_front_map_;
-  std::vector<std::vector<uint32_t>> stream_groups_;
-  size_t total_refinput_size{0};
-  size_t total_refoutput_size{0};
-  size_t total_comm_reuse_size{0};
-  size_t total_comm_output_reuse_size{0};
-  size_t total_comm_not_reuse_size{0};
-};
-}  // namespace memreuse
-}  // namespace mindspore
-#endif  // #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_MEM_REUSE_MEM_REUSE_ALLOCATOR_H_
--- a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_checker.h
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_checker.h
@ -26,7 +26,6 @@
 #include "backend/session/anf_runtime_algorithm.h"
 #include "backend/optimizer/mem_reuse/mem_reuse.h"
 #include "backend/kernel_compiler/common_utils.h"
-#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
 namespace mindspore {
 namespace memreuse {
 constexpr auto kSplitC = '/';
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc
@ -47,6 +47,9 @@ void AscendMemoryManager::MallocDeviceMemory() {
    } else {
      MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
    }
+  } else {
+    MS_LOG(INFO) << "Call rtMalloc to allocate device memory Success, size : " << device_mem_size_
+                 << " bytes , address : " << reinterpret_cast<void *>(device_mem_base_);
  }
  AscendMemoryPool::GetInstance().Init(device_mem_base_, device_mem_size_, dynamic_mem_offset_);
 }
@ -107,6 +110,12 @@ uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_me
  } else {
    align_size = GetCommonAlignSize(size);
  }
+  auto device_mem_pool_offset = AscendMemoryPool::GetInstance().device_mem_pool_offset();
+  MS_LOG(INFO) << "Malloc Memory for Static: size[" << align_size << "], Memory statistics: total[" << device_mem_size_
+               << "] dynamic [" << total_dynamic_size_ << "] static [" << device_mem_size_ - device_mem_pool_offset
+               << "], Pool statistics: pool total size [" << AscendMemoryPool::GetInstance().total_mem_statistics()
+               << "] used [" << AscendMemoryPool::GetInstance().used_mem_statistics()
+               << "] communication_mem:" << communication_mem;

  if (MemoryProfiling::GetInstance().IsMemoryProfilingEnable() && graph_id != kInvalidGraphId) {
    auto node = MemoryProfiling::GetInstance().GetGraphMemoryNode(graph_id);
@ -136,9 +145,9 @@ uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_m
  }

  auto device_mem_pool_offset = AscendMemoryPool::GetInstance().device_mem_pool_offset();
-  MS_LOG(INFO) << "Malloc Memory: Dynamic, total[" << device_mem_size_ << "] (dynamic[" << total_dynamic_size_
-               << "] memory pool[" << device_mem_size_ - device_mem_pool_offset << "])"
-               << " malloc [" << align_size << "] communication_mem: " << communication_mem;
+  MS_LOG(INFO) << "Malloc Memory for Dynamic: size[" << align_size << "], Memory statistics: total[" << device_mem_size_
+               << "] dynamic[" << total_dynamic_size_ << "] static[" << device_mem_size_ - device_mem_pool_offset
+               << "] communication_mem: " << communication_mem;
  auto offset = dynamic_mem_offset_;
  auto new_offset = dynamic_mem_offset_ + align_size;
  if (new_offset >= device_mem_pool_offset) {
--- a/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.h
+++ b/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.h
@ -22,6 +22,7 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
+#include <fstream>
 #include "runtime/device/kernel_runtime.h"
 #include "ir/anf.h"
 #include "backend/kernel_compiler/ascend_kernel_mod.h"
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h
@ -29,11 +29,13 @@
 #include "runtime/device/kernel_runtime.h"
 #include "runtime/device/kernel_runtime_manager.h"
 #include "backend/optimizer/mem_reuse/mem_swap_manager.h"
+#include "backend/optimizer/mem_reuse/mem_reuse.h"

 namespace mindspore {
 namespace device {
 namespace gpu {
 using mindspore::device::memswap::MemSwapManagerPtr;
+using mindspore::memreuse::MemReuseUtilPtr;
 class GPUKernelRuntime : public KernelRuntime {
 public:
  GPUKernelRuntime() = default;
--- a/mindspore/ccsrc/runtime/device/kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime.cc
@ -276,7 +276,7 @@ void KernelRuntime::RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value
 void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
  MS_EXCEPTION_IF_NULL(graph);
  MS_EXCEPTION_IF_NULL(mem_manager_);
-  MS_LOG(INFO) << "AssignStaticMemoryInput start";
+  MS_LOG(INFO) << "AssignStaticMemoryInput start for graph " << graph->graph_id();
  auto graph_inputs = graph->inputs();
  auto graph_valid_input = graph->valid_inputs();
  graph_inputs.insert(graph_inputs.end(), graph->child_graph_result().begin(), graph->child_graph_result().end());
@ -342,8 +342,8 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
 #endif
      auto tensor_size = AnfAlgo::GetOutputTensorMemSize(item, index);
      device_address = CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id);
-      MS_LOG(INFO) << "Malloc Input for graph " << graph->graph_id() << ", node: " << item->fullname_with_scope()
-                   << " index: " << index << " size: " << tensor_size;
+      MS_LOG(INFO) << "Assign Static Memory for Input node, size:" << tensor_size
+                   << " node:" << item->fullname_with_scope() << " index: " << index;
      if (mem_manager_->MallocMem(kStaticMem, tensor_size, device_address, graph->graph_id()) == nullptr) {
        MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << tensor_size;
      }
@ -355,7 +355,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {

 void KernelRuntime::AssignStaticMemoryOutput(const session::KernelGraph *graph) {
  MS_EXCEPTION_IF_NULL(graph);
-  MS_LOG(INFO) << "AssignStaticMemoryOutput start";
+  MS_LOG(INFO) << "AssignStaticMemoryOutput start for graph " << graph->graph_id();
  auto nodes = AnfAlgo::GetAllOutput(graph->output(), {prim::kPrimTupleGetItem});
  std::vector<session::KernelWithIndex> non_communication_op;
  // Assign Communicate Op Memory firstly.
@ -500,12 +500,7 @@ void KernelRuntime::AssignCommunicationNodeOutputMem(MemType type, const AnfNode
    return;
  }

-  if (type == kReuseDynamicMem) {
-    // reuse communication op's all outputs' memory
-    type = kReuseDynamicCommMem;
-  }
-
-  if (type == kReuseDynamicCommMem || type == kSomasReuseDynamicMem) {
+  if (type == kSomasReuseDynamicMem) {
    bool not_reuse = KernelMemNotReuse(node);
    if (not_reuse) {
      type = kDynamicMem;
@ -588,7 +583,7 @@ void KernelRuntime::AssignCommunicationNodeInputMem(MemType type, const AnfNodeP
    return;
  }

-  if (type == kReuseDynamicMem || type == kSomasReuseDynamicMem) {
+  if (type == kSomasReuseDynamicMem) {
    bool not_reuse = KernelMemNotReuse(node);
    if (not_reuse) {
      type = kDynamicMem;
@ -616,20 +611,8 @@ void KernelRuntime::AssignCommunicationNodeInputMem(MemType type, const AnfNodeP
 void KernelRuntime::AssignNodeOutputMem(MemType type, const AnfNodePtr &node, int index) {
  MS_EXCEPTION_IF_NULL(node);
  MS_EXCEPTION_IF_NULL(mem_manager_);
-  if (AnfAlgo::IsGetNext(NOT_NULL(node)) && type == kReuseDynamicMem) {
-    MS_LOG(INFO) << "GetNext disable mem_reuse";
-    type = kDynamicMem;
-  }

-  if (node->isa<CNode>()) {
-    bool independent = AnfAlgo::IsIndependentNode(node->cast<CNodePtr>());
-    if (independent && (type == kReuseDynamicMem)) {
-      MS_LOG(INFO) << "Independent node " << node->fullname_with_scope() << " disable memory reuse";
-      type = kDynamicMem;
-    }
-  }
-
-  if (type == kReuseDynamicMem || type == kSomasReuseDynamicMem) {
+  if (type == kSomasReuseDynamicMem) {
    bool not_reuse = KernelMemNotReuse(node);
    if (not_reuse) {
      type = kDynamicMem;
@ -652,6 +635,10 @@ void KernelRuntime::AssignNodeOutputMem(MemType type, const AnfNodePtr &node, in
      continue;
    }
    MS_LOG(DEBUG) << "Assign Node:" << node->fullname_with_scope() << " output memory size:" << output_sizes[i];
+    if (type == kStaticMem) {
+      MS_LOG(INFO) << "Assign Static Memory for Output node, size:" << output_sizes[i]
+                   << " node:" << node->fullname_with_scope();
+    }
    std::string output_format = AnfAlgo::GetOutputFormat(node, i);
    auto output_type = AnfAlgo::GetOutputDeviceDataType(node, i);
    auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type);
@ -699,8 +686,12 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
    if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER) &&
        !mem_manager_->MallocMemFromMemPool(address, node_size)) {
      MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << node_size;
-    } else if (mem_manager_->MallocMem(kStaticMem, node_size, address, graph_id) == nullptr) {
-      MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << node_size;
+    } else {
+      MS_LOG(INFO) << "Assign Static Memory for Value node, size:" << node_size
+                   << " node:" << value_node->fullname_with_scope();
+      if (mem_manager_->MallocMem(kStaticMem, node_size, address, graph_id) == nullptr) {
+        MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << node_size;
+      }
    }
    AnfAlgo::SetOutputAddr(address, output_idx, value_node.get());
    if (!address->SyncHostToDevice(trans::GetRuntimePaddingShape(value_node, 0), tensor_size, tensor->data_type(),
@ -717,7 +708,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
 void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
  MS_EXCEPTION_IF_NULL(graph);
  MS_EXCEPTION_IF_NULL(mem_manager_);
-  MS_LOG(DEBUG) << "AssignStaticMemoryValueNode start";
+  MS_LOG(DEBUG) << "AssignStaticMemoryValueNode start for graph " << graph->graph_id();
  auto ms_context = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(ms_context);
  // order the value nodes
@ -747,8 +738,13 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
      if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER) &&
          !mem_manager_->MallocMemFromMemPool(address, tensor_size)) {
        MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << tensor_size;
-      } else if (mem_manager_->MallocMem(kStaticMem, tensor_size, address, graph->graph_id()) == nullptr) {
-        MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << tensor_size;
+      } else {
+        MS_LOG(INFO) << "Assign Static Memory for Value node, size:" << tensor_size
+                     << " node:" << value_node->fullname_with_scope();
+        if (mem_manager_->MallocMem(kStaticMem, tensor_size, address, graph->graph_id()) == nullptr) {
+          MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem
+                            << ", tensor size is: " << tensor_size;
+        }
      }
      AnfAlgo::SetOutputAddr(address, 0, value_node.get());
      ShapeVector shape = {1, SizeToLong(tensor_size)};
@ -776,13 +772,8 @@ void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) {

  if (is_enable_mem_reuse) {
    MS_LOG(INFO) << "Memory Reuse is enable...";
-#ifdef MEM_REUSE_DEBUG
-    mem_manager_->MallocReusedDynamicMem(graph);
-    mem_type = kReuseDynamicMem;
-#else
    mem_manager_->MallocSomasDynamicMem(graph);
    mem_type = kSomasReuseDynamicMem;
-#endif
  } else {
    MS_LOG(INFO) << "Memory Reuse is disable...";
  }
@ -973,8 +964,8 @@ bool KernelRuntime::LaunchKernelMod(const session::KernelGraph &graph) {
      MS_EXCEPTION_IF_NULL(kernel_mod);

      // Skip transpose kernel with "nop_op" attr which is not hidden or removed in PyNative infer scenario. Transpose
-      // kernel, which is not supposed to be executed, is generated in TransDataSplit to support specific Transdata. And
-      // hard code here should be removed after new Transdata programme is implemented in the foreseeable future.
+      // kernel, which is not supposed to be executed, is generated in TransDataSplit to support specific Transdata.
+      // And hard code here should be removed after new Transdata programme is implemented in the foreseeable future.
      if (AnfAlgo::HasNodeAttr("nop_op", kernel)) {
        for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(kernel); idx += 1) {
          auto real_input = AnfAlgo::GetRealInputIndex(kernel, idx);
--- a/mindspore/ccsrc/runtime/device/memory_manager.cc
+++ b/mindspore/ccsrc/runtime/device/memory_manager.cc
@ -22,9 +22,6 @@
 #endif
 #include "utils/ms_context.h"

-using mindspore::memreuse::BestFitMemReuse;
-using mindspore::memreuse::MemReuseUtilPtr;
-
 namespace mindspore {
 namespace device {
 constexpr size_t kAlignBytes = 32;
@ -37,24 +34,6 @@ size_t MemoryManager::GetCommunicationAlignSize(size_t input_size) const {
  return (input_size + kMemAlignSize - 1) / kMemAlignSize * kMemAlignSize + 2 * kMemAlignSize;
 }

-void MemoryManager::MallocReusedDynamicMem(const session::KernelGraph *graph) {
-  MS_EXCEPTION_IF_NULL(graph);
-  MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared<memreuse::MemReuseUtil>();
-  MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr);
-  // set all infos
-  mem_reuse_util_ptr->SetAllInfo(graph);
-  auto bestfit_mem_reuse = std::make_shared<BestFitMemReuse>();
-  MS_EXCEPTION_IF_NULL(bestfit_mem_reuse);
-  bestfit_mem_reuse->Reuse(mem_reuse_util_ptr.get());
-  size_t total_allocated_size = bestfit_mem_reuse->GetAllocatedSize();
-  MS_LOG(INFO) << "TotalReuseDynamicSize [" << total_allocated_size << "]";
-  mem_reuse_util_ptr_ = mem_reuse_util_ptr;
-  auto base_ptr = MallocDynamicMem(total_allocated_size, false);
-  MS_LOG(INFO) << "Reuse Memory from [" << reinterpret_cast<void *>(base_ptr) << "] to ["
-               << reinterpret_cast<void *>(base_ptr + total_allocated_size) << "]";
-  mem_reuse_util_ptr_->set_mem_base(base_ptr);
-}
-
 void MemoryManager::MallocSomasDynamicMem(const session::KernelGraph *graph) {
  MS_EXCEPTION_IF_NULL(graph);
  SomasPtr somas_reuse_util_ptr = std::make_shared<somas::Somas>();
@ -117,9 +96,6 @@ uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, Me
      if (communication_mem) {
        address->communication_ptr_ = ptr - kMemAlignSize;
      }
-    } else if (type == kReuseDynamicCommMem) {
-      MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_);
-      ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index);
    } else if (type == kSomasReuseDynamicMem) {
      MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr_);
      ptr = somas_reuse_util_ptr_->GetNodeOutputPtr(node, index);
@ -135,9 +111,6 @@ uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, Me
    address->from_mem_pool_ = true;
  } else if (type == kDynamicMem) {
    ptr = MallocDynamicMem(size, false);
-  } else if (type == kReuseDynamicMem) {
-    MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_);
-    ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index);
  } else if (type == kSomasReuseDynamicMem) {
    MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr_);
    ptr = somas_reuse_util_ptr_->GetNodeOutputPtr(node, index);
@ -147,10 +120,7 @@ uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, Me
 }

 uint8_t *MemoryManager::MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, MemType type, size_t size) {
-  if (type == kReuseDynamicMem) {
-    MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_);
-    return mem_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index);
-  } else if (type == kSomasReuseDynamicMem) {
+  if (type == kSomasReuseDynamicMem) {
    MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr_);
    return somas_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index);
  }
--- a/mindspore/ccsrc/runtime/device/memory_manager.h
+++ b/mindspore/ccsrc/runtime/device/memory_manager.h
@ -20,14 +20,12 @@
 #include <utility>
 #include <vector>
 #include "backend/optimizer/mem_reuse/mem_reuse.h"
-#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
 #include "backend/optimizer/somas/somas.h"
 namespace mindspore {
 namespace device {
-enum MemType { kStaticMem, kDynamicMem, kReuseDynamicMem, kSomasReuseDynamicMem, kReuseDynamicCommMem };
+enum MemType { kStaticMem, kDynamicMem, kSomasReuseDynamicMem };
 const int kGetAllOuts = -1;
 const uint64_t kMemAlignSize = 512;
-using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr;
 using SomasPtr = mindspore::somas::SomasPtr;

 class MemoryManager {
@ -43,7 +41,6 @@ class MemoryManager {
  }
  virtual void ClearGlobalIdleMem() {}

-  void MallocReusedDynamicMem(const session::KernelGraph *graph);
  virtual void MallocSomasDynamicMem(const session::KernelGraph *graph);
  uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, MemType type, size_t size,
                           const DeviceAddressPtr &address, bool comm_mem);
@ -72,7 +69,6 @@ class MemoryManager {
  uint64_t static_mem_offset_{0};
  size_t total_static_size_ = 0;
  size_t total_dynamic_size_ = 0;
-  MemReuseUtilPtr mem_reuse_util_ptr_{nullptr};
  SomasPtr somas_reuse_util_ptr_{nullptr};
 };
 }  // namespace device
--- a/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_allocator_test.cc
+++ b/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_allocator_test.cc
@ -1,153 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <memory>
-#include <vector>
-#include <string>
-#include "frontend/operator/ops.h"
-#include "backend/optimizer/mem_reuse/mem_reuse.h"
-#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
-
-#include "common/common_test.h"
-#include "common/py_func_graph_fetcher.h"
-
-using mindspore::memreuse::BestFitMemReuse;
-using mindspore::memreuse::KernelDef;
-using mindspore::memreuse::KernelDefPtr;
-using mindspore::memreuse::KernelRefCount;
-using mindspore::memreuse::KernelRefCountPtr;
-using mindspore::memreuse::MemReuseUtil;
-using mindspore::memreuse::MemReuseUtilPtr;
-using mindspore::memreuse::RefCountType;
-using MembufPtr = std::shared_ptr<mindspore::memreuse::Membuf>;
-
-namespace mindspore {
-namespace memreuse {
-class TestMemReuseAllocator : public UT::Common {
- public:
-  TestMemReuseAllocator() : getPyFun_("gtest_input.mem_reuse.TestMemReuseAllocator", true) {}
-  void SetUp() {}
-
- public:
-  UT::PyFuncGraphFetcher getPyFun_;
-};
-
-KernelDefPtr GetNewKernelDef(const std::vector<KernelRefCountPtr> &inputs,
-                             const std::vector<KernelRefCountPtr> &outputs, uint32_t stream_id) {
-  auto kernel_def = std::make_shared<KernelDef>();
-  kernel_def->set_input_refs(inputs);
-  kernel_def->set_output_refs(outputs);
-  kernel_def->set_stream_id(stream_id);
-  return kernel_def;
-}
-
-void InitMemReuseUtils(MemReuseUtil *mem_reuse_util_ptr) {
-  // tensor params: ref_count, offset, size, index,
-  auto tensor_0 = std::make_shared<KernelRefCount>();
-  tensor_0->index_ = 0;
-  tensor_0->size_ = 512;
-  tensor_0->ref_count_ = 999;
-  ASSERT_NE(tensor_0, nullptr);
-  auto tensor_1 = std::make_shared<KernelRefCount>();
-  tensor_1->index_ = 1;
-  tensor_1->size_ = 1024;
-  tensor_1->ref_count_ = 1;
-  auto tensor_2 = std::make_shared<KernelRefCount>();
-  tensor_2->index_ = 2;
-  tensor_2->size_ = 1024;
-  tensor_2->ref_count_ = 2;
-  auto tensor_3 = std::make_shared<KernelRefCount>();
-  tensor_3->index_ = 3;
-  tensor_3->size_ = 32;
-  tensor_3->ref_count_ = 1;
-  auto tensor_4 = std::make_shared<KernelRefCount>();
-  tensor_4->index_ = 4;
-  tensor_4->size_ = 2048;
-  tensor_4->ref_count_ = 1;
-  auto tensor_5 = std::make_shared<KernelRefCount>();
-  tensor_5->index_ = 5;
-  tensor_5->size_ = 256;
-  tensor_5->ref_count_ = 1;
-  MS_LOG(INFO) << "init all tensor info success.";
-
-  std::vector<KernelRefCountPtr> inputs;
-  std::vector<KernelRefCountPtr> outputs;
-  inputs = {tensor_0};
-  outputs = {tensor_1};
-  auto kernel0 = GetNewKernelDef(inputs, outputs, 0);
-  inputs = {tensor_1};
-  outputs = {tensor_2};
-  auto kernel1 = GetNewKernelDef(inputs, outputs, 0);
-  inputs = {tensor_2};
-  outputs = {tensor_3};
-  auto kernel2 = GetNewKernelDef(inputs, outputs, 0);
-  inputs = {tensor_2, tensor_3};
-  outputs = {tensor_4};
-  auto kernel3 = GetNewKernelDef(inputs, outputs, 0);
-  inputs = {tensor_4};
-  outputs = {tensor_5};
-  auto kernel4 = GetNewKernelDef(inputs, outputs, 1);
-  MS_LOG(INFO) << "init all op info success.";
-  std::vector<KernelRefCountPtr> tensor_ptr_list{tensor_0, tensor_1, tensor_2, tensor_3, tensor_4, tensor_5};
-  std::vector<KernelDefPtr> op_ptr_list{kernel0, kernel1, kernel2, kernel3, kernel4};
-
-  mem_reuse_util_ptr->set_total_refs_list(tensor_ptr_list);
-  mem_reuse_util_ptr->set_kernel_def_ptr_list(op_ptr_list);
-}
-
-TEST_F(TestMemReuseAllocator, mem_reuse_allocator) {
-  MS_LOG(INFO) << "mem_resue_allocator UT";
-  auto mem_reuse_util_ptr = std::make_shared<MemReuseUtil>();
-  InitMemReuseUtils(mem_reuse_util_ptr.get());
-  auto best_fit_mem_reuse = std::make_shared<BestFitMemReuse>();
-  best_fit_mem_reuse->Reuse(mem_reuse_util_ptr.get());
-  MS_LOG(INFO) << "run mem reuse success";
-  size_t total_allocated_size = best_fit_mem_reuse->GetAllocatedSize();
-  ASSERT_NE(total_allocated_size, 0);
-}
-
-TEST_F(TestMemReuseAllocator, mem_reuse_allocator_add_membuf) {
-  auto best_fit_mem_reuse = std::make_shared<BestFitMemReuse>();
-  auto tensor_desc = std::make_shared<KernelRefCount>();
-  tensor_desc->SetKernelRefCountInfo(0, 1024, kDynamicRefCount);
-  best_fit_mem_reuse->AddNewMembufPtr(tensor_desc.get(), kDynamicMem);
-  auto allocated_size = best_fit_mem_reuse->GetAllocatedSize();
-  ASSERT_EQ(allocated_size, 1024);
-}
-
-TEST_F(TestMemReuseAllocator, mem_reuse_allocator_split_membuf) {
-  auto best_fit_mem_reuse = std::make_shared<BestFitMemReuse>();
-  auto tensor_0 = std::make_shared<KernelRefCount>();
-  tensor_0->SetKernelRefCountInfo(0, 2048, kDynamicRefCount);
-  best_fit_mem_reuse->AddNewMembufPtr(tensor_0.get(), kDynamicMem);
-
-  auto tensor_1 = std::make_shared<KernelRefCount>();
-  tensor_1->SetKernelRefCountInfo(1, 800, kDynamicRefCount);
-  auto is_split = best_fit_mem_reuse->IsSplit(tensor_1->size_, tensor_0->size_);
-  ASSERT_EQ(is_split, true);
-
-  best_fit_mem_reuse->SplitMembuf(tensor_1.get(), 0);
-  auto allocated_size = best_fit_mem_reuse->GetAllocatedSize();
-  ASSERT_EQ(allocated_size, 2048);
-}
-
-TEST_F(TestMemReuseAllocator, mem_reuse_allocator_align) {
-  auto best_fit_mem_reuse = std::make_shared<BestFitMemReuse>();
-  auto size = best_fit_mem_reuse->AlignCommonMemorySize(510);
-  ASSERT_EQ(size, 1024);
-}
-}  // namespace memreuse
-}  // namespace mindspore
--- a/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc
+++ b/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc
@ -20,7 +20,6 @@
 #include "backend/session/session_basic.h"
 #include "backend/session/ascend_session.h"
 #include "backend/optimizer/mem_reuse/kernel_refcount.h"
-#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
 #include "runtime/device/kernel_info.h"
 #include "backend/kernel_compiler/tbe/tbe_kernel_mod.h"
 #include "frontend/operator/ops.h"
@ -229,19 +228,6 @@ TEST_F(TestMemReuseWithPy, KernelRef) {
  ASSERT_NE(membuf_ptr, nullptr);
 }

-TEST_F(TestMemReuseWithPy, ReuseAssignDynamicMemory) {
-  MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared<MemReuseUtil>();
-  ASSERT_NE(mem_reuse_util_ptr, nullptr);
-  auto bestfit_mem_reuse = std::make_shared<BestFitMemReuse>();
-  ASSERT_NE(bestfit_mem_reuse, nullptr);
-  bestfit_mem_reuse->Reuse(mem_reuse_util_ptr.get());
-  auto total_size = bestfit_mem_reuse->GetAllocatedSize();
-  ASSERT_EQ(total_size, 0);
-  KernelGraphPtr kernel_graph = std::make_shared<KernelGraph>();
-  bool ret = mem_reuse_util_ptr->InitDynamicKernelRef(kernel_graph.get());
-  ASSERT_EQ(ret, true);
-}
-
 TEST_F(TestMemReuseWithPy, TestSetInfo) {
  KernelGraphPtr g = CreateKernelGraph();
  ASSERT_NE(g, nullptr);