add mem analyer for swap

2023-01-19 10:52:53 +08:00 · 2023-01-19 10:52:53 +08:00 · 0a7533fe35
parent e12c672e95
commit 0a7533fe35
10 changed files with 462 additions and 10 deletions
--- a/mindspore/ccsrc/runtime/device/CMakeLists.txt
+++ b/mindspore/ccsrc/runtime/device/CMakeLists.txt
@ -1,4 +1,4 @@
-file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "common/*.cc"
+file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "common/*.cc" "gsm/*.cc"
    "kernel_info.cc" "executor/dynamic_kernel.cc" "executor/executor_callback.cc" "kernel_runtime.cc"
    "memory_manager.cc" "kernel_runtime_manager.cc" "convert_tensor_utils.cc" "memory_scheduler.cc"
    "memory_offload_strategy.cc" "launch_kernel.cc" "launch_mul.cc" "tensor_array.cc"
--- a/mindspore/ccsrc/runtime/device/gsm/mem_usage_analyzer.cc
+++ b/mindspore/ccsrc/runtime/device/gsm/mem_usage_analyzer.cc
@ -0,0 +1,202 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "runtime/device/gsm/mem_usage_analyzer.h"
+#include <memory>
+#include "backend/common/session/anf_runtime_algorithm.h"
+#include "include/common/utils/anfalgo.h"
+namespace mindspore {
+namespace device {
+size_t MemUsageAnalyzer::AddTensorInfo(const AnfNodePtr &node, size_t index, bool is_workspace) {
+  auto add_to_container = [this](const AnfNodePtr &node, size_t index,
+                                 std::map<AnfNodePtr, std::map<size_t, size_t>> *container, bool is_workspace) {
+    MS_EXCEPTION_IF_NULL(node);
+    MS_EXCEPTION_IF_NULL(container);
+    auto iter_node = container->find(node);
+    if (iter_node != container->end()) {
+      auto iter_tid = iter_node->second.find(index);
+      if (iter_tid == iter_node->second.end()) {
+        iter_node->second[index] = tensor_num_;
+      } else {
+        return iter_tid->second;
+      }
+    } else {
+      (*container)[node] = std::map<size_t, size_t>({{index, tensor_num_}});
+    }
+
+    DeviceAddressPtr address = nullptr;
+    if (is_workspace) {
+      address = AnfAlgo::GetMutableWorkspaceAddr(node, index);
+    } else {
+      address = AnfAlgo::GetMutableOutputAddr(node, index, true);
+    }
+
+    MS_EXCEPTION_IF_NULL(address);
+    auto info = std::make_shared<MemUsageTensorInfo>();
+    info->tensor_id_ = tensor_num_;
+    info->real_tensor_id_ = tensor_num_;
+    info->tensor_size_ = address->GetSize();
+    info->node_ = node;
+    info->index_ = index;
+    info->is_workspace_ = is_workspace;
+    info->is_graph_input_ = !(node->isa<CNode>());
+    info->is_graph_output_ = IsGraphOutput(node, index);
+    (void)tensor_infos_.emplace_back(info);
+    ++tensor_num_;
+    return info->tensor_id_;
+  };
+
+  MS_EXCEPTION_IF_NULL(node);
+  size_t tensor_id = 0;
+  if (node->isa<ValueNode>()) {
+    tensor_id = add_to_container(node, index, &kernel_input_value_tid_, false);
+  } else if (node->isa<Parameter>()) {
+    tensor_id = add_to_container(node, index, &kernel_input_param_tid_, false);
+  } else if (is_workspace) {
+    tensor_id = add_to_container(node, index, &kernel_workspace_tid_, true);
+  } else {
+    tensor_id = add_to_container(node, index, &kernel_output_tid_, false);
+  }
+  return tensor_id;
+}
+
+void MemUsageAnalyzer::Analyze(const KernelGraphPtr &graph) {
+  AddOutputNodeInfo(graph);
+  AddKernelAndTensorInfo(graph);
+  AddFusedTensorInfo();
+}
+
+void MemUsageAnalyzer::AddOutputNodeInfo(const KernelGraphPtr &graph) {
+  MS_EXCEPTION_IF_NULL(graph);
+  auto outputs = common::AnfAlgo::GetAllOutputWithIndex(graph->output());
+  for (const auto &output : outputs) {
+    const auto &output_with_index = common::AnfAlgo::FetchRealNodeSkipMonadControl(output);
+    auto output_node = output_with_index.first;
+    MS_EXCEPTION_IF_NULL(output_node);
+    auto output_index = output_with_index.second;
+    if (common::AnfAlgo::IsNopNode(output_node)) {
+      auto real_node_with_index = common::AnfAlgo::GetPrevNodeOutput(output_node, output_index, true);
+      output_node = real_node_with_index.first;
+      output_index = real_node_with_index.second;
+    }
+    (void)graph_output_nodes_[output_node].insert(output_index);
+  }
+}
+
+bool MemUsageAnalyzer::IsGraphOutput(const AnfNodePtr &node, size_t index) {
+  auto iter = graph_output_nodes_.find(node);
+  if (iter == graph_output_nodes_.end()) {
+    return false;
+  }
+
+  if (iter->second.find(index) == iter->second.end()) {
+    return false;
+  }
+
+  return true;
+}
+
+void MemUsageAnalyzer::AddFusedTensorInfo() {
+  auto add_fused_tensor = [this](const std::vector<size_t> &tensors, size_t kernel_id) {
+    if (tensors.size() <= 1) {
+      return;
+    }
+
+    auto info = std::make_shared<MemUsageTensorInfo>();
+    info->tensor_id_ = tensor_num_;
+    info->real_tensor_id_ = tensor_num_;
+    info->tensor_size_ = 0;
+    info->node_ = nullptr;
+    info->index_ = 0;
+    (void)tensor_infos_.emplace_back(info);
+    ++tensor_num_;
+
+    for (auto tensor_id : tensors) {
+      auto tensor_info = GetMemUsageTensorInfo(tensor_id);
+      tensor_info->real_tensor_id_ = info->tensor_id_;
+      info->tensor_size_ += tensor_info->tensor_size_;
+      (void)info->fused_tensor_ids_.emplace_back(tensor_info->tensor_id_);
+      (void)info->used_by_kernels_.emplace_back(kernel_id);
+    }
+  };
+
+  for (size_t i = 0; i < kernel_infos_.size(); ++i) {
+    auto &info = kernel_infos_[i];
+    MS_EXCEPTION_IF_NULL(info);
+    if (!info->is_comm_) {
+      continue;
+    }
+    add_fused_tensor(info->input_tensors_, i);
+    add_fused_tensor(info->output_tensors_, i);
+  }
+}
+
+void MemUsageAnalyzer::AddKernelAndTensorInfo(const KernelGraphPtr &graph) {
+  MS_EXCEPTION_IF_NULL(graph);
+  auto &exec_order = graph->execution_order();
+  auto real_kernel_num = exec_order.size();
+  kernel_infos_.resize(real_kernel_num);
+
+  auto add_tensor_usage = [this](size_t tensor_id, size_t kernel_id, size_t *kernel_mem) {
+    auto tensor_info = GetMemUsageTensorInfo(tensor_id);
+    (void)tensor_info->used_by_kernels_.emplace_back(kernel_id);
+    *kernel_mem += tensor_info->tensor_size_;
+  };
+
+  for (size_t i = 0; i < real_kernel_num; ++i) {
+    const auto &node = exec_order[i];
+    auto kernel_mod = AnfAlgo::GetKernelMod(node);
+    MS_EXCEPTION_IF_NULL(kernel_mod);
+    auto kernel_info = std::make_shared<MemUsageKernelInfo>();
+    kernel_info->is_comm_ = common::AnfAlgo::IsCommunicationOp(node);
+    kernel_info->update_input_ = common::AnfAlgo::IsUpdateParameterKernel(node);
+
+    // Memory used by this kernel
+    size_t kernel_mem = 0;
+
+    // Add input tensors
+    const auto input_num = kernel_mod->GetInputSizeList().size();
+    for (size_t index = 0; index < input_num; ++index) {
+      const auto &prev_node_output = common::AnfAlgo::GetPrevNodeOutput(node, index, true);
+      auto tensor_id = AddTensorInfo(prev_node_output.first, prev_node_output.second);
+      (void)kernel_info->input_tensors_.emplace_back(tensor_id);
+      add_tensor_usage(tensor_id, i, &kernel_mem);
+    }
+
+    // Add output tensors
+    const auto output_num = kernel_mod->GetOutputSizeList().size();
+    for (size_t index = 0; index < output_num; ++index) {
+      auto tensor_id = AddTensorInfo(node, index);
+      (void)kernel_info->output_tensors_.emplace_back(tensor_id);
+      add_tensor_usage(tensor_id, i, &kernel_mem);
+    }
+
+    // Add workspace tensors
+    const auto workspace_num = kernel_mod->GetWorkspaceSizeList().size();
+    for (size_t index = 0; index < workspace_num; ++index) {
+      auto tensor_id = AddTensorInfo(node, index, true);
+      (void)kernel_info->workspace_tensors_.emplace_back(tensor_id);
+      add_tensor_usage(tensor_id, i, &kernel_mem);
+    }
+
+    if (kernel_mem > least_mem_) {
+      least_mem_ = kernel_mem;
+    }
+
+    kernel_infos_[i] = kernel_info;
+  }
+}
+}  // namespace device
+}  // namespace mindspore
--- a/mindspore/ccsrc/runtime/device/gsm/mem_usage_analyzer.h
+++ b/mindspore/ccsrc/runtime/device/gsm/mem_usage_analyzer.h
@ -0,0 +1,71 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GSM_MEM_USAGE_ANALYZER_H_
+#define MINDSPORE_CCSRC_RUNTIME_DEVICE_GSM_MEM_USAGE_ANALYZER_H_
+#include <memory>
+#include <vector>
+#include <map>
+#include <set>
+#include "backend/common/session/kernel_graph.h"
+#include "runtime/device/gsm/swap_strategy.h"
+namespace mindspore {
+namespace device {
+class MemUsageAnalyzer {
+ public:
+  MemUsageAnalyzer() = default;
+  ~MemUsageAnalyzer() = default;
+  void Analyze(const KernelGraphPtr &graph);
+
+  const std::vector<std::shared_ptr<MemUsageKernelInfo>> &GetMemUsageKernelInfos() const { return kernel_infos_; }
+
+  const std::vector<std::shared_ptr<MemUsageTensorInfo>> &GetMemUsageTensorInfos() const { return tensor_infos_; }
+
+  size_t LeastMemNeeded() const { return least_mem_; }
+
+  const std::shared_ptr<MemUsageKernelInfo> GetMemUsageKernelInfo(size_t kid) const {
+    if (kid >= kernel_infos_.size()) {
+      MS_LOG(EXCEPTION) << "Invalid kernel id!!!";
+    }
+    return kernel_infos_[kid];
+  }
+
+  const std::shared_ptr<MemUsageTensorInfo> GetMemUsageTensorInfo(size_t tid) const {
+    if (tid >= tensor_infos_.size()) {
+      MS_LOG(EXCEPTION) << "Invalid tensor id!!!";
+    }
+    return tensor_infos_[tid];
+  }
+
+ private:
+  void AddOutputNodeInfo(const KernelGraphPtr &graph);
+  void AddKernelAndTensorInfo(const KernelGraphPtr &graph);
+  size_t AddTensorInfo(const AnfNodePtr &node, size_t index, bool is_workspace = false);
+  void AddFusedTensorInfo();
+  bool IsGraphOutput(const AnfNodePtr &node, size_t index);
+  std::map<AnfNodePtr, std::map<size_t, size_t>> kernel_input_value_tid_;
+  std::map<AnfNodePtr, std::map<size_t, size_t>> kernel_input_param_tid_;
+  std::map<AnfNodePtr, std::map<size_t, size_t>> kernel_output_tid_;
+  std::map<AnfNodePtr, std::map<size_t, size_t>> kernel_workspace_tid_;
+  std::map<AnfNodePtr, std::set<size_t>> graph_output_nodes_;
+  std::vector<std::shared_ptr<MemUsageTensorInfo>> tensor_infos_;
+  std::vector<std::shared_ptr<MemUsageKernelInfo>> kernel_infos_;
+
+  size_t tensor_num_{0};
+  size_t least_mem_{0};
+};
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_GSM_MEM_USAGE_ANALYZER_H_
--- a/mindspore/ccsrc/runtime/device/gsm/swap_strategy.h
+++ b/mindspore/ccsrc/runtime/device/gsm/swap_strategy.h
@ -0,0 +1,86 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GSM_SWAP_STRATEGY_H_
+#define MINDSPORE_CCSRC_RUNTIME_DEVICE_GSM_SWAP_STRATEGY_H_
+#include <vector>
+#include <map>
+#include <memory>
+#include "ir/anf.h"
+
+namespace mindspore {
+namespace device {
+struct MemUsageTensorInfo {
+  size_t tensor_id_{0};
+  size_t real_tensor_id_{0};
+  size_t tensor_size_{0};
+  AnfNodePtr node_{nullptr};
+  size_t index_{0};
+  bool is_workspace_{false};
+  bool is_graph_output_{false};
+  bool is_graph_input_{false};
+  std::vector<size_t> used_by_kernels_;
+  std::vector<size_t> fused_tensor_ids_;
+};
+
+struct MemUsageKernelInfo {
+  bool is_comm_{false};
+  bool update_input_{false};
+  std::vector<size_t> input_tensors_;
+  std::vector<size_t> output_tensors_;
+  std::vector<size_t> workspace_tensors_;
+};
+
+enum class SwapActionType {
+  kUnDefined,
+  kHBM2DDR,
+  kHBM2DISK,
+  kDDR2HBM,
+  kDISK2HBM,
+  kDDR2DISK,
+  kDISK2DDR,
+  kAllocHBM,
+};
+
+struct TensorAction {
+  SwapActionType action_;
+  size_t tensor_id_{0};
+  // Avoid copy if data exists in target storage and not be updated by kernel
+  bool avoid_copy_{false};
+};
+
+struct SwapAction {
+  std::vector<std::shared_ptr<TensorAction>> actions_;
+};
+
+struct SwapLink {
+  SwapLink(size_t from, size_t to) : from_(from), to_(to) {}
+  ~SwapLink() = default;
+  size_t from_{0};
+  size_t to_{0};
+};
+
+struct SwapStrategy {
+  size_t kernel_num_{0};
+  size_t virtual_node_num_{0};
+  std::map<size_t, AnfNodePtr> nodes_;
+  std::map<size_t, std::shared_ptr<SwapAction>> actions_;
+  std::vector<std::shared_ptr<SwapLink>> links_;
+  std::vector<std::shared_ptr<MemUsageTensorInfo>> tensor_infos_;
+  std::vector<std::shared_ptr<MemUsageKernelInfo>> kernel_infos_;
+};
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_GSM_SWAP_STRATEGY_H_
--- a/mindspore/lite/src/extendrt/CMakeLists.txt
+++ b/mindspore/lite/src/extendrt/CMakeLists.txt
@ -96,6 +96,7 @@ if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE OR MSLITE_ENABLE_CLOUD_INFERENCE)
        ${CCSRC_DIR}/runtime/device/memory_offload_strategy.cc
        ${CCSRC_DIR}/runtime/device/memory_manager.cc
        ${CCSRC_DIR}/runtime/device/auto_mem_offload.cc
+        ${CCSRC_DIR}/runtime/device/gsm/mem_usage_analyzer.cc
        ${CCSRC_DIR}/runtime/device/common_somas_allocator.cc
        ${CCSRC_DIR}/runtime/pynative/op_runtime_info.cc
        ${CCSRC_DIR}/runtime/hardware/device_type.cc
--- a/tests/ut/cpp/CMakeLists.txt
+++ b/tests/ut/cpp/CMakeLists.txt
@ -77,6 +77,7 @@ if(ENABLE_MINDDATA)
            ./tbe/*.cc
            ./mindapi/*.cc
            ./runtime/graph_scheduler/*.cc
+            ./runtime/device/gsm/*.cc
            ./plugin/device/cpu/hal/*.cc
            ./place/*.cc
            ./ops/test_ops_fake_quant_param.cc
--- a/tests/ut/cpp/common/backend_common_test.cc
+++ b/tests/ut/cpp/common/backend_common_test.cc
@ -134,7 +134,7 @@ std::shared_ptr<session::KernelGraph> BackendCommon::Compile(const FuncGraphPtr
  func_graph->set_manager(new_manager);

  const std::string kDefaultDeviceName = "CPU";
-  auto graph_partition = std::make_shared<compile::GraphPartition>(compile::GetMsNonlinearOps(), kDefaultDeviceName);
+  auto graph_partition = std::make_shared<compile::GraphPartition>(compile::GetMsNonlinearOps(), kMsConvert);
  bool multi_target = false;
  auto segments = graph_partition->Partition(func_graph, &multi_target);
  if (segments.empty()) {
--- a/tests/ut/cpp/common/device_common_test.h
+++ b/tests/ut/cpp/common/device_common_test.h
@ -80,7 +80,7 @@ class TestDeviceResManager : public device::DeviceResManager {
  virtual DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format,
                                               TypeId type_id, const ShapeVector &shape,
                                               const UserDataPtr &user_data = nullptr) const {
-    return std::make_shared<TestDeviceAddress>(nullptr, 0);
+    return std::make_shared<TestDeviceAddress>(device_ptr, device_size);
  }
 };

@ -128,13 +128,13 @@ class TestKernelExecutor : public device::KernelExecutor {
      std::vector<size_t> output_size_list;
      size_t input_num = common::AnfAlgo::GetInputTensorNum(node);
      for (size_t input_index = 0; input_index < input_num; ++input_index) {
-        TypeId type_id = AnfAlgo::GetInputDeviceDataType(node, input_index);
-        size_t type_size = GetTypeByte(TypeIdToType(type_id));
-        auto shape = AnfAlgo::GetInputDeviceShape(node, input_index);
-        size_t tensor_size =
-          shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies<size_t>());
-        tensor_size = std::max(tensor_size, type_size);
+        auto [input_node, index] = common::AnfAlgo::GetPrevNodeOutput(node, input_index, true);
+        size_t tensor_size = AnfAlgo::GetOutputTensorMemSize(input_node, index);
        (void)input_size_list.emplace_back(tensor_size);
+        if (AnfAlgo::OutputAddrExist(input_node, index)) {
+          continue;
+        }
+        AnfAlgo::SetOutputAddr(std::make_shared<TestDeviceAddress>(nullptr, tensor_size), index, input_node.get());
      }
      size_t output_num = AnfAlgo::GetOutputTensorNum(node);
      for (size_t output_index = 0; output_index < output_num; ++output_index) {
@ -143,11 +143,13 @@ class TestKernelExecutor : public device::KernelExecutor {
        AnfAlgo::SetOutputAddr(std::make_shared<TestDeviceAddress>(nullptr, tensor_size), output_index, node.get());
      }

+      const size_t kDefaultWorkSpaceSize = 4;
      auto kernel_mod_ptr = std::make_shared<TestKernelMod>();
      kernel_mod_ptr->SetInputSizeList(input_size_list);
      kernel_mod_ptr->SetOutputSizeList(output_size_list);
-      kernel_mod_ptr->SetWorkspaceSizeList({4});
+      kernel_mod_ptr->SetWorkspaceSizeList({kDefaultWorkSpaceSize});
      AnfAlgo::SetKernelMod(kernel_mod_ptr, node.get());
+      AnfAlgo::SetWorkspaceAddr(std::make_shared<TestDeviceAddress>(nullptr, kDefaultWorkSpaceSize), 0, node.get());
    }
  }
 };
--- a/tests/ut/cpp/python_input/gtest_input/runtime/device/gsm/mem_usage_analyzer_test.py
+++ b/tests/ut/cpp/python_input/gtest_input/runtime/device/gsm/mem_usage_analyzer_test.py
@ -0,0 +1,27 @@
+# Copyright 2023 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+from mindspore.ops import operations as P
+
+add = P.Add()
+addn = P.AddN()
+
+
+def add_net(x1, x2, x3, x4, x5):
+    sum1 = add(x1, x2)
+    sum2 = add(sum1, x3)
+    sum3 = add(sum2, x4)
+    sum4 = add(sum3, x5)
+    ret = addn((sum4, sum1, sum2))
+    return ret
--- a/tests/ut/cpp/runtime/device/gsm/mem_usage_analyzer_test.cc
+++ b/tests/ut/cpp/runtime/device/gsm/mem_usage_analyzer_test.cc
@ -0,0 +1,62 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+#include <map>
+#include "common/common_test.h"
+#include "common/backend_common_test.h"
+#include "common/py_func_graph_fetcher.h"
+#include "runtime/device/gsm/mem_usage_analyzer.h"
+
+namespace mindspore::device {
+class TestMemUsageAnalyzer : public BackendCommon {
+ public:
+  TestMemUsageAnalyzer() : get_py_func_("gtest_input.runtime.device.gsm.mem_usage_analyzer_test", true) {}
+
+  UT::PyFuncGraphFetcher get_py_func_;
+};
+
+/// Feature: MemUsageAnalyzer
+/// Description: Test MemUsageAnalyzer interface
+/// Expectation: Pass all interface test
+TEST_F(TestMemUsageAnalyzer, test_mem_usage_analyzer) {
+  auto net = get_py_func_("add_net");
+  EXPECT_NE(net, nullptr);
+  std::vector<int64_t> shp_x{1, 2, 2, 2};
+  auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp_x);
+  AbstractBasePtrList args_spec_list{x_abstract, x_abstract, x_abstract, x_abstract, x_abstract};
+
+  auto func_graph = GetFuncGraph(net, args_spec_list);
+  auto kernel_graph = Compile(func_graph);
+
+  auto analyzer = std::make_shared<MemUsageAnalyzer>();
+  analyzer->Analyze(kernel_graph);
+  auto kernel_infos = analyzer->GetMemUsageKernelInfos();
+  auto tensor_infos = analyzer->GetMemUsageTensorInfos();
+
+  ASSERT_EQ(5, kernel_infos.size());
+  ASSERT_EQ(15, tensor_infos.size());
+  for (size_t i = 0; i < kernel_infos.size(); ++i) {
+    ASSERT_NE(nullptr, analyzer->GetMemUsageKernelInfo(i));
+  }
+
+  for (size_t i = 0; i < tensor_infos.size(); ++i) {
+    ASSERT_NE(nullptr, analyzer->GetMemUsageTensorInfo(i));
+  }
+
+  ASSERT_EQ(132, analyzer->LeastMemNeeded());
+}
+}  // namespace mindspore::device