forked from mindspore-Ecosystem/mindspore
add mem analyer for swap
This commit is contained in:
parent
e12c672e95
commit
0a7533fe35
|
@ -1,4 +1,4 @@
|
|||
file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "common/*.cc"
|
||||
file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "common/*.cc" "gsm/*.cc"
|
||||
"kernel_info.cc" "executor/dynamic_kernel.cc" "executor/executor_callback.cc" "kernel_runtime.cc"
|
||||
"memory_manager.cc" "kernel_runtime_manager.cc" "convert_tensor_utils.cc" "memory_scheduler.cc"
|
||||
"memory_offload_strategy.cc" "launch_kernel.cc" "launch_mul.cc" "tensor_array.cc"
|
||||
|
|
|
@ -0,0 +1,202 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "runtime/device/gsm/mem_usage_analyzer.h"
|
||||
#include <memory>
|
||||
#include "backend/common/session/anf_runtime_algorithm.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
size_t MemUsageAnalyzer::AddTensorInfo(const AnfNodePtr &node, size_t index, bool is_workspace) {
|
||||
auto add_to_container = [this](const AnfNodePtr &node, size_t index,
|
||||
std::map<AnfNodePtr, std::map<size_t, size_t>> *container, bool is_workspace) {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
MS_EXCEPTION_IF_NULL(container);
|
||||
auto iter_node = container->find(node);
|
||||
if (iter_node != container->end()) {
|
||||
auto iter_tid = iter_node->second.find(index);
|
||||
if (iter_tid == iter_node->second.end()) {
|
||||
iter_node->second[index] = tensor_num_;
|
||||
} else {
|
||||
return iter_tid->second;
|
||||
}
|
||||
} else {
|
||||
(*container)[node] = std::map<size_t, size_t>({{index, tensor_num_}});
|
||||
}
|
||||
|
||||
DeviceAddressPtr address = nullptr;
|
||||
if (is_workspace) {
|
||||
address = AnfAlgo::GetMutableWorkspaceAddr(node, index);
|
||||
} else {
|
||||
address = AnfAlgo::GetMutableOutputAddr(node, index, true);
|
||||
}
|
||||
|
||||
MS_EXCEPTION_IF_NULL(address);
|
||||
auto info = std::make_shared<MemUsageTensorInfo>();
|
||||
info->tensor_id_ = tensor_num_;
|
||||
info->real_tensor_id_ = tensor_num_;
|
||||
info->tensor_size_ = address->GetSize();
|
||||
info->node_ = node;
|
||||
info->index_ = index;
|
||||
info->is_workspace_ = is_workspace;
|
||||
info->is_graph_input_ = !(node->isa<CNode>());
|
||||
info->is_graph_output_ = IsGraphOutput(node, index);
|
||||
(void)tensor_infos_.emplace_back(info);
|
||||
++tensor_num_;
|
||||
return info->tensor_id_;
|
||||
};
|
||||
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
size_t tensor_id = 0;
|
||||
if (node->isa<ValueNode>()) {
|
||||
tensor_id = add_to_container(node, index, &kernel_input_value_tid_, false);
|
||||
} else if (node->isa<Parameter>()) {
|
||||
tensor_id = add_to_container(node, index, &kernel_input_param_tid_, false);
|
||||
} else if (is_workspace) {
|
||||
tensor_id = add_to_container(node, index, &kernel_workspace_tid_, true);
|
||||
} else {
|
||||
tensor_id = add_to_container(node, index, &kernel_output_tid_, false);
|
||||
}
|
||||
return tensor_id;
|
||||
}
|
||||
|
||||
void MemUsageAnalyzer::Analyze(const KernelGraphPtr &graph) {
|
||||
AddOutputNodeInfo(graph);
|
||||
AddKernelAndTensorInfo(graph);
|
||||
AddFusedTensorInfo();
|
||||
}
|
||||
|
||||
void MemUsageAnalyzer::AddOutputNodeInfo(const KernelGraphPtr &graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
auto outputs = common::AnfAlgo::GetAllOutputWithIndex(graph->output());
|
||||
for (const auto &output : outputs) {
|
||||
const auto &output_with_index = common::AnfAlgo::FetchRealNodeSkipMonadControl(output);
|
||||
auto output_node = output_with_index.first;
|
||||
MS_EXCEPTION_IF_NULL(output_node);
|
||||
auto output_index = output_with_index.second;
|
||||
if (common::AnfAlgo::IsNopNode(output_node)) {
|
||||
auto real_node_with_index = common::AnfAlgo::GetPrevNodeOutput(output_node, output_index, true);
|
||||
output_node = real_node_with_index.first;
|
||||
output_index = real_node_with_index.second;
|
||||
}
|
||||
(void)graph_output_nodes_[output_node].insert(output_index);
|
||||
}
|
||||
}
|
||||
|
||||
bool MemUsageAnalyzer::IsGraphOutput(const AnfNodePtr &node, size_t index) {
|
||||
auto iter = graph_output_nodes_.find(node);
|
||||
if (iter == graph_output_nodes_.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (iter->second.find(index) == iter->second.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void MemUsageAnalyzer::AddFusedTensorInfo() {
|
||||
auto add_fused_tensor = [this](const std::vector<size_t> &tensors, size_t kernel_id) {
|
||||
if (tensors.size() <= 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto info = std::make_shared<MemUsageTensorInfo>();
|
||||
info->tensor_id_ = tensor_num_;
|
||||
info->real_tensor_id_ = tensor_num_;
|
||||
info->tensor_size_ = 0;
|
||||
info->node_ = nullptr;
|
||||
info->index_ = 0;
|
||||
(void)tensor_infos_.emplace_back(info);
|
||||
++tensor_num_;
|
||||
|
||||
for (auto tensor_id : tensors) {
|
||||
auto tensor_info = GetMemUsageTensorInfo(tensor_id);
|
||||
tensor_info->real_tensor_id_ = info->tensor_id_;
|
||||
info->tensor_size_ += tensor_info->tensor_size_;
|
||||
(void)info->fused_tensor_ids_.emplace_back(tensor_info->tensor_id_);
|
||||
(void)info->used_by_kernels_.emplace_back(kernel_id);
|
||||
}
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < kernel_infos_.size(); ++i) {
|
||||
auto &info = kernel_infos_[i];
|
||||
MS_EXCEPTION_IF_NULL(info);
|
||||
if (!info->is_comm_) {
|
||||
continue;
|
||||
}
|
||||
add_fused_tensor(info->input_tensors_, i);
|
||||
add_fused_tensor(info->output_tensors_, i);
|
||||
}
|
||||
}
|
||||
|
||||
void MemUsageAnalyzer::AddKernelAndTensorInfo(const KernelGraphPtr &graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
auto &exec_order = graph->execution_order();
|
||||
auto real_kernel_num = exec_order.size();
|
||||
kernel_infos_.resize(real_kernel_num);
|
||||
|
||||
auto add_tensor_usage = [this](size_t tensor_id, size_t kernel_id, size_t *kernel_mem) {
|
||||
auto tensor_info = GetMemUsageTensorInfo(tensor_id);
|
||||
(void)tensor_info->used_by_kernels_.emplace_back(kernel_id);
|
||||
*kernel_mem += tensor_info->tensor_size_;
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < real_kernel_num; ++i) {
|
||||
const auto &node = exec_order[i];
|
||||
auto kernel_mod = AnfAlgo::GetKernelMod(node);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
auto kernel_info = std::make_shared<MemUsageKernelInfo>();
|
||||
kernel_info->is_comm_ = common::AnfAlgo::IsCommunicationOp(node);
|
||||
kernel_info->update_input_ = common::AnfAlgo::IsUpdateParameterKernel(node);
|
||||
|
||||
// Memory used by this kernel
|
||||
size_t kernel_mem = 0;
|
||||
|
||||
// Add input tensors
|
||||
const auto input_num = kernel_mod->GetInputSizeList().size();
|
||||
for (size_t index = 0; index < input_num; ++index) {
|
||||
const auto &prev_node_output = common::AnfAlgo::GetPrevNodeOutput(node, index, true);
|
||||
auto tensor_id = AddTensorInfo(prev_node_output.first, prev_node_output.second);
|
||||
(void)kernel_info->input_tensors_.emplace_back(tensor_id);
|
||||
add_tensor_usage(tensor_id, i, &kernel_mem);
|
||||
}
|
||||
|
||||
// Add output tensors
|
||||
const auto output_num = kernel_mod->GetOutputSizeList().size();
|
||||
for (size_t index = 0; index < output_num; ++index) {
|
||||
auto tensor_id = AddTensorInfo(node, index);
|
||||
(void)kernel_info->output_tensors_.emplace_back(tensor_id);
|
||||
add_tensor_usage(tensor_id, i, &kernel_mem);
|
||||
}
|
||||
|
||||
// Add workspace tensors
|
||||
const auto workspace_num = kernel_mod->GetWorkspaceSizeList().size();
|
||||
for (size_t index = 0; index < workspace_num; ++index) {
|
||||
auto tensor_id = AddTensorInfo(node, index, true);
|
||||
(void)kernel_info->workspace_tensors_.emplace_back(tensor_id);
|
||||
add_tensor_usage(tensor_id, i, &kernel_mem);
|
||||
}
|
||||
|
||||
if (kernel_mem > least_mem_) {
|
||||
least_mem_ = kernel_mem;
|
||||
}
|
||||
|
||||
kernel_infos_[i] = kernel_info;
|
||||
}
|
||||
}
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,71 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GSM_MEM_USAGE_ANALYZER_H_
|
||||
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_GSM_MEM_USAGE_ANALYZER_H_
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include "backend/common/session/kernel_graph.h"
|
||||
#include "runtime/device/gsm/swap_strategy.h"
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
class MemUsageAnalyzer {
|
||||
public:
|
||||
MemUsageAnalyzer() = default;
|
||||
~MemUsageAnalyzer() = default;
|
||||
void Analyze(const KernelGraphPtr &graph);
|
||||
|
||||
const std::vector<std::shared_ptr<MemUsageKernelInfo>> &GetMemUsageKernelInfos() const { return kernel_infos_; }
|
||||
|
||||
const std::vector<std::shared_ptr<MemUsageTensorInfo>> &GetMemUsageTensorInfos() const { return tensor_infos_; }
|
||||
|
||||
size_t LeastMemNeeded() const { return least_mem_; }
|
||||
|
||||
const std::shared_ptr<MemUsageKernelInfo> GetMemUsageKernelInfo(size_t kid) const {
|
||||
if (kid >= kernel_infos_.size()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid kernel id!!!";
|
||||
}
|
||||
return kernel_infos_[kid];
|
||||
}
|
||||
|
||||
const std::shared_ptr<MemUsageTensorInfo> GetMemUsageTensorInfo(size_t tid) const {
|
||||
if (tid >= tensor_infos_.size()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid tensor id!!!";
|
||||
}
|
||||
return tensor_infos_[tid];
|
||||
}
|
||||
|
||||
private:
|
||||
void AddOutputNodeInfo(const KernelGraphPtr &graph);
|
||||
void AddKernelAndTensorInfo(const KernelGraphPtr &graph);
|
||||
size_t AddTensorInfo(const AnfNodePtr &node, size_t index, bool is_workspace = false);
|
||||
void AddFusedTensorInfo();
|
||||
bool IsGraphOutput(const AnfNodePtr &node, size_t index);
|
||||
std::map<AnfNodePtr, std::map<size_t, size_t>> kernel_input_value_tid_;
|
||||
std::map<AnfNodePtr, std::map<size_t, size_t>> kernel_input_param_tid_;
|
||||
std::map<AnfNodePtr, std::map<size_t, size_t>> kernel_output_tid_;
|
||||
std::map<AnfNodePtr, std::map<size_t, size_t>> kernel_workspace_tid_;
|
||||
std::map<AnfNodePtr, std::set<size_t>> graph_output_nodes_;
|
||||
std::vector<std::shared_ptr<MemUsageTensorInfo>> tensor_infos_;
|
||||
std::vector<std::shared_ptr<MemUsageKernelInfo>> kernel_infos_;
|
||||
|
||||
size_t tensor_num_{0};
|
||||
size_t least_mem_{0};
|
||||
};
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GSM_MEM_USAGE_ANALYZER_H_
|
|
@ -0,0 +1,86 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GSM_SWAP_STRATEGY_H_
|
||||
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_GSM_SWAP_STRATEGY_H_
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include "ir/anf.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
struct MemUsageTensorInfo {
|
||||
size_t tensor_id_{0};
|
||||
size_t real_tensor_id_{0};
|
||||
size_t tensor_size_{0};
|
||||
AnfNodePtr node_{nullptr};
|
||||
size_t index_{0};
|
||||
bool is_workspace_{false};
|
||||
bool is_graph_output_{false};
|
||||
bool is_graph_input_{false};
|
||||
std::vector<size_t> used_by_kernels_;
|
||||
std::vector<size_t> fused_tensor_ids_;
|
||||
};
|
||||
|
||||
struct MemUsageKernelInfo {
|
||||
bool is_comm_{false};
|
||||
bool update_input_{false};
|
||||
std::vector<size_t> input_tensors_;
|
||||
std::vector<size_t> output_tensors_;
|
||||
std::vector<size_t> workspace_tensors_;
|
||||
};
|
||||
|
||||
enum class SwapActionType {
|
||||
kUnDefined,
|
||||
kHBM2DDR,
|
||||
kHBM2DISK,
|
||||
kDDR2HBM,
|
||||
kDISK2HBM,
|
||||
kDDR2DISK,
|
||||
kDISK2DDR,
|
||||
kAllocHBM,
|
||||
};
|
||||
|
||||
struct TensorAction {
|
||||
SwapActionType action_;
|
||||
size_t tensor_id_{0};
|
||||
// Avoid copy if data exists in target storage and not be updated by kernel
|
||||
bool avoid_copy_{false};
|
||||
};
|
||||
|
||||
struct SwapAction {
|
||||
std::vector<std::shared_ptr<TensorAction>> actions_;
|
||||
};
|
||||
|
||||
struct SwapLink {
|
||||
SwapLink(size_t from, size_t to) : from_(from), to_(to) {}
|
||||
~SwapLink() = default;
|
||||
size_t from_{0};
|
||||
size_t to_{0};
|
||||
};
|
||||
|
||||
struct SwapStrategy {
|
||||
size_t kernel_num_{0};
|
||||
size_t virtual_node_num_{0};
|
||||
std::map<size_t, AnfNodePtr> nodes_;
|
||||
std::map<size_t, std::shared_ptr<SwapAction>> actions_;
|
||||
std::vector<std::shared_ptr<SwapLink>> links_;
|
||||
std::vector<std::shared_ptr<MemUsageTensorInfo>> tensor_infos_;
|
||||
std::vector<std::shared_ptr<MemUsageKernelInfo>> kernel_infos_;
|
||||
};
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GSM_SWAP_STRATEGY_H_
|
|
@ -96,6 +96,7 @@ if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE OR MSLITE_ENABLE_CLOUD_INFERENCE)
|
|||
${CCSRC_DIR}/runtime/device/memory_offload_strategy.cc
|
||||
${CCSRC_DIR}/runtime/device/memory_manager.cc
|
||||
${CCSRC_DIR}/runtime/device/auto_mem_offload.cc
|
||||
${CCSRC_DIR}/runtime/device/gsm/mem_usage_analyzer.cc
|
||||
${CCSRC_DIR}/runtime/device/common_somas_allocator.cc
|
||||
${CCSRC_DIR}/runtime/pynative/op_runtime_info.cc
|
||||
${CCSRC_DIR}/runtime/hardware/device_type.cc
|
||||
|
|
|
@ -77,6 +77,7 @@ if(ENABLE_MINDDATA)
|
|||
./tbe/*.cc
|
||||
./mindapi/*.cc
|
||||
./runtime/graph_scheduler/*.cc
|
||||
./runtime/device/gsm/*.cc
|
||||
./plugin/device/cpu/hal/*.cc
|
||||
./place/*.cc
|
||||
./ops/test_ops_fake_quant_param.cc
|
||||
|
|
|
@ -134,7 +134,7 @@ std::shared_ptr<session::KernelGraph> BackendCommon::Compile(const FuncGraphPtr
|
|||
func_graph->set_manager(new_manager);
|
||||
|
||||
const std::string kDefaultDeviceName = "CPU";
|
||||
auto graph_partition = std::make_shared<compile::GraphPartition>(compile::GetMsNonlinearOps(), kDefaultDeviceName);
|
||||
auto graph_partition = std::make_shared<compile::GraphPartition>(compile::GetMsNonlinearOps(), kMsConvert);
|
||||
bool multi_target = false;
|
||||
auto segments = graph_partition->Partition(func_graph, &multi_target);
|
||||
if (segments.empty()) {
|
||||
|
|
|
@ -80,7 +80,7 @@ class TestDeviceResManager : public device::DeviceResManager {
|
|||
virtual DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format,
|
||||
TypeId type_id, const ShapeVector &shape,
|
||||
const UserDataPtr &user_data = nullptr) const {
|
||||
return std::make_shared<TestDeviceAddress>(nullptr, 0);
|
||||
return std::make_shared<TestDeviceAddress>(device_ptr, device_size);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -128,13 +128,13 @@ class TestKernelExecutor : public device::KernelExecutor {
|
|||
std::vector<size_t> output_size_list;
|
||||
size_t input_num = common::AnfAlgo::GetInputTensorNum(node);
|
||||
for (size_t input_index = 0; input_index < input_num; ++input_index) {
|
||||
TypeId type_id = AnfAlgo::GetInputDeviceDataType(node, input_index);
|
||||
size_t type_size = GetTypeByte(TypeIdToType(type_id));
|
||||
auto shape = AnfAlgo::GetInputDeviceShape(node, input_index);
|
||||
size_t tensor_size =
|
||||
shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies<size_t>());
|
||||
tensor_size = std::max(tensor_size, type_size);
|
||||
auto [input_node, index] = common::AnfAlgo::GetPrevNodeOutput(node, input_index, true);
|
||||
size_t tensor_size = AnfAlgo::GetOutputTensorMemSize(input_node, index);
|
||||
(void)input_size_list.emplace_back(tensor_size);
|
||||
if (AnfAlgo::OutputAddrExist(input_node, index)) {
|
||||
continue;
|
||||
}
|
||||
AnfAlgo::SetOutputAddr(std::make_shared<TestDeviceAddress>(nullptr, tensor_size), index, input_node.get());
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(node);
|
||||
for (size_t output_index = 0; output_index < output_num; ++output_index) {
|
||||
|
@ -143,11 +143,13 @@ class TestKernelExecutor : public device::KernelExecutor {
|
|||
AnfAlgo::SetOutputAddr(std::make_shared<TestDeviceAddress>(nullptr, tensor_size), output_index, node.get());
|
||||
}
|
||||
|
||||
const size_t kDefaultWorkSpaceSize = 4;
|
||||
auto kernel_mod_ptr = std::make_shared<TestKernelMod>();
|
||||
kernel_mod_ptr->SetInputSizeList(input_size_list);
|
||||
kernel_mod_ptr->SetOutputSizeList(output_size_list);
|
||||
kernel_mod_ptr->SetWorkspaceSizeList({4});
|
||||
kernel_mod_ptr->SetWorkspaceSizeList({kDefaultWorkSpaceSize});
|
||||
AnfAlgo::SetKernelMod(kernel_mod_ptr, node.get());
|
||||
AnfAlgo::SetWorkspaceAddr(std::make_shared<TestDeviceAddress>(nullptr, kDefaultWorkSpaceSize), 0, node.get());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
# Copyright 2023 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
add = P.Add()
|
||||
addn = P.AddN()
|
||||
|
||||
|
||||
def add_net(x1, x2, x3, x4, x5):
|
||||
sum1 = add(x1, x2)
|
||||
sum2 = add(sum1, x3)
|
||||
sum3 = add(sum2, x4)
|
||||
sum4 = add(sum3, x5)
|
||||
ret = addn((sum4, sum1, sum2))
|
||||
return ret
|
|
@ -0,0 +1,62 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "common/common_test.h"
|
||||
#include "common/backend_common_test.h"
|
||||
#include "common/py_func_graph_fetcher.h"
|
||||
#include "runtime/device/gsm/mem_usage_analyzer.h"
|
||||
|
||||
namespace mindspore::device {
|
||||
class TestMemUsageAnalyzer : public BackendCommon {
|
||||
public:
|
||||
TestMemUsageAnalyzer() : get_py_func_("gtest_input.runtime.device.gsm.mem_usage_analyzer_test", true) {}
|
||||
|
||||
UT::PyFuncGraphFetcher get_py_func_;
|
||||
};
|
||||
|
||||
/// Feature: MemUsageAnalyzer
|
||||
/// Description: Test MemUsageAnalyzer interface
|
||||
/// Expectation: Pass all interface test
|
||||
TEST_F(TestMemUsageAnalyzer, test_mem_usage_analyzer) {
|
||||
auto net = get_py_func_("add_net");
|
||||
EXPECT_NE(net, nullptr);
|
||||
std::vector<int64_t> shp_x{1, 2, 2, 2};
|
||||
auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp_x);
|
||||
AbstractBasePtrList args_spec_list{x_abstract, x_abstract, x_abstract, x_abstract, x_abstract};
|
||||
|
||||
auto func_graph = GetFuncGraph(net, args_spec_list);
|
||||
auto kernel_graph = Compile(func_graph);
|
||||
|
||||
auto analyzer = std::make_shared<MemUsageAnalyzer>();
|
||||
analyzer->Analyze(kernel_graph);
|
||||
auto kernel_infos = analyzer->GetMemUsageKernelInfos();
|
||||
auto tensor_infos = analyzer->GetMemUsageTensorInfos();
|
||||
|
||||
ASSERT_EQ(5, kernel_infos.size());
|
||||
ASSERT_EQ(15, tensor_infos.size());
|
||||
for (size_t i = 0; i < kernel_infos.size(); ++i) {
|
||||
ASSERT_NE(nullptr, analyzer->GetMemUsageKernelInfo(i));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < tensor_infos.size(); ++i) {
|
||||
ASSERT_NE(nullptr, analyzer->GetMemUsageTensorInfo(i));
|
||||
}
|
||||
|
||||
ASSERT_EQ(132, analyzer->LeastMemNeeded());
|
||||
}
|
||||
} // namespace mindspore::device
|
Loading…
Reference in New Issue