forked from OSSInnovation/mindspore
[MemOpt]Safe Optimized Memory Allocation Solver
This commit is contained in:
parent
5e039bfaad
commit
5452e711b5
|
@ -47,7 +47,6 @@ endif()
|
||||||
|
|
||||||
if (DEBUG_MODE)
|
if (DEBUG_MODE)
|
||||||
set(CMAKE_BUILD_TYPE "Debug")
|
set(CMAKE_BUILD_TYPE "Debug")
|
||||||
add_compile_definitions(MEM_REUSE_DEBUG)
|
|
||||||
else()
|
else()
|
||||||
set(CMAKE_BUILD_TYPE "Release")
|
set(CMAKE_BUILD_TYPE "Release")
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -2,6 +2,7 @@ file(GLOB_RECURSE _PREACTIVATE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
"common/*.cc"
|
"common/*.cc"
|
||||||
"mem_reuse/*.cc"
|
"mem_reuse/*.cc"
|
||||||
"pass/*.cc"
|
"pass/*.cc"
|
||||||
|
"somas/*.cc"
|
||||||
)
|
)
|
||||||
|
|
||||||
if (ENABLE_D)
|
if (ENABLE_D)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,132 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_H_
|
||||||
|
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_H_
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "backend/kernel_compiler/tbe/tbe_utils.h"
|
||||||
|
#include "backend/optimizer/somas/somas_node.h"
|
||||||
|
#include "backend/optimizer/somas/somas_solver_pre.h"
|
||||||
|
#include "backend/optimizer/somas/somas_stream.h"
|
||||||
|
#include "backend/session/anf_runtime_algorithm.h"
|
||||||
|
#include "backend/session/kernel_graph.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace somas {
|
||||||
|
class Somas {
|
||||||
|
public:
|
||||||
|
// Constructors/Destructors
|
||||||
|
Somas() = default;
|
||||||
|
Somas(const Somas &) = delete;
|
||||||
|
Somas &operator=(const Somas &) = delete;
|
||||||
|
~Somas() = default;
|
||||||
|
|
||||||
|
bool Allocate(const session::KernelGraph *graph);
|
||||||
|
size_t GetTotalMemSize() { return mem_offset_; }
|
||||||
|
void set_mem_base_addr(uint8_t *mem_base_addr) { mem_base_addr_ = mem_base_addr; }
|
||||||
|
uint8_t *GetNodeOutputPtr(const AnfNodePtr &node, size_t index) const;
|
||||||
|
uint8_t *GetNodeWorkSpacePtr(const AnfNodePtr &node, size_t index) const;
|
||||||
|
|
||||||
|
void DumpSomasBasicIR(const string filename);
|
||||||
|
void DumpSomasMemoryIR(const string filename);
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Maps
|
||||||
|
std::unordered_map<size_t, SomasTensorPtr> tensors_map_;
|
||||||
|
std::map<void *, SomasNodePtr> nodes_map_;
|
||||||
|
|
||||||
|
// Vectors
|
||||||
|
std::vector<SomasNodePtr> nodes_list_;
|
||||||
|
std::vector<SomasStreamPtr> streams_list_;
|
||||||
|
std::vector<SomasTensorPtr> tensors_list_;
|
||||||
|
|
||||||
|
// Stream groups
|
||||||
|
std::vector<vector<uint32_t>> streams_groups_;
|
||||||
|
|
||||||
|
// Solver
|
||||||
|
std::unordered_map<size_t, SomasSolverTensorDescPtr> solver_tensor_desc_list_;
|
||||||
|
SomasSolverPrePtr somas_solver_;
|
||||||
|
|
||||||
|
// Constraints
|
||||||
|
std::shared_ptr<Array> cannot_reuse_;
|
||||||
|
|
||||||
|
// Contiguous list
|
||||||
|
std::vector<vector<size_t>> contiguous_tensors_list_;
|
||||||
|
|
||||||
|
// Ref lists
|
||||||
|
std::vector<vector<size_t>> ref_node_constraints_;
|
||||||
|
std::vector<vector<size_t>> ref_overlap_constraints_;
|
||||||
|
|
||||||
|
// total Offset
|
||||||
|
size_t mem_offset_;
|
||||||
|
|
||||||
|
// getnext op output size
|
||||||
|
size_t get_next_size_;
|
||||||
|
|
||||||
|
// Memory base addr
|
||||||
|
uint8_t *mem_base_addr_{nullptr};
|
||||||
|
|
||||||
|
// Save debug info
|
||||||
|
bool save_graphs_{false};
|
||||||
|
std::string save_graphs_path_;
|
||||||
|
|
||||||
|
// statistic info
|
||||||
|
size_t upper_bound_{0};
|
||||||
|
size_t lower_bound_{0};
|
||||||
|
size_t workspace_total_size_{0};
|
||||||
|
size_t comm_input_total_size_{0};
|
||||||
|
size_t comm_output_total_size_{0};
|
||||||
|
size_t lifelong_all_total_size_{0};
|
||||||
|
size_t lifelong_start_total_size_{0};
|
||||||
|
size_t lifelong_end_total_size_{0};
|
||||||
|
|
||||||
|
bool InitSomasTensors(const session::KernelGraph *graph);
|
||||||
|
void InitBasicInfo(const session::KernelGraph *graph);
|
||||||
|
void InitSomasStreamAndNode(const session::KernelGraph *graph);
|
||||||
|
void InitSomasOutputAndWorkspaceTensors(const session::KernelGraph *graph);
|
||||||
|
void InitSomasInputTensors(const session::KernelGraph *graph);
|
||||||
|
void GetNextOutputProcess(const session::KernelGraph *graph);
|
||||||
|
void IndependentNodeOutputProcess(const session::KernelGraph *graph);
|
||||||
|
void SummaryInputProcess(const session::KernelGraph *graph);
|
||||||
|
void RefNodeProcess(const session::KernelGraph *graph);
|
||||||
|
void UnReuseNodeProcess(const session::KernelGraph *graph);
|
||||||
|
SomasTensorPtr CreateGapTensor(size_t gap_tensor_id);
|
||||||
|
void GenContiguousList(const session::KernelGraph *graph);
|
||||||
|
|
||||||
|
void PreprocessingConflicts();
|
||||||
|
void ComputeConflictPairs();
|
||||||
|
|
||||||
|
bool Assign(const session::KernelGraph *graph);
|
||||||
|
|
||||||
|
void DumpOfflineIR(const string filename);
|
||||||
|
void DumpSomasMemoryPoolInfoIR(const string filename);
|
||||||
|
std::string GetSplitName(const string &scope_name) const;
|
||||||
|
size_t CalcLowerBound() const;
|
||||||
|
void GenStatisticInfo();
|
||||||
|
};
|
||||||
|
|
||||||
|
using SomasPtr = std::shared_ptr<Somas>;
|
||||||
|
} // namespace somas
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_H_
|
|
@ -0,0 +1,46 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "backend/optimizer/somas/somas_node.h"
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace somas {
|
||||||
|
void SomasNode::ComputeAncestorNodes() {
|
||||||
|
// Fast algorithm: assuming nodes execute this function in the received topological order
|
||||||
|
int64_t thisId = this->GetStream()->GetId();
|
||||||
|
|
||||||
|
for (SomasNodePtr node : ancestor_nodes_) {
|
||||||
|
int64_t ancestorId = node->GetStream()->GetId();
|
||||||
|
// Map Improvement for max_ancestor_order
|
||||||
|
if (thisId != ancestorId) {
|
||||||
|
this->anc_stream_max_order_[ancestorId] = std::max(this->anc_stream_max_order_[ancestorId], node->GetId());
|
||||||
|
}
|
||||||
|
for (SomasStreamPtr stream : node->GetStream()->ancestor_streams_) {
|
||||||
|
int64_t streamId = stream->GetId();
|
||||||
|
this->anc_stream_max_order_[streamId] =
|
||||||
|
std::max(this->anc_stream_max_order_[streamId], node->anc_stream_max_order_[streamId]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SomasNode::PresetAncestorStreams(const std::vector<SomasStreamPtr> stream_vector) {
|
||||||
|
for (SomasStreamPtr stream : stream_vector) {
|
||||||
|
anc_stream_max_order_[stream->GetId()] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace somas
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,78 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_NODE_H_
|
||||||
|
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_NODE_H_
|
||||||
|
|
||||||
|
#include "backend/optimizer/somas/somas_stream.h"
|
||||||
|
#include "backend/optimizer/somas/somas_tensor.h"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace somas {
|
||||||
|
class SomasStream;
|
||||||
|
class SomasTensor;
|
||||||
|
|
||||||
|
enum NodeType { kCommonNode, kCommunicationNode };
|
||||||
|
|
||||||
|
using SomasStreamPtr = std::shared_ptr<SomasStream>;
|
||||||
|
using SomasTensorPtr = std::shared_ptr<SomasTensor>;
|
||||||
|
|
||||||
|
class SomasNode {
|
||||||
|
public:
|
||||||
|
using SomasNodePtr = std::shared_ptr<SomasNode>;
|
||||||
|
// Public attributes (mutated in code)
|
||||||
|
std::string scope_full_name_;
|
||||||
|
|
||||||
|
std::set<SomasNodePtr>
|
||||||
|
ancestor_nodes_; // keeping only distance *one* ancestor nodes; enough to ComputeAncestorNodes()
|
||||||
|
std::set<SomasTensorPtr> tensors_;
|
||||||
|
|
||||||
|
std::vector<SomasTensorPtr> input_tensors_;
|
||||||
|
std::vector<SomasTensorPtr> output_tensors_;
|
||||||
|
std::vector<SomasTensorPtr> workspace_tensors_;
|
||||||
|
|
||||||
|
std::unordered_map<int64_t, size_t> anc_stream_max_order_;
|
||||||
|
|
||||||
|
// Constructors/Destructors
|
||||||
|
SomasNode(size_t id, NodeType type, SomasStreamPtr stream) : id_(id), stream_(stream), type_(type) {}
|
||||||
|
SomasNode(const SomasNode &) = delete;
|
||||||
|
SomasNode &operator=(const SomasNode &) = delete;
|
||||||
|
~SomasNode() = default;
|
||||||
|
|
||||||
|
// Accessors
|
||||||
|
const size_t &GetId() { return id_; }
|
||||||
|
SomasStreamPtr GetStream() { return stream_; }
|
||||||
|
const NodeType &GetType() { return type_; }
|
||||||
|
|
||||||
|
// Computing ancestors
|
||||||
|
void PresetAncestorStreams(const std::vector<SomasStreamPtr> stream_vector);
|
||||||
|
void ComputeAncestorNodes();
|
||||||
|
|
||||||
|
private:
|
||||||
|
const size_t id_{0};
|
||||||
|
SomasStreamPtr const stream_;
|
||||||
|
const NodeType type_;
|
||||||
|
};
|
||||||
|
} // namespace somas
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_NODE_H_
|
|
@ -0,0 +1,260 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "backend/optimizer/somas/somas_solver_alg.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <stack>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace somas {
|
||||||
|
// offset picking heuristics
|
||||||
|
bool SmallestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
|
||||||
|
return a.first < b.first || (a.first == b.first && a.second < b.second);
|
||||||
|
}
|
||||||
|
bool LargestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
|
||||||
|
return a.first > b.first || (a.first == b.first && a.second < b.second);
|
||||||
|
}
|
||||||
|
bool BestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
|
||||||
|
return a.second < b.second || (a.second == b.second && a.first < b.first);
|
||||||
|
}
|
||||||
|
bool WorstFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
|
||||||
|
return a.second > b.second || (a.second == b.second && a.first < b.first);
|
||||||
|
}
|
||||||
|
size_t SharedObjects(FootPrint *p) { return p->Next()->getOffset(); }
|
||||||
|
size_t SingleObject(FootPrint *p) { return SIZE_MAX; }
|
||||||
|
|
||||||
|
bool (*g_pBranching[kNumFittingTypes])(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) = {
|
||||||
|
BestFit, SmallestFit
|
||||||
|
#ifdef SOMAS_DEBUG
|
||||||
|
,
|
||||||
|
LargestFit, WorstFit
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
size_t (*algorithm[kNumAlgorithmTypes])(FootPrint *p) = {SharedObjects, SingleObject};
|
||||||
|
|
||||||
|
size_t FootPrint::Result() {
|
||||||
|
std::shared_ptr<FootPrint> foot_print = shared_from_this();
|
||||||
|
size_t upperbound = 0;
|
||||||
|
uint32_t total_footprints = 0;
|
||||||
|
while (NULL != foot_print) {
|
||||||
|
foot_print->printStats();
|
||||||
|
|
||||||
|
upperbound = foot_print->getOffset();
|
||||||
|
foot_print = foot_print->Next();
|
||||||
|
total_footprints++;
|
||||||
|
}
|
||||||
|
|
||||||
|
MS_LOG(DEBUG) << total_footprints << " footprints allocated";
|
||||||
|
|
||||||
|
return upperbound;
|
||||||
|
}
|
||||||
|
bool FootPrint::findFirst(stack<Interval> *merged, const BlockTensor &block, size_t *offset) {
|
||||||
|
MS_EXCEPTION_IF_NULL(merged);
|
||||||
|
MS_EXCEPTION_IF_NULL(offset);
|
||||||
|
bool bfound = false;
|
||||||
|
std::set<pair<size_t, size_t>, bool (*)(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b)>
|
||||||
|
offsetcandidates(g_pBranching[m_branching_strategy_]);
|
||||||
|
size_t gap = 0;
|
||||||
|
|
||||||
|
Interval a;
|
||||||
|
Interval it;
|
||||||
|
|
||||||
|
a.ub() = algorithm[m_algorithm_](this);
|
||||||
|
while (!(*merged).empty()) {
|
||||||
|
it = (*merged).top();
|
||||||
|
(*merged).pop();
|
||||||
|
a.lb() = it.ub();
|
||||||
|
if (a.contains(block.m_size_)) {
|
||||||
|
gap = a.ub() - a.lb() - block.m_size_;
|
||||||
|
offsetcandidates.emplace(pair<size_t, size_t>(a.lb(), gap));
|
||||||
|
}
|
||||||
|
a.ub() = it.lb();
|
||||||
|
}
|
||||||
|
a.lb() = m_offset_;
|
||||||
|
gap = a.ub() - a.lb() - block.m_size_;
|
||||||
|
if (a.contains(block.m_size_)) offsetcandidates.emplace(pair<size_t, size_t>(a.lb(), gap));
|
||||||
|
|
||||||
|
if (offsetcandidates.size() > 0) {
|
||||||
|
*offset = (*offsetcandidates.begin()).first;
|
||||||
|
m_foot_print_next_->m_offset_ = std::max(m_foot_print_next_->m_offset_, *offset + block.m_size_);
|
||||||
|
offsetcandidates.erase(offsetcandidates.begin());
|
||||||
|
bfound = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return bfound;
|
||||||
|
}
|
||||||
|
void FootPrint::Merge(vector<Interval> *interval_v, stack<Interval> *s) {
|
||||||
|
MS_EXCEPTION_IF_NULL(s);
|
||||||
|
sort((*interval_v).begin(), (*interval_v).end(),
|
||||||
|
[](Interval &i1, Interval &i2) { return (i1.lb() < i2.lb()) || (i1.lb() == i2.lb() && i1.ub() < i2.ub()); });
|
||||||
|
(*s).push((*interval_v)[0]);
|
||||||
|
|
||||||
|
for (size_t i = 1; i < (*interval_v).size(); i++) {
|
||||||
|
Interval &top = (*s).top();
|
||||||
|
Interval &b = (*interval_v)[i];
|
||||||
|
if (top.ub() < b.lb())
|
||||||
|
(*s).push(b);
|
||||||
|
|
||||||
|
else if (top.ub() < b.ub())
|
||||||
|
top.ub() = b.ub();
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
void FootPrint::ConstrainedBLocks(const std::shared_ptr<Array> &constraints, const BlockTensor &b1,
|
||||||
|
const BlockTensor &b2, vector<Interval> *oInterval) {
|
||||||
|
MS_EXCEPTION_IF_NULL(oInterval);
|
||||||
|
// propagate
|
||||||
|
size_t acum = m_offset_;
|
||||||
|
|
||||||
|
for (SomasSolverTensorDescPtr p1 = b1.m_start_tensor_; NULL != p1; p1 = p1->right_) {
|
||||||
|
for (SomasSolverTensorDescPtr p2 = b2.m_start_tensor_; NULL != p2; p2 = p2->right_) {
|
||||||
|
if ((*constraints)(p1->index_, p2->index_) == 1) {
|
||||||
|
Interval a = Interval(acum, acum + p1->size_);
|
||||||
|
Interval b = Interval(p2);
|
||||||
|
if (a.lb() < b.ub()) {
|
||||||
|
(*oInterval).emplace_back(b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
acum += p1->size_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool FootPrint::findOffset(const std::shared_ptr<Array> &constraints, const BlockTensor &block, size_t *offset) {
|
||||||
|
MS_EXCEPTION_IF_NULL(offset);
|
||||||
|
bool bretval = true;
|
||||||
|
vector<Interval> l_interval;
|
||||||
|
|
||||||
|
const size_t intervals_estimation = 1000;
|
||||||
|
l_interval.reserve(intervals_estimation * sizeof(Interval));
|
||||||
|
|
||||||
|
*offset = m_offset_;
|
||||||
|
bretval = true;
|
||||||
|
|
||||||
|
// transform constrained tensors in non eligible intervals
|
||||||
|
for (size_t i = 0; i < m_starts_.size(); i++) {
|
||||||
|
if (block.Alone() && m_starts_[i]->Alone() &&
|
||||||
|
(*constraints)(block.m_start_tensor_->index_, m_starts_[i]->m_start_tensor_->index_)) {
|
||||||
|
if (m_algorithm_ != 1 && i == 0) return false;
|
||||||
|
Interval It = Interval(m_starts_[i]->m_start_tensor_);
|
||||||
|
l_interval.emplace_back(It);
|
||||||
|
} else {
|
||||||
|
ConstrainedBLocks(constraints, block, *m_starts_[i], &l_interval); // solve multiple tensor blocks
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// merge non-eligible intervals and find a slot to allocate the tensor block
|
||||||
|
if (!l_interval.empty()) {
|
||||||
|
stack<Interval> l_mergedIntervals;
|
||||||
|
Merge(&l_interval, &l_mergedIntervals);
|
||||||
|
bretval = findFirst(&l_mergedIntervals, block, offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
return bretval;
|
||||||
|
}
|
||||||
|
void FootPrint::addElem(BlockTensor *block, const size_t &offset) {
|
||||||
|
if (m_foot_print_next_ == NULL) {
|
||||||
|
m_foot_print_next_ = std::make_shared<FootPrint>();
|
||||||
|
size_t newoffset = m_offset_ + block->m_size_;
|
||||||
|
m_foot_print_next_->setOffset(newoffset);
|
||||||
|
m_foot_print_next_->setAlignment(m_alignment_);
|
||||||
|
m_foot_print_next_->m_solId_ = m_solId_;
|
||||||
|
m_starts_.clear();
|
||||||
|
MS_LOG(DEBUG) << "Creating footprint at offset: " << m_offset_;
|
||||||
|
}
|
||||||
|
|
||||||
|
addStart(block);
|
||||||
|
size_t offset1 = offset;
|
||||||
|
SomasSolverTensorDescPtr tensor = block->m_start_tensor_;
|
||||||
|
MS_LOG(DEBUG) << "Allocating block: " << tensor->index_ << " in offset: " << offset;
|
||||||
|
pair<uint32_t, size_t> sol_offset;
|
||||||
|
sol_offset.first = block->m_current_sol_;
|
||||||
|
sol_offset.second = offset;
|
||||||
|
if (block->offsets_.count(sol_offset.first))
|
||||||
|
MS_LOG(WARNING) << "Warning addElem: Offset overwritten at solution " << block->m_current_sol_ << " for block "
|
||||||
|
<< block->m_start_tensor_->index_;
|
||||||
|
block->offsets_.insert(sol_offset);
|
||||||
|
while (tensor) {
|
||||||
|
tensor->offset_ = offset1;
|
||||||
|
offset1 += tensor->size_;
|
||||||
|
|
||||||
|
MS_LOG(DEBUG) << tensor->index_ << " " << tensor->size_ << " " << tensor->offset_;
|
||||||
|
tensor = tensor->right_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void FootPrint::printStats() {
|
||||||
|
MS_LOG(DEBUG) << "Footprint blocks: " << m_starts_.size() << " \toffset: " << m_offset_;
|
||||||
|
}
|
||||||
|
bool FastHeuristic::Eval( // unordered_map<size_t, SomasSolverTensorDescPtr> &tensors_m,
|
||||||
|
vector<BlockTensor> *block_tensors_v, std::shared_ptr<FootPrint> foot_print,
|
||||||
|
const std::shared_ptr<Array> &pConstraints) {
|
||||||
|
MS_EXCEPTION_IF_NULL(foot_print);
|
||||||
|
auto start = std::chrono::system_clock::now();
|
||||||
|
|
||||||
|
std::shared_ptr<FootPrint> p = foot_print;
|
||||||
|
bool bpushed = false;
|
||||||
|
uint32_t startscount = 0;
|
||||||
|
size_t offset = foot_print->getOffset();
|
||||||
|
m_tensors_allocated_ = 0;
|
||||||
|
SomasSolverTensorDescPtr tensor = NULL;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < (*block_tensors_v).size(); i++) {
|
||||||
|
BlockTensor &block = (*block_tensors_v)[i];
|
||||||
|
if (block.m_bre_allocate_ == false) {
|
||||||
|
offset = block.m_start_tensor_->offset_;
|
||||||
|
pair<uint32_t, size_t> aux;
|
||||||
|
aux.first = foot_print->m_solId_;
|
||||||
|
aux.second = block.m_start_tensor_->offset_;
|
||||||
|
if (block.offsets_.count(aux.first)) {
|
||||||
|
MS_LOG(WARNING) << "Warning: Offset overwritten at solution " << aux.first << " for block "
|
||||||
|
<< block.m_start_tensor_->index_;
|
||||||
|
}
|
||||||
|
block.offsets_.insert(aux);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
bpushed = false;
|
||||||
|
p = foot_print;
|
||||||
|
block.m_current_sol_ = foot_print->m_solId_;
|
||||||
|
while (!bpushed) {
|
||||||
|
if (p->findOffset(pConstraints, block, &offset)) {
|
||||||
|
p->addElem(&block, offset);
|
||||||
|
startscount++;
|
||||||
|
tensor = block.m_start_tensor_;
|
||||||
|
while (tensor) {
|
||||||
|
m_tensors_allocated_++;
|
||||||
|
tensor = tensor->right_;
|
||||||
|
}
|
||||||
|
bpushed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// go to the next footprint slot
|
||||||
|
if (NULL != p->Next()) {
|
||||||
|
p = p->Next();
|
||||||
|
} else if (bpushed == false) { // something went wrong
|
||||||
|
MS_LOG(WARNING) << "Could not allocate memory for tensor: " << tensor->index_;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MS_LOG(DEBUG)
|
||||||
|
<< "\nElapsed time of Fast Heuristic search: "
|
||||||
|
<< std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count() << " ms";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} // namespace somas
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,177 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_ALG_H_
|
||||||
|
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_ALG_H_
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cassert>
|
||||||
|
#include <chrono>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstring>
|
||||||
|
#include <list>
|
||||||
|
#include <memory>
|
||||||
|
#include <numeric>
|
||||||
|
#include <set>
|
||||||
|
#include <stack>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "backend/optimizer/somas/somas_solver_pre.h"
|
||||||
|
#include "utils/ms_context.h"
|
||||||
|
|
||||||
|
using std::pair;
|
||||||
|
using std::set;
|
||||||
|
using std::stack;
|
||||||
|
using std::unordered_map;
|
||||||
|
using std::vector;
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace somas {
|
||||||
|
class Interval {
|
||||||
|
public:
|
||||||
|
Interval() { m_a_ = m_b_ = 0; }
|
||||||
|
explicit Interval(SomasSolverTensorDescPtr t) {
|
||||||
|
m_a_ = t->offset_;
|
||||||
|
m_b_ = m_a_ + t->size_;
|
||||||
|
}
|
||||||
|
Interval(const size_t &a, const size_t &b) {
|
||||||
|
m_a_ = a;
|
||||||
|
m_b_ = b;
|
||||||
|
}
|
||||||
|
bool intersect(const Interval &i) { return (in(i.m_a_) || in(i.m_b_)); }
|
||||||
|
bool in(const size_t &a) { return ((a > m_a_) && (a < m_b_)); }
|
||||||
|
Interval intersection(const Interval &i) {
|
||||||
|
if (m_a_ < i.m_a_)
|
||||||
|
return Interval(m_a_, i.m_b_);
|
||||||
|
else
|
||||||
|
return Interval(i.m_a_, m_b_);
|
||||||
|
}
|
||||||
|
void merge(const Interval &i) {
|
||||||
|
m_a_ = std::min(m_a_, i.m_a_);
|
||||||
|
m_b_ = std::max(m_b_, i.m_b_);
|
||||||
|
}
|
||||||
|
size_t &lb() { return m_a_; }
|
||||||
|
size_t &ub() { return m_b_; }
|
||||||
|
bool contains(size_t width) { return (m_b_ - m_a_) >= width; }
|
||||||
|
bool contains(const Interval &a) { return ((a.m_a_ >= m_a_) && (a.m_b_ <= m_b_)); }
|
||||||
|
Interval &operator=(const Interval &in) {
|
||||||
|
m_a_ = in.m_a_;
|
||||||
|
m_b_ = in.m_b_;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t m_a_;
|
||||||
|
size_t m_b_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class BlockTensor {
|
||||||
|
public:
|
||||||
|
SomasSolverTensorDescPtr m_start_tensor_;
|
||||||
|
unordered_map<uint32_t,
|
||||||
|
std::set<pair<size_t, size_t>, bool (*)(const pair<size_t, size_t> &, const pair<size_t, size_t> &)>>
|
||||||
|
offsets_candidates_;
|
||||||
|
uint32_t m_current_sol_;
|
||||||
|
bool m_bre_allocate_;
|
||||||
|
unordered_map<uint32_t, size_t> offsets_;
|
||||||
|
size_t m_size_;
|
||||||
|
BlockTensor()
|
||||||
|
: m_start_tensor_(NULL),
|
||||||
|
offsets_candidates_(),
|
||||||
|
m_current_sol_(0),
|
||||||
|
m_bre_allocate_(true),
|
||||||
|
offsets_(),
|
||||||
|
m_size_(0) {}
|
||||||
|
|
||||||
|
BlockTensor &operator=(const BlockTensor &bt) {
|
||||||
|
m_bre_allocate_ = bt.m_bre_allocate_;
|
||||||
|
m_current_sol_ = 0;
|
||||||
|
m_start_tensor_ = bt.m_start_tensor_;
|
||||||
|
offsets_candidates_ = bt.offsets_candidates_;
|
||||||
|
offsets_ = bt.offsets_;
|
||||||
|
m_size_ = bt.m_size_;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
void log() {
|
||||||
|
SomasSolverTensorDescPtr p = m_start_tensor_;
|
||||||
|
MS_LOG(DEBUG) << "Block of Tensors [" << m_start_tensor_->index_ << "]\nsize: " << m_size_ << "Tensors:";
|
||||||
|
while (p) {
|
||||||
|
MS_LOG(DEBUG) << "[" << p->index_ << "," << p->size_ << "]";
|
||||||
|
p = p->right_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool Alone() const { return ((NULL == m_start_tensor_->right_) && (NULL == m_start_tensor_->left_)); }
|
||||||
|
};
|
||||||
|
|
||||||
|
class FootPrint : public std::enable_shared_from_this<FootPrint> {
|
||||||
|
public:
|
||||||
|
uint32_t m_solId_;
|
||||||
|
|
||||||
|
FootPrint()
|
||||||
|
: m_offset_(0),
|
||||||
|
m_starts_(),
|
||||||
|
m_foot_print_next_(NULL),
|
||||||
|
m_alignment_(0),
|
||||||
|
m_branching_strategy_(0),
|
||||||
|
m_algorithm_(0) {}
|
||||||
|
void setAlignment(const size_t a) { m_alignment_ = a; }
|
||||||
|
void setBranchingStrategy(uint32_t bs) { m_branching_strategy_ = bs; }
|
||||||
|
void setCurrentSol(uint32_t solId) { m_solId_ = solId; }
|
||||||
|
void setAlgorithm(uint32_t algorithm) { m_algorithm_ = algorithm; }
|
||||||
|
void addStart(BlockTensor *elemIndex) { m_starts_.push_back(elemIndex); }
|
||||||
|
void addElem(BlockTensor *block, const size_t &offset);
|
||||||
|
std::shared_ptr<FootPrint> &Next() { return m_foot_print_next_; }
|
||||||
|
vector<BlockTensor *> &getStarts() { return m_starts_; }
|
||||||
|
void Destroy();
|
||||||
|
const size_t getOffset() { return m_offset_; }
|
||||||
|
void setOffset(const size_t &offset) { m_offset_ = offset; }
|
||||||
|
bool findOffset(const std::shared_ptr<Array> &constraints, const BlockTensor &block, size_t *offset);
|
||||||
|
void ConstrainedBLocks(const std::shared_ptr<Array> &constraints, const BlockTensor &b1, const BlockTensor &b2,
|
||||||
|
vector<Interval> *oInterval_l);
|
||||||
|
void Merge(vector<Interval> *l_interval, stack<Interval> *l_merged);
|
||||||
|
bool findFirst(stack<Interval> *merged, const BlockTensor &block, size_t *offset);
|
||||||
|
size_t Result();
|
||||||
|
void printStats();
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t m_offset_;
|
||||||
|
vector<BlockTensor *> m_starts_;
|
||||||
|
std::shared_ptr<FootPrint> m_foot_print_next_;
|
||||||
|
size_t m_alignment_;
|
||||||
|
uint32_t m_branching_strategy_;
|
||||||
|
uint32_t m_algorithm_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class FastHeuristic {
|
||||||
|
public:
|
||||||
|
FastHeuristic() : m_alignment_(512), m_tensors_allocated_(0) {}
|
||||||
|
|
||||||
|
void setAlignment(const size_t &a) { m_alignment_ = a; }
|
||||||
|
void Destroy();
|
||||||
|
bool Eval( // unordered_map<size_t, SomasSolverTensorDescPtr> &tensors_m,
|
||||||
|
vector<BlockTensor> *block_tensors_v, std::shared_ptr<FootPrint> foot_print,
|
||||||
|
const std::shared_ptr<Array> &pConstraints);
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t m_alignment_;
|
||||||
|
size_t m_tensors_allocated_;
|
||||||
|
};
|
||||||
|
} // namespace somas
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_ALG_H_
|
|
@ -0,0 +1,405 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <chrono>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <ctime>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "backend/optimizer/somas/somas_solver_alg.h"
|
||||||
|
#include "backend/optimizer/somas/somas_solver_core.h"
|
||||||
|
#include "backend/optimizer/somas/somas_solver_pre.h"
|
||||||
|
|
||||||
|
using std::sort;
|
||||||
|
using std::unordered_map;
|
||||||
|
using std::vector;
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace somas {
|
||||||
|
Status SomasSolverCore::MemoryAllocationSolver() {
|
||||||
|
auto start = std::chrono::system_clock::now();
|
||||||
|
Status retval = SUCCESS;
|
||||||
|
size_t best = SIZE_MAX;
|
||||||
|
size_t best_timing = SIZE_MAX;
|
||||||
|
if (all_) { // loop over all heuristics
|
||||||
|
FittingType best_branching = kBest;
|
||||||
|
SortingType best_sorting = kGreaterSizeSmallerIndex;
|
||||||
|
AlgorithmType best_algorithm = kManyObjects;
|
||||||
|
uint32_t best_sol = 0;
|
||||||
|
size_t worst = 0;
|
||||||
|
BuildBlocks();
|
||||||
|
Clean();
|
||||||
|
MS_LOG(INFO) << "time\tSol#\tResult\t\t\t\tAlgorithm\tSorting Strategy\tOffset Strategy";
|
||||||
|
for (size_t algorithm = 0; algorithm < kNumAlgorithmTypes; algorithm++) {
|
||||||
|
algorithm_ = static_cast<AlgorithmType>(algorithm);
|
||||||
|
for (size_t sort_strategy = 0; sort_strategy < kNumSortingTypes; sort_strategy++) {
|
||||||
|
sort_strategy_ = static_cast<SortingType>(sort_strategy);
|
||||||
|
SortTensors();
|
||||||
|
for (size_t branching_strategy = 0; branching_strategy < kNumFittingTypes; branching_strategy++) {
|
||||||
|
branching_strategy_ = static_cast<FittingType>(branching_strategy);
|
||||||
|
Clean();
|
||||||
|
MS_LOG(DEBUG) << "Timing Start " << tensors_.size() << " Tensors";
|
||||||
|
start = std::chrono::system_clock::now();
|
||||||
|
upperbound_ = FindSolutions();
|
||||||
|
MS_LOG(DEBUG)
|
||||||
|
<< "\nElapsed time of upper bound testing: "
|
||||||
|
<< std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count()
|
||||||
|
<< " ms";
|
||||||
|
start = std::chrono::system_clock::now();
|
||||||
|
|
||||||
|
if (upperbound_ > worst) {
|
||||||
|
worst = upperbound_;
|
||||||
|
}
|
||||||
|
if (upperbound_ < best || upperbound_ == best) {
|
||||||
|
best = upperbound_;
|
||||||
|
best_algorithm = algorithm_;
|
||||||
|
best_branching = branching_strategy_;
|
||||||
|
best_sorting = sort_strategy_;
|
||||||
|
best_sol = sol_count_;
|
||||||
|
best_timing = timing_;
|
||||||
|
}
|
||||||
|
Verify();
|
||||||
|
sol_count_++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
upperbound_ = best;
|
||||||
|
auto end = std::chrono::system_clock::now();
|
||||||
|
size_t total_time = std::chrono::duration_cast<std::chrono::milliseconds>((end - start)).count();
|
||||||
|
const double giga = 1024. * 1024. * 1024.;
|
||||||
|
const double cent = 100.;
|
||||||
|
MS_LOG(INFO) << "SOMAS SOLVER RESUME:";
|
||||||
|
MS_LOG(INFO) << "Best Solution:[" << 1 + best_sol << "/" << sol_count_ << "] ";
|
||||||
|
MS_LOG(INFO) << "Best result:" << best << " Bytes " << (best) / (giga) << " GB ("
|
||||||
|
<< (best - lifelongmemory_) / (giga) << " GB + " << lifelongmemory_ / (giga)
|
||||||
|
<< " GB from lifelong tensors)";
|
||||||
|
|
||||||
|
MS_LOG(INFO) << "Best timing:" << best_timing << " ms";
|
||||||
|
MS_LOG(INFO) << "Best algorithm: " << algorithm_type_[best_algorithm].c_str();
|
||||||
|
MS_LOG(INFO) << "Best sorting strategy: " << sorting_[best_sorting].c_str();
|
||||||
|
MS_LOG(INFO) << "Best offset strategy: " << branching_[best_branching].c_str();
|
||||||
|
MS_LOG(INFO) << "Time elapsed: " << total_time << " ms";
|
||||||
|
MS_LOG(INFO) << "Spread:" << static_cast<double>((worst - best) / static_cast<double>(best * cent)) << " %%";
|
||||||
|
best_sol_ = best_sol;
|
||||||
|
SetBestSolution();
|
||||||
|
} else {
|
||||||
|
MS_LOG(INFO) << "Algorithm strategy: " << algorithm_type_[algorithm_].c_str();
|
||||||
|
MS_LOG(INFO) << "Sorting strategy: " << sorting_[sort_strategy_].c_str();
|
||||||
|
MS_LOG(INFO) << "Offset strategy: " << branching_[branching_strategy_].c_str();
|
||||||
|
BuildBlocks();
|
||||||
|
SortTensors();
|
||||||
|
upperbound_ = FindSolutions();
|
||||||
|
Verify();
|
||||||
|
}
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
Status SomasSolverCore::Verify() {
|
||||||
|
Status retval = SUCCESS;
|
||||||
|
if (verify_) {
|
||||||
|
MS_LOG(INFO) << "Verifying solution..";
|
||||||
|
|
||||||
|
if (!Verify(upperbound_)) {
|
||||||
|
MS_LOG(WARNING) << "Solver Allocation Memory Check FAILS";
|
||||||
|
retval = FAILED;
|
||||||
|
} else {
|
||||||
|
const double giga = 1024. * 1024. * 1024.;
|
||||||
|
MS_LOG(INFO) << "Solver Allocation Memory Check SUCCESS !!";
|
||||||
|
MS_LOG(INFO) << "Result: " << upperbound_ << " (" << (upperbound_) / (giga) << " GB)";
|
||||||
|
retval = SUCCESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
Status SomasSolverCore::Verify(unordered_map<size_t, SomasSolverTensorDescPtr> *pTensor_map) {
|
||||||
|
Status retval = SUCCESS;
|
||||||
|
if (NULL == pTensor_map) return retval;
|
||||||
|
MS_LOG(INFO) << "Verifying HQ Solution..";
|
||||||
|
MS_LOG(INFO) << "Checking tensors id, sizes..";
|
||||||
|
|
||||||
|
for (auto ptensor : *pTensor_map) {
|
||||||
|
if (tensors_.count(ptensor.first) == 0) {
|
||||||
|
MS_LOG(WARNING) << "HQ Tensor id " << ptensor.first << " does not exists";
|
||||||
|
} else if (tensors_[ptensor.first]->size_ != ptensor.second->size_) {
|
||||||
|
size_t HQ_index = ptensor.first;
|
||||||
|
size_t HQ_size = ptensor.second->size_;
|
||||||
|
size_t index = ptensor.first;
|
||||||
|
size_t size = tensors_[ptensor.first]->size_;
|
||||||
|
MS_LOG(WARNING) << "HQ Tensor Id: " << HQ_index << " with size: " << HQ_size
|
||||||
|
<< " is different from Tensor Id: " << index << " size: " << size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MS_LOG(INFO) << "Checking HQ Solution..";
|
||||||
|
tensors_ = *pTensor_map;
|
||||||
|
retval = Verify(upperbound_) == 0 ? FAILED : SUCCESS;
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
bool SomasSolverCore::Verify(const size_t &upperbound) {
|
||||||
|
auto start = std::chrono::system_clock::now();
|
||||||
|
bool retval = true;
|
||||||
|
size_t result = 0;
|
||||||
|
SomasSolverTensorDescPtr t1;
|
||||||
|
SomasSolverTensorDescPtr t2;
|
||||||
|
|
||||||
|
for (auto t1_ : tensors_) {
|
||||||
|
// check alignment
|
||||||
|
result = std::max(result, t1_.second->size_ + t1_.second->offset_);
|
||||||
|
for (auto t2_ : tensors_) {
|
||||||
|
t1 = t1_.second;
|
||||||
|
t2 = t2_.second;
|
||||||
|
if (t1->index_ == t2->index_) continue;
|
||||||
|
bool blifelong = (t1->lifelong_ || t2->lifelong_) && (t1->index_ != t2->index_);
|
||||||
|
const size_t continuous = 2;
|
||||||
|
const size_t conflict = 1;
|
||||||
|
if ((*constraints_)(t1->index_, t2->index_) == continuous) { // continuous constraint
|
||||||
|
// t1 must be continous to t2
|
||||||
|
bool bcontinuous = t1->offset_ == (t2->offset_ + t2->size_);
|
||||||
|
if (!bcontinuous) {
|
||||||
|
MS_LOG(WARNING) << "Continuous constraint violation in tensors " << t1->index_ << " and" << t2->index_;
|
||||||
|
retval = false;
|
||||||
|
}
|
||||||
|
} else if (blifelong || (*constraints_)(t1->index_, t2->index_) == conflict) { // conflict constraint
|
||||||
|
size_t t1_ub = t1->offset_ + t1->size_;
|
||||||
|
size_t t2_ub = t2->offset_ + t2->size_;
|
||||||
|
bool b_overlap_lb = ((t2->offset_ >= t1->offset_) && (t2->offset_ < t1_ub));
|
||||||
|
bool b_overlap_ub = ((t2_ub > t1->offset_) && (t2_ub < t1_ub));
|
||||||
|
bool b_overlap = b_overlap_lb || b_overlap_ub;
|
||||||
|
bool biszerosized = t1->size_ == 0 || t2->size_ == 0;
|
||||||
|
if (b_overlap && !biszerosized) {
|
||||||
|
MS_LOG(WARNING) << "Non-overlap constraint violation in tensors " << t1->index_ << " and" << t2->index_;
|
||||||
|
retval = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (upperbound != result) {
|
||||||
|
MS_LOG(WARNING) << "ERROR Invalid upperbound result --> Footprint Result: " << upperbound_
|
||||||
|
<< " Tensor Result: " << result + lifelongmemory_;
|
||||||
|
retval = false;
|
||||||
|
}
|
||||||
|
MS_LOG(DEBUG)
|
||||||
|
<< "\nElapsed time of Fast Heuristic Check: "
|
||||||
|
<< std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count() << " ms";
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SomasSolverCore::BuildBlocks() {
|
||||||
|
MS_LOG(DEBUG) << "Building block of tensors";
|
||||||
|
|
||||||
|
lifelongmemory_ = 0;
|
||||||
|
uint64_t tensors_block_count = 0;
|
||||||
|
for (auto tensor : tensors_) {
|
||||||
|
SomasSolverTensorDescPtr pTensor = tensor.second;
|
||||||
|
if (pTensor->blocked_) continue;
|
||||||
|
if (pTensor->lifelong_) {
|
||||||
|
lifelongmemory_ += pTensor->size_;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// move to the left
|
||||||
|
while (pTensor->left_) pTensor = pTensor->left_;
|
||||||
|
|
||||||
|
// set start tensor
|
||||||
|
BlockTensor bTensor;
|
||||||
|
bTensor.m_bre_allocate_ = true;
|
||||||
|
bTensor.m_start_tensor_ = pTensor;
|
||||||
|
// find size
|
||||||
|
bTensor.m_size_ = 0;
|
||||||
|
|
||||||
|
do {
|
||||||
|
bTensor.m_size_ += pTensor->size_;
|
||||||
|
pTensor->blocked_ = true;
|
||||||
|
pTensor = pTensor->right_;
|
||||||
|
tensors_block_count++;
|
||||||
|
} while (NULL != pTensor);
|
||||||
|
|
||||||
|
// add to the list
|
||||||
|
this->block_tensors_.emplace_back(bTensor);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tensors_block_count != tensors_.size())
|
||||||
|
MS_LOG(INFO) << static_cast<int>(tensors_.size() - tensors_block_count) << " lifelong tensors found";
|
||||||
|
|
||||||
|
// for debug
|
||||||
|
for (auto &b : block_tensors_) b.log();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SomasSolverCore::Clean() {
|
||||||
|
for (auto &block : block_tensors_) {
|
||||||
|
block.m_bre_allocate_ = true;
|
||||||
|
auto pTensor = block.m_start_tensor_;
|
||||||
|
while (pTensor) {
|
||||||
|
pTensor->offset_ = 0;
|
||||||
|
pTensor = pTensor->right_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
upperbound_ = SIZE_MAX;
|
||||||
|
}
|
||||||
|
void SomasSolverCore::SortTensors() { // need to sort the tensors for Fast Heuristic
|
||||||
|
MS_LOG(DEBUG) << "Sorting Blocks of tensor, strategy: " << sorting_[sort_strategy_].c_str();
|
||||||
|
switch (sort_strategy_) {
|
||||||
|
case kGreaterSizeSmallerIndex: { // size(>), index(<)
|
||||||
|
sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) {
|
||||||
|
return t1.m_size_ > t2.m_size_ ||
|
||||||
|
(t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_);
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#ifdef SOMAS_DEBUG
|
||||||
|
case kGreaterSizeGreaterIndex: { // size(>), index(>)
|
||||||
|
sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) {
|
||||||
|
return t1.m_size > t2.m_size ||
|
||||||
|
(t1.m_size == t2.m_size && t1.m_pStartTensor->index_ > t2.m_pStartTensor->index_);
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case kGreaterSizeSmallerConstraintsSmallerIndex: { // size(>), constraints(<), index(<)
|
||||||
|
sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) {
|
||||||
|
return t1.m_size > t2.m_size ||
|
||||||
|
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ < t2.m_pStartTensor->constraints_) ||
|
||||||
|
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ &&
|
||||||
|
t1.m_pStartTensor->index_ < t2.m_pStartTensor->index_);
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case kGreaterSizeSmallerConstraintsGreaterIndex: { // size(>), constraints(<), index(>)
|
||||||
|
sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) {
|
||||||
|
return t1.m_size > t2.m_size ||
|
||||||
|
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ < t2.m_pStartTensor->constraints_) ||
|
||||||
|
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ &&
|
||||||
|
t1.m_pStartTensor->index_ > t2.m_pStartTensor->index_);
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case kGreaterSizeGreaterConstraintsSmallerIndex: { // size(>), constraints(>), index(<)
|
||||||
|
sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) {
|
||||||
|
return t1.m_size > t2.m_size ||
|
||||||
|
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ > t2.m_pStartTensor->constraints_) ||
|
||||||
|
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ &&
|
||||||
|
t1.m_pStartTensor->index_ < t2.m_pStartTensor->index_);
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case kGreaterSizeGreaterConstraintsGreaterIndex: { // // size(>), constraints(>), index(>)
|
||||||
|
sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) {
|
||||||
|
return t1.m_size > t2.m_size ||
|
||||||
|
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ > t2.m_pStartTensor->constraints_) ||
|
||||||
|
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ &&
|
||||||
|
t1.m_pStartTensor->index_ > t2.m_pStartTensor->index_);
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
case kNumSortingTypes: { // no sorting case
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// log for debug purposes
|
||||||
|
for (auto &block : block_tensors_) block.log();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SomasSolverCore::RestoreSolution(uint32_t sol_id) {
|
||||||
|
for (auto block : block_tensors_) {
|
||||||
|
if (block.offsets_.count(sol_id) == 0) assert(0);
|
||||||
|
size_t bestOffset = block.offsets_[sol_id];
|
||||||
|
size_t offset = bestOffset;
|
||||||
|
SomasSolverTensorDescPtr pTensor = block.m_start_tensor_;
|
||||||
|
|
||||||
|
while (pTensor) {
|
||||||
|
pTensor->offset_ = offset;
|
||||||
|
offset += pTensor->size_;
|
||||||
|
pTensor = pTensor->right_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
size_t SomasSolverCore::Search(const std::shared_ptr<FootPrint> &pFootprint) {
|
||||||
|
size_t result = 0;
|
||||||
|
FastHeuristic fh;
|
||||||
|
MS_LOG(INFO) << "Calling FastSolver Search for " << block_tensors_.size() << " tensors ";
|
||||||
|
auto start = std::chrono::system_clock::now();
|
||||||
|
if (fh.Eval(&block_tensors_, pFootprint, constraints_)) {
|
||||||
|
result = pFootprint->Result();
|
||||||
|
auto end = std::chrono::system_clock::now();
|
||||||
|
timing_ = std::chrono::duration_cast<std::chrono::milliseconds>((end - start)).count();
|
||||||
|
if (all_) {
|
||||||
|
const double giga = 1073741824.;
|
||||||
|
MS_LOG(INFO) << timing_ << " ms\t" << sol_count_ + 1 << "/"
|
||||||
|
<< kNumFittingTypes * kNumAlgorithmTypes * kNumSortingTypes << "\t" << result << " Bytes ("
|
||||||
|
<< result / giga << " GB)\t" << algorithm_type_[algorithm_].c_str() << "\t"
|
||||||
|
<< sorting_[sort_strategy_].c_str() << "\t" << branching_[branching_strategy_].c_str();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
MS_LOG(INFO) << "FastSolver could not find solution";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result < upperbound_) {
|
||||||
|
upperbound_ = result;
|
||||||
|
best_sol_ = pFootprint->m_solId_;
|
||||||
|
best_branching_ = branching_strategy_;
|
||||||
|
best_sort_ = sort_strategy_;
|
||||||
|
}
|
||||||
|
|
||||||
|
return upperbound_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SomasSolverCore::AppendLifelongTensors() {
|
||||||
|
MS_LOG(DEBUG) << "Appending lifelong tensors to solution";
|
||||||
|
size_t offset = upperbound_;
|
||||||
|
for (auto t_ : tensors_) {
|
||||||
|
SomasSolverTensorDescPtr pTensor = t_.second;
|
||||||
|
if (pTensor->lifelong_) {
|
||||||
|
pTensor->offset_ = offset;
|
||||||
|
offset += pTensor->size_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
upperbound_ += lifelongmemory_;
|
||||||
|
MS_LOG(DEBUG) << lifelongmemory_ << " bytes from lifelong tensors added to solution";
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t SomasSolverCore::FindSolutions() {
|
||||||
|
MS_LOG(DEBUG) << "Start allocating blocks,offset strategy: " << branching_[branching_strategy_].c_str();
|
||||||
|
|
||||||
|
std::shared_ptr<FootPrint> pFootprint = std::make_shared<FootPrint>();
|
||||||
|
pFootprint->setBranchingStrategy(branching_strategy_);
|
||||||
|
pFootprint->setCurrentSol(sol_count_);
|
||||||
|
pFootprint->setAlgorithm(algorithm_);
|
||||||
|
Search(pFootprint);
|
||||||
|
AppendLifelongTensors();
|
||||||
|
Destroy(pFootprint);
|
||||||
|
return upperbound_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SomasSolverCore::Destroy(std::shared_ptr<FootPrint> &pFootprint) {
|
||||||
|
while (NULL != pFootprint) {
|
||||||
|
if (NULL != pFootprint->Next()) {
|
||||||
|
std::shared_ptr<FootPrint> &p = pFootprint;
|
||||||
|
pFootprint = pFootprint->Next();
|
||||||
|
// delete p;
|
||||||
|
p = NULL;
|
||||||
|
} else {
|
||||||
|
// delete pFootprint;
|
||||||
|
pFootprint = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace somas
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,101 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_CORE_H_
|
||||||
|
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_CORE_H_
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <chrono>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "backend/optimizer/somas/somas_solver_alg.h"
|
||||||
|
#include "backend/optimizer/somas/somas_solver_pre.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace somas {
|
||||||
|
class SomasSolverCore {
|
||||||
|
public:
|
||||||
|
/// Interface Function: receive parameters, creates the model to solve and then save the result
|
||||||
|
SomasSolverCore(const std::unordered_map<size_t, SomasSolverTensorDescPtr> &tensors,
|
||||||
|
const std::shared_ptr<Array> &constraints)
|
||||||
|
: tensors_(tensors),
|
||||||
|
constraints_(constraints),
|
||||||
|
upperbound_(SIZE_MAX),
|
||||||
|
timing_(0),
|
||||||
|
lifelongmemory_(0),
|
||||||
|
verify_(false),
|
||||||
|
all_(true),
|
||||||
|
best_sol_(0),
|
||||||
|
sort_strategy_(kGreaterSizeSmallerIndex),
|
||||||
|
branching_strategy_(kBest),
|
||||||
|
sol_count_(0),
|
||||||
|
algorithm_(kManyObjects) {}
|
||||||
|
~SomasSolverCore() = default;
|
||||||
|
|
||||||
|
Status MemoryAllocationSolver();
|
||||||
|
Status Verify();
|
||||||
|
bool Verify(const size_t &);
|
||||||
|
Status Verify(unordered_map<size_t, SomasSolverTensorDescPtr> *);
|
||||||
|
void VerifySolution(const bool verify) { verify_ = verify; }
|
||||||
|
void SortTensors();
|
||||||
|
void BuildBlocks();
|
||||||
|
void Clean();
|
||||||
|
void SetBestSolution() { RestoreSolution(best_sol_); }
|
||||||
|
void RestoreSolution(uint32_t sol_id);
|
||||||
|
void SetSortingStrategy(SortingType sort_strategy) { sort_strategy_ = sort_strategy; }
|
||||||
|
void SetFittingStrategy(FittingType branching_strategy) { branching_strategy_ = branching_strategy; }
|
||||||
|
void SetAlgorithmStrategy(AlgorithmType algorithm_strategy) { algorithm_ = algorithm_strategy; }
|
||||||
|
void SetAllStrategies(bool all) { all_ = all; }
|
||||||
|
const size_t &GetUpperbound() const { return upperbound_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unordered_map<size_t, SomasSolverTensorDescPtr> tensors_;
|
||||||
|
vector<BlockTensor> block_tensors_;
|
||||||
|
std::shared_ptr<Array> constraints_;
|
||||||
|
size_t upperbound_{0};
|
||||||
|
size_t timing_{0};
|
||||||
|
size_t lifelongmemory_{0};
|
||||||
|
bool verify_{false};
|
||||||
|
bool all_{false};
|
||||||
|
uint32_t best_sol_{0};
|
||||||
|
SortingType best_sort_;
|
||||||
|
FittingType best_branching_;
|
||||||
|
SortingType sort_strategy_;
|
||||||
|
FittingType branching_strategy_;
|
||||||
|
uint32_t sol_count_{0};
|
||||||
|
AlgorithmType algorithm_;
|
||||||
|
|
||||||
|
size_t FindSolutions();
|
||||||
|
size_t Search(const std::shared_ptr<FootPrint> &pFootprint);
|
||||||
|
void AppendLifelongTensors();
|
||||||
|
void Destroy(std::shared_ptr<FootPrint> &);
|
||||||
|
|
||||||
|
const std::string sorting_[6] = {"size(>), index(<)",
|
||||||
|
"size(>), index(>)",
|
||||||
|
"size(>), constraints(<), index(<)",
|
||||||
|
"size(>), constraints(<), index(>)",
|
||||||
|
"size(>), constraints(>), index(<)",
|
||||||
|
"size(>), constraints(>), index(>)"};
|
||||||
|
const std::string branching_[4] = {"bestfit", "smallest", "largest", "worstfit"};
|
||||||
|
const std::string algorithm_type_[2] = {"Shared Objects", "Single Object"};
|
||||||
|
};
|
||||||
|
} // namespace somas
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_CORE_H_
|
|
@ -0,0 +1,209 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <fstream>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "backend/optimizer/somas/somas_solver_core.h"
|
||||||
|
#include "backend/optimizer/somas/somas_solver_pre.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace somas {
|
||||||
|
Status SomasSolverPre::Solving(const session::KernelGraph *graph,
|
||||||
|
std::unordered_map<size_t, SomasSolverTensorDescPtr> *ptensors,
|
||||||
|
std::shared_ptr<Array> pConstraints, const vector<vector<size_t>> &continuous_v,
|
||||||
|
bool bVerifySolution, bool ball, SortingType sorting, FittingType fitting,
|
||||||
|
AlgorithmType algorithm) {
|
||||||
|
Status retval = SUCCESS;
|
||||||
|
|
||||||
|
try {
|
||||||
|
size_t maxIndex = 0;
|
||||||
|
std::unordered_map<size_t, SomasSolverTensorDescPtr> &tensors = *ptensors;
|
||||||
|
std::unordered_map<size_t, SomasSolverTensorDescPtr>::iterator max =
|
||||||
|
std::max_element(tensors.begin(), tensors.end(),
|
||||||
|
[](const std::pair<size_t, SomasSolverTensorDescPtr> &a,
|
||||||
|
const std::pair<size_t, SomasSolverTensorDescPtr> &b) { return a.first < b.first; });
|
||||||
|
maxIndex = max->first;
|
||||||
|
if (maxIndex > pConstraints->Rows() - 1) {
|
||||||
|
MS_LOG(WARNING) << "ERROR: MaxIndex invalid, MaxIndex " << maxIndex << ", Rows " << pConstraints->Rows();
|
||||||
|
return FAILED;
|
||||||
|
}
|
||||||
|
MS_LOG(INFO) << "Filling in constraints matrix..";
|
||||||
|
uint32_t continuous_cnt = 0;
|
||||||
|
|
||||||
|
// creating S Lists
|
||||||
|
for (auto &aux : continuous_v) {
|
||||||
|
for (uint32_t i = 0; i < aux.size() - 1; i++) {
|
||||||
|
uint32_t index1 = aux[i];
|
||||||
|
uint32_t index2 = aux[i + 1];
|
||||||
|
if (NULL == tensors[index1]) {
|
||||||
|
MS_LOG(WARNING) << "NULL tensor received in continuous constraint (tensor index " << index1 << ")";
|
||||||
|
return FAILED;
|
||||||
|
}
|
||||||
|
if (NULL == tensors[index2]) {
|
||||||
|
MS_LOG(WARNING) << "NULL tensor received in continuous constraint (tensor index " << index2 << ")";
|
||||||
|
return FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t continuous = 2;
|
||||||
|
(*pConstraints)(index2, index1) = continuous;
|
||||||
|
if (tensors[index1]->right_)
|
||||||
|
MS_LOG(WARNING) << "Warning:tensor " << index1
|
||||||
|
<< " already has a right tensor (id: " << tensors[index1]->right_->index_;
|
||||||
|
if (tensors[index2]->left_)
|
||||||
|
MS_LOG(WARNING) << "Warning:tensor " << index2
|
||||||
|
<< " already has a left tensor (id: " << tensors[index2]->left_->index_;
|
||||||
|
|
||||||
|
tensors[index1]->right_ = tensors[index2];
|
||||||
|
tensors[index2]->left_ = tensors[index1];
|
||||||
|
continuous_cnt++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continuous_cnt++;
|
||||||
|
|
||||||
|
std::shared_ptr<SomasSolverCore> pSolver = std::make_shared<SomasSolverCore>(tensors, pConstraints);
|
||||||
|
pSolver->SetAlgorithmStrategy(algorithm);
|
||||||
|
pSolver->SetSortingStrategy(sorting);
|
||||||
|
pSolver->SetFittingStrategy(fitting);
|
||||||
|
pSolver->SetAllStrategies(ball);
|
||||||
|
pSolver->VerifySolution(bVerifySolution);
|
||||||
|
|
||||||
|
if (SUCCESS == (pSolver->MemoryAllocationSolver())) {
|
||||||
|
max_offset_ = pSolver->GetUpperbound();
|
||||||
|
const double giga = 1024. * 1024. * 1024.;
|
||||||
|
MS_LOG(INFO) << "SomasSolver::Solving SUCCESS";
|
||||||
|
MS_LOG(INFO) << "SomasSolver::Solving RESULT: " << max_offset_ << " (" << max_offset_ / (giga) << " GB)";
|
||||||
|
}
|
||||||
|
auto context_ptr = MsContext::GetInstance();
|
||||||
|
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||||
|
bool save_graphs = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG);
|
||||||
|
if (save_graphs) {
|
||||||
|
Log(graph, tensors, pConstraints, continuous_v);
|
||||||
|
}
|
||||||
|
} catch (const std::exception &e) {
|
||||||
|
MS_LOG(EXCEPTION) << "SomasSolver::Solving FAILED: " << e.what();
|
||||||
|
retval = FAILED;
|
||||||
|
}
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SomasSolverPre::Log(const session::KernelGraph *graph,
|
||||||
|
const unordered_map<size_t, SomasSolverTensorDescPtr> &tensors,
|
||||||
|
const std::shared_ptr<Array> &pConstraints, const vector<vector<size_t>> &continuous_v) {
|
||||||
|
MS_LOG(INFO) << "SomasSolver::Log Writing somas-input.txt..";
|
||||||
|
|
||||||
|
auto context_ptr = MsContext::GetInstance();
|
||||||
|
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||||
|
auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
|
||||||
|
std::string filename = save_graphs_path + "/" + "somas_solver_input_" + std::to_string(graph->graph_id()) + ".ir";
|
||||||
|
if (filename.size() > PATH_MAX) {
|
||||||
|
MS_LOG(ERROR) << "File path " << filename << " is too long.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
char real_path[PATH_MAX] = {0};
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
if (_fullpath(real_path, filename.c_str(), PATH_MAX) == nullptr) {
|
||||||
|
MS_LOG(DEBUG) << "dir " << filename << " does not exit.";
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (realpath(filename.c_str(), real_path) == nullptr) {
|
||||||
|
MS_LOG(DEBUG) << "Dir " << filename << " does not exit.";
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::string path_string = real_path;
|
||||||
|
ChangeFileMode(path_string, S_IRWXU);
|
||||||
|
std::ofstream ofs_1(real_path);
|
||||||
|
|
||||||
|
if (!ofs_1.is_open()) {
|
||||||
|
MS_LOG(ERROR) << "Open log file '" << real_path << "' failed!";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto &t : tensors) {
|
||||||
|
ofs_1 << "T " << t.second->index_ << " " << t.second->size_ << " " << t.second->lifelong_ << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto &t1 : tensors) {
|
||||||
|
for (auto &t2 : tensors) {
|
||||||
|
size_t idx1 = t1.first;
|
||||||
|
size_t idx2 = t2.first;
|
||||||
|
if ((idx1 != idx2) && (*pConstraints)(idx1, idx2) == 1) {
|
||||||
|
ofs_1 << "C " << idx1 << " " << idx2 << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto &s : continuous_v) {
|
||||||
|
ofs_1 << "S";
|
||||||
|
for (auto idx : s) {
|
||||||
|
ofs_1 << " " << idx;
|
||||||
|
}
|
||||||
|
ofs_1 << std::endl;
|
||||||
|
}
|
||||||
|
ofs_1.close();
|
||||||
|
|
||||||
|
MS_LOG(INFO) << "SomasSolver::Log Writing somas-output.txt..";
|
||||||
|
std::string out_filename =
|
||||||
|
save_graphs_path + "/" + "somas_solver_output_" + std::to_string(graph->graph_id()) + ".ir";
|
||||||
|
if (out_filename.size() > PATH_MAX) {
|
||||||
|
MS_LOG(ERROR) << "File path " << out_filename << " is too long.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
if (_fullpath(real_path, out_filename.c_str(), PATH_MAX) == nullptr) {
|
||||||
|
MS_LOG(DEBUG) << "dir " << out_filename << " does not exit.";
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (realpath(out_filename.c_str(), real_path) == nullptr) {
|
||||||
|
MS_LOG(DEBUG) << "Dir " << out_filename << " does not exit.";
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
path_string = real_path;
|
||||||
|
ChangeFileMode(path_string, S_IRWXU);
|
||||||
|
std::ofstream ofs_2(real_path);
|
||||||
|
|
||||||
|
if (!ofs_2.is_open()) {
|
||||||
|
MS_LOG(ERROR) << "Open log file '" << real_path << "' failed!";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto &t : tensors) {
|
||||||
|
SomasSolverTensorDescPtr tensor = t.second;
|
||||||
|
int continuous = 0;
|
||||||
|
if (tensor->left_ == NULL && tensor->right_ != NULL)
|
||||||
|
continuous = 1;
|
||||||
|
else if (tensor->left_ != NULL && tensor->right_ != NULL)
|
||||||
|
continuous = 2;
|
||||||
|
else if (tensor->left_ != NULL && tensor->right_ == NULL)
|
||||||
|
continuous = 3;
|
||||||
|
const size_t alignment = 512;
|
||||||
|
bool size_aligned = tensor->size_ % alignment == 0;
|
||||||
|
bool offset_aligned = tensor->offset_ % alignment == 0;
|
||||||
|
|
||||||
|
ofs_2 << std::endl
|
||||||
|
<< "tensor_id=" << tensor->index_ << "\tsize=" << tensor->size_ << "\toffset=" << tensor->offset_
|
||||||
|
<< "\tcontinuous=" << continuous << "\tsize_aligned=" << size_aligned
|
||||||
|
<< "\toffset_aligned=" << offset_aligned;
|
||||||
|
}
|
||||||
|
ofs_2.close();
|
||||||
|
|
||||||
|
MS_LOG(INFO) << "SomasSolver::Log done";
|
||||||
|
}
|
||||||
|
} // namespace somas
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,159 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_PRE_H_
|
||||||
|
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_PRE_H_
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstring>
|
||||||
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <stack>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
#include "backend/session/kernel_graph.h"
|
||||||
|
|
||||||
|
using std::unordered_map;
|
||||||
|
using std::vector;
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace somas {
|
||||||
|
enum Status { FAILED, SUCCESS };
|
||||||
|
enum AlgorithmType { kManyObjects = 0, kSingleObject, kNumAlgorithmTypes };
|
||||||
|
enum SortingType {
|
||||||
|
kGreaterSizeSmallerIndex = 0,
|
||||||
|
#ifdef SOMAS_DEBUG
|
||||||
|
kGreaterSizeGreaterIndex,
|
||||||
|
kGreaterSizeSmallerConstraintsSmallerIndex,
|
||||||
|
kGreaterSizeSmallerConstraintsGreaterIndex,
|
||||||
|
kGreaterSizeGreaterConstraintsSmallerIndex,
|
||||||
|
kGreaterSizeGreaterConstraintsGreaterIndex,
|
||||||
|
#endif
|
||||||
|
kNumSortingTypes
|
||||||
|
};
|
||||||
|
enum FittingType {
|
||||||
|
kBest = 0,
|
||||||
|
kSmallest,
|
||||||
|
#ifdef SOMAS_DEBUG
|
||||||
|
kLargest,
|
||||||
|
kWorst,
|
||||||
|
#endif
|
||||||
|
kNumFittingTypes
|
||||||
|
};
|
||||||
|
|
||||||
|
class Array {
|
||||||
|
public:
|
||||||
|
Array(const size_t &rows, const size_t &cols) : rows_(rows), cols_(cols) {
|
||||||
|
conflicts_array_ = std::make_unique<int[]>(rows * cols);
|
||||||
|
for (uint32_t i = 0; i < rows * cols; i++) {
|
||||||
|
conflicts_array_[i] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Array(const Array &array) : rows_(array.rows_), cols_(array.cols_) {
|
||||||
|
conflicts_array_ = std::make_unique<int[]>(array.rows_ * array.cols_);
|
||||||
|
for (uint32_t i = 0; i < array.rows_ * array.cols_; i++) {
|
||||||
|
conflicts_array_[i] = array.conflicts_array_[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Array &operator=(const Array &array) { return *this; }
|
||||||
|
|
||||||
|
int &operator()(const size_t &i, const size_t &j) {
|
||||||
|
assert((i * cols_ + j) < (rows_ * cols_));
|
||||||
|
return conflicts_array_[i * cols_ + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t &Rows() { return rows_; }
|
||||||
|
const size_t &Cols() { return cols_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
const size_t rows_;
|
||||||
|
const size_t cols_;
|
||||||
|
std::unique_ptr<int[]> conflicts_array_;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SomasSolverTensorDesc {
|
||||||
|
size_t index_;
|
||||||
|
size_t size_;
|
||||||
|
size_t offset_;
|
||||||
|
bool lifelong_;
|
||||||
|
size_t constraints_;
|
||||||
|
using SomasSolverTensorDescPtr = std::shared_ptr<SomasSolverTensorDesc>;
|
||||||
|
SomasSolverTensorDescPtr right_;
|
||||||
|
SomasSolverTensorDescPtr left_;
|
||||||
|
bool blocked_;
|
||||||
|
|
||||||
|
SomasSolverTensorDesc() = default;
|
||||||
|
|
||||||
|
SomasSolverTensorDesc(size_t index, size_t size, size_t offset, bool blifelong)
|
||||||
|
: index_(index), size_(size), offset_(offset), lifelong_(blifelong) {
|
||||||
|
constraints_ = 0;
|
||||||
|
right_ = NULL;
|
||||||
|
left_ = NULL;
|
||||||
|
blocked_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Update(size_t index, size_t size, size_t offset, bool blifelong, size_t constraints) {
|
||||||
|
index_ = index;
|
||||||
|
size_ = size;
|
||||||
|
offset_ = offset;
|
||||||
|
lifelong_ = blifelong;
|
||||||
|
constraints_ = constraints;
|
||||||
|
}
|
||||||
|
|
||||||
|
friend std::ostream &operator<<(std::ostream &out, const SomasSolverTensorDescPtr n) {
|
||||||
|
out << n->index_ << " " << n->size_ << " " << n->offset_ << "\n";
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
friend std::istream &operator>>(std::istream &in, SomasSolverTensorDescPtr n) {
|
||||||
|
in >> n->index_ >> n->size_ >> n->offset_;
|
||||||
|
return in;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
using SomasSolverTensorDescPtr = std::shared_ptr<SomasSolverTensorDesc>;
|
||||||
|
|
||||||
|
class SomasSolverPre {
|
||||||
|
public:
|
||||||
|
SomasSolverPre() = default;
|
||||||
|
~SomasSolverPre() = default;
|
||||||
|
|
||||||
|
SomasSolverPre(const SomasSolverPre &) = delete;
|
||||||
|
SomasSolverPre &operator=(const SomasSolverPre &) = delete;
|
||||||
|
|
||||||
|
size_t GetMaxOffset() { return max_offset_; }
|
||||||
|
|
||||||
|
Status Solving(const session::KernelGraph *graph, std::unordered_map<size_t, SomasSolverTensorDescPtr> *tensors,
|
||||||
|
std::shared_ptr<Array> pConstraints, const vector<vector<size_t>> &continuous_v,
|
||||||
|
bool bVerifySolution, // true -> Check continuous and non overlapping constraints solution
|
||||||
|
bool ball = true, // true -> run full set of heuristics, false -> run single heuristic specified
|
||||||
|
SortingType sorting = kGreaterSizeSmallerIndex, FittingType fitting = kBest,
|
||||||
|
AlgorithmType algorithm = kManyObjects);
|
||||||
|
|
||||||
|
void Log(const session::KernelGraph *graph, const unordered_map<size_t, SomasSolverTensorDescPtr> &tensors,
|
||||||
|
const std::shared_ptr<Array> &pConstraints_v, const vector<vector<size_t>> &continuous_v);
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t max_offset_;
|
||||||
|
};
|
||||||
|
using SomasSolverPrePtr = std::shared_ptr<SomasSolverPre>;
|
||||||
|
} // namespace somas
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_SOLVER_PRE_H_
|
|
@ -0,0 +1,53 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "backend/optimizer/somas/somas_stream.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace somas {
|
||||||
|
void SomasStream::ComputeAncestorStreams() {
|
||||||
|
// (Naive) algorithm: for a given stream, compute its ancestors assuming only distance 1 ancestors are known (handles
|
||||||
|
// cycles between streams)
|
||||||
|
std::set<SomasStreamPtr> current_level, temp_level, already_visited;
|
||||||
|
auto thisPtr = std::make_shared<SomasStream>(id_);
|
||||||
|
already_visited.insert(thisPtr);
|
||||||
|
// Initialize current level to distance 2 ancestors
|
||||||
|
for (auto stream1 : ancestor_streams_) {
|
||||||
|
already_visited.insert(stream1);
|
||||||
|
for (auto stream2 : stream1->ancestor_streams_) {
|
||||||
|
if (std::find(already_visited.begin(), already_visited.end(), stream2) == already_visited.end())
|
||||||
|
current_level.insert(stream2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!current_level.empty()) {
|
||||||
|
// Push current level into ancestors
|
||||||
|
for (auto stream1 : current_level) {
|
||||||
|
ancestor_streams_.insert(stream1);
|
||||||
|
already_visited.insert(stream1);
|
||||||
|
// Keep next level of this ancestor
|
||||||
|
for (auto stream2 : stream1->ancestor_streams_) {
|
||||||
|
if (std::find(already_visited.begin(), already_visited.end(), stream2) == already_visited.end())
|
||||||
|
temp_level.insert(stream2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
current_level.clear();
|
||||||
|
current_level = temp_level;
|
||||||
|
temp_level.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace somas
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,61 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_STREAM_H_
|
||||||
|
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_STREAM_H_
|
||||||
|
|
||||||
|
#include "backend/optimizer/somas/somas_node.h"
|
||||||
|
#include "backend/optimizer/somas/somas_tensor.h"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace somas {
|
||||||
|
class SomasNode;
|
||||||
|
class SomasTensor;
|
||||||
|
|
||||||
|
using SomasTensorPtr = std::shared_ptr<SomasTensor>;
|
||||||
|
|
||||||
|
class SomasStream {
|
||||||
|
public:
|
||||||
|
using SomasStreamPtr = std::shared_ptr<SomasStream>;
|
||||||
|
|
||||||
|
// Attributes mutated in code
|
||||||
|
std::vector<SomasTensorPtr> tensors_; // vector needed for same-stream loop in ConflictComputing()
|
||||||
|
std::set<SomasStreamPtr> ancestor_streams_;
|
||||||
|
std::set<SomasStreamPtr> ancestor_streams_group_;
|
||||||
|
|
||||||
|
// Constructors/Destructors
|
||||||
|
explicit SomasStream(int64_t id) : id_(id) {}
|
||||||
|
SomasStream(const SomasStream &) = delete;
|
||||||
|
SomasStream &operator=(const SomasStream &) = delete;
|
||||||
|
~SomasStream() = default;
|
||||||
|
|
||||||
|
// Accessors
|
||||||
|
const int64_t &GetId() const { return id_; }
|
||||||
|
|
||||||
|
// Ancestor Computing
|
||||||
|
void ComputeAncestorStreams(); // Given "ancestors at distance one" information, compute "ancestors at any distance"
|
||||||
|
|
||||||
|
private:
|
||||||
|
const int64_t id_{0};
|
||||||
|
};
|
||||||
|
} // namespace somas
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_STREAM_H_
|
|
@ -0,0 +1,64 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "backend/optimizer/somas/somas_tensor.h"
|
||||||
|
#include "backend/optimizer/somas/somas_node.h"
|
||||||
|
#include "backend/optimizer/somas/somas_stream.h"
|
||||||
|
#include "backend/optimizer/somas/somas.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace somas {
|
||||||
|
SomasTensor::SomasTensor(size_t id, SomasNodePtr source_node, SomasStreamPtr source_stream, size_t real_size,
|
||||||
|
LifeLongType lifelong_value)
|
||||||
|
: lifelong_value_(lifelong_value),
|
||||||
|
type_(kUnknown),
|
||||||
|
offset_(0),
|
||||||
|
id_(id),
|
||||||
|
source_node_(source_node),
|
||||||
|
source_stream_(source_stream),
|
||||||
|
original_size_(real_size) {
|
||||||
|
const size_t alignment = 512;
|
||||||
|
const size_t alignment_complement = 31;
|
||||||
|
aligned_size_ = (real_size > 0) ? (real_size + alignment + alignment_complement) / alignment * alignment : 0;
|
||||||
|
|
||||||
|
solver_tensor_desc_ = std::make_shared<SomasSolverTensorDesc>(id_, aligned_size_, offset_, false);
|
||||||
|
|
||||||
|
ref_overlap_ = false;
|
||||||
|
between_streams_ = false;
|
||||||
|
num_constraints_ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SomasSolverTensorDescPtr SomasTensor::GetSolverTensorDesc() {
|
||||||
|
if (type_ == kGap) { // ignore lifelong_ value for gaps given to solver, and pass with original_size_
|
||||||
|
solver_tensor_desc_->Update(id_, original_size_, offset_, false, num_constraints_);
|
||||||
|
} else {
|
||||||
|
solver_tensor_desc_->Update(id_, aligned_size_, offset_, lifelong_value_ == kLifeLongGraphAll, num_constraints_);
|
||||||
|
}
|
||||||
|
if (aligned_size_ == 0) { // ignore zero-size tensors for solver
|
||||||
|
return nullptr;
|
||||||
|
} else {
|
||||||
|
return solver_tensor_desc_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SomasTensor::ComputeMaxDestinationId() {
|
||||||
|
for (SomasStreamPtr stream : destinationStreams_) max_destination_id_[stream] = 0;
|
||||||
|
|
||||||
|
for (SomasNodePtr node : destinations_)
|
||||||
|
if (node->GetId() > max_destination_id_[node->GetStream()]) max_destination_id_[node->GetStream()] = node->GetId();
|
||||||
|
}
|
||||||
|
} // namespace somas
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,129 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_TENSOR_H_
|
||||||
|
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_TENSOR_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "backend/optimizer/somas/somas_node.h"
|
||||||
|
#include "backend/optimizer/somas/somas_solver_pre.h"
|
||||||
|
#include "backend/optimizer/somas/somas_stream.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace somas {
|
||||||
|
class SomasNode;
|
||||||
|
class SomasStream;
|
||||||
|
|
||||||
|
// Lifetime type
|
||||||
|
struct Lifetime {
|
||||||
|
size_t start_;
|
||||||
|
size_t end_;
|
||||||
|
|
||||||
|
explicit Lifetime(size_t start = 0, size_t end = 0) : start_(start), end_(end) {}
|
||||||
|
};
|
||||||
|
using lifetime_t = struct Lifetime;
|
||||||
|
|
||||||
|
// Tensor type
|
||||||
|
enum TensorType {
|
||||||
|
kCommon,
|
||||||
|
kOutputOnly,
|
||||||
|
kWorkspace,
|
||||||
|
kGetNextOutput,
|
||||||
|
kSummaryInput,
|
||||||
|
kRefNodeInput,
|
||||||
|
kRefNodeOutput,
|
||||||
|
kGap,
|
||||||
|
kUnknown
|
||||||
|
};
|
||||||
|
|
||||||
|
enum LifeLongType {
|
||||||
|
kLifeLongNone, // life time is from tensor start to tensor end
|
||||||
|
kLifeLongGraphAll, // life time is from graph start to graph end
|
||||||
|
kLifeLongGraphStart, // life time is from graph start to tensor end
|
||||||
|
kLifeLongGraphEnd // life time is from tensor start to graph end
|
||||||
|
};
|
||||||
|
|
||||||
|
using SomasNodePtr = std::shared_ptr<SomasNode>;
|
||||||
|
using SomasStreamPtr = std::shared_ptr<SomasStream>;
|
||||||
|
|
||||||
|
class SomasTensor {
|
||||||
|
public:
|
||||||
|
using SomasTensorPtr = std::shared_ptr<SomasTensor>;
|
||||||
|
|
||||||
|
size_t aligned_size_{0};
|
||||||
|
LifeLongType lifelong_value_;
|
||||||
|
|
||||||
|
bool ref_overlap_;
|
||||||
|
bool between_streams_;
|
||||||
|
|
||||||
|
lifetime_t lifetime_;
|
||||||
|
TensorType type_;
|
||||||
|
|
||||||
|
size_t offset_{0};
|
||||||
|
size_t num_constraints_{0};
|
||||||
|
|
||||||
|
std::set<SomasNodePtr> destinations_;
|
||||||
|
std::set<SomasStreamPtr> destinationStreams_;
|
||||||
|
unordered_map<SomasStreamPtr, size_t> max_destination_id_;
|
||||||
|
|
||||||
|
// Constructors/Destructors
|
||||||
|
explicit SomasTensor(size_t id, SomasNodePtr source_node, SomasStreamPtr source_stream, size_t real_size,
|
||||||
|
LifeLongType lifelong_value = kLifeLongNone);
|
||||||
|
SomasTensor(const SomasTensor &) = delete;
|
||||||
|
SomasTensor &operator=(const SomasTensor &) = delete;
|
||||||
|
~SomasTensor() = default;
|
||||||
|
|
||||||
|
// Accessors
|
||||||
|
const size_t &GetId() { return id_; }
|
||||||
|
SomasNodePtr GetSourceNode() const { return source_node_; }
|
||||||
|
SomasStreamPtr GetSourceStream() const { return source_stream_; }
|
||||||
|
const size_t &GetOriginalSize() { return original_size_; }
|
||||||
|
const size_t &GetAlignedSize() { return aligned_size_; }
|
||||||
|
bool IsLifelong() { return lifelong_value_ == kLifeLongGraphAll; }
|
||||||
|
bool IsWorkspace() { return type_ == kWorkspace; }
|
||||||
|
bool IsOutputOnly() { return type_ == kOutputOnly; }
|
||||||
|
size_t GetOffset() { return offset_; }
|
||||||
|
bool IsBetweenStreams() { return between_streams_; }
|
||||||
|
bool IsSemiLifelongStart() { return lifelong_value_ == kLifeLongGraphStart; }
|
||||||
|
bool IsSemiLifelongEnd() { return lifelong_value_ == kLifeLongGraphEnd; }
|
||||||
|
bool IsRefOverlap() { return ref_overlap_; }
|
||||||
|
bool IsGap() { return type_ == kGap; }
|
||||||
|
|
||||||
|
// Computing functions
|
||||||
|
void SetOffset(size_t start_offset = 0) {
|
||||||
|
if (aligned_size_ != 0 && type_ != kGetNextOutput) {
|
||||||
|
offset_ = start_offset + solver_tensor_desc_->offset_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SomasSolverTensorDescPtr GetSolverTensorDesc();
|
||||||
|
void ComputeMaxDestinationId();
|
||||||
|
|
||||||
|
private:
|
||||||
|
const size_t id_{0};
|
||||||
|
const SomasNodePtr source_node_;
|
||||||
|
SomasStreamPtr const source_stream_;
|
||||||
|
const size_t original_size_{0};
|
||||||
|
|
||||||
|
SomasSolverTensorDescPtr solver_tensor_desc_;
|
||||||
|
};
|
||||||
|
} // namespace somas
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_TENSOR_H_
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||||
*
|
*
|
||||||
|
@ -15,21 +16,21 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "runtime/device/kernel_runtime.h"
|
#include "runtime/device/kernel_runtime.h"
|
||||||
#include <vector>
|
|
||||||
#include <utility>
|
|
||||||
#include <numeric>
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include "utils/ms_utils.h"
|
#include <numeric>
|
||||||
#include "common/trans.h"
|
#include <utility>
|
||||||
#include "utils/utils.h"
|
#include <vector>
|
||||||
#include "utils/ms_context.h"
|
|
||||||
#include "frontend/operator/ops.h"
|
|
||||||
#include "backend/session/kernel_graph.h"
|
|
||||||
#include "backend/session/anf_runtime_algorithm.h"
|
|
||||||
#include "backend/optimizer/common/helper.h"
|
#include "backend/optimizer/common/helper.h"
|
||||||
|
#include "backend/session/anf_runtime_algorithm.h"
|
||||||
|
#include "backend/session/kernel_graph.h"
|
||||||
|
#include "common/trans.h"
|
||||||
#include "debug/data_dump/dump_json_parser.h"
|
#include "debug/data_dump/dump_json_parser.h"
|
||||||
|
#include "frontend/operator/ops.h"
|
||||||
#include "ir/value.h"
|
#include "ir/value.h"
|
||||||
|
#include "utils/ms_context.h"
|
||||||
|
#include "utils/ms_utils.h"
|
||||||
#include "utils/shape_utils.h"
|
#include "utils/shape_utils.h"
|
||||||
|
#include "utils/utils.h"
|
||||||
using mindspore::kernel::Address;
|
using mindspore::kernel::Address;
|
||||||
using mindspore::kernel::AddressPtr;
|
using mindspore::kernel::AddressPtr;
|
||||||
|
|
||||||
|
@ -440,6 +441,9 @@ void KernelRuntime::AssignCommunicationNodeOutputMem(MemType type, const AnfNode
|
||||||
if (type == kReuseDynamicMem) {
|
if (type == kReuseDynamicMem) {
|
||||||
// reuse communication op's all outputs' memory
|
// reuse communication op's all outputs' memory
|
||||||
type = kReuseDynamicCommMem;
|
type = kReuseDynamicCommMem;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type == kReuseDynamicCommMem || type == kSomasReuseDynamicMem) {
|
||||||
bool not_reuse = KernelMemNotReuse(node);
|
bool not_reuse = KernelMemNotReuse(node);
|
||||||
if (not_reuse) {
|
if (not_reuse) {
|
||||||
type = kDynamicMem;
|
type = kDynamicMem;
|
||||||
|
@ -504,7 +508,7 @@ void KernelRuntime::AssignCommunicationNodeInputMem(MemType type, const AnfNodeP
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type == kReuseDynamicMem) {
|
if (type == kReuseDynamicMem || type == kSomasReuseDynamicMem) {
|
||||||
bool not_reuse = KernelMemNotReuse(node);
|
bool not_reuse = KernelMemNotReuse(node);
|
||||||
if (not_reuse) {
|
if (not_reuse) {
|
||||||
type = kDynamicMem;
|
type = kDynamicMem;
|
||||||
|
@ -530,13 +534,13 @@ void KernelRuntime::AssignNodeOutputMem(MemType type, const AnfNodePtr &node, in
|
||||||
|
|
||||||
if (node->isa<CNode>()) {
|
if (node->isa<CNode>()) {
|
||||||
bool independent = AnfAlgo::IsIndependentNode(node->cast<CNodePtr>());
|
bool independent = AnfAlgo::IsIndependentNode(node->cast<CNodePtr>());
|
||||||
if (independent && type == kReuseDynamicMem) {
|
if (independent && (type == kReuseDynamicMem || type == kSomasReuseDynamicMem)) {
|
||||||
MS_LOG(INFO) << "Independent disable mem_reuse";
|
MS_LOG(INFO) << "Independent node " << node->fullname_with_scope() << " disable memory reuse";
|
||||||
type = kDynamicMem;
|
type = kDynamicMem;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type == kReuseDynamicMem) {
|
if (type == kReuseDynamicMem || type == kSomasReuseDynamicMem) {
|
||||||
bool not_reuse = KernelMemNotReuse(node);
|
bool not_reuse = KernelMemNotReuse(node);
|
||||||
if (not_reuse) {
|
if (not_reuse) {
|
||||||
type = kDynamicMem;
|
type = kDynamicMem;
|
||||||
|
@ -671,8 +675,13 @@ void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) {
|
||||||
|
|
||||||
if (is_enable_mem_reuse) {
|
if (is_enable_mem_reuse) {
|
||||||
MS_LOG(INFO) << "Memory Reuse is enable...";
|
MS_LOG(INFO) << "Memory Reuse is enable...";
|
||||||
|
#ifdef MEM_REUSE_DEBUG
|
||||||
mem_manager_->MallocReusedDynamicMem(graph);
|
mem_manager_->MallocReusedDynamicMem(graph);
|
||||||
mem_type = kReuseDynamicMem;
|
mem_type = kReuseDynamicMem;
|
||||||
|
#else
|
||||||
|
mem_manager_->MallocSomasDynamicMem(graph);
|
||||||
|
mem_type = kSomasReuseDynamicMem;
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
MS_LOG(INFO) << "Memory Reuse is disable...";
|
MS_LOG(INFO) << "Memory Reuse is disable...";
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "runtime/device/memory_manager.h"
|
#include "runtime/device/memory_manager.h"
|
||||||
|
#include <string>
|
||||||
#include "backend/session/anf_runtime_algorithm.h"
|
#include "backend/session/anf_runtime_algorithm.h"
|
||||||
#include "utils/ms_context.h"
|
#include "utils/ms_context.h"
|
||||||
using mindspore::memreuse::BestFitMemReuse;
|
using mindspore::memreuse::BestFitMemReuse;
|
||||||
|
@ -47,6 +48,40 @@ void MemoryManager::MallocReusedDynamicMem(const session::KernelGraph *graph) {
|
||||||
mem_reuse_util_ptr_->set_mem_base(base_ptr);
|
mem_reuse_util_ptr_->set_mem_base(base_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MemoryManager::MallocSomasDynamicMem(const session::KernelGraph *graph) {
|
||||||
|
MS_EXCEPTION_IF_NULL(graph);
|
||||||
|
SomasPtr somas_reuse_util_ptr = std::make_shared<somas::Somas>();
|
||||||
|
MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr);
|
||||||
|
somas_reuse_util_ptr_ = somas_reuse_util_ptr;
|
||||||
|
|
||||||
|
if (!(somas_reuse_util_ptr->Allocate(graph))) {
|
||||||
|
MS_LOG(EXCEPTION) << "Somas Allocate Failed.";
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t total_allocated_size = somas_reuse_util_ptr->GetTotalMemSize();
|
||||||
|
MS_LOG(INFO) << "Graph " << graph->graph_id() << ": TotalSomasReuseDynamicSize [" << total_allocated_size << "]";
|
||||||
|
auto base_ptr = MallocDynamicMem(total_allocated_size, false);
|
||||||
|
MS_LOG(INFO) << "Somas Reuse Memory Base Address [" << static_cast<void *>(base_ptr) << "], End Address ["
|
||||||
|
<< static_cast<void *>(base_ptr + total_allocated_size) << "]";
|
||||||
|
somas_reuse_util_ptr->set_mem_base_addr(base_ptr);
|
||||||
|
|
||||||
|
auto context_ptr = MsContext::GetInstance();
|
||||||
|
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||||
|
bool save_graphs = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG);
|
||||||
|
auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
|
||||||
|
if (save_graphs_path.empty()) {
|
||||||
|
save_graphs_path = ".";
|
||||||
|
}
|
||||||
|
if (save_graphs) {
|
||||||
|
std::string file_path =
|
||||||
|
save_graphs_path + "/" + "somas_after_allocate_" + std::to_string(graph->graph_id()) + ".ir";
|
||||||
|
somas_reuse_util_ptr_->DumpSomasBasicIR(file_path);
|
||||||
|
|
||||||
|
std::string mem_file_path = save_graphs_path + "/" + "somas_mem_info_" + std::to_string(graph->graph_id()) + ".ir";
|
||||||
|
somas_reuse_util_ptr_->DumpSomasMemoryIR(mem_file_path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, MemType type, size_t size,
|
uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, MemType type, size_t size,
|
||||||
const DeviceAddressPtr &address) {
|
const DeviceAddressPtr &address) {
|
||||||
MS_EXCEPTION_IF_NULL(node);
|
MS_EXCEPTION_IF_NULL(node);
|
||||||
|
@ -68,6 +103,9 @@ uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, Me
|
||||||
} else if (type == kReuseDynamicCommMem) {
|
} else if (type == kReuseDynamicCommMem) {
|
||||||
MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_);
|
MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_);
|
||||||
ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index);
|
ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index);
|
||||||
|
} else if (type == kSomasReuseDynamicMem) {
|
||||||
|
MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr_);
|
||||||
|
ptr = somas_reuse_util_ptr_->GetNodeOutputPtr(node, index);
|
||||||
} else {
|
} else {
|
||||||
ptr = MallocDynamicMem(size, communication_mem);
|
ptr = MallocDynamicMem(size, communication_mem);
|
||||||
}
|
}
|
||||||
|
@ -83,6 +121,9 @@ uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, Me
|
||||||
} else if (type == kReuseDynamicMem) {
|
} else if (type == kReuseDynamicMem) {
|
||||||
MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_);
|
MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_);
|
||||||
ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index);
|
ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index);
|
||||||
|
} else if (type == kSomasReuseDynamicMem) {
|
||||||
|
MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr_);
|
||||||
|
ptr = somas_reuse_util_ptr_->GetNodeOutputPtr(node, index);
|
||||||
}
|
}
|
||||||
address->ptr_ = ptr;
|
address->ptr_ = ptr;
|
||||||
return ptr;
|
return ptr;
|
||||||
|
@ -92,6 +133,9 @@ uint8_t *MemoryManager::MallocWorkSpaceMem(const AnfNodePtr &node, size_t index,
|
||||||
if (type == kReuseDynamicMem) {
|
if (type == kReuseDynamicMem) {
|
||||||
MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_);
|
MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_);
|
||||||
return mem_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index);
|
return mem_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index);
|
||||||
|
} else if (type == kSomasReuseDynamicMem) {
|
||||||
|
MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr_);
|
||||||
|
return somas_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index);
|
||||||
}
|
}
|
||||||
return MallocDynamicMem(size, false);
|
return MallocDynamicMem(size, false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,16 +17,18 @@
|
||||||
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_MEMORY_MANAGER_H_
|
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_MEMORY_MANAGER_H_
|
||||||
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_MEMORY_MANAGER_H_
|
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_MEMORY_MANAGER_H_
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
#include "backend/optimizer/mem_reuse/mem_reuse.h"
|
#include "backend/optimizer/mem_reuse/mem_reuse.h"
|
||||||
#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
|
#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
|
||||||
|
#include "backend/optimizer/somas/somas.h"
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace device {
|
namespace device {
|
||||||
enum MemType { kStaticMem, kDynamicMem, kReuseDynamicMem, kReuseDynamicCommMem };
|
enum MemType { kStaticMem, kDynamicMem, kReuseDynamicMem, kSomasReuseDynamicMem, kReuseDynamicCommMem };
|
||||||
const int kGetAllOuts = -1;
|
const int kGetAllOuts = -1;
|
||||||
const uint64_t kMemAlignSize = 512;
|
const uint64_t kMemAlignSize = 512;
|
||||||
using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr;
|
using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr;
|
||||||
|
using SomasPtr = mindspore::somas::SomasPtr;
|
||||||
|
|
||||||
class MemoryManager {
|
class MemoryManager {
|
||||||
public:
|
public:
|
||||||
|
@ -42,6 +44,7 @@ class MemoryManager {
|
||||||
virtual void ClearGlobalIdleMem() {}
|
virtual void ClearGlobalIdleMem() {}
|
||||||
|
|
||||||
void MallocReusedDynamicMem(const session::KernelGraph *graph);
|
void MallocReusedDynamicMem(const session::KernelGraph *graph);
|
||||||
|
void MallocSomasDynamicMem(const session::KernelGraph *graph);
|
||||||
uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, MemType type, size_t size,
|
uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, MemType type, size_t size,
|
||||||
const DeviceAddressPtr &address);
|
const DeviceAddressPtr &address);
|
||||||
uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, MemType type, size_t size);
|
uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, MemType type, size_t size);
|
||||||
|
@ -68,6 +71,7 @@ class MemoryManager {
|
||||||
size_t total_static_size_ = 0;
|
size_t total_static_size_ = 0;
|
||||||
size_t total_dynamic_size_ = 0;
|
size_t total_dynamic_size_ = 0;
|
||||||
MemReuseUtilPtr mem_reuse_util_ptr_{nullptr};
|
MemReuseUtilPtr mem_reuse_util_ptr_{nullptr};
|
||||||
|
SomasPtr somas_reuse_util_ptr_{nullptr};
|
||||||
};
|
};
|
||||||
} // namespace device
|
} // namespace device
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
Loading…
Reference in New Issue