!47731 Adjust parallel log for strategy

Merge pull request !47731 from liuluobin/master_refactor
This commit is contained in:
i-robot 2023-01-13 08:56:21 +00:00 committed by Gitee
commit 66b258841e
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
13 changed files with 109 additions and 101 deletions

View File

@ -339,14 +339,14 @@ CostPtr Edge::GetCostByStrategyPair(const CostPtrKey &stra_pair) {
}
auto cost_vec = cost_map_[stra_pair];
if (cost_vec.empty()) {
PrintStrategy(stra_pair.first);
PrintStrategy(stra_pair.second);
MS_LOG(EXCEPTION) << "No available cost under current strategy pair of the edge: " << edge_name_;
MS_LOG(EXCEPTION) << "stra_pair.first: " << stra_pair.first->ToString() << ", "
<< "stra_pair.second: " << stra_pair.second->ToString() << ". "
<< "No available cost under current strategy pair of the edge: " << edge_name_;
}
if (cost_vec.size() > 1) {
PrintStrategy(stra_pair.first);
PrintStrategy(stra_pair.second);
MS_LOG(INFO) << "Multiple costs available under the stratey pair of the edge: " << edge_name_;
MS_LOG(INFO) << "stra_pair.first: " << stra_pair.first->ToString() << ", "
<< "stra_pair.second: " << stra_pair.second->ToString() << ". "
<< "Multiple costs available under the stratey pair of the edge: " << edge_name_;
}
return cost_vec[0];
}
@ -374,30 +374,32 @@ StrategyPtr Edge::GetNextOpStrategyByPrevOpStrategyWithMiniComm(const StrategyPt
return nullptr;
}
MS_LOG(WARNING) << "Inconsistency occurred at edge: " << edge_name();
std::sort(next_stras.begin(), next_stras.end(),
[this](const std::pair<StrategyPtr, double> &a, const std::pair<StrategyPtr, double> &b) {
return !IsDoubleEqual(a.second, b.second) ? a.second < b.second : a.first->Compare(b.first);
});
return next_stras[0].first;
auto min_stra =
std::min_element(next_stras.begin(), next_stras.end(),
[this](const std::pair<StrategyPtr, double> &a, const std::pair<StrategyPtr, double> &b) {
return !IsDoubleEqual(a.second, b.second) ? a.second < b.second : a.first->Compare(b.first);
});
return min_stra->first;
}
if (next_op_stras.size() > 1) {
MS_LOG(INFO) << "There are multiple strategies for edge: " << edge_name_
<< " with zero communication cost, choose the one with minimum computation costs.";
}
auto next_op = next_op_;
std::sort(next_op_stras.begin(), next_op_stras.end(),
[this, &next_op](const std::pair<StrategyPtr, double> &a, const std::pair<StrategyPtr, double> &b) {
if (!IsDoubleEqual(a.second, b.second)) {
return a.second < b.second;
}
auto cost_a = next_op->GetCostByStrategyPtr(a.first)[0]->communication_without_parameter_;
auto cost_b = next_op->GetCostByStrategyPtr(b.first)[0]->communication_without_parameter_;
if (!IsDoubleEqual(cost_a, cost_b)) {
return cost_a < cost_b;
}
return a.first->Compare(b.first);
});
return next_op_stras[0].first;
auto min_next_op_stra = std::min_element(
next_op_stras.begin(), next_op_stras.end(),
[this, &next_op](const std::pair<StrategyPtr, double> &a, const std::pair<StrategyPtr, double> &b) {
if (!IsDoubleEqual(a.second, b.second)) {
return a.second < b.second;
}
auto cost_a = next_op->GetCostByStrategyPtr(a.first)[0]->communication_without_parameter_;
auto cost_b = next_op->GetCostByStrategyPtr(b.first)[0]->communication_without_parameter_;
if (!IsDoubleEqual(cost_a, cost_b)) {
return cost_a < cost_b;
}
return a.first->Compare(b.first);
});
return min_next_op_stra->first;
}
StrategyPtr Edge::GetPrevOpStrategyByNextOpStrategyWithMiniComm(const StrategyPtr &next_op_stra) {
@ -423,30 +425,32 @@ StrategyPtr Edge::GetPrevOpStrategyByNextOpStrategyWithMiniComm(const StrategyPt
return nullptr;
}
MS_LOG(WARNING) << "Inconsistency occurred at edge: " << edge_name();
std::sort(prev_stras.begin(), prev_stras.end(),
[this](const std::pair<StrategyPtr, double> &a, const std::pair<StrategyPtr, double> &b) {
return !IsDoubleEqual(a.second, b.second) ? a.second < b.second : a.first->Compare(b.first);
});
return prev_stras[0].first;
auto min_prev_stra =
std::min_element(prev_stras.begin(), prev_stras.end(),
[this](const std::pair<StrategyPtr, double> &a, const std::pair<StrategyPtr, double> &b) {
return !IsDoubleEqual(a.second, b.second) ? a.second < b.second : a.first->Compare(b.first);
});
return min_prev_stra->first;
}
if (prev_op_stras.size() > 1) {
MS_LOG(INFO) << "There are multiple strategies for edge: " << edge_name_
<< " with zero communication costs, choose the one with minimum computation costs.";
}
auto prev_op = prev_op_;
std::sort(prev_op_stras.begin(), prev_op_stras.end(),
[this, &prev_op](const std::pair<StrategyPtr, double> &a, const std::pair<StrategyPtr, double> &b) {
if (!IsDoubleEqual(a.second, b.second)) {
return a.second < b.second;
}
auto cost_a = prev_op->GetCostByStrategyPtr(a.first)[0]->communication_without_parameter_;
auto cost_b = prev_op->GetCostByStrategyPtr(b.first)[0]->communication_without_parameter_;
if (!IsDoubleEqual(cost_a, cost_b)) {
return cost_a < cost_b;
}
return a.first->Compare(b.first);
});
return prev_op_stras[0].first;
auto min_prev_op_stra = std::min_element(
prev_op_stras.begin(), prev_op_stras.end(),
[this, &prev_op](const std::pair<StrategyPtr, double> &a, const std::pair<StrategyPtr, double> &b) {
if (!IsDoubleEqual(a.second, b.second)) {
return a.second < b.second;
}
auto cost_a = prev_op->GetCostByStrategyPtr(a.first)[0]->communication_without_parameter_;
auto cost_b = prev_op->GetCostByStrategyPtr(b.first)[0]->communication_without_parameter_;
if (!IsDoubleEqual(cost_a, cost_b)) {
return cost_a < cost_b;
}
return a.first->Compare(b.first);
});
return min_prev_op_stra->first;
}
int64_t Edge::GetReshapeSWCIndexByNextOpStrategy(const StrategyPtr &next_op_stra) {
@ -540,9 +544,8 @@ bool Edge::CheckStrategyConsistency(StrategyPtr prev_stra, StrategyPtr next_stra
}
auto cost = GetCostByStrategyPair({prev_stra, next_stra});
if (cost == nullptr || cost->communication_cost_ > 0.0) {
MS_LOG(INFO) << "The edge " << edge_name_ << "'s strategy: ";
PrintStrategy(prev_stra);
PrintStrategy(next_stra);
MS_LOG(INFO) << "The edge " << edge_name_ << "'s strategy: prev_stra is " << prev_stra->ToString()
<< ", next_stra is " << next_stra->ToString();
if (prev_op_->IsTmpIdentity()) {
MS_LOG(ERROR) << "The parameter: " << prev_op_->refkey_parameter_name()
<< " has been used by operators with "

View File

@ -217,7 +217,7 @@ void CostGraph::BFS(const OperatorInfoPtr &op, const StrategyPtr &op_stra,
} else {
const auto &next_op_stra = edge->GetNextOpStrategyByPrevOpStrategyWithMiniComm(curr_op->selected_strategy());
if (next_op_stra == nullptr) {
PrintStrategy(curr_op->selected_strategy());
MS_LOG(INFO) << "The strategy is: " << curr_op->selected_strategy()->ToString();
MS_LOG(EXCEPTION) << next_op->name() << "'s strategy is null in the edge: " << edge->edge_name();
}
(void)next_level.emplace(std::make_pair(next_op, std::make_pair(next_op_stra, -1)), curr_depth + 1);
@ -245,8 +245,9 @@ void CostGraph::BFS(const OperatorInfoPtr &op, const StrategyPtr &op_stra,
} else {
const auto &prev_op_stra = edge->GetPrevOpStrategyByNextOpStrategyWithMiniComm(curr_op->selected_strategy());
if (prev_op_stra == nullptr) {
PrintStrategy(curr_op->selected_strategy());
MS_LOG(EXCEPTION) << prev_op->name() << "'s strategy is null in the edge: " << edge->edge_name();
MS_LOG(EXCEPTION) << "Current op " << curr_op->name() << "'s strategy is "
<< curr_op->selected_strategy()->ToString() << ". " << prev_op->name()
<< "'s strategy is null in the edge: " << edge->edge_name();
}
(void)next_level.emplace(std::make_pair(prev_op, std::make_pair(prev_op_stra, -1)), curr_depth + 1);
}

View File

@ -311,9 +311,7 @@ std::map<size_t, size_t> GetRealIndexToSeg(const std::vector<size_t> &split_segm
}
// Check whether the vector of indices is valid.
std::vector<size_t> tmp = split_segment;
std::sort(tmp.begin(), tmp.end());
if (split_segment != tmp) {
if (!std::is_sorted(split_segment.begin(), split_segment.end())) {
MS_LOG(EXCEPTION) << "Indices of segments is not in a ascending order: " << split_segment;
}

View File

@ -30,6 +30,7 @@
#include "ir/value.h"
#include "frontend/parallel/auto_parallel/edge_costmodel.h"
#include "frontend/parallel/auto_parallel/graph_costmodel.h"
#include "frontend/parallel/step_parallel_utils.h"
#include "include/common/utils/parallel_context.h"
#include "utils/log_adapter.h"
#include "include/common/debug/anf_dump_utils.h"
@ -1177,27 +1178,6 @@ std::shared_ptr<Strategies> OperatorInfo::GenerateBatchStrategies() {
return GenerateBatchStrategiesBySplitFlag(inputs_shape_, split_flag_list_);
}
void PrintStrategy(const StrategyPtr &strategy) {
if (strategy == nullptr) {
return;
}
std::string all_strategy = "";
for (size_t i = 0; i < strategy->GetInputNumber(); ++i) {
all_strategy += "[";
for (size_t j = 0; j < strategy->GetInputDim()[i].size(); ++j) {
all_strategy += std::to_string(strategy->GetInputDim()[i][j]);
if (j != strategy->GetInputDim()[i].size() - 1) {
all_strategy += ", ";
}
}
all_strategy += "]";
if (i != strategy->GetInputNumber() - 1) {
all_strategy += ", ";
}
}
MS_LOG(INFO) << "The strategy is: " << all_strategy;
}
// generate strategies for that each dimension of input0 and input1 is relevant, such as: ([a, b, c, d], [a, b, c, d])
Status GenerateStrategiesForTwoEqualInputs(int64_t stage_id, const Shapes &inputs_shape,
const Shapes &splittable_inputs, std::vector<StrategyPtr> *const sp_vector) {
@ -2027,8 +2007,7 @@ void OperatorInfo::SetSelectedStrategy(const StrategyPtr &s_strategy, size_t cur
MS_LOG(INFO) << name_ << " has already been set strategy.";
return;
}
MS_LOG(INFO) << name_ << ": Set strategy";
PrintStrategy(s_strategy);
MS_LOG(INFO) << name_ << ": Set strategy " << s_strategy->ToString();
selected_strategy_ = s_strategy;
selected_strategy_depth_ = SizeToLong(curr_depth);
}
@ -2048,10 +2027,9 @@ double OperatorInfo::GetForwardMemoryCostFromCNode() {
void OperatorInfo::CheckSelectedStrategy(const StrategyPtr &s_strategy) {
MS_EXCEPTION_IF_NULL(s_strategy);
if (!s_strategy->IsEqual(selected_strategy_)) {
MS_LOG(INFO) << name_ << "'s strategy may cause suboptimal, the determined strategy:";
PrintStrategy(selected_strategy_);
MS_LOG(INFO) << name_ << ": The minimal strategy:";
PrintStrategy(s_strategy);
MS_LOG(INFO) << name_
<< "'s strategy may cause suboptimal, the determined strategy: " << selected_strategy_->ToString()
<< "The minimal strategy: " << s_strategy->ToString();
}
}
@ -2069,11 +2047,12 @@ Status OperatorInfo::GenerateStrategies(int64_t stage_id) {
size_t success = 0;
for (auto &sp : sp_vector) {
PrintStrategy(sp);
if (SetCostUnderStrategy(sp) == SUCCESS) {
success++;
MS_LOG(INFO) << name_ << ": Successfully generated " << success << " strategy.";
PrintStrategy(sp);
MS_LOG(INFO) << name_ << ": Successfully generated the " << GetSerialNumberString(success)
<< " strategy: " << sp->ToString();
} else {
MS_LOG(INFO) << name_ << ": SetCostUnderStrategy failed, the strategy is " << sp->ToString();
}
}
return SUCCESS;

View File

@ -362,7 +362,6 @@ int64_t ComputeRepeatDeviceNumByTensorMap(const Shape &dev_matrix_shape, const S
std::shared_ptr<Strategies> GenerateBatchStrategiesBySplitFlag(const Shapes &shapes,
const std::vector<bool> &split_flag_list);
std::string StrategyToString(const Strategies &strategy);
void PrintStrategy(const StrategyPtr &strategy);
Status GenerateStrategiesForIndependentInputsBase(int64_t stage_id, size_t dev_num, const Shapes &inputs_shape,
const Shapes &splittable_inputs, std::vector<StrategyPtr> *sp_vector);
// generate strategies for that all inputs' dimensions are independent, such as: ([a, b, c, d])

View File

@ -24,6 +24,7 @@
#include "frontend/parallel/device_matrix.h"
#include "frontend/parallel/dynamic_creator.h"
#include "frontend/parallel/step_parallel.h"
#include "frontend/parallel/step_parallel_utils.h"
#include "frontend/parallel/auto_parallel/graph_costmodel.h"
#include "include/common/utils/convert_utils.h"
#include "utils/log_adapter.h"
@ -391,8 +392,8 @@ void ReshapeInfo::SetCostForReshapeWithParameter() {
for (auto &sp : sp_vector_) {
if (SetCostUnderStrategy(sp) == SUCCESS) {
success++;
MS_LOG(INFO) << name_ << ": Successfully generated " << success << " strategy.";
PrintStrategy(sp);
MS_LOG(INFO) << name_ << ": Successfully generated the " << GetSerialNumberString(success)
<< " strategy: " << sp->ToString();
}
}
}
@ -500,8 +501,7 @@ Status ReshapeInfo::GenerateStrategyCosts(
}
MS_LOG(INFO) << "Print " << name() << "'s 'strategy_cost':";
for (auto &swc : strategy_cost_) {
MS_LOG(INFO) << name() << "'s strategy:";
PrintStrategy(swc->strategy_ptr);
MS_LOG(INFO) << name() << "'s strategy: " << swc->strategy_ptr->ToString();
MS_LOG(INFO) << "The corresponding cost: " << swc->cost_list[0]->computation_cost_ << ", "
<< swc->cost_list[0]->communication_cost_ << ", "
<< swc->cost_list[0]->communication_without_parameter_;

View File

@ -93,7 +93,7 @@ Status UniqueInfo::SetCostUnderStrategy(const StrategyPtr &strategy) { return Se
std::vector<StrategyPtr> UniqueInfo::GenerateOpStrategies(int64_t stage_id) {
Shape input0_split;
input0_split.emplace_back(0);
(void)input0_split.emplace_back(0);
Shapes splittable_inputs = {input0_split};
std::vector<StrategyPtr> sp_vector;
if (GenerateStrategiesForIndependentInputs(stage_id, inputs_shape_, splittable_inputs, &sp_vector) != SUCCESS) {

View File

@ -1055,8 +1055,7 @@ Status ParallelStrategySearch(const std::vector<AnfNodePtr> &all_nodes, const Fu
// print the selected strategy
for (auto &op : entire_costgraph->GetOperators()) {
StrategyPtr s_strategy = op->selected_strategy();
MS_LOG(INFO) << op->name() << " : The strategy is:";
PrintStrategy(s_strategy);
MS_LOG(INFO) << op->name() << ": The strategy is: " << s_strategy->ToString();
}
// Remove some operatorInfo from the CNODEs
(void)IgnoreOperatorsInCostGraph();
@ -1210,8 +1209,7 @@ Status ParallelStrategyRecSearch(const std::vector<AnfNodePtr> &all_nodes, const
// print the selected strategy
for (auto &op : entire_costgraph->GetOperators()) {
StrategyPtr s_strategy = op->selected_strategy();
MS_LOG(INFO) << op->name() << " : The strategy is:";
PrintStrategy(s_strategy);
MS_LOG(INFO) << op->name() << ": The strategy is: " << s_strategy->ToString();
}
(void)IgnoreOperatorsInCostGraph();

View File

@ -1404,5 +1404,19 @@ TensorLayout GetInputLayoutFromCNode(const std::pair<AnfNodePtr, int64_t> &node_
TensorLayout tensorlayout_in = tensorinfo_in.tensor_layout();
return tensorlayout_in;
}
std::string GetSerialNumberString(size_t number) {
std::string suffix = "th";
if (number == kSizeOne) {
suffix = "st";
} else if (number == kSizeTwo) {
suffix = "nd";
} else if (number == kSizeThree) {
suffix = "rd";
}
std::ostringstream oss;
oss << number << suffix;
return oss.str();
}
} // namespace parallel
} // namespace mindspore

View File

@ -105,6 +105,9 @@ void SetSharedParameterFlag(const FuncGraphPtr &root, const AnfNodePtr &paramete
StrategyPtr GenerateBatchParallelStrategy(const OperatorInfoPtr operator_, const PrimitivePtr prim);
bool IsInsertVirtualOutput(const FuncGraphPtr &root);
TensorLayout GetInputLayoutFromCNode(const std::pair<AnfNodePtr, int64_t> &node_pair);
// Transfer number to serial number string
std::string GetSerialNumberString(size_t number);
} // namespace parallel
} // namespace mindspore

View File

@ -111,6 +111,24 @@ class Strategy {
internal_size_++;
}
std::string ToString() {
std::ostringstream oss;
for (size_t i = 0; i < this->GetInputNumber(); ++i) {
oss << "[";
for (size_t j = 0; j < this->GetInputDim()[i].size(); ++j) {
oss << std::to_string(this->GetInputDim()[i][j]);
if (j != this->GetInputDim()[i].size() - 1) {
oss << ", ";
}
}
oss << "]";
if (i != this->GetInputNumber() - 1) {
oss << ", ";
}
}
return oss.str();
}
private:
const int64_t stage_;

View File

@ -141,16 +141,11 @@ std::shared_ptr<std::vector<Arrangement>> Map::ReMapVector(const std::vector<Arr
return std::make_shared<std::vector<Arrangement>>(out);
}
bool Map::CheckNoneByIdxList(std::vector<size_t> idx_list) const {
for (auto &value : idx_list) {
if (GetDimByIdx(value) != MAP_NONE) {
return false;
}
}
return true;
bool Map::CheckNoneByIdxList(const std::vector<size_t> &idx_list) const {
return std::all_of(idx_list.begin(), idx_list.end(), [this](size_t value) { return GetDimByIdx(value) == MAP_NONE; });
}
Map Map::SqueezeMapByIdxList(std::vector<size_t> idx_list) const {
Map Map::SqueezeMapByIdxList(const std::vector<size_t> &idx_list) const {
Shape out_shape;
for (size_t i = 0; i < GetDimSize(); i++) {
auto it = std::find(idx_list.begin(), idx_list.end(), i);

View File

@ -40,8 +40,8 @@ class Map : public Array {
std::shared_ptr<Map> ExpandMapByNone(const Arrangement &expand_num_list) const;
std::shared_ptr<Map> ExpandMapByDecreaseNumber(const Arrangement &expand_num_list) const;
std::shared_ptr<std::vector<Arrangement>> ReMapVector(const std::vector<Arrangement> &input_vector) const;
bool CheckNoneByIdxList(std::vector<size_t> idx_list) const;
Map SqueezeMapByIdxList(std::vector<size_t> idx_list) const;
bool CheckNoneByIdxList(const std::vector<size_t> &idx_list) const;
Map SqueezeMapByIdxList(const std::vector<size_t> &idx_list) const;
private:
bool IsValidMap();