forked from mindspore-Ecosystem/mindspore
merge master code to r0.5
This commit is contained in:
parent
fe1d4ca3bd
commit
300dd2971c
|
@ -28,7 +28,6 @@
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace parallel {
|
namespace parallel {
|
||||||
#define DOUBLE_MAX (std::numeric_limits<double>::max)()
|
|
||||||
|
|
||||||
// Compute redistributed cost
|
// Compute redistributed cost
|
||||||
double CostRedis(const Graph::NodeType &node,
|
double CostRedis(const Graph::NodeType &node,
|
||||||
|
@ -621,75 +620,50 @@ StrategyRec CostCommon::ChoseStr(const std::vector<double> &cost_op, StrategyRec
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
MS_LOG(EXCEPTION) << "Failure: CostBiasAdd failed.";
|
MS_LOG(EXCEPTION) << "Failure: Common failed.";
|
||||||
}
|
}
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get weight for BN
|
// Get optimal strategy for BatchParallel OPs
|
||||||
double CostBatchNorm::GetMinCostIn(const OperatorRec &op) {
|
StrategyRec CostBatchParallel::GetOptimalStr(const Graph::NodeType &node) {
|
||||||
int tensor = static_cast<int>(op.arguments[0].tensor_shape.shape_h * op.arguments[0].tensor_str.str_h) *
|
|
||||||
static_cast<int>(op.arguments[0].tensor_shape.shape_n * op.arguments[0].tensor_str.str_n) *
|
|
||||||
static_cast<int>(op.arguments[0].tensor_shape.shape_w * op.arguments[0].tensor_str.str_w) *
|
|
||||||
static_cast<int>(op.arguments[0].tensor_shape.shape_c * op.arguments[0].tensor_str.str_c);
|
|
||||||
|
|
||||||
std::vector<double> cost_in;
|
|
||||||
cost_in.push_back(StrDimB(tensor) * 1.2);
|
|
||||||
cost_in.push_back(DOUBLE_MAX);
|
|
||||||
cost_in.push_back(StrDimH(tensor) * 1.2);
|
|
||||||
cost_in.push_back(StrDimW(tensor) * 1.2);
|
|
||||||
|
|
||||||
return *min_element(cost_in.begin(), cost_in.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get optimal strategy for BN
|
|
||||||
StrategyRec CostBatchNorm::GetOptimalStr(const Graph::NodeType &node,
|
|
||||||
const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
|
|
||||||
const Graph &graph) {
|
|
||||||
const OperatorRec &op = node.apply;
|
const OperatorRec &op = node.apply;
|
||||||
|
int tensor_n = static_cast<int>(op.arguments[0].tensor_shape.shape_n * op.arguments[0].tensor_str.str_n);
|
||||||
int tensor_filter_n = static_cast<int>(op.arguments[1].tensor_shape.shape_n * op.arguments[1].tensor_str.str_n);
|
int tensor_c = static_cast<int>(op.arguments[0].tensor_shape.shape_c * op.arguments[0].tensor_str.str_c);
|
||||||
int tensor_filter_c = static_cast<int>(op.arguments[1].tensor_shape.shape_c * op.arguments[1].tensor_str.str_c);
|
int tensor_h = static_cast<int>(op.arguments[0].tensor_shape.shape_h * op.arguments[0].tensor_str.str_h);
|
||||||
int tensor_filter_h = static_cast<int>(op.arguments[1].tensor_shape.shape_h * op.arguments[1].tensor_str.str_h);
|
int tensor_w = static_cast<int>(op.arguments[0].tensor_shape.shape_w * op.arguments[0].tensor_str.str_w);
|
||||||
int tensor_filter_w = static_cast<int>(op.arguments[1].tensor_shape.shape_w * op.arguments[1].tensor_str.str_w);
|
|
||||||
|
|
||||||
int tensor_filter = tensor_filter_h * tensor_filter_w * tensor_filter_n * tensor_filter_c;
|
|
||||||
|
|
||||||
int output_tensor_h = static_cast<int>(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_str.str_h);
|
|
||||||
int output_tensor_w = static_cast<int>(node.tensor_parm.tensor_shape.shape_w * node.tensor_parm.tensor_str.str_w);
|
|
||||||
int output_tensor_n = static_cast<int>(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_str.str_n);
|
|
||||||
|
|
||||||
std::vector<double> cost_op;
|
std::vector<double> cost_op;
|
||||||
std::vector<std::vector<float>> mode;
|
|
||||||
|
|
||||||
if (output_tensor_n < 2 || output_tensor_n % 2 != 0) {
|
if (tensor_n < 2 || tensor_n % 2 != 0) {
|
||||||
cost_op.push_back(DOUBLE_MAX);
|
cost_op.push_back(DOUBLE_MAX);
|
||||||
} else {
|
} else {
|
||||||
cost_op.push_back(StrDimB(tensor_filter) + CostRedis(node, node_name_to_strategy,
|
cost_op.push_back(cost_in_);
|
||||||
mode = {{0.5, 1, 1, 1}, {1, 1, 1, 1}, {0.5, 1, 1, 1}}, graph));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cost_op.push_back(DOUBLE_MAX);
|
if (tensor_c < 2 || tensor_c % 2 != 0) {
|
||||||
|
|
||||||
if (output_tensor_h < 2 || output_tensor_h % 2 != 0) {
|
|
||||||
cost_op.push_back(DOUBLE_MAX);
|
cost_op.push_back(DOUBLE_MAX);
|
||||||
} else {
|
} else {
|
||||||
cost_op.push_back(StrDimH(tensor_filter) + CostRedis(node, node_name_to_strategy,
|
cost_op.push_back(cost_in_);
|
||||||
mode = {{1, 1, 0.5, 1}, {1, 1, 1, 1}, {1, 1, 0.5, 1}}, graph));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (output_tensor_w < 2 || output_tensor_w % 2 != 0) {
|
if (tensor_h < 2 || tensor_h % 2 != 0) {
|
||||||
cost_op.push_back(DOUBLE_MAX);
|
cost_op.push_back(DOUBLE_MAX);
|
||||||
} else {
|
} else {
|
||||||
cost_op.push_back(StrDimW(tensor_filter) + CostRedis(node, node_name_to_strategy,
|
cost_op.push_back(cost_in_);
|
||||||
mode = {{1, 1, 1, 0.5}, {1, 1, 1, 1}, {1, 1, 1, 0.5}}, graph));
|
}
|
||||||
|
|
||||||
|
if (tensor_w < 2 || tensor_w % 2 != 0) {
|
||||||
|
cost_op.push_back(DOUBLE_MAX);
|
||||||
|
} else {
|
||||||
|
cost_op.push_back(cost_in_);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ChoseStr(cost_op, node.apply.str);
|
return ChoseStr(cost_op, node.apply.str);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Chose strategy for BatchNorm
|
// Chose strategy for BatchParallel op
|
||||||
StrategyRec CostBatchNorm::ChoseStr(const std::vector<double> &cost_op, StrategyRec str) {
|
StrategyRec CostBatchParallel::ChoseStr(const std::vector<double> &cost_op, StrategyRec str) {
|
||||||
uint64_t min_position = min_element(cost_op.begin(), cost_op.end()) - cost_op.begin();
|
uint64_t min_position = min_element(cost_op.begin(), cost_op.end()) - cost_op.begin();
|
||||||
if (cost_op[min_position] > (DOUBLE_MAX - 0.1)) {
|
if (cost_op[min_position] > (DOUBLE_MAX - 0.1)) {
|
||||||
return str;
|
return str;
|
||||||
|
@ -700,36 +674,75 @@ StrategyRec CostBatchNorm::ChoseStr(const std::vector<double> &cost_op, Strategy
|
||||||
str.inputTensor[0].str_n /= 2.0;
|
str.inputTensor[0].str_n /= 2.0;
|
||||||
str.outputTensor.str_n /= 2.0;
|
str.outputTensor.str_n /= 2.0;
|
||||||
str.cut_counter += 1;
|
str.cut_counter += 1;
|
||||||
str.cost = str.cost + cost_in_b_;
|
str.cost = str.cost + cost_in_;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 1:
|
case 1:
|
||||||
str.inputTensor[0].str_c /= 2.0;
|
str.inputTensor[0].str_c /= 2.0;
|
||||||
str.inputTensor[1].str_c /= 2.0;
|
|
||||||
str.inputTensor[2].str_c /= 2.0;
|
|
||||||
str.inputTensor[3].str_c /= 2.0;
|
|
||||||
str.inputTensor[4].str_c /= 2.0;
|
|
||||||
str.outputTensor.str_c /= 2.0;
|
str.outputTensor.str_c /= 2.0;
|
||||||
str.cut_counter += 1;
|
str.cut_counter += 1;
|
||||||
str.cost = str.cost + cost_in_c_;
|
str.cost = str.cost + cost_in_;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
str.inputTensor[0].str_h /= 2.0;
|
str.inputTensor[0].str_h /= 2.0;
|
||||||
str.outputTensor.str_h /= 2.0;
|
str.outputTensor.str_h /= 2.0;
|
||||||
str.cut_counter += 1;
|
str.cut_counter += 1;
|
||||||
str.cost = str.cost + cost_in_h_;
|
str.cost = str.cost + cost_in_;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 3:
|
case 3:
|
||||||
str.inputTensor[0].str_w /= 2.0;
|
str.inputTensor[0].str_w /= 2.0;
|
||||||
str.outputTensor.str_w /= 2.0;
|
str.outputTensor.str_w /= 2.0;
|
||||||
str.cut_counter += 1;
|
str.cut_counter += 1;
|
||||||
str.cost = str.cost + cost_in_w_;
|
str.cost = str.cost + cost_in_;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
MS_LOG(EXCEPTION) << "Failure: CostBatchNorm failed.";
|
MS_LOG(EXCEPTION) << "Failure: CostBatchParallel failed.";
|
||||||
|
}
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Chose strategy for CostSoftmaxCrossEntropyWithLogits
|
||||||
|
StrategyRec CostSoftmaxCrossEntropyWithLogits::ChoseStr(const std::vector<double> &cost_op, StrategyRec str) {
|
||||||
|
uint64_t min_position = min_element(cost_op.begin(), cost_op.end()) - cost_op.begin();
|
||||||
|
if (cost_op[min_position] > (DOUBLE_MAX - 0.1)) {
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (min_position) {
|
||||||
|
case 0:
|
||||||
|
str.inputTensor[0].str_n /= 2.0;
|
||||||
|
str.inputTensor[1].str_n /= 2.0;
|
||||||
|
str.cut_counter += 1;
|
||||||
|
str.cost = str.cost + cost_in_;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
str.inputTensor[0].str_c /= 2.0;
|
||||||
|
str.inputTensor[1].str_c /= 2.0;
|
||||||
|
str.cut_counter += 1;
|
||||||
|
str.cost = str.cost + cost_in_;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
str.inputTensor[0].str_h /= 2.0;
|
||||||
|
str.inputTensor[1].str_h /= 2.0;
|
||||||
|
str.outputTensor.str_w /= 2.0;
|
||||||
|
str.cut_counter += 1;
|
||||||
|
str.cost = str.cost + cost_in_;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
str.inputTensor[0].str_w /= 2.0;
|
||||||
|
str.inputTensor[1].str_w /= 2.0;
|
||||||
|
str.cut_counter += 1;
|
||||||
|
str.cost = str.cost + cost_in_;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
MS_LOG(EXCEPTION) << "Failure: CostSoftmax failed.";
|
||||||
}
|
}
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,6 +28,8 @@
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace parallel {
|
namespace parallel {
|
||||||
|
#define DOUBLE_MAX (std::numeric_limits<double>::max)()
|
||||||
|
|
||||||
double CostRedis(const Graph::NodeType &node,
|
double CostRedis(const Graph::NodeType &node,
|
||||||
const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
|
const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
|
||||||
const std::vector<std::vector<float>> &mode, const Graph &graph);
|
const std::vector<std::vector<float>> &mode, const Graph &graph);
|
||||||
|
@ -195,7 +197,6 @@ class CostTensorAdd : public CostCommon {
|
||||||
};
|
};
|
||||||
|
|
||||||
// all the following operation are element-wise and have the same cost
|
// all the following operation are element-wise and have the same cost
|
||||||
class CostOneHot : public CostCommon {};
|
|
||||||
class CostReLU : public CostCommon {};
|
class CostReLU : public CostCommon {};
|
||||||
class CostLog : public CostCommon {};
|
class CostLog : public CostCommon {};
|
||||||
class CostExp : public CostCommon {};
|
class CostExp : public CostCommon {};
|
||||||
|
@ -206,50 +207,27 @@ class CostDiv : public CostCommon {};
|
||||||
class CostSqueeze : public CostCommon {};
|
class CostSqueeze : public CostCommon {};
|
||||||
class CostCast : public CostCommon {};
|
class CostCast : public CostCommon {};
|
||||||
|
|
||||||
// class BatchNorm is used to compute the cost of BatchNorm operator.
|
// class BatchParallel is used to compute the cost of BatchParallel operator.
|
||||||
class CostBatchNorm {
|
class CostBatchParallel {
|
||||||
public:
|
public:
|
||||||
StrategyRec GetOptimalStr(const Graph::NodeType &node,
|
virtual StrategyRec GetOptimalStr(const Graph::NodeType &node);
|
||||||
const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
|
|
||||||
const Graph &graph);
|
|
||||||
|
|
||||||
double GetMinCostIn(const OperatorRec &op);
|
virtual double GetMaxCostIn() const { return DOUBLE_MAX; }
|
||||||
|
|
||||||
private:
|
protected:
|
||||||
double StrDimB(int32_t Tensor) {
|
virtual StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
|
||||||
cost_in_b_ = (static_cast<double>(Tensor) * 4.0) / 2.0;
|
|
||||||
|
|
||||||
return cost_in_b_;
|
double cost_in_ = 0;
|
||||||
}
|
}; // class BatchParallel is used to compute the cost of BatchParallel operator.
|
||||||
|
|
||||||
double StrDimC() {
|
class CostBatchNorm : public CostBatchParallel {};
|
||||||
cost_in_c_ = 0.0;
|
class CostOneHot : public CostBatchParallel {};
|
||||||
|
class CostPRelu : public CostBatchParallel {};
|
||||||
return cost_in_c_;
|
class CostSoftmax : public CostBatchParallel {};
|
||||||
}
|
|
||||||
|
|
||||||
double StrDimH(int32_t Tensor) {
|
|
||||||
cost_in_h_ = (static_cast<double>(Tensor) * 4.0) / 2.0;
|
|
||||||
|
|
||||||
return cost_in_h_;
|
|
||||||
}
|
|
||||||
|
|
||||||
double StrDimW(int32_t Tensor) {
|
|
||||||
cost_in_w_ = (static_cast<double>(Tensor) * 4.0) / 2.0;
|
|
||||||
|
|
||||||
return cost_in_w_;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
class CostSoftmaxCrossEntropyWithLogits : public CostBatchParallel {
|
||||||
StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
|
StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
|
||||||
|
};
|
||||||
double cost_in_b_ = 0;
|
|
||||||
|
|
||||||
double cost_in_c_ = 0;
|
|
||||||
|
|
||||||
double cost_in_h_ = 0;
|
|
||||||
|
|
||||||
double cost_in_w_ = 0;
|
|
||||||
}; // class BatchNorm is used to compute the cost of BatchNorm operator.
|
|
||||||
} // namespace parallel
|
} // namespace parallel
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
#endif // PARALLEL_AUTO_PARALLEL_REC_COST_H_
|
#endif // PARALLEL_AUTO_PARALLEL_REC_COST_H_
|
||||||
|
|
|
@ -28,10 +28,10 @@
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace parallel {
|
namespace parallel {
|
||||||
void GenerateStrategy(std::shared_ptr<Graph> graph, const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
void GenerateStrategy(const std::shared_ptr<Graph> &graph, const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const std::shared_ptr<std::vector<std::vector<size_t>>> eli_list,
|
const std::shared_ptr<std::vector<std::vector<size_t>>> &eli_list,
|
||||||
const std::vector<std::vector<std::string>> &input_tensor_names,
|
const std::vector<std::vector<std::string>> &input_tensor_names,
|
||||||
const std::shared_ptr<std::vector<size_t>> index_list) {
|
const std::shared_ptr<std::vector<size_t>> &index_list) {
|
||||||
MS_EXCEPTION_IF_NULL(graph);
|
MS_EXCEPTION_IF_NULL(graph);
|
||||||
MS_EXCEPTION_IF_NULL(eli_list);
|
MS_EXCEPTION_IF_NULL(eli_list);
|
||||||
MS_EXCEPTION_IF_NULL(index_list);
|
MS_EXCEPTION_IF_NULL(index_list);
|
||||||
|
@ -127,25 +127,6 @@ std::vector<std::vector<int32_t>> PrepareMatMul(const std::shared_ptr<Graph> &gr
|
||||||
return strategies;
|
return strategies;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::vector<int32_t>> PreparePReLU(const std::shared_ptr<Graph> &graph,
|
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
|
||||||
const size_t iter_graph, const size_t iter_ops) {
|
|
||||||
std::vector<std::vector<int32_t>> strategies = MakeDataParallelStrategy(graph, ops, iter_graph, iter_ops);
|
|
||||||
strategies[1][0] = 1;
|
|
||||||
return strategies;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::vector<int32_t>> PrepareBatchNorm(const std::shared_ptr<Graph> &graph,
|
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
|
||||||
const size_t iter_graph, const size_t iter_ops) {
|
|
||||||
std::vector<std::vector<int32_t>> strategies = MakeDataParallelStrategy(graph, ops, iter_graph, iter_ops);
|
|
||||||
for (size_t i = 1; i < strategies.size(); i++) {
|
|
||||||
strategies[i][0] = strategies[0][1];
|
|
||||||
}
|
|
||||||
strategies[1][0] = 1;
|
|
||||||
return strategies;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::vector<int32_t>> PrepareBiasAdd(const std::shared_ptr<std::vector<int32_t>> &s) {
|
std::vector<std::vector<int32_t>> PrepareBiasAdd(const std::shared_ptr<std::vector<int32_t>> &s) {
|
||||||
std::vector<std::vector<int32_t>> strategies;
|
std::vector<std::vector<int32_t>> strategies;
|
||||||
strategies.push_back(*s);
|
strategies.push_back(*s);
|
||||||
|
@ -155,10 +136,29 @@ std::vector<std::vector<int32_t>> PrepareBiasAdd(const std::shared_ptr<std::vect
|
||||||
return strategies;
|
return strategies;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::vector<int32_t>> PrepareOneHot(const std::shared_ptr<std::vector<int32_t>> &s) {
|
std::vector<std::vector<int32_t>> PrepareOneHot(const std::shared_ptr<Graph> &graph,
|
||||||
std::vector<std::vector<int32_t>> strategies;
|
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
|
const size_t iter_graph, const size_t iter_ops) {
|
||||||
|
std::vector<std::vector<int32_t>> strategies = MakeRecSearchStrategy(graph, ops, iter_graph, iter_ops);
|
||||||
|
|
||||||
|
int32_t axis = -1;
|
||||||
|
auto iter = ops[iter_ops]->attrs().find(AXIS);
|
||||||
|
if (iter != ops[iter_ops]->attrs().end()) {
|
||||||
|
MS_EXCEPTION_IF_NULL(iter->second);
|
||||||
|
if (iter->second->isa<Int32Imm>()) {
|
||||||
|
axis = iter->second->cast<Int32ImmPtr>()->value();
|
||||||
|
} else {
|
||||||
|
MS_LOG(EXCEPTION) << ops[iter_ops]->name() << ": The value of axis is not int.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (axis == -1) {
|
||||||
|
strategies[0][0] = strategies[0][1];
|
||||||
|
strategies[0][1] = 1;
|
||||||
|
graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = graph->nodes[iter_graph].tensor_parm.tensor_str.str_w;
|
||||||
|
graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<int32_t> s_empty = {};
|
std::vector<int32_t> s_empty = {};
|
||||||
strategies.push_back(*s);
|
|
||||||
strategies.push_back(s_empty);
|
strategies.push_back(s_empty);
|
||||||
strategies.push_back(s_empty);
|
strategies.push_back(s_empty);
|
||||||
return strategies;
|
return strategies;
|
||||||
|
@ -170,6 +170,32 @@ std::vector<std::vector<int32_t>> PrepareGatherV2(const std::shared_ptr<std::vec
|
||||||
return strategies;
|
return strategies;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<int32_t>> PrepareL2Normalize(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
|
const size_t iter_ops, std::vector<int32_t> s) {
|
||||||
|
int32_t axis = 0;
|
||||||
|
auto iter = ops[iter_ops]->attrs().find(AXIS);
|
||||||
|
if (iter != ops[iter_ops]->attrs().end()) {
|
||||||
|
MS_EXCEPTION_IF_NULL(iter->second);
|
||||||
|
if (iter->second->isa<Int32Imm>()) {
|
||||||
|
axis = iter->second->cast<Int32ImmPtr>()->value();
|
||||||
|
} else {
|
||||||
|
MS_LOG(EXCEPTION) << ops[iter_ops]->name() << " : The value of axis is not int.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t axis_index = axis;
|
||||||
|
if (axis < 0) {
|
||||||
|
size_t input_dim = ops[iter_ops]->inputs_tensor_info()[0].shape().size();
|
||||||
|
axis_index = static_cast<int32_t>(input_dim) + axis;
|
||||||
|
}
|
||||||
|
|
||||||
|
s[IntToSize(axis_index)] = 1;
|
||||||
|
|
||||||
|
std::vector<std::vector<int32_t>> strategies;
|
||||||
|
strategies.push_back(s);
|
||||||
|
return strategies;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<std::vector<int32_t>> MakeRecSearchStrategy(const std::shared_ptr<Graph> &graph,
|
std::vector<std::vector<int32_t>> MakeRecSearchStrategy(const std::shared_ptr<Graph> &graph,
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const size_t iter_graph, const size_t iter_ops) {
|
const size_t iter_graph, const size_t iter_ops) {
|
||||||
|
@ -209,7 +235,7 @@ std::vector<std::vector<int32_t>> MakeRecSearchStrategy(const std::shared_ptr<Gr
|
||||||
} else if (output_size == 0) {
|
} else if (output_size == 0) {
|
||||||
s = {};
|
s = {};
|
||||||
} else {
|
} else {
|
||||||
MS_LOG(ERROR) << "Tensor's output size is unexcepted.";
|
MS_LOG(EXCEPTION) << ops[iter_ops]->name() << ": Tensor's output size is unexcepted.";
|
||||||
}
|
}
|
||||||
strategies.push_back(s);
|
strategies.push_back(s);
|
||||||
}
|
}
|
||||||
|
@ -229,7 +255,7 @@ std::vector<std::vector<int32_t>> MakeDataParallelStrategy(const std::shared_ptr
|
||||||
StrategyPtr origin_strategy = ops[iter_ops]->strategy();
|
StrategyPtr origin_strategy = ops[iter_ops]->strategy();
|
||||||
std::vector<std::vector<int32_t>> strategies;
|
std::vector<std::vector<int32_t>> strategies;
|
||||||
size_t max_device_num = g_device_manager->DeviceNum();
|
size_t max_device_num = g_device_manager->DeviceNum();
|
||||||
size_t target_tensor_batch = ops[iter_ops]->outputs_tensor_info()[0].shape()[0];
|
size_t target_tensor_batch = ops[iter_ops]->inputs_tensor_info()[0].shape()[0];
|
||||||
for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) {
|
for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) {
|
||||||
if (iter_op_inputs >= origin_strategy->GetInputDim().size()) {
|
if (iter_op_inputs >= origin_strategy->GetInputDim().size()) {
|
||||||
MS_LOG(EXCEPTION) << "Failure: Strategy's InputDim out of range.";
|
MS_LOG(EXCEPTION) << "Failure: Strategy's InputDim out of range.";
|
||||||
|
@ -244,8 +270,10 @@ std::vector<std::vector<int32_t>> MakeDataParallelStrategy(const std::shared_ptr
|
||||||
} else {
|
} else {
|
||||||
s.push_back(1);
|
s.push_back(1);
|
||||||
}
|
}
|
||||||
|
} else if (input_size == 0) {
|
||||||
|
s = {};
|
||||||
} else {
|
} else {
|
||||||
MS_LOG(ERROR) << "Tensor's shape is unknown.";
|
MS_LOG(EXCEPTION) << ops[iter_ops]->name() << ": Tensor's shape is unknown.";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
strategies.push_back(s);
|
strategies.push_back(s);
|
||||||
|
@ -285,25 +313,20 @@ std::vector<std::vector<int32_t>> PrepareStrategy(const std::shared_ptr<Graph> &
|
||||||
|
|
||||||
if (type == MATMUL) {
|
if (type == MATMUL) {
|
||||||
return PrepareMatMul(graph, ops, iter_graph, iter_ops);
|
return PrepareMatMul(graph, ops, iter_graph, iter_ops);
|
||||||
} else if (type == PRELU) {
|
} else if (type == ONEHOT) {
|
||||||
return PreparePReLU(graph, ops, iter_graph, iter_ops);
|
return PrepareOneHot(graph, ops, iter_graph, iter_ops);
|
||||||
} else if (type == BATCH_NORM) {
|
|
||||||
return PrepareBatchNorm(graph, ops, iter_graph, iter_ops);
|
|
||||||
} else if (type == SOFTMAX || type == LOG_SOFTMAX || type == SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS ||
|
|
||||||
type == SOFTMAX_CROSS_ENTROPY_WITH_LOGITS) {
|
|
||||||
return MakeDataParallelStrategy(graph, ops, iter_graph, iter_ops);
|
|
||||||
} else {
|
} else {
|
||||||
return MakeRecSearchStrategy(graph, ops, iter_graph, iter_ops);
|
return MakeRecSearchStrategy(graph, ops, iter_graph, iter_ops);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GeneratePartitionedOperatorStrategy(const std::shared_ptr<Graph> graph,
|
void GeneratePartitionedOperatorStrategy(const std::shared_ptr<Graph> &graph,
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const std::shared_ptr<std::vector<size_t>> index_list) {
|
const std::shared_ptr<std::vector<size_t>> &index_list) {
|
||||||
for (size_t iter_ops = 0; iter_ops < (size_t)index_list->size(); iter_ops++) {
|
for (size_t iter_ops = 0; iter_ops < (size_t)index_list->size(); iter_ops++) {
|
||||||
std::vector<std::vector<int32_t>> strategies;
|
std::vector<std::vector<int32_t>> strategies;
|
||||||
size_t iter_graph = index_list->at(iter_ops);
|
size_t iter_graph = index_list->at(iter_ops);
|
||||||
if (iter_graph != SIZE_MAX) {
|
if (iter_graph != SIZE_MAX && ops[iter_ops]->type() != GET_NEXT) {
|
||||||
strategies = PrepareStrategy(graph, ops, iter_graph, iter_ops);
|
strategies = PrepareStrategy(graph, ops, iter_graph, iter_ops);
|
||||||
}
|
}
|
||||||
StrategyPtr sp = std::make_shared<Strategy>(0, strategies);
|
StrategyPtr sp = std::make_shared<Strategy>(0, strategies);
|
||||||
|
@ -328,7 +351,7 @@ size_t FindIndexOfOperatorIncoming(const std::vector<std::vector<std::string>> &
|
||||||
return incoming_op_index;
|
return incoming_op_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<int32_t> CopyIncomingOperatorOutputStrategy(const std::shared_ptr<Graph> graph,
|
std::vector<int32_t> CopyIncomingOperatorOutputStrategy(const std::shared_ptr<Graph> &graph,
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const size_t iter_ops, const size_t iter_graph) {
|
const size_t iter_ops, const size_t iter_graph) {
|
||||||
std::vector<int32_t> s;
|
std::vector<int32_t> s;
|
||||||
|
@ -348,7 +371,7 @@ std::vector<int32_t> CopyIncomingOperatorOutputStrategy(const std::shared_ptr<Gr
|
||||||
s.push_back(1 / graph->nodes[iter_graph].tensor_parm.tensor_str.str_h);
|
s.push_back(1 / graph->nodes[iter_graph].tensor_parm.tensor_str.str_h);
|
||||||
s.push_back(1 / graph->nodes[iter_graph].tensor_parm.tensor_str.str_w);
|
s.push_back(1 / graph->nodes[iter_graph].tensor_parm.tensor_str.str_w);
|
||||||
} else {
|
} else {
|
||||||
MS_LOG(ERROR) << "Tensor's shape is unknown.";
|
MS_LOG(EXCEPTION) << ops[iter_ops]->name() << ": Tensor's shape is unknown.";
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -358,7 +381,8 @@ std::vector<int32_t> CopyIncomingOperatorOutputStrategy(const std::shared_ptr<Gr
|
||||||
std::vector<int32_t> PrepareIncomingOperatorInputStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
std::vector<int32_t> PrepareIncomingOperatorInputStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const size_t incoming_op_index) {
|
const size_t incoming_op_index) {
|
||||||
std::vector<int32_t> s;
|
std::vector<int32_t> s;
|
||||||
if (ops[incoming_op_index]->type() == RESHAPE || ops[incoming_op_index]->type() == GATHERV2) {
|
if (ops[incoming_op_index]->type() == RESHAPE || ops[incoming_op_index]->type() == GATHERV2 ||
|
||||||
|
ops[incoming_op_index]->type() == TRANSPOSE) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
auto strategy = ops[incoming_op_index]->selected_strategy();
|
auto strategy = ops[incoming_op_index]->selected_strategy();
|
||||||
|
@ -426,13 +450,23 @@ std::vector<int32_t> ModifyStrategyIfSqueezeIncoming(const std::vector<std::shar
|
||||||
return s_Squeeze;
|
return s_Squeeze;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool GetKeepDims(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops) {
|
||||||
|
bool keepdims = false;
|
||||||
|
auto keep_dims_iter = ops[iter_ops]->attrs().find(KEEP_DIMS);
|
||||||
|
if (keep_dims_iter == ops[iter_ops]->attrs().end()) {
|
||||||
|
MS_LOG(EXCEPTION) << ops[iter_ops]->name() << ": Don't have attr keep_dims.";
|
||||||
|
}
|
||||||
|
MS_EXCEPTION_IF_NULL(keep_dims_iter->second);
|
||||||
|
if (!keep_dims_iter->second->isa<BoolImm>()) {
|
||||||
|
MS_LOG(EXCEPTION) << ops[iter_ops]->name() << ": Keep_dims is not a bool.";
|
||||||
|
}
|
||||||
|
keepdims = keep_dims_iter->second->cast<BoolImmPtr>()->value();
|
||||||
|
return keepdims;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<int32_t> GetDimList(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops) {
|
std::vector<int32_t> GetDimList(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops) {
|
||||||
std::vector<int32_t> dim_list;
|
std::vector<int32_t> dim_list;
|
||||||
bool keep_dims;
|
bool keep_dims = GetKeepDims(ops, iter_ops);
|
||||||
if (!ops[iter_ops]->attrs().find(KEEP_DIMS)->second->isa<BoolImm>()) {
|
|
||||||
MS_LOG(EXCEPTION) << "Failure: Parameter keep_dims is not a boolean value." << std::endl;
|
|
||||||
}
|
|
||||||
keep_dims = ops[iter_ops]->attrs().find(KEEP_DIMS)->second->cast<BoolImmPtr>()->value();
|
|
||||||
if (keep_dims != false) {
|
if (keep_dims != false) {
|
||||||
return dim_list;
|
return dim_list;
|
||||||
}
|
}
|
||||||
|
@ -478,6 +512,62 @@ std::vector<int32_t> ModifyStrategyIfReduceIncoming(const std::vector<std::share
|
||||||
return s_Reduce;
|
return s_Reduce;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<int32_t> GetDimListFromAttrs(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops) {
|
||||||
|
std::vector<int32_t> dim_list;
|
||||||
|
auto iter = ops[iter_ops]->attrs().find(AXIS);
|
||||||
|
if (iter == ops[iter_ops]->attrs().end()) {
|
||||||
|
MS_LOG(EXCEPTION) << ops[iter_ops]->name() << ": Don't have attr axis.";
|
||||||
|
}
|
||||||
|
auto input_dim = ops[iter_ops]->inputs_tensor_info()[0].shape().size();
|
||||||
|
MS_EXCEPTION_IF_NULL(iter->second);
|
||||||
|
if (iter->second->isa<ValueTuple>()) {
|
||||||
|
auto attr_axis = GetValue<std::vector<int>>(iter->second);
|
||||||
|
if (attr_axis.empty()) {
|
||||||
|
for (size_t i = 0; i < input_dim; ++i) {
|
||||||
|
dim_list.push_back(SizeToInt(i));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (auto &axis : attr_axis) {
|
||||||
|
axis < 0 ? dim_list.push_back(axis + SizeToInt(input_dim)) : dim_list.push_back(axis);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (iter->second->isa<Int32Imm>()) {
|
||||||
|
int axis = GetValue<int>(iter->second);
|
||||||
|
axis < 0 ? dim_list.push_back(axis + SizeToInt(input_dim)) : dim_list.push_back(axis);
|
||||||
|
} else {
|
||||||
|
MS_LOG(EXCEPTION) << "Axis type is invalid.";
|
||||||
|
}
|
||||||
|
return dim_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int32_t> ModifyStrategyIfArgIncoming(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
|
const size_t incoming_op_index, std::vector<int32_t> s) {
|
||||||
|
bool keepdims = GetKeepDims(ops, incoming_op_index);
|
||||||
|
if (keepdims) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int32_t> s_Arg;
|
||||||
|
std::vector<int32_t> axis_list;
|
||||||
|
for (size_t i = 0; i < s.size(); i++) {
|
||||||
|
axis_list.push_back(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto dim_list = GetDimListFromAttrs(ops, incoming_op_index);
|
||||||
|
for (auto axis : dim_list) {
|
||||||
|
auto it = find(axis_list.begin(), axis_list.end(), axis);
|
||||||
|
if (it == axis_list.end()) {
|
||||||
|
MS_LOG(EXCEPTION) << "Failure: Can not find dimension indexes in Axis." << std::endl;
|
||||||
|
}
|
||||||
|
axis_list.erase(it);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < (size_t)axis_list.size(); i++) {
|
||||||
|
s_Arg.push_back(s[axis_list[i]]);
|
||||||
|
}
|
||||||
|
return s_Arg;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<int32_t> CopyIncomingOperatorInputStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
std::vector<int32_t> CopyIncomingOperatorInputStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const size_t iter_ops, const size_t incoming_op_index) {
|
const size_t iter_ops, const size_t incoming_op_index) {
|
||||||
std::vector<int32_t> s;
|
std::vector<int32_t> s;
|
||||||
|
@ -490,6 +580,9 @@ std::vector<int32_t> CopyIncomingOperatorInputStrategy(const std::vector<std::sh
|
||||||
ops[incoming_op_index]->type() == REDUCE_MIN || ops[incoming_op_index]->type() == REDUCE_MEAN) {
|
ops[incoming_op_index]->type() == REDUCE_MIN || ops[incoming_op_index]->type() == REDUCE_MEAN) {
|
||||||
s = ModifyStrategyIfReduceIncoming(ops, incoming_op_index, s);
|
s = ModifyStrategyIfReduceIncoming(ops, incoming_op_index, s);
|
||||||
}
|
}
|
||||||
|
if (ops[incoming_op_index]->type() == ARGMAXWITHVALUE || ops[incoming_op_index]->type() == ARGMINWITHVALUE) {
|
||||||
|
s = ModifyStrategyIfArgIncoming(ops, incoming_op_index, s);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
@ -513,12 +606,12 @@ std::vector<std::vector<int32_t>> GenerateStrategiesFromStrategy(const std::vect
|
||||||
if (ops[iter_ops]->type() == BIAS_ADD) {
|
if (ops[iter_ops]->type() == BIAS_ADD) {
|
||||||
return PrepareBiasAdd(s_ptr);
|
return PrepareBiasAdd(s_ptr);
|
||||||
}
|
}
|
||||||
if (ops[iter_ops]->type() == ONEHOT) {
|
|
||||||
return PrepareOneHot(s_ptr);
|
|
||||||
}
|
|
||||||
if (ops[iter_ops]->type() == GATHERV2) {
|
if (ops[iter_ops]->type() == GATHERV2) {
|
||||||
return PrepareGatherV2(s_ptr);
|
return PrepareGatherV2(s_ptr);
|
||||||
}
|
}
|
||||||
|
if (ops[iter_ops]->type() == L2_NORMALIZE) {
|
||||||
|
return PrepareL2Normalize(ops, iter_ops, basic_stra);
|
||||||
|
}
|
||||||
|
|
||||||
for (size_t iter_op_inputs = 0; iter_op_inputs < (size_t)ops[iter_ops]->inputs_tensor_info().size();
|
for (size_t iter_op_inputs = 0; iter_op_inputs < (size_t)ops[iter_ops]->inputs_tensor_info().size();
|
||||||
iter_op_inputs++) {
|
iter_op_inputs++) {
|
||||||
|
@ -544,11 +637,11 @@ std::vector<std::vector<int32_t>> GenerateStrategiesFromStrategy(const std::vect
|
||||||
return stra;
|
return stra;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GenerateEliminatedOperatorStrategyForward(const std::shared_ptr<Graph> graph,
|
void GenerateEliminatedOperatorStrategyForward(const std::shared_ptr<Graph> &graph,
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const std::vector<std::vector<std::string>> &input_tensor_names,
|
const std::vector<std::vector<std::string>> &input_tensor_names,
|
||||||
const std::shared_ptr<std::vector<size_t>> index_list,
|
const std::shared_ptr<std::vector<size_t>> &index_list,
|
||||||
const std::shared_ptr<std::vector<size_t>> no_stra_op_list) {
|
const std::shared_ptr<std::vector<size_t>> &no_stra_op_list) {
|
||||||
if (no_stra_op_list->size() == 0) {
|
if (no_stra_op_list->size() == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -559,7 +652,7 @@ void GenerateEliminatedOperatorStrategyForward(const std::shared_ptr<Graph> grap
|
||||||
std::vector<std::vector<int32_t>> stra;
|
std::vector<std::vector<int32_t>> stra;
|
||||||
std::vector<int32_t> s;
|
std::vector<int32_t> s;
|
||||||
size_t incoming_op_index = FindIndexOfOperatorIncoming(input_tensor_names, iter_ops);
|
size_t incoming_op_index = FindIndexOfOperatorIncoming(input_tensor_names, iter_ops);
|
||||||
if (incoming_op_index != SIZE_MAX && ops[iter_ops]->type() != ONEHOT) {
|
if (incoming_op_index != SIZE_MAX) {
|
||||||
auto iter_graph = index_list->at(incoming_op_index);
|
auto iter_graph = index_list->at(incoming_op_index);
|
||||||
if (iter_graph != SIZE_MAX) {
|
if (iter_graph != SIZE_MAX) {
|
||||||
s = CopyIncomingOperatorOutputStrategy(graph, ops, iter_ops, iter_graph);
|
s = CopyIncomingOperatorOutputStrategy(graph, ops, iter_ops, iter_graph);
|
||||||
|
@ -617,7 +710,8 @@ std::vector<int32_t> CopyOutgoingOperatorInputStrategy(const std::vector<std::sh
|
||||||
std::vector<int32_t> s;
|
std::vector<int32_t> s;
|
||||||
if (ops[iter_ops]->type() == REDUCE_MAX || ops[iter_ops]->type() == REDUCE_MIN ||
|
if (ops[iter_ops]->type() == REDUCE_MAX || ops[iter_ops]->type() == REDUCE_MIN ||
|
||||||
ops[iter_ops]->type() == REDUCE_SUM || ops[iter_ops]->type() == REDUCE_MEAN || ops[iter_ops]->type() == RESHAPE ||
|
ops[iter_ops]->type() == REDUCE_SUM || ops[iter_ops]->type() == REDUCE_MEAN || ops[iter_ops]->type() == RESHAPE ||
|
||||||
ops[iter_ops]->type() == GATHERV2) {
|
ops[iter_ops]->type() == GATHERV2 || ops[iter_ops]->type() == TRANSPOSE ||
|
||||||
|
ops[iter_ops]->type() == ARGMAXWITHVALUE || ops[iter_ops]->type() == ARGMINWITHVALUE) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -640,7 +734,7 @@ std::vector<int32_t> CopyOutgoingOperatorInputStrategy(const std::vector<std::sh
|
||||||
}
|
}
|
||||||
|
|
||||||
if (outgoing_op_index != SIZE_MAX && iter_op_inputs != SIZE_MAX) {
|
if (outgoing_op_index != SIZE_MAX && iter_op_inputs != SIZE_MAX) {
|
||||||
for (size_t k = 0; k < ops[outgoing_op_index]->selected_strategy()->GetInputDim()[iter_op_inputs].size(); ++k) {
|
for (size_t k = 0; k < ops[iter_ops]->outputs_tensor_info()[0].shape().size(); ++k) {
|
||||||
s.push_back(ops[outgoing_op_index]->selected_strategy()->GetInputDim()[iter_op_inputs][k]);
|
s.push_back(ops[outgoing_op_index]->selected_strategy()->GetInputDim()[iter_op_inputs][k]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -649,7 +743,7 @@ std::vector<int32_t> CopyOutgoingOperatorInputStrategy(const std::vector<std::sh
|
||||||
|
|
||||||
void GenerateEliminatedOperatorStrategyBackward(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
void GenerateEliminatedOperatorStrategyBackward(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const std::vector<std::vector<std::string>> &input_tensor_names,
|
const std::vector<std::vector<std::string>> &input_tensor_names,
|
||||||
const std::shared_ptr<std::vector<size_t>> no_stra_op_list) {
|
const std::shared_ptr<std::vector<size_t>> &no_stra_op_list) {
|
||||||
if (no_stra_op_list->size() == 0) {
|
if (no_stra_op_list->size() == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -679,16 +773,16 @@ void GenerateEliminatedOperatorStrategyBackward(const std::vector<std::shared_pt
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GenerateRemainingOperatorStrategy(const std::shared_ptr<Graph> graph,
|
void GenerateRemainingOperatorStrategy(const std::shared_ptr<Graph> &graph,
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const std::vector<std::vector<std::string>> &input_tensor_names,
|
const std::vector<std::vector<std::string>> &input_tensor_names,
|
||||||
const std::shared_ptr<std::vector<size_t>> index_list,
|
const std::shared_ptr<std::vector<size_t>> &index_list,
|
||||||
const std::shared_ptr<std::vector<size_t>> no_stra_op_list) {
|
const std::shared_ptr<std::vector<size_t>> &no_stra_op_list) {
|
||||||
if (no_stra_op_list->size() == 0) {
|
if (no_stra_op_list->size() == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t no_stra_op_list_size;
|
size_t no_stra_op_list_size = no_stra_op_list->size();
|
||||||
do {
|
do {
|
||||||
no_stra_op_list_size = no_stra_op_list->size();
|
no_stra_op_list_size = no_stra_op_list->size();
|
||||||
GenerateEliminatedOperatorStrategyForward(graph, ops, input_tensor_names, index_list, no_stra_op_list);
|
GenerateEliminatedOperatorStrategyForward(graph, ops, input_tensor_names, index_list, no_stra_op_list);
|
||||||
|
|
|
@ -27,22 +27,20 @@
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace parallel {
|
namespace parallel {
|
||||||
void GenerateStrategy(std::shared_ptr<Graph> graph, const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
void GenerateStrategy(const std::shared_ptr<Graph> &graph, const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const std::shared_ptr<std::vector<std::vector<size_t>>> eli_list,
|
const std::shared_ptr<std::vector<std::vector<size_t>>> &eli_list,
|
||||||
const std::vector<std::vector<std::string>> &input_tensor_names,
|
const std::vector<std::vector<std::string>> &input_tensor_names,
|
||||||
const std::shared_ptr<std::vector<size_t>> index_list);
|
const std::shared_ptr<std::vector<size_t>> &index_list);
|
||||||
std::vector<std::vector<int32_t>> PrepareMatMul(const std::shared_ptr<Graph> &graph,
|
std::vector<std::vector<int32_t>> PrepareMatMul(const std::shared_ptr<Graph> &graph,
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const size_t iter_graph, const size_t iter_ops);
|
const size_t iter_graph, const size_t iter_ops);
|
||||||
std::vector<std::vector<int32_t>> PreparePReLU(const std::shared_ptr<Graph> &graph,
|
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
|
||||||
const size_t iter_graph, const size_t iter_ops);
|
|
||||||
std::vector<std::vector<int32_t>> PrepareBatchNorm(const std::shared_ptr<Graph> &graph,
|
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
|
||||||
const size_t iter_graph, const size_t iter_ops);
|
|
||||||
std::vector<std::vector<int32_t>> PrepareBiasAdd(const std::shared_ptr<std::vector<int32_t>> &s);
|
std::vector<std::vector<int32_t>> PrepareBiasAdd(const std::shared_ptr<std::vector<int32_t>> &s);
|
||||||
std::vector<std::vector<int32_t>> PrepareOneHot(const std::shared_ptr<std::vector<int32_t>> &s);
|
std::vector<std::vector<int32_t>> PrepareOneHot(const std::shared_ptr<Graph> &graph,
|
||||||
|
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
|
const size_t iter_graph, const size_t iter_ops);
|
||||||
std::vector<std::vector<int32_t>> PrepareGatherV2(const std::shared_ptr<std::vector<int32_t>> &s);
|
std::vector<std::vector<int32_t>> PrepareGatherV2(const std::shared_ptr<std::vector<int32_t>> &s);
|
||||||
|
std::vector<std::vector<int32_t>> PrepareL2Normalize(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
|
const size_t iter_ops, std::vector<int32_t> s);
|
||||||
std::vector<std::vector<int32_t>> MakeRecSearchStrategy(const std::shared_ptr<Graph> &graph,
|
std::vector<std::vector<int32_t>> MakeRecSearchStrategy(const std::shared_ptr<Graph> &graph,
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const size_t iter_graph, const size_t iter_ops);
|
const size_t iter_graph, const size_t iter_ops);
|
||||||
|
@ -52,12 +50,12 @@ std::vector<std::vector<int32_t>> MakeDataParallelStrategy(const std::shared_ptr
|
||||||
std::vector<std::vector<int32_t>> PrepareStrategy(const std::shared_ptr<Graph> &graph,
|
std::vector<std::vector<int32_t>> PrepareStrategy(const std::shared_ptr<Graph> &graph,
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const size_t iter_graph, const size_t iter_ops);
|
const size_t iter_graph, const size_t iter_ops);
|
||||||
void GeneratePartitionedOperatorStrategy(const std::shared_ptr<Graph> graph,
|
void GeneratePartitionedOperatorStrategy(const std::shared_ptr<Graph> &graph,
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const std::shared_ptr<std::vector<size_t>> index_list);
|
const std::shared_ptr<std::vector<size_t>> &index_list);
|
||||||
size_t FindIndexOfOperatorIncoming(const std::vector<std::vector<std::string>> &input_tensor_names,
|
size_t FindIndexOfOperatorIncoming(const std::vector<std::vector<std::string>> &input_tensor_names,
|
||||||
const size_t iter_ops);
|
const size_t iter_ops);
|
||||||
std::vector<int32_t> CopyIncomingOperatorOutputStrategy(const std::shared_ptr<Graph> graph,
|
std::vector<int32_t> CopyIncomingOperatorOutputStrategy(const std::shared_ptr<Graph> &graph,
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const size_t iter_ops, const size_t iter_graph);
|
const size_t iter_ops, const size_t iter_graph);
|
||||||
std::vector<int32_t> PrepareIncomingOperatorInputStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
std::vector<int32_t> PrepareIncomingOperatorInputStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
|
@ -65,19 +63,23 @@ std::vector<int32_t> PrepareIncomingOperatorInputStrategy(const std::vector<std:
|
||||||
std::vector<int32_t> GetAxisList(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const int iter_ops);
|
std::vector<int32_t> GetAxisList(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const int iter_ops);
|
||||||
std::vector<int32_t> ModifyStrategyIfSqueezeIncoming(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
std::vector<int32_t> ModifyStrategyIfSqueezeIncoming(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const size_t incoming_op_index, std::vector<int32_t> s);
|
const size_t incoming_op_index, std::vector<int32_t> s);
|
||||||
|
bool GetKeepDims(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops);
|
||||||
std::vector<int32_t> GetDimList(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops);
|
std::vector<int32_t> GetDimList(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops);
|
||||||
std::vector<int32_t> ModifyStrategyIfReduceIncoming(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
std::vector<int32_t> ModifyStrategyIfReduceIncoming(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const size_t incoming_op_index, std::vector<int32_t> s);
|
const size_t incoming_op_index, std::vector<int32_t> s);
|
||||||
|
std::vector<int32_t> GetDimListFromAttrs(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops);
|
||||||
|
std::vector<int32_t> ModifyStrategyIfArgIncoming(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
|
const size_t incoming_op_index, std::vector<int32_t> s);
|
||||||
std::vector<int32_t> CopyIncomingOperatorInputStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
std::vector<int32_t> CopyIncomingOperatorInputStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const size_t iter_ops, const size_t incoming_op_index);
|
const size_t iter_ops, const size_t incoming_op_index);
|
||||||
std::vector<std::vector<int32_t>> GenerateStrategiesFromStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
std::vector<std::vector<int32_t>> GenerateStrategiesFromStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const size_t iter_ops,
|
const size_t iter_ops,
|
||||||
std::vector<int32_t> basic_stra);
|
std::vector<int32_t> basic_stra);
|
||||||
void GenerateEliminatedOperatorStrategyForward(std::shared_ptr<Graph> graph,
|
void GenerateEliminatedOperatorStrategyForward(const std::shared_ptr<Graph> &graph,
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const std::vector<std::vector<std::string>> &input_tensor_names,
|
const std::vector<std::vector<std::string>> &input_tensor_names,
|
||||||
const std::shared_ptr<std::vector<size_t>> index_list,
|
const std::shared_ptr<std::vector<size_t>> &index_list,
|
||||||
const std::shared_ptr<std::vector<size_t>> no_stra_op_list);
|
const std::shared_ptr<std::vector<size_t>> &no_stra_op_list);
|
||||||
std::vector<int32_t> ModifyStrategyIfSqueezeOutgoing(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
std::vector<int32_t> ModifyStrategyIfSqueezeOutgoing(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const size_t iter_ops, std::vector<int32_t> s);
|
const size_t iter_ops, std::vector<int32_t> s);
|
||||||
std::vector<int32_t> CopyOutgoingOperatorInputStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
std::vector<int32_t> CopyOutgoingOperatorInputStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
|
@ -85,12 +87,12 @@ std::vector<int32_t> CopyOutgoingOperatorInputStrategy(const std::vector<std::sh
|
||||||
const size_t iter_ops);
|
const size_t iter_ops);
|
||||||
void GenerateEliminatedOperatorStrategyBackward(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
void GenerateEliminatedOperatorStrategyBackward(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const std::vector<std::vector<std::string>> &input_tensor_names,
|
const std::vector<std::vector<std::string>> &input_tensor_names,
|
||||||
const std::shared_ptr<std::vector<size_t>> no_stra_op_list);
|
const std::shared_ptr<std::vector<size_t>> &no_stra_op_list);
|
||||||
void GenerateRemainingOperatorStrategy(const std::shared_ptr<Graph> graph,
|
void GenerateRemainingOperatorStrategy(const std::shared_ptr<Graph> &graph,
|
||||||
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const std::vector<std::vector<std::string>> &input_tensor_names,
|
const std::vector<std::vector<std::string>> &input_tensor_names,
|
||||||
const std::shared_ptr<std::vector<size_t>> index_list,
|
const std::shared_ptr<std::vector<size_t>> &index_list,
|
||||||
const std::shared_ptr<std::vector<size_t>> no_stra_op_list);
|
const std::shared_ptr<std::vector<size_t>> &no_stra_op_list);
|
||||||
} // namespace parallel
|
} // namespace parallel
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
#endif // PARALLEL_AUTO_PARALLEL_REC_GENERATE_STRATEGY_H_
|
#endif // PARALLEL_AUTO_PARALLEL_REC_GENERATE_STRATEGY_H_
|
||||||
|
|
|
@ -38,6 +38,7 @@ enum OperatorType {
|
||||||
kRecBiasAdd,
|
kRecBiasAdd,
|
||||||
kRecSoftmax,
|
kRecSoftmax,
|
||||||
kRecSparseSoftmaxCrossEntropyWithLogits,
|
kRecSparseSoftmaxCrossEntropyWithLogits,
|
||||||
|
kRecSoftmaxCrossEntropyWithLogits,
|
||||||
kRecOneHot,
|
kRecOneHot,
|
||||||
kRecLog,
|
kRecLog,
|
||||||
kRecExp,
|
kRecExp,
|
||||||
|
@ -49,7 +50,8 @@ enum OperatorType {
|
||||||
kRecCast,
|
kRecCast,
|
||||||
kRecReduce,
|
kRecReduce,
|
||||||
kRecPReLU,
|
kRecPReLU,
|
||||||
kRecGatherV2
|
kRecGatherV2,
|
||||||
|
kRecArgWithValue
|
||||||
};
|
};
|
||||||
|
|
||||||
enum InfoType { kApplication, kConstant };
|
enum InfoType { kApplication, kConstant };
|
||||||
|
|
|
@ -40,7 +40,7 @@ const TensorParam MakeTensor(int n, int c, int h, int w) {
|
||||||
return tensor;
|
return tensor;
|
||||||
}
|
}
|
||||||
|
|
||||||
Graph::NodeType MakeNewOperator(std::vector<std::shared_ptr<OperatorInfo>> ops, size_t iter_ops) {
|
Graph::NodeType MakeNewOperator(const std::vector<std::shared_ptr<OperatorInfo>> &ops, size_t iter_ops) {
|
||||||
Graph::NodeType NewOp;
|
Graph::NodeType NewOp;
|
||||||
NewOp.name = ops[iter_ops]->name();
|
NewOp.name = ops[iter_ops]->name();
|
||||||
NewOp.info = InfoType::kApplication;
|
NewOp.info = InfoType::kApplication;
|
||||||
|
@ -140,7 +140,7 @@ std::shared_ptr<Graph> ParseGraph(const std::vector<std::shared_ptr<OperatorInfo
|
||||||
return graph;
|
return graph;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MakeEdge(const std::vector<std::vector<std::string>> &input_tensor_names, std::shared_ptr<Graph> graph) {
|
void MakeEdge(const std::vector<std::vector<std::string>> &input_tensor_names, const std::shared_ptr<Graph> &graph) {
|
||||||
for (size_t iter_i = 0; iter_i < input_tensor_names.size(); iter_i++) {
|
for (size_t iter_i = 0; iter_i < input_tensor_names.size(); iter_i++) {
|
||||||
for (size_t iter_j = 1; iter_j < input_tensor_names[iter_i].size(); iter_j++) {
|
for (size_t iter_j = 1; iter_j < input_tensor_names[iter_i].size(); iter_j++) {
|
||||||
size_t head_node_index = GetIndexInInputTensorNames(input_tensor_names, input_tensor_names[iter_i][iter_j]);
|
size_t head_node_index = GetIndexInInputTensorNames(input_tensor_names, input_tensor_names[iter_i][iter_j]);
|
||||||
|
@ -163,8 +163,8 @@ size_t GetIndexInInputTensorNames(const std::vector<std::vector<std::string>> &i
|
||||||
return SIZE_MAX;
|
return SIZE_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Eliminate_Aux(const size_t node_index, const std::shared_ptr<Graph> graph,
|
void Eliminate_Aux(const size_t node_index, const std::shared_ptr<Graph> &graph,
|
||||||
const std::shared_ptr<std::vector<std::vector<size_t>>> eli_list) {
|
const std::shared_ptr<std::vector<std::vector<size_t>>> &eli_list) {
|
||||||
std::vector<size_t> eli;
|
std::vector<size_t> eli;
|
||||||
eli.push_back(node_index);
|
eli.push_back(node_index);
|
||||||
for (size_t i = 0; i < (size_t)graph->nodes[node_index].node_out.size(); i++) {
|
for (size_t i = 0; i < (size_t)graph->nodes[node_index].node_out.size(); i++) {
|
||||||
|
@ -211,18 +211,18 @@ void Eliminate_Aux(const size_t node_index, const std::shared_ptr<Graph> graph,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<Graph> EliminateGraph(const std::shared_ptr<Graph> graph,
|
std::shared_ptr<Graph> EliminateGraph(const std::shared_ptr<Graph> &graph,
|
||||||
const std::shared_ptr<std::vector<std::vector<size_t>>> eli_list,
|
const std::shared_ptr<std::vector<std::vector<size_t>>> &eli_list,
|
||||||
const std::shared_ptr<std::vector<size_t>> index_list) {
|
const std::shared_ptr<std::vector<size_t>> &index_list) {
|
||||||
MS_EXCEPTION_IF_NULL(graph);
|
MS_EXCEPTION_IF_NULL(graph);
|
||||||
const std::set<OperatorType> type_list = {
|
static const std::set<OperatorType> elementwise_type = {
|
||||||
OperatorType::kRecOneHot, OperatorType::kRecReLU, OperatorType::kRecLog, OperatorType::kRecExp,
|
OperatorType::kRecReLU, OperatorType::kRecLog, OperatorType::kRecExp, OperatorType::kRecAdd,
|
||||||
OperatorType::kRecAdd, OperatorType::kRecElmWiseOp, OperatorType::kRecBiasAdd, OperatorType::kRecSub,
|
OperatorType::kRecElmWiseOp, OperatorType::kRecBiasAdd, OperatorType::kRecSub, OperatorType::kRecMul,
|
||||||
OperatorType::kRecMul, OperatorType::kRecDiv, OperatorType::kRecSqueeze, OperatorType::kRecReduce,
|
OperatorType::kRecDiv, OperatorType::kRecSqueeze, OperatorType::kRecReduce, OperatorType::kRecCast,
|
||||||
OperatorType::kRecCast, OperatorType::kRecReshape, OperatorType::kRecGatherV2};
|
OperatorType::kRecReshape, OperatorType::kRecGatherV2, OperatorType::kRecArgWithValue};
|
||||||
for (size_t node_index = 0; node_index < (size_t)graph->nodes.size(); node_index++) {
|
for (size_t node_index = 0; node_index < (size_t)graph->nodes.size(); node_index++) {
|
||||||
auto type = graph->nodes[node_index].apply.op_type;
|
auto type = graph->nodes[node_index].apply.op_type;
|
||||||
if (type_list.find(type) != type_list.end()) {
|
if (elementwise_type.find(type) != elementwise_type.end()) {
|
||||||
Eliminate_Aux(node_index, graph, eli_list);
|
Eliminate_Aux(node_index, graph, eli_list);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -250,12 +250,22 @@ std::shared_ptr<Graph> EliminateGraph(const std::shared_ptr<Graph> graph,
|
||||||
|
|
||||||
new_graph->nodes.push_back(graph->nodes[i]);
|
new_graph->nodes.push_back(graph->nodes[i]);
|
||||||
auto *node_in = &new_graph->nodes[index_list->at(i)].node_in;
|
auto *node_in = &new_graph->nodes[index_list->at(i)].node_in;
|
||||||
for (size_t j = 0; j < node_in->size(); j++) {
|
for (size_t j = node_in->size(); j > 0; j--) {
|
||||||
node_in->at(j) = index_list->at(node_in->at(j));
|
bool IsEliminated = (index_list->at(node_in->at(j - 1)) == SIZE_MAX);
|
||||||
|
if (IsEliminated) {
|
||||||
|
node_in->erase(node_in->begin() + j - 1);
|
||||||
|
} else {
|
||||||
|
node_in->at(j - 1) = index_list->at(node_in->at(j - 1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
auto *node_out = &new_graph->nodes[index_list->at(i)].node_out;
|
auto *node_out = &new_graph->nodes[index_list->at(i)].node_out;
|
||||||
for (size_t j = 0; j < node_out->size(); j++) {
|
for (size_t j = node_out->size(); j > 0; j--) {
|
||||||
node_out->at(j) = index_list->at(node_out->at(j));
|
bool IsEliminated = (index_list->at(node_out->at(j - 1)) == SIZE_MAX);
|
||||||
|
if (IsEliminated) {
|
||||||
|
node_out->erase(node_out->begin() + j - 1);
|
||||||
|
} else {
|
||||||
|
node_out->at(j - 1) = index_list->at(node_out->at(j - 1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new_graph;
|
return new_graph;
|
||||||
|
|
|
@ -47,6 +47,8 @@ const std::map<std::string, OperatorType> DictOpType{
|
||||||
{REDUCE_MIN, OperatorType::kRecReduce},
|
{REDUCE_MIN, OperatorType::kRecReduce},
|
||||||
{REDUCE_MEAN, OperatorType::kRecReduce},
|
{REDUCE_MEAN, OperatorType::kRecReduce},
|
||||||
{GATHERV2, OperatorType::kRecGatherV2},
|
{GATHERV2, OperatorType::kRecGatherV2},
|
||||||
|
{ARGMAXWITHVALUE, OperatorType::kRecArgWithValue},
|
||||||
|
{ARGMINWITHVALUE, OperatorType::kRecArgWithValue},
|
||||||
|
|
||||||
{RELU, OperatorType::kRecReLU},
|
{RELU, OperatorType::kRecReLU},
|
||||||
{"ReLU6", OperatorType::kRecReLU},
|
{"ReLU6", OperatorType::kRecReLU},
|
||||||
|
@ -59,6 +61,7 @@ const std::map<std::string, OperatorType> DictOpType{
|
||||||
|
|
||||||
{PRELU, OperatorType::kRecPReLU},
|
{PRELU, OperatorType::kRecPReLU},
|
||||||
|
|
||||||
|
{TRANSPOSE, OperatorType::kRecElmWiseOp},
|
||||||
{L2_NORMALIZE, OperatorType::kRecElmWiseOp},
|
{L2_NORMALIZE, OperatorType::kRecElmWiseOp},
|
||||||
{TENSOR_ADD, OperatorType::kRecElmWiseOp},
|
{TENSOR_ADD, OperatorType::kRecElmWiseOp},
|
||||||
{SUB, OperatorType::kRecElmWiseOp},
|
{SUB, OperatorType::kRecElmWiseOp},
|
||||||
|
@ -67,7 +70,7 @@ const std::map<std::string, OperatorType> DictOpType{
|
||||||
{REAL_DIV, OperatorType::kRecElmWiseOp},
|
{REAL_DIV, OperatorType::kRecElmWiseOp},
|
||||||
{SOFTMAX, OperatorType::kRecSoftmax},
|
{SOFTMAX, OperatorType::kRecSoftmax},
|
||||||
{LOG_SOFTMAX, OperatorType::kRecSoftmax},
|
{LOG_SOFTMAX, OperatorType::kRecSoftmax},
|
||||||
{SOFTMAX_CROSS_ENTROPY_WITH_LOGITS, OperatorType::kRecSoftmax},
|
{SOFTMAX_CROSS_ENTROPY_WITH_LOGITS, OperatorType::kRecSoftmaxCrossEntropyWithLogits},
|
||||||
{SQRT, OperatorType::kRecElmWiseOp},
|
{SQRT, OperatorType::kRecElmWiseOp},
|
||||||
{NEG, OperatorType::kRecElmWiseOp},
|
{NEG, OperatorType::kRecElmWiseOp},
|
||||||
{POW, OperatorType::kRecElmWiseOp},
|
{POW, OperatorType::kRecElmWiseOp},
|
||||||
|
@ -107,7 +110,7 @@ const std::map<std::string, OperatorType> DictOpType{
|
||||||
|
|
||||||
const TensorParam MakeTensor(int n, int c, int h, int w);
|
const TensorParam MakeTensor(int n, int c, int h, int w);
|
||||||
|
|
||||||
Graph::NodeType MakeNewOperator(std::vector<std::shared_ptr<OperatorInfo>> ops, size_t iter_ops);
|
Graph::NodeType MakeNewOperator(const std::vector<std::shared_ptr<OperatorInfo>> &ops, size_t iter_ops);
|
||||||
|
|
||||||
OperatorRec CompleteOperatorInputs(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
|
OperatorRec CompleteOperatorInputs(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
|
||||||
Graph::NodeType NewTensor);
|
Graph::NodeType NewTensor);
|
||||||
|
@ -118,17 +121,17 @@ TensorParam Complete2DInputs(const std::vector<std::shared_ptr<OperatorInfo>> &o
|
||||||
std::shared_ptr<Graph> ParseGraph(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
std::shared_ptr<Graph> ParseGraph(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
||||||
const std::vector<std::vector<std::string>> &input_tensor_names);
|
const std::vector<std::vector<std::string>> &input_tensor_names);
|
||||||
|
|
||||||
void MakeEdge(const std::vector<std::vector<std::string>> &input_tensor_names, std::shared_ptr<Graph> graph);
|
void MakeEdge(const std::vector<std::vector<std::string>> &input_tensor_names, const std::shared_ptr<Graph> &graph);
|
||||||
|
|
||||||
size_t GetIndexInInputTensorNames(const std::vector<std::vector<std::string>> &input_tensor_names,
|
size_t GetIndexInInputTensorNames(const std::vector<std::vector<std::string>> &input_tensor_names,
|
||||||
const std::string &input_name);
|
const std::string &input_name);
|
||||||
|
|
||||||
void Eliminate_Aux(const size_t node_index, const std::shared_ptr<Graph> graph,
|
void Eliminate_Aux(const size_t node_index, const std::shared_ptr<Graph> &graph,
|
||||||
const std::shared_ptr<std::vector<std::vector<size_t>>> eli_list);
|
const std::shared_ptr<std::vector<std::vector<size_t>>> &eli_list);
|
||||||
|
|
||||||
std::shared_ptr<Graph> EliminateGraph(const std::shared_ptr<Graph> graph,
|
std::shared_ptr<Graph> EliminateGraph(const std::shared_ptr<Graph> &graph,
|
||||||
const std::shared_ptr<std::vector<std::vector<size_t>>> eli_list,
|
const std::shared_ptr<std::vector<std::vector<size_t>>> &eli_list,
|
||||||
const std::shared_ptr<std::vector<size_t>> index_list);
|
const std::shared_ptr<std::vector<size_t>> &index_list);
|
||||||
} // namespace parallel
|
} // namespace parallel
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
#endif // PARALLEL_AUTO_PARALLEL_REC_PARSE_GRAPH_H_
|
#endif // PARALLEL_AUTO_PARALLEL_REC_PARSE_GRAPH_H_
|
||||||
|
|
|
@ -68,19 +68,24 @@ double GetWeights(const Graph::NodeType &node) {
|
||||||
auto cost_ptr = std::make_shared<CostBiasAdd>();
|
auto cost_ptr = std::make_shared<CostBiasAdd>();
|
||||||
|
|
||||||
return cost_ptr->GetMinCostIn();
|
return cost_ptr->GetMinCostIn();
|
||||||
} else if (op.op_type == OperatorType::kRecOneHot || op.op_type == OperatorType::kRecLog ||
|
} else if (op.op_type == OperatorType::kRecLog || op.op_type == OperatorType::kRecExp ||
|
||||||
op.op_type == OperatorType::kRecExp || op.op_type == OperatorType::kRecAdd ||
|
op.op_type == OperatorType::kRecAdd || op.op_type == OperatorType::kRecSub ||
|
||||||
op.op_type == OperatorType::kRecSub || op.op_type == OperatorType::kRecMul ||
|
op.op_type == OperatorType::kRecMul || op.op_type == OperatorType::kRecDiv ||
|
||||||
op.op_type == OperatorType::kRecDiv || op.op_type == OperatorType::kRecSqueeze ||
|
op.op_type == OperatorType::kRecSqueeze || op.op_type == OperatorType::kRecCast) {
|
||||||
op.op_type == OperatorType::kRecCast) {
|
|
||||||
// For element-wise op
|
// For element-wise op
|
||||||
auto cost_ptr = std::make_shared<CostCommon>();
|
auto cost_ptr = std::make_shared<CostCommon>();
|
||||||
|
|
||||||
return cost_ptr->GetMinCostIn();
|
return cost_ptr->GetMinCostIn();
|
||||||
} else if (op.op_type == OperatorType::kRecUnkownType || op.op_type == OperatorType::kRecPReLU ||
|
} else if (op.op_type == OperatorType::kRecBatchNorm || op.op_type == OperatorType::kRecOneHot ||
|
||||||
op.op_type == OperatorType::kRecBatchNorm || op.op_type == OperatorType::kRecSoftmax ||
|
op.op_type == OperatorType::kRecPReLU || op.op_type == OperatorType::kRecSoftmax ||
|
||||||
op.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) {
|
op.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits ||
|
||||||
// For unprocessed type
|
op.op_type == OperatorType::kRecSoftmaxCrossEntropyWithLogits) {
|
||||||
|
// For BatchParallel op
|
||||||
|
auto cost_ptr = std::make_shared<CostBatchParallel>();
|
||||||
|
|
||||||
|
return cost_ptr->GetMaxCostIn();
|
||||||
|
} else if (op.op_type == OperatorType::kRecUnkownType) {
|
||||||
|
// For Unkown type
|
||||||
return 0.0;
|
return 0.0;
|
||||||
} else {
|
} else {
|
||||||
MS_LOG(EXCEPTION) << "Failure: GetOperatorWeight failed.";
|
MS_LOG(EXCEPTION) << "Failure: GetOperatorWeight failed.";
|
||||||
|
@ -88,7 +93,7 @@ double GetWeights(const Graph::NodeType &node) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort all the nodes by their weights
|
// Sort all the nodes by their weights
|
||||||
std::vector<size_t> SortByWeight(const std::shared_ptr<Graph> graph) {
|
std::vector<size_t> SortByWeight(const std::shared_ptr<Graph> &graph) {
|
||||||
MS_EXCEPTION_IF_NULL(graph);
|
MS_EXCEPTION_IF_NULL(graph);
|
||||||
|
|
||||||
std::vector<std::pair<double, size_t>> weight_to_node_index;
|
std::vector<std::pair<double, size_t>> weight_to_node_index;
|
||||||
|
@ -119,7 +124,7 @@ std::vector<size_t> SortByWeight(const std::shared_ptr<Graph> graph) {
|
||||||
// Get optimal strategy to partition the target node
|
// Get optimal strategy to partition the target node
|
||||||
StrategyRec PartitionNode(const Graph::NodeType &node,
|
StrategyRec PartitionNode(const Graph::NodeType &node,
|
||||||
const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
|
const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
|
||||||
std::shared_ptr<Graph> graph) {
|
const std::shared_ptr<Graph> &graph) {
|
||||||
bool enable_conv_chw_partition = false;
|
bool enable_conv_chw_partition = false;
|
||||||
MS_EXCEPTION_IF_NULL(graph);
|
MS_EXCEPTION_IF_NULL(graph);
|
||||||
|
|
||||||
|
@ -158,19 +163,26 @@ StrategyRec PartitionNode(const Graph::NodeType &node,
|
||||||
auto cost_ptr = std::make_shared<CostBiasAdd>();
|
auto cost_ptr = std::make_shared<CostBiasAdd>();
|
||||||
|
|
||||||
return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
|
return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
|
||||||
} else if (node.apply.op_type == OperatorType::kRecOneHot || node.apply.op_type == OperatorType::kRecLog ||
|
} else if (node.apply.op_type == OperatorType::kRecLog || node.apply.op_type == OperatorType::kRecExp ||
|
||||||
node.apply.op_type == OperatorType::kRecExp || node.apply.op_type == OperatorType::kRecAdd ||
|
node.apply.op_type == OperatorType::kRecAdd || node.apply.op_type == OperatorType::kRecSub ||
|
||||||
node.apply.op_type == OperatorType::kRecSub || node.apply.op_type == OperatorType::kRecMul ||
|
node.apply.op_type == OperatorType::kRecMul || node.apply.op_type == OperatorType::kRecDiv ||
|
||||||
node.apply.op_type == OperatorType::kRecDiv || node.apply.op_type == OperatorType::kRecSqueeze ||
|
node.apply.op_type == OperatorType::kRecSqueeze || node.apply.op_type == OperatorType::kRecCast) {
|
||||||
node.apply.op_type == OperatorType::kRecCast) {
|
|
||||||
// For element-wise op
|
// For element-wise op
|
||||||
auto cost_ptr = std::make_shared<CostCommon>();
|
auto cost_ptr = std::make_shared<CostCommon>();
|
||||||
|
|
||||||
return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
|
return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
|
||||||
} else if (node.apply.op_type == OperatorType::kRecUnkownType || node.apply.op_type == OperatorType::kRecPReLU ||
|
} else if (node.apply.op_type == OperatorType::kRecBatchNorm || node.apply.op_type == OperatorType::kRecOneHot ||
|
||||||
node.apply.op_type == OperatorType::kRecBatchNorm || node.apply.op_type == OperatorType::kRecSoftmax ||
|
node.apply.op_type == OperatorType::kRecPReLU || node.apply.op_type == kRecSoftmax ||
|
||||||
node.apply.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) {
|
node.apply.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) {
|
||||||
// For unprocessed type
|
// For BatchParallel type
|
||||||
|
auto cost_ptr = std::make_shared<CostBatchParallel>();
|
||||||
|
return cost_ptr->GetOptimalStr(node);
|
||||||
|
} else if (node.apply.op_type == OperatorType::kRecSoftmaxCrossEntropyWithLogits) {
|
||||||
|
// For SoftmaxCrossEntropyWithLogits type
|
||||||
|
auto cost_ptr = std::make_shared<CostSoftmaxCrossEntropyWithLogits>();
|
||||||
|
return cost_ptr->GetOptimalStr(node);
|
||||||
|
} else if (node.apply.op_type == OperatorType::kRecUnkownType) {
|
||||||
|
// For Unkown type
|
||||||
StrategyRec default_strategy;
|
StrategyRec default_strategy;
|
||||||
return default_strategy;
|
return default_strategy;
|
||||||
} else {
|
} else {
|
||||||
|
@ -179,7 +191,8 @@ StrategyRec PartitionNode(const Graph::NodeType &node,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parttion graph into all devices.
|
// Parttion graph into all devices.
|
||||||
Status PartitionForAllDevices(const size_t num_device, const double device_memory, std::shared_ptr<Graph> graph) {
|
Status PartitionForAllDevices(const size_t num_device, const double device_memory,
|
||||||
|
const std::shared_ptr<Graph> &graph) {
|
||||||
if (num_device < 1) {
|
if (num_device < 1) {
|
||||||
MS_LOG(EXCEPTION) << "ERROR: Number of devices can't be " << num_device << ".";
|
MS_LOG(EXCEPTION) << "ERROR: Number of devices can't be " << num_device << ".";
|
||||||
}
|
}
|
||||||
|
@ -249,7 +262,7 @@ Graph::NodeType ApplyStrToTensor(Graph::NodeType Node) {
|
||||||
return Node;
|
return Node;
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DevicesMemoryControl(const size_t num_device, const double device_memory, std::shared_ptr<Graph> graph) {
|
Status DevicesMemoryControl(const size_t num_device, const double device_memory, const std::shared_ptr<Graph> &graph) {
|
||||||
MS_EXCEPTION_IF_NULL(graph);
|
MS_EXCEPTION_IF_NULL(graph);
|
||||||
if (num_device == 0) {
|
if (num_device == 0) {
|
||||||
MS_LOG(EXCEPTION) << "Failure: device number is 0.";
|
MS_LOG(EXCEPTION) << "Failure: device number is 0.";
|
||||||
|
|
|
@ -32,19 +32,19 @@
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace parallel {
|
namespace parallel {
|
||||||
std::vector<size_t> SortByWeight(const std::shared_ptr<Graph> graph);
|
std::vector<size_t> SortByWeight(const std::shared_ptr<Graph> &graph);
|
||||||
|
|
||||||
double GetWeights(const Graph::NodeType &node);
|
double GetWeights(const Graph::NodeType &node);
|
||||||
|
|
||||||
StrategyRec PartitionNode(const Graph::NodeType &node,
|
StrategyRec PartitionNode(const Graph::NodeType &node,
|
||||||
const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
|
const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
|
||||||
std::shared_ptr<Graph> graph);
|
const std::shared_ptr<Graph> &graph);
|
||||||
|
|
||||||
Status PartitionForAllDevices(const size_t num_device, const double device_memory, std::shared_ptr<Graph> graph);
|
Status PartitionForAllDevices(const size_t num_device, const double device_memory, const std::shared_ptr<Graph> &graph);
|
||||||
|
|
||||||
Graph::NodeType ApplyStrToTensor(Graph::NodeType Node);
|
Graph::NodeType ApplyStrToTensor(Graph::NodeType Node);
|
||||||
|
|
||||||
Status DevicesMemoryControl(const size_t num_device, const double device_memory, std::shared_ptr<Graph> graph);
|
Status DevicesMemoryControl(const size_t num_device, const double device_memory, const std::shared_ptr<Graph> &graph);
|
||||||
|
|
||||||
size_t GetDataTypeSize(const TensorType &type);
|
size_t GetDataTypeSize(const TensorType &type);
|
||||||
} // namespace parallel
|
} // namespace parallel
|
||||||
|
|
Loading…
Reference in New Issue