!31899 [Auto-Par] [D-Rec] Add Mem & Redis coefficient on D-Rec cost model for Pangu-alpha

Merge pull request !31899 from FRHW-WANG/D-Rec-deliver
2022-03-31 02:12:26 +00:00 · 2022-03-31 02:12:26 +00:00 · 9810fa53cb
parent 9f596fe87a 3674e1d713
commit 9810fa53cb
5 changed files with 26 additions and 21 deletions
--- a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.cc
@ -117,9 +117,9 @@ double CostRedisWithAdjacentNode(const std::vector<std::pair<std::string, Strate
    }
  }

-  if (counter >= 2) {
-    new_redis_cost = tensor_size / 4.0;
-  } else if (counter == 0 || counter == 1) {
+  if (counter >= 1) {
+    new_redis_cost = tensor_size * REDIS_COEF;
+  } else if (counter == 0) {
    new_redis_cost = 0;
  } else {
    MS_LOG(EXCEPTION) << "Failure: CostRedis failed.";
@ -166,7 +166,7 @@ StrategyRec CostMatMul::GetOptimalStr(const Graph::NodeType &node,
 }

 // Get weight for MatMul
-double CostMatMul::GetMinCostIn(const OperatorRec &op) {
+double CostMatMul::GetMaxCostIn(const OperatorRec &op) {
  int64_t edge_i = static_cast<int64_t>(op.arguments[0].tensor_shape.shape_h * op.arguments[0].tensor_str.str_h);
  int64_t edge_j = static_cast<int64_t>(op.arguments[1].tensor_shape.shape_w * op.arguments[1].tensor_str.str_w);
  int64_t edge_k = static_cast<int64_t>(op.arguments[0].tensor_shape.shape_w * op.arguments[0].tensor_str.str_w);
@ -176,7 +176,7 @@ double CostMatMul::GetMinCostIn(const OperatorRec &op) {
  cost_in.push_back(StrConcatDimJ(edge_i, edge_k));
  cost_in.push_back(StrReduceDimK(edge_i, edge_j));

-  return *min_element(cost_in.begin(), cost_in.end());
+  return *max_element(cost_in.begin(), cost_in.end());
 }

 // Chose strategy for MatMul
--- a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.h
@ -29,6 +29,8 @@
 namespace mindspore {
 namespace parallel {
 #define DOUBLE_MAX (std::numeric_limits<double>::max)()
+#define MATMUL_MEM_COEF 0.25
+#define REDIS_COEF 16

 double CostRedis(const Graph::NodeType &node,
                 const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
@ -45,11 +47,12 @@ class CostMatMul {
                            const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
                            const Graph &graph);

-  double GetMinCostIn(const OperatorRec &op);
+  double GetMaxCostIn(const OperatorRec &op);

 private:
  double StrConcatDimI(int64_t a, int64_t b) {
    cost_in_i_ = (static_cast<double>(a) * static_cast<double>(b)) / 2.0;
+    cost_in_i_ = cost_in_i_ * MATMUL_MEM_COEF;

    return cost_in_i_;
  }
--- a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.cc
@ -35,7 +35,7 @@ double GetWeights(const Graph::NodeType &node) {
    // For MatMul
    auto cost_ptr = std::make_shared<CostMatMul>();

-    return cost_ptr->GetMinCostIn(op);
+    return cost_ptr->GetMaxCostIn(op);
  } else if (op.op_type == OperatorType::kRecConvolution) {
    // For Convolution
    auto cost_ptr = std::make_shared<CostConvolution>();
--- a/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc
+++ b/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc
@ -29,7 +29,8 @@ namespace parallel {

 class TestPartition : public UT::Common {
 public:
-  void Create(std::shared_ptr<Graph> graph, int node_num, std::vector<int64_t> edge_head, std::vector<int64_t> edge_tail);
+  void Create(std::shared_ptr<Graph> graph, int node_num, std::vector<int64_t> edge_head,
+              std::vector<int64_t> edge_tail);
  void InitEdge(std::shared_ptr<Graph> graph, int vHead, int vTail);
  void InitNode(std::shared_ptr<Graph> graph, int num_node);
  TensorParam *MakeTensor(int n, int c, int h, int w);
@ -85,9 +86,10 @@ TensorParam *TestPartition::MakeTensor(int n, int c, int h, int w) {
 std::shared_ptr<Graph> TestPartition::MakeMatMulData(int numNode) {
  // Build Edges
  int edgeNum = 0;
-  if (0 == numNode % 2 && numNode != 0) {
-    edgeNum = numNode - 2;
-  } else if (1 == numNode % 2) {
+  constexpr int INTERVAL = 2;
+  if (numNode % INTERVAL == 0 && numNode != 0) {
+    edgeNum = numNode - INTERVAL;
+  } else if (numNode % INTERVAL == 1) {
    edgeNum = numNode - 1;
  } else {
    edgeNum = 0;
@ -98,14 +100,14 @@ std::shared_ptr<Graph> TestPartition::MakeMatMulData(int numNode) {

  for (int i = 0; i < edgeNum; i++) {
    edgeHead[i] = i;
-    if (0 == i % 2) {
-      edgeTail[i] = i + 2;
+    if (i % INTERVAL == 0) {
+      edgeTail[i] = i + INTERVAL;
    } else {
      edgeTail[i] = i + 1;
    };
  };

-  // Creat graph
+  // Create graph
  std::shared_ptr<Graph> graph(new Graph);
  TestPartition::Create(graph, numNode, edgeHead, edgeTail);

@ -221,8 +223,8 @@ TEST_F(TestPartition, test_PartitionNode) {
  Graph::NodeType node2 = graph->nodes[2];
  std::vector<std::pair<std::string, StrategyRec>> nameToStrategy;
  StrategyRec str = PartitionNode(node2, nameToStrategy, graph);
-  ASSERT_EQ(str.outputTensor.str_h, 1);
-  ASSERT_EQ(str.outputTensor.str_w, 0.5);
+  ASSERT_EQ(str.outputTensor.str_h, 0.5);
+  ASSERT_EQ(str.outputTensor.str_w, 1);
 }

 TEST_F(TestPartition, test_PartitionForAllDevices) {
@ -237,7 +239,7 @@ TEST_F(TestPartition, test_PartitionForAllDevices2) {
  ASSERT_EQ(PartitionForAllDevices(2, device_memory, graph), SUCCESS);
 }

-// Negative case: parition on 0 device
+// Negative case: partition on 0 device
 TEST_F(TestPartition, test_PartitionForAllDevices0) {
  std::shared_ptr<Graph> graph = MakeMatMulData(9);
  double device_memory = 1024.0 * 1024.0 * 1024.0 * 16.0;
@ -248,9 +250,9 @@ TEST_F(TestPartition, test_PartitionForAllDevices0) {
 TEST_F(TestPartition, test_ApplyStrToTensor) {
  std::shared_ptr<Graph> graph = MakeMatMulData(9);
  std::vector<std::pair<std::string, StrategyRec>> nameToStrategy;
-  StrategyRec str = PartitionNode(graph->nodes[4], nameToStrategy, graph);
-  auto h_str = str.outputTensor.str_h;
-  auto w_str = str.outputTensor.str_w;
+  graph->nodes[4].apply.str = PartitionNode(graph->nodes[4], nameToStrategy, graph);
+  auto h_str = graph->nodes[4].apply.str.outputTensor.str_h;
+  auto w_str = graph->nodes[4].apply.str.outputTensor.str_w;

  Graph::NodeType n_node = ApplyStrToTensor(graph->nodes[4]);
  auto h_node = n_node.tensor_parm.tensor_str.str_h;
--- a/tests/ut/python/parallel/test_autoparallel_rec_shared_param_strmodif.py
+++ b/tests/ut/python/parallel/test_autoparallel_rec_shared_param_strmodif.py
@ -100,6 +100,6 @@ def test_rec_shared_param_strmodif():
    stras = _cell_graph_executor._get_shard_strategy(model._train_network)
    for (k, v) in stras.items():
        if re.search("Gather", k) is not None:
-            assert v == [[4, 1], [2, 1]]
+            assert v == [[2, 1], [4, 1]]
    context.reset_auto_parallel_context()