!304 [Auto parallel] Change 'NOT_FULLY_USE_DEVICES' to 'FULLY_USE_DEVICES' and make ALL-1 user-specified-strategy valid in auto-parallel

Merge pull request !304 from Xiaoda/modify-not-fully-use-devices-and-strategy-checking
2020-04-15 17:21:45 +08:00 · 2020-04-15 17:21:45 +08:00 · ba55a8ed0b
parent c3525d0cf7 ffb2cb03a4
commit ba55a8ed0b
12 changed files with 55 additions and 65 deletions
--- a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc
@ -85,10 +85,10 @@ Status Edge::InitEdgeCost() {
    }
  }
  if (!has_available_cost) {
-    if (!NOT_FULLY_USE_DEVICES) {
+    if (FULLY_USE_DEVICES) {
      MS_LOG(EXCEPTION) << "Generating cost for edge: " << edge_name_
-                        << " failed, it may be caused by setting 'not_fully_use_devices' false. Try to set "
-                           "'not_fully_use_devices' true.";
+                        << " failed, it may be caused by setting 'fully_use_devices' true. Try to set "
+                           "'fully_use_devices' false.";
    } else if (ELEMENTWISE_OP_STRA_FOLLOW) {
      MS_LOG(EXCEPTION) << "Generating cost for edge: " << edge_name_
                        << " failed, it may be caused by setting 'elementwise_op_strategy_follow' true. "
--- a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc
@ -36,7 +36,7 @@ double COST_MODEL_COMMUNI_CONST = DEFAULT_COST_MODEL_COMMUNI_CONST;
 double COST_MODEL_COMMUNI_BIAS = DEFAULT_COST_MODEL_COMMUNI_BIAS;
 bool TENSOR_SLICE_ALIGNMENT_ENABLE = DEFAULT_TENSOR_SLICE_ALIGNMENT_ENABLE;
 size_t TENSOR_SLICE_ALIGNMENT_SIZE = DEFAULT_TENSOR_SLICE_ALIGNMENT_SIZE;
-bool NOT_FULLY_USE_DEVICES = DEFAULT_NOT_FULLY_USE_DEVICES;
+bool FULLY_USE_DEVICES = DEFAULT_FULLY_USE_DEVICES;
 bool ELEMENTWISE_OP_STRA_FOLLOW = DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW;

 void CostGraph::SetDeviceMemoryAndCostParameter() {
@ -125,13 +125,13 @@ void CostGraph::SetDeviceMemoryAndCostParameter() {
  TENSOR_SLICE_ALIGNMENT_SIZE = align_size;
  MS_LOG(INFO) << "tensor_slice_align_size: " << TENSOR_SLICE_ALIGNMENT_SIZE << ".";

-  // NOT_FULLY_USE_DEVICES
-  auto not_fully_devices = CostModelContext::GetInstance()->not_fully_use_device();
-  NOT_FULLY_USE_DEVICES = not_fully_devices;
-  if (NOT_FULLY_USE_DEVICES) {
-    MS_LOG(INFO) << "not_fully_use_devices: true.";
+  // FULLY_USE_DEVICES
+  auto fully_devices = CostModelContext::GetInstance()->fully_use_device();
+  FULLY_USE_DEVICES = fully_devices;
+  if (FULLY_USE_DEVICES) {
+    MS_LOG(INFO) << "fully_use_devices: true.";
  } else {
-    MS_LOG(INFO) << "not_fully_use_devices: false.";
+    MS_LOG(INFO) << "fully_use_devices: false.";
  }

  // ELEMENTWISE_OP_STRA_FOLLOW
--- a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h
@ -42,7 +42,7 @@ namespace parallel {
 #define DEFAULT_COST_MODEL_COMMUNI_BIAS 1024.0
 #define DEFAULT_TENSOR_SLICE_ALIGNMENT_ENABLE false
 #define DEFAULT_TENSOR_SLICE_ALIGNMENT_SIZE 16
-#define DEFAULT_NOT_FULLY_USE_DEVICES false
+#define DEFAULT_FULLY_USE_DEVICES true
 #define DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW false

 class CostGraph;
@ -57,7 +57,7 @@ extern double COST_MODEL_COMMUNI_CONST;
 extern double COST_MODEL_COMMUNI_BIAS;
 extern bool TENSOR_SLICE_ALIGNMENT_ENABLE;
 extern size_t TENSOR_SLICE_ALIGNMENT_SIZE;
-extern bool NOT_FULLY_USE_DEVICES;
+extern bool FULLY_USE_DEVICES;
 extern bool ELEMENTWISE_OP_STRA_FOLLOW;

 class CostGraph {
--- a/mindspore/ccsrc/parallel/costmodel_context.cc
+++ b/mindspore/ccsrc/parallel/costmodel_context.cc
@ -60,7 +60,7 @@ void CostModelContext::ResetAlgoParameters() {
  costmodel_simplify_cal_ = DEFAULT_COST_MODEL_SIMPLIFY_CALCULATION;
  tensor_slice_alignment_enable_ = DEFAULT_TENSOR_SLICE_ALIGNMENT_ENABLE;
  tensor_slice_alignment_size_ = DEFAULT_TENSOR_SLICE_ALIGNMENT_SIZE;
-  not_fully_use_device_ = DEFAULT_NOT_FULLY_USE_DEVICES;
+  fully_use_device_ = DEFAULT_FULLY_USE_DEVICES;
  elementwise_stra_follow_ = DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW;
 }

@ -118,7 +118,7 @@ void CostModelContext::set_tensor_slice_alignment_size(size_t ts_align_size) {
  tensor_slice_alignment_size_ = ts_align_size;
 }

-void CostModelContext::set_not_fully_use_device(bool not_fully_use) { not_fully_use_device_ = not_fully_use; }
+void CostModelContext::set_fully_use_device(bool fully_use) { fully_use_device_ = fully_use; }

 void CostModelContext::set_elementwise_stra_follow(bool elementwise_follow) {
  elementwise_stra_follow_ = elementwise_follow;
--- a/mindspore/ccsrc/parallel/costmodel_context.h
+++ b/mindspore/ccsrc/parallel/costmodel_context.h
@ -102,9 +102,9 @@ class CostModelContext {
  void set_tensor_slice_alignment_size(size_t);
  size_t tensor_slice_alignment_size() const { return tensor_slice_alignment_size_; }

-  // NOT_FULLY_USE_DEVICES
-  void set_not_fully_use_device(bool);
-  bool not_fully_use_device() const { return not_fully_use_device_; }
+  // FULLY_USE_DEVICES
+  void set_fully_use_device(bool);
+  bool fully_use_device() const { return fully_use_device_; }

  // ELEMENTWISE_OP_STRA_FOLLOW
  void set_elementwise_stra_follow(bool);
@ -158,8 +158,8 @@ class CostModelContext {
  // TENSOR_SLICE_ALIGNMENT_SIZE
  size_t tensor_slice_alignment_size_;

-  // NOT_FULLY_USE_DEVICES
-  bool not_fully_use_device_;
+  // FULLY_USE_DEVICES
+  bool fully_use_device_;

  // ELEMENTWISE_OP_STRA_FOLLOW
  bool elementwise_stra_follow_;
--- a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc
+++ b/mindspore/ccsrc/parallel/ops_info/matmul_info.cc
@ -465,7 +465,7 @@ Status MatMulBase::PrepareStrategy(int32_t stage_id, size_t dev_num,
                                   mindspore::parallel::Dimensions combined_partitions, size_t input0_shape_size,
                                   size_t input1_shape_size, mindspore::parallel::StrategyPtr* const sp) {
  int32_t product = std::accumulate(combined_partitions.begin(), combined_partitions.end(), 1, std::multiplies<int>());
-  if (NOT_FULLY_USE_DEVICES) {
+  if (!FULLY_USE_DEVICES) {
    if (IntToSize(product) > dev_num) {
      return FAILED;
    }
--- a/mindspore/ccsrc/parallel/ops_info/operator_info.cc
+++ b/mindspore/ccsrc/parallel/ops_info/operator_info.cc
@ -675,7 +675,7 @@ Status PrepareStrategyBase(int32_t stage_id, size_t dev_num, const Shapes& input
  for (auto& input_partition : inputs_partitions) {
    product *= std::accumulate(input_partition.begin(), input_partition.end(), 1, std::multiplies<int>());
  }
-  if (NOT_FULLY_USE_DEVICES) {
+  if (!FULLY_USE_DEVICES) {
    if (IntToSize(product) > dev_num) {
      return FAILED;
    }
--- a/mindspore/ccsrc/parallel/step_auto_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc
@ -110,8 +110,6 @@ std::vector<std::string> splittable_op_ = {MATMUL,
 std::vector<std::string> elementwise_op_ = {ACTIVATION, GELU, TANH, SOFTMAX, LOG_SOFTMAX, RELU, SQRT,
                                            CAST,       POW,  EXP,  LOG,     COS,         ACOS, LOGICALNOT};

-std::vector<std::string> ignore_manual_strategy_op_ = {BATCH_NORM};
-
 bool StepAutoParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &) {
  MS_EXCEPTION_IF_NULL(root);
  MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance());
@ -308,16 +306,6 @@ std::vector<TypePtr> ExtractOutputTypeByNode(const CNodePtr &node) {
  return outputs_type;
 }

-// Be careful the argument is cnode_full_name, not the op_name
-bool IsIgnoreStrategyOperator(const std::string &cnode_full_name) {
-  for (auto &ignore_op : ignore_manual_strategy_op_) {
-    if (cnode_full_name.find(ignore_op) != std::string::npos) {
-      return true;
-    }
-  }
-  return false;
-}
-
 bool IsElementWiseOperator(const std::string &op_name) {
  auto iter = std::find(elementwise_op_.begin(), elementwise_op_.end(), op_name);
  return (iter != elementwise_op_.end());
@ -414,18 +402,20 @@ OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr &
      // Set cost for this configured strategy
      if (operator_info->SetCostUnderStrategy(strategyPtr) != SUCCESS) {
        MS_LOG(EXCEPTION) << "Failure: operator " << prim->name() << " SetCostUnderStrategy failed";
-      } else if (!NOT_FULLY_USE_DEVICES) {
-        if (!IsIgnoreStrategyOperator(cnode->fullname_with_scope())) {
-          // If configured to fully use devices, then checking for the user-specified strategy
-          int32_t used_devices = operator_info->used_devices();
-          MS_EXCEPTION_IF_NULL(g_device_manager);
-          auto total_device_num = g_device_manager->GetDeviceListByStageId(0).size();
-          // 'used_devices == -1' means that 'used_devices_' is not set
-          if ((used_devices == -1) || IntToSize(used_devices) != total_device_num) {
-            MS_LOG(EXCEPTION) << "In configuration 'NOT_FULLY_USE_DEVICES' = False, "
-                              << "but the specified strategy uses device: " << used_devices
-                              << ", total devices: " << total_device_num;
-          }
+      } else if (FULLY_USE_DEVICES) {
+        // If configured to fully use devices, then checking for the user-specified strategy
+        int32_t used_devices = operator_info->used_devices();
+        MS_EXCEPTION_IF_NULL(g_device_manager);
+        auto total_device_num = g_device_manager->GetDeviceListByStageId(0).size();
+        // 'used_devices == 1' means that ALL-1 strategy, which is valid in auto-parallel
+        if (used_devices == 1) {
+          return operator_info;
+        }
+        // 'used_devices == -1' means that 'used_devices_' is not set
+        if ((used_devices == -1) || IntToSize(used_devices) != total_device_num) {
+          MS_LOG(EXCEPTION) << "In configuration 'FULLY_USE_DEVICES' = True, "
+                            << "but the specified strategy uses device: " << used_devices
+                            << ", total devices: " << total_device_num;
        }
      }
    }
--- a/mindspore/ccsrc/pipeline/init.cc
+++ b/mindspore/ccsrc/pipeline/init.cc
@ -261,10 +261,10 @@ PYBIND11_MODULE(_c_expression, m) {
         "Set the parameter tensor_slice_size in strategy generation.")
    .def("get_tensor_slice_align_size", &CostModelContext::tensor_slice_alignment_size,
         "Get the parameter tensor_slice_size in strategy generation.")
-    .def("set_not_fully_use_devices", &CostModelContext::set_not_fully_use_device,
-         "Set the parameter not_fully_use_devices in the DP algorithm.")
-    .def("get_not_fully_use_devices", &CostModelContext::not_fully_use_device,
-         "Get the parameter not_fully_use_devices in the DP algorithm.")
+    .def("set_fully_use_devices", &CostModelContext::set_fully_use_device,
+         "Set the parameter fully_use_devices in the DP algorithm.")
+    .def("get_fully_use_devices", &CostModelContext::fully_use_device,
+         "Get the parameter fully_use_devices in the DP algorithm.")
    .def("set_elementwise_op_strategy_follow", &CostModelContext::set_elementwise_stra_follow,
         "Set the parameter elementwise_op_strategy_follow in the DP algorithm.")
    .def("get_elementwise_op_strategy_follow", &CostModelContext::elementwise_stra_follow,
--- a/mindspore/parallel/algo_parameter_config.py
+++ b/mindspore/parallel/algo_parameter_config.py
@ -53,13 +53,13 @@ class _AlgoParameterConfig():
        self.check_config_handle()
        return self._config_handle.get_simplify_cal()

-    def set_not_fully_use_devices(self, not_fully):
+    def set_fully_use_devices(self, not_fully):
        self.check_config_handle()
-        self._config_handle.set_not_fully_use_devices(not_fully)
+        self._config_handle.set_fully_use_devices(not_fully)

-    def get_not_fully_use_devices(self):
+    def get_fully_use_devices(self):
        self.check_config_handle()
-        return self._config_handle.get_not_fully_use_devices()
+        return self._config_handle.get_fully_use_devices()

    def set_elementwise_op_strategy_follow(self, element_strategy_follow):
        self.check_config_handle()
@ -119,7 +119,7 @@ def _algo_parameter_config():

 set_algo_parameters_config_func_map = {
    "simplify_cal": _algo_parameter_config().set_simplify_cal,
-    "not_fully_use_devices": _algo_parameter_config().set_not_fully_use_devices,
+    "fully_use_devices": _algo_parameter_config().set_fully_use_devices,
    "elementwise_op_strategy_follow": _algo_parameter_config().set_elementwise_op_strategy_follow,
    "tensor_slice_align_enable": _algo_parameter_config().set_tensor_slice_align_enable,
    "tensor_slice_align_size": _algo_parameter_config().set_tensor_slice_align_size}
@ -127,14 +127,14 @@ set_algo_parameters_config_func_map = {

 get_algo_parameters_config_func_map = {
    "simplify_cal": _algo_parameter_config().get_simplify_cal,
-    "not_fully_use_devices": _algo_parameter_config().get_not_fully_use_devices,
+    "fully_use_devices": _algo_parameter_config().get_fully_use_devices,
    "elementwise_op_strategy_follow": _algo_parameter_config().get_elementwise_op_strategy_follow,
    "tensor_slice_align_enable": _algo_parameter_config().get_tensor_slice_align_enable,
    "tensor_slice_align_size": _algo_parameter_config().get_tensor_slice_align_size}


@args_type_check(simplify_cal=bool, tensor_slice_align_enable=bool, tensor_slice_align_size=int,
-                 not_fully_use_devices=bool, elementwise_op_strategy_follow=bool)
+                 fully_use_devices=bool, elementwise_op_strategy_follow=bool)
 def set_algo_parameters(**kwargs):
    """
    Set algo parameter config.
@ -146,7 +146,7 @@ def set_algo_parameters(**kwargs):
        simplify_cal (bool): Whether simplifying calculations in strategy-searching algorithm. Default: True
        tensor_slice_align_enable (bool): Whether checking tensor slice shape. Default: False
        tensor_slice_align_size (int): The minimum tensor slice shape, the value must be in [1, 1024]. Default: 16
-        not_fully_use_devices (bool): Whether generating strategies that not fully use devices. Default: False
+        fully_use_devices (bool): Whether generating strategies that fully use all available devices. Default: True
        elementwise_op_strategy_follow (bool): Whether the elementwise operator have the same strategies as its
            subsequent operators. Default: False

--- a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
+++ b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
@ -100,7 +100,7 @@ def test_two_matmul():
    set_algo_parameters(simplify_cal=True,
                                          tensor_slice_align_enable=False,
                                          tensor_slice_align_size=32,
-                                          not_fully_use_devices=True,
+                                          fully_use_devices=False,
                                          elementwise_op_strategy_follow=False)
    para_simplify_cal = get_algo_parameters("simplify_cal")
    assert para_simplify_cal == True
@ -108,8 +108,8 @@ def test_two_matmul():
    assert para_slice_align_enable == False
    para_slice_align_size = get_algo_parameters("tensor_slice_align_size")
    assert para_slice_align_size == 32
-    not_fully_use_devices  = get_algo_parameters("not_fully_use_devices")
-    assert not_fully_use_devices == True
+    fully_use_devices = get_algo_parameters("fully_use_devices")
+    assert fully_use_devices == False
    elementwise_op_strategy_follow = get_algo_parameters("elementwise_op_strategy_follow")
    assert elementwise_op_strategy_follow == False

@ -120,8 +120,8 @@ def test_two_matmul():
    assert para_slice_align_enable == False
    para_slice_align_size = get_algo_parameters("tensor_slice_align_size")
    assert para_slice_align_size == 16
-    not_fully_use_devices  = get_algo_parameters("not_fully_use_devices")
-    assert not_fully_use_devices == False
+    fully_use_devices = get_algo_parameters("fully_use_devices")
+    assert fully_use_devices == True
    elementwise_op_strategy_follow = get_algo_parameters("elementwise_op_strategy_follow")
    assert elementwise_op_strategy_follow == False

--- a/tests/ut/python/parallel/test_reshape.py
+++ b/tests/ut/python/parallel/test_reshape.py
@ -576,7 +576,7 @@ def test_flatten_reshape2(parallel_mode="auto_parallel"):
    epoch_size = 2
    context.reset_auto_parallel_context()
    context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=8)
-    set_algo_parameters(not_fully_use_devices=True)
+    set_algo_parameters(fully_use_devices=False)
    net = ParallelReduceMeanNet(conv_in_channel=3, conv_out_channel=64, reducemean_axis=(2, 3), strategy=((4, 1, 1, 1),))
    loss = CrossEntropyLoss()
    predict = Tensor(np.ones([batch_size, 3, 32, 32]), dtype=ms.float32)
@ -617,7 +617,7 @@ def test_flatten_reshape3(parallel_mode="auto_parallel"):
    epoch_size = 2
    context.reset_auto_parallel_context()
    context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=8)
-    set_algo_parameters(not_fully_use_devices=True)
+    set_algo_parameters(fully_use_devices=False)
    net = ParallelReshapeNet(dense_in_channel=2048, dense_out_channel=1000, shape=(128, 1000), strategy=((16, 1),))
    loss = CrossEntropyLoss()
    predict = Tensor(np.ones([batch_size, 1, 2, 1024]), dtype=ms.float32)
@ -646,7 +646,7 @@ def test_flatten_reshape4(parallel_mode="semi_auto_parallel"):
    epoch_size = 2
    context.reset_auto_parallel_context()
    context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=8)
-    set_algo_parameters(not_fully_use_devices=True)
+    set_algo_parameters(fully_use_devices=False)
    net = ParallelReduceMeanNet(conv_in_channel=3, conv_out_channel=64, reducemean_keep_dims=True, strategy=((4, 1, 1, 1),))
    loss = CrossEntropyLoss2()
    predict = Tensor(np.ones([batch_size, 3, 32, 32]), dtype=ms.float32)