From ffb2cb03a45812cf7e864ea4d32f302111e74981 Mon Sep 17 00:00:00 2001 From: Xiaoda Zhang Date: Tue, 14 Apr 2020 17:06:37 +0800 Subject: [PATCH] Change 'NOT_FULLY_USE_DEVICES' to 'FULLY_USE_DEVICES' and make ALL-1 user-specified-strategy valid in auto-parallel --- .../parallel/auto_parallel/edge_costmodel.cc | 6 +-- .../parallel/auto_parallel/graph_costmodel.cc | 14 +++---- .../parallel/auto_parallel/graph_costmodel.h | 4 +- mindspore/ccsrc/parallel/costmodel_context.cc | 4 +- mindspore/ccsrc/parallel/costmodel_context.h | 10 ++--- .../ccsrc/parallel/ops_info/matmul_info.cc | 2 +- .../ccsrc/parallel/ops_info/operator_info.cc | 2 +- .../ccsrc/parallel/step_auto_parallel.cc | 38 +++++++------------ mindspore/ccsrc/pipeline/init.cc | 8 ++-- mindspore/parallel/algo_parameter_config.py | 16 ++++---- .../parallel/test_auto_parallel_two_matmul.py | 10 ++--- tests/ut/python/parallel/test_reshape.py | 6 +-- 12 files changed, 55 insertions(+), 65 deletions(-) diff --git a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc b/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc index 895646f409f..21e67f9f7b2 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc @@ -85,10 +85,10 @@ Status Edge::InitEdgeCost() { } } if (!has_available_cost) { - if (!NOT_FULLY_USE_DEVICES) { + if (FULLY_USE_DEVICES) { MS_LOG(EXCEPTION) << "Generating cost for edge: " << edge_name_ - << " failed, it may be caused by setting 'not_fully_use_devices' false. Try to set " - "'not_fully_use_devices' true."; + << " failed, it may be caused by setting 'fully_use_devices' true. Try to set " + "'fully_use_devices' false."; } else if (ELEMENTWISE_OP_STRA_FOLLOW) { MS_LOG(EXCEPTION) << "Generating cost for edge: " << edge_name_ << " failed, it may be caused by setting 'elementwise_op_strategy_follow' true. " diff --git a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc index 82dd7230390..c56d3a6fbd7 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc @@ -36,7 +36,7 @@ double COST_MODEL_COMMUNI_CONST = DEFAULT_COST_MODEL_COMMUNI_CONST; double COST_MODEL_COMMUNI_BIAS = DEFAULT_COST_MODEL_COMMUNI_BIAS; bool TENSOR_SLICE_ALIGNMENT_ENABLE = DEFAULT_TENSOR_SLICE_ALIGNMENT_ENABLE; size_t TENSOR_SLICE_ALIGNMENT_SIZE = DEFAULT_TENSOR_SLICE_ALIGNMENT_SIZE; -bool NOT_FULLY_USE_DEVICES = DEFAULT_NOT_FULLY_USE_DEVICES; +bool FULLY_USE_DEVICES = DEFAULT_FULLY_USE_DEVICES; bool ELEMENTWISE_OP_STRA_FOLLOW = DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW; void CostGraph::SetDeviceMemoryAndCostParameter() { @@ -125,13 +125,13 @@ void CostGraph::SetDeviceMemoryAndCostParameter() { TENSOR_SLICE_ALIGNMENT_SIZE = align_size; MS_LOG(INFO) << "tensor_slice_align_size: " << TENSOR_SLICE_ALIGNMENT_SIZE << "."; - // NOT_FULLY_USE_DEVICES - auto not_fully_devices = CostModelContext::GetInstance()->not_fully_use_device(); - NOT_FULLY_USE_DEVICES = not_fully_devices; - if (NOT_FULLY_USE_DEVICES) { - MS_LOG(INFO) << "not_fully_use_devices: true."; + // FULLY_USE_DEVICES + auto fully_devices = CostModelContext::GetInstance()->fully_use_device(); + FULLY_USE_DEVICES = fully_devices; + if (FULLY_USE_DEVICES) { + MS_LOG(INFO) << "fully_use_devices: true."; } else { - MS_LOG(INFO) << "not_fully_use_devices: false."; + MS_LOG(INFO) << "fully_use_devices: false."; } // ELEMENTWISE_OP_STRA_FOLLOW diff --git a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h index 65aeb210ea0..b6591c07417 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h +++ b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h @@ -42,7 +42,7 @@ namespace parallel { #define DEFAULT_COST_MODEL_COMMUNI_BIAS 1024.0 #define DEFAULT_TENSOR_SLICE_ALIGNMENT_ENABLE false #define DEFAULT_TENSOR_SLICE_ALIGNMENT_SIZE 16 -#define DEFAULT_NOT_FULLY_USE_DEVICES false +#define DEFAULT_FULLY_USE_DEVICES true #define DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW false class CostGraph; @@ -57,7 +57,7 @@ extern double COST_MODEL_COMMUNI_CONST; extern double COST_MODEL_COMMUNI_BIAS; extern bool TENSOR_SLICE_ALIGNMENT_ENABLE; extern size_t TENSOR_SLICE_ALIGNMENT_SIZE; -extern bool NOT_FULLY_USE_DEVICES; +extern bool FULLY_USE_DEVICES; extern bool ELEMENTWISE_OP_STRA_FOLLOW; class CostGraph { diff --git a/mindspore/ccsrc/parallel/costmodel_context.cc b/mindspore/ccsrc/parallel/costmodel_context.cc index 0ebbd2c626a..82b260f9670 100644 --- a/mindspore/ccsrc/parallel/costmodel_context.cc +++ b/mindspore/ccsrc/parallel/costmodel_context.cc @@ -60,7 +60,7 @@ void CostModelContext::ResetAlgoParameters() { costmodel_simplify_cal_ = DEFAULT_COST_MODEL_SIMPLIFY_CALCULATION; tensor_slice_alignment_enable_ = DEFAULT_TENSOR_SLICE_ALIGNMENT_ENABLE; tensor_slice_alignment_size_ = DEFAULT_TENSOR_SLICE_ALIGNMENT_SIZE; - not_fully_use_device_ = DEFAULT_NOT_FULLY_USE_DEVICES; + fully_use_device_ = DEFAULT_FULLY_USE_DEVICES; elementwise_stra_follow_ = DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW; } @@ -118,7 +118,7 @@ void CostModelContext::set_tensor_slice_alignment_size(size_t ts_align_size) { tensor_slice_alignment_size_ = ts_align_size; } -void CostModelContext::set_not_fully_use_device(bool not_fully_use) { not_fully_use_device_ = not_fully_use; } +void CostModelContext::set_fully_use_device(bool fully_use) { fully_use_device_ = fully_use; } void CostModelContext::set_elementwise_stra_follow(bool elementwise_follow) { elementwise_stra_follow_ = elementwise_follow; diff --git a/mindspore/ccsrc/parallel/costmodel_context.h b/mindspore/ccsrc/parallel/costmodel_context.h index 04782fa3660..23c9f7cc8d1 100644 --- a/mindspore/ccsrc/parallel/costmodel_context.h +++ b/mindspore/ccsrc/parallel/costmodel_context.h @@ -102,9 +102,9 @@ class CostModelContext { void set_tensor_slice_alignment_size(size_t); size_t tensor_slice_alignment_size() const { return tensor_slice_alignment_size_; } - // NOT_FULLY_USE_DEVICES - void set_not_fully_use_device(bool); - bool not_fully_use_device() const { return not_fully_use_device_; } + // FULLY_USE_DEVICES + void set_fully_use_device(bool); + bool fully_use_device() const { return fully_use_device_; } // ELEMENTWISE_OP_STRA_FOLLOW void set_elementwise_stra_follow(bool); @@ -158,8 +158,8 @@ class CostModelContext { // TENSOR_SLICE_ALIGNMENT_SIZE size_t tensor_slice_alignment_size_; - // NOT_FULLY_USE_DEVICES - bool not_fully_use_device_; + // FULLY_USE_DEVICES + bool fully_use_device_; // ELEMENTWISE_OP_STRA_FOLLOW bool elementwise_stra_follow_; diff --git a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc b/mindspore/ccsrc/parallel/ops_info/matmul_info.cc index e617ae6c240..8d1264482b1 100644 --- a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc +++ b/mindspore/ccsrc/parallel/ops_info/matmul_info.cc @@ -465,7 +465,7 @@ Status MatMulBase::PrepareStrategy(int32_t stage_id, size_t dev_num, mindspore::parallel::Dimensions combined_partitions, size_t input0_shape_size, size_t input1_shape_size, mindspore::parallel::StrategyPtr* const sp) { int32_t product = std::accumulate(combined_partitions.begin(), combined_partitions.end(), 1, std::multiplies()); - if (NOT_FULLY_USE_DEVICES) { + if (!FULLY_USE_DEVICES) { if (IntToSize(product) > dev_num) { return FAILED; } diff --git a/mindspore/ccsrc/parallel/ops_info/operator_info.cc b/mindspore/ccsrc/parallel/ops_info/operator_info.cc index 23b6a5190a8..5842a9149f9 100644 --- a/mindspore/ccsrc/parallel/ops_info/operator_info.cc +++ b/mindspore/ccsrc/parallel/ops_info/operator_info.cc @@ -675,7 +675,7 @@ Status PrepareStrategyBase(int32_t stage_id, size_t dev_num, const Shapes& input for (auto& input_partition : inputs_partitions) { product *= std::accumulate(input_partition.begin(), input_partition.end(), 1, std::multiplies()); } - if (NOT_FULLY_USE_DEVICES) { + if (!FULLY_USE_DEVICES) { if (IntToSize(product) > dev_num) { return FAILED; } diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc index a42ce612fb5..495c3a8d391 100644 --- a/mindspore/ccsrc/parallel/step_auto_parallel.cc +++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc @@ -110,8 +110,6 @@ std::vector splittable_op_ = {MATMUL, std::vector elementwise_op_ = {ACTIVATION, GELU, TANH, SOFTMAX, LOG_SOFTMAX, RELU, SQRT, CAST, POW, EXP, LOG, COS, ACOS, LOGICALNOT}; -std::vector ignore_manual_strategy_op_ = {BATCH_NORM}; - bool StepAutoParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &) { MS_EXCEPTION_IF_NULL(root); MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance()); @@ -308,16 +306,6 @@ std::vector ExtractOutputTypeByNode(const CNodePtr &node) { return outputs_type; } -// Be careful the argument is cnode_full_name, not the op_name -bool IsIgnoreStrategyOperator(const std::string &cnode_full_name) { - for (auto &ignore_op : ignore_manual_strategy_op_) { - if (cnode_full_name.find(ignore_op) != std::string::npos) { - return true; - } - } - return false; -} - bool IsElementWiseOperator(const std::string &op_name) { auto iter = std::find(elementwise_op_.begin(), elementwise_op_.end(), op_name); return (iter != elementwise_op_.end()); @@ -414,18 +402,20 @@ OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr & // Set cost for this configured strategy if (operator_info->SetCostUnderStrategy(strategyPtr) != SUCCESS) { MS_LOG(EXCEPTION) << "Failure: operator " << prim->name() << " SetCostUnderStrategy failed"; - } else if (!NOT_FULLY_USE_DEVICES) { - if (!IsIgnoreStrategyOperator(cnode->fullname_with_scope())) { - // If configured to fully use devices, then checking for the user-specified strategy - int32_t used_devices = operator_info->used_devices(); - MS_EXCEPTION_IF_NULL(g_device_manager); - auto total_device_num = g_device_manager->GetDeviceListByStageId(0).size(); - // 'used_devices == -1' means that 'used_devices_' is not set - if ((used_devices == -1) || IntToSize(used_devices) != total_device_num) { - MS_LOG(EXCEPTION) << "In configuration 'NOT_FULLY_USE_DEVICES' = False, " - << "but the specified strategy uses device: " << used_devices - << ", total devices: " << total_device_num; - } + } else if (FULLY_USE_DEVICES) { + // If configured to fully use devices, then checking for the user-specified strategy + int32_t used_devices = operator_info->used_devices(); + MS_EXCEPTION_IF_NULL(g_device_manager); + auto total_device_num = g_device_manager->GetDeviceListByStageId(0).size(); + // 'used_devices == 1' means that ALL-1 strategy, which is valid in auto-parallel + if (used_devices == 1) { + return operator_info; + } + // 'used_devices == -1' means that 'used_devices_' is not set + if ((used_devices == -1) || IntToSize(used_devices) != total_device_num) { + MS_LOG(EXCEPTION) << "In configuration 'FULLY_USE_DEVICES' = True, " + << "but the specified strategy uses device: " << used_devices + << ", total devices: " << total_device_num; } } } diff --git a/mindspore/ccsrc/pipeline/init.cc b/mindspore/ccsrc/pipeline/init.cc index 24ead047d3f..4aab6e2a5e7 100644 --- a/mindspore/ccsrc/pipeline/init.cc +++ b/mindspore/ccsrc/pipeline/init.cc @@ -261,10 +261,10 @@ PYBIND11_MODULE(_c_expression, m) { "Set the parameter tensor_slice_size in strategy generation.") .def("get_tensor_slice_align_size", &CostModelContext::tensor_slice_alignment_size, "Get the parameter tensor_slice_size in strategy generation.") - .def("set_not_fully_use_devices", &CostModelContext::set_not_fully_use_device, - "Set the parameter not_fully_use_devices in the DP algorithm.") - .def("get_not_fully_use_devices", &CostModelContext::not_fully_use_device, - "Get the parameter not_fully_use_devices in the DP algorithm.") + .def("set_fully_use_devices", &CostModelContext::set_fully_use_device, + "Set the parameter fully_use_devices in the DP algorithm.") + .def("get_fully_use_devices", &CostModelContext::fully_use_device, + "Get the parameter fully_use_devices in the DP algorithm.") .def("set_elementwise_op_strategy_follow", &CostModelContext::set_elementwise_stra_follow, "Set the parameter elementwise_op_strategy_follow in the DP algorithm.") .def("get_elementwise_op_strategy_follow", &CostModelContext::elementwise_stra_follow, diff --git a/mindspore/parallel/algo_parameter_config.py b/mindspore/parallel/algo_parameter_config.py index aafc02367f1..d1e4aa87a96 100644 --- a/mindspore/parallel/algo_parameter_config.py +++ b/mindspore/parallel/algo_parameter_config.py @@ -53,13 +53,13 @@ class _AlgoParameterConfig(): self.check_config_handle() return self._config_handle.get_simplify_cal() - def set_not_fully_use_devices(self, not_fully): + def set_fully_use_devices(self, not_fully): self.check_config_handle() - self._config_handle.set_not_fully_use_devices(not_fully) + self._config_handle.set_fully_use_devices(not_fully) - def get_not_fully_use_devices(self): + def get_fully_use_devices(self): self.check_config_handle() - return self._config_handle.get_not_fully_use_devices() + return self._config_handle.get_fully_use_devices() def set_elementwise_op_strategy_follow(self, element_strategy_follow): self.check_config_handle() @@ -119,7 +119,7 @@ def _algo_parameter_config(): set_algo_parameters_config_func_map = { "simplify_cal": _algo_parameter_config().set_simplify_cal, - "not_fully_use_devices": _algo_parameter_config().set_not_fully_use_devices, + "fully_use_devices": _algo_parameter_config().set_fully_use_devices, "elementwise_op_strategy_follow": _algo_parameter_config().set_elementwise_op_strategy_follow, "tensor_slice_align_enable": _algo_parameter_config().set_tensor_slice_align_enable, "tensor_slice_align_size": _algo_parameter_config().set_tensor_slice_align_size} @@ -127,14 +127,14 @@ set_algo_parameters_config_func_map = { get_algo_parameters_config_func_map = { "simplify_cal": _algo_parameter_config().get_simplify_cal, - "not_fully_use_devices": _algo_parameter_config().get_not_fully_use_devices, + "fully_use_devices": _algo_parameter_config().get_fully_use_devices, "elementwise_op_strategy_follow": _algo_parameter_config().get_elementwise_op_strategy_follow, "tensor_slice_align_enable": _algo_parameter_config().get_tensor_slice_align_enable, "tensor_slice_align_size": _algo_parameter_config().get_tensor_slice_align_size} @args_type_check(simplify_cal=bool, tensor_slice_align_enable=bool, tensor_slice_align_size=int, - not_fully_use_devices=bool, elementwise_op_strategy_follow=bool) + fully_use_devices=bool, elementwise_op_strategy_follow=bool) def set_algo_parameters(**kwargs): """ Set algo parameter config. @@ -146,7 +146,7 @@ def set_algo_parameters(**kwargs): simplify_cal (bool): Whether simplifying calculations in strategy-searching algorithm. Default: True tensor_slice_align_enable (bool): Whether checking tensor slice shape. Default: False tensor_slice_align_size (int): The minimum tensor slice shape, the value must be in [1, 1024]. Default: 16 - not_fully_use_devices (bool): Whether generating strategies that not fully use devices. Default: False + fully_use_devices (bool): Whether generating strategies that fully use all available devices. Default: True elementwise_op_strategy_follow (bool): Whether the elementwise operator have the same strategies as its subsequent operators. Default: False diff --git a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py index bd6639a5019..db6190ab897 100644 --- a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py +++ b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py @@ -100,7 +100,7 @@ def test_two_matmul(): set_algo_parameters(simplify_cal=True, tensor_slice_align_enable=False, tensor_slice_align_size=32, - not_fully_use_devices=True, + fully_use_devices=False, elementwise_op_strategy_follow=False) para_simplify_cal = get_algo_parameters("simplify_cal") assert para_simplify_cal == True @@ -108,8 +108,8 @@ def test_two_matmul(): assert para_slice_align_enable == False para_slice_align_size = get_algo_parameters("tensor_slice_align_size") assert para_slice_align_size == 32 - not_fully_use_devices = get_algo_parameters("not_fully_use_devices") - assert not_fully_use_devices == True + fully_use_devices = get_algo_parameters("fully_use_devices") + assert fully_use_devices == False elementwise_op_strategy_follow = get_algo_parameters("elementwise_op_strategy_follow") assert elementwise_op_strategy_follow == False @@ -120,8 +120,8 @@ def test_two_matmul(): assert para_slice_align_enable == False para_slice_align_size = get_algo_parameters("tensor_slice_align_size") assert para_slice_align_size == 16 - not_fully_use_devices = get_algo_parameters("not_fully_use_devices") - assert not_fully_use_devices == False + fully_use_devices = get_algo_parameters("fully_use_devices") + assert fully_use_devices == True elementwise_op_strategy_follow = get_algo_parameters("elementwise_op_strategy_follow") assert elementwise_op_strategy_follow == False diff --git a/tests/ut/python/parallel/test_reshape.py b/tests/ut/python/parallel/test_reshape.py index 43906aec238..f72e5f909b0 100644 --- a/tests/ut/python/parallel/test_reshape.py +++ b/tests/ut/python/parallel/test_reshape.py @@ -576,7 +576,7 @@ def test_flatten_reshape2(parallel_mode="auto_parallel"): epoch_size = 2 context.reset_auto_parallel_context() context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=8) - set_algo_parameters(not_fully_use_devices=True) + set_algo_parameters(fully_use_devices=False) net = ParallelReduceMeanNet(conv_in_channel=3, conv_out_channel=64, reducemean_axis=(2, 3), strategy=((4, 1, 1, 1),)) loss = CrossEntropyLoss() predict = Tensor(np.ones([batch_size, 3, 32, 32]), dtype=ms.float32) @@ -617,7 +617,7 @@ def test_flatten_reshape3(parallel_mode="auto_parallel"): epoch_size = 2 context.reset_auto_parallel_context() context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=8) - set_algo_parameters(not_fully_use_devices=True) + set_algo_parameters(fully_use_devices=False) net = ParallelReshapeNet(dense_in_channel=2048, dense_out_channel=1000, shape=(128, 1000), strategy=((16, 1),)) loss = CrossEntropyLoss() predict = Tensor(np.ones([batch_size, 1, 2, 1024]), dtype=ms.float32) @@ -646,7 +646,7 @@ def test_flatten_reshape4(parallel_mode="semi_auto_parallel"): epoch_size = 2 context.reset_auto_parallel_context() context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=8) - set_algo_parameters(not_fully_use_devices=True) + set_algo_parameters(fully_use_devices=False) net = ParallelReduceMeanNet(conv_in_channel=3, conv_out_channel=64, reducemean_keep_dims=True, strategy=((4, 1, 1, 1),)) loss = CrossEntropyLoss2() predict = Tensor(np.ones([batch_size, 3, 32, 32]), dtype=ms.float32)