diff --git a/mindspore/ccsrc/backend/common/optimizer/helper.cc b/mindspore/ccsrc/backend/common/optimizer/helper.cc index 27630bff07f..6394e7429ab 100644 --- a/mindspore/ccsrc/backend/common/optimizer/helper.cc +++ b/mindspore/ccsrc/backend/common/optimizer/helper.cc @@ -57,18 +57,6 @@ void UpdateDumpFlagAndDebugInfo(const CNodePtr &node, const std::vector Convert2Int(const std::vector &v) { - std::vector result; - (void)std::transform(v.begin(), v.end(), std::back_inserter(result), SizeToInt); - return result; -} - -std::vector Convert2Long(const std::vector &v) { - std::vector result; - (void)std::transform(v.begin(), v.end(), std::back_inserter(result), SizeToLong); - return result; -} - bool IsDepend(const FuncGraph &graph, const AnfNodePtr &node, const std::vector &nodes) { mindspore::HashSet visited_nodes; return IsDepend(graph, node, nodes, &visited_nodes); @@ -950,7 +938,6 @@ kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const std::vector std::vector outputs_device_format; std::vector inputs_device_type; std::vector outputs_device_type; - std::vector> outputs_shape; kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; for (size_t idx = 0; idx < node_list.size(); ++idx) { auto cnode = utils::cast(node_list[idx]); @@ -964,7 +951,6 @@ kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const std::vector for (size_t output_index = 0; output_index < output_num; ++output_index) { (void)outputs_device_format.emplace_back(kOpFormat_DEFAULT); (void)outputs_device_type.emplace_back(common::AnfAlgo::GetOutputInferDataType(cnode, output_index)); - (void)outputs_shape.emplace_back(common::AnfAlgo::GetOutputInferShape(cnode, output_index)); } } builder.SetInputsFormat(inputs_device_format); diff --git a/mindspore/ccsrc/backend/common/optimizer/helper.h b/mindspore/ccsrc/backend/common/optimizer/helper.h index 96820c7e63b..5e7a0ec15b8 100644 --- a/mindspore/ccsrc/backend/common/optimizer/helper.h +++ b/mindspore/ccsrc/backend/common/optimizer/helper.h @@ -127,10 +127,6 @@ enum ConvBn1Output { kMean, }; -std::vector Convert2Int(const std::vector &v); - -std::vector Convert2Long(const std::vector &v); - // check whether node depends on either of nodes or not bool IsDepend(const FuncGraph &graph, const AnfNodePtr &node, const std::vector &nodes); bool IsDepend(const FuncGraph &graph, const AnfNodePtr &node, const std::vector &nodes, diff --git a/mindspore/ccsrc/backend/common/optimizer/pass_manager.cc b/mindspore/ccsrc/backend/common/optimizer/pass_manager.cc index a75f6ab0369..f31585ffe7d 100644 --- a/mindspore/ccsrc/backend/common/optimizer/pass_manager.cc +++ b/mindspore/ccsrc/backend/common/optimizer/pass_manager.cc @@ -63,7 +63,7 @@ TypeId CacheManager::GetOutputType(const AnfNodePtr &node, size_t index) { return result; } -std::vector CacheManager::GetOutputShape(const AnfNodePtr &node, size_t index) { +ShapeVector CacheManager::GetOutputShape(const AnfNodePtr &node, size_t index) { MS_EXCEPTION_IF_NULL(node); auto iter = shape_map_.find(node); if (iter != shape_map_.end()) { @@ -75,8 +75,8 @@ std::vector CacheManager::GetOutputShape(const AnfNodePtr &node, size_t return {}; } auto output_nums = common::AnfAlgo::GetOutputTensorNum(node); - std::map> index_to_shapes; - std::vector result = {}; + std::map index_to_shapes; + ShapeVector result = {}; for (size_t i = 0; i < output_nums; i++) { auto output_shape = common::AnfAlgo::GetOutputInferShape(node, i); (void)index_to_shapes.emplace(i, output_shape); diff --git a/mindspore/ccsrc/backend/common/optimizer/pass_manager.h b/mindspore/ccsrc/backend/common/optimizer/pass_manager.h index 395232c1f84..09cbf9949e9 100644 --- a/mindspore/ccsrc/backend/common/optimizer/pass_manager.h +++ b/mindspore/ccsrc/backend/common/optimizer/pass_manager.h @@ -34,11 +34,11 @@ class CacheManager { ~CacheManager() = default; void Update(const AnfNodePtr &node); TypeId GetOutputType(const AnfNodePtr &node, size_t index); - std::vector GetOutputShape(const AnfNodePtr &node, size_t index); + ShapeVector GetOutputShape(const AnfNodePtr &node, size_t index); private: std::map> type_map_; - std::map>> shape_map_; + std::map> shape_map_; }; // @brief For optimization passes management diff --git a/mindspore/ccsrc/backend/common/pass/communication_op_fusion.cc b/mindspore/ccsrc/backend/common/pass/communication_op_fusion.cc index 2806d9ea7c9..1d03e04d646 100644 --- a/mindspore/ccsrc/backend/common/pass/communication_op_fusion.cc +++ b/mindspore/ccsrc/backend/common/pass/communication_op_fusion.cc @@ -46,7 +46,6 @@ kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const CommunicationOpInfo &co std::vector outputs_device_format; std::vector inputs_device_type; std::vector outputs_device_type; - std::vector> outputs_shape; kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; for (size_t idx = start_index; idx <= end_index; ++idx) { auto cnode = communication_op_info.communication_op_nodes[idx]; @@ -55,8 +54,7 @@ kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const CommunicationOpInfo &co common::AnfAlgo::GetCNodeName(cnode) == kAllGatherOpName) { rank_size = common::AnfAlgo::GetNodeAttr(cnode, kAttrRankSize); } - size_t rank_size_t = LongToSize(rank_size); - if (rank_size_t == 0) { + if (rank_size == 0) { MS_LOG(EXCEPTION) << "Rank size should not be zero."; } MS_EXCEPTION_IF_NULL(cnode); @@ -65,16 +63,11 @@ kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const CommunicationOpInfo &co inputs_device_format.push_back(AnfAlgo::GetInputFormat(cnode, input_index)); inputs_device_type.push_back(AnfAlgo::GetInputDeviceDataType(cnode, input_index)); } - for (size_t rank_index = 0; rank_index < rank_size_t; ++rank_index) { + for (int64_t rank_index = 0; rank_index < rank_size; ++rank_index) { size_t output_num = common::AnfAlgo::GetOutputTensorNum(cnode); for (size_t output_index = 0; output_index < output_num; ++output_index) { outputs_device_format.push_back(AnfAlgo::GetOutputFormat(cnode, output_index)); outputs_device_type.push_back(AnfAlgo::GetOutputDeviceDataType(cnode, output_index)); - std::vector shape = common::AnfAlgo::GetOutputInferShape(cnode, output_index); - if (!shape.empty()) { - shape[0] /= rank_size_t; - } - outputs_shape.push_back(common::AnfAlgo::GetOutputInferShape(cnode, output_index)); } } builder.SetFusionType(AnfAlgo::GetFusionType(cnode)); @@ -373,21 +366,21 @@ AnfNodePtr CommunicationOpFusion::CreateFusedCommunicationOp(const FuncGraphPtr common::AnfAlgo::GetCNodeName(final_node) == kAllGatherOpName) { rank_size = common::AnfAlgo::GetNodeAttr(final_node, kAttrRankSize); } - size_t rank_size_t = LongToSize(rank_size); - if (rank_size_t == 0) { + + if (rank_size == 0) { MS_LOG(EXCEPTION) << "Rank size should not be zero."; } - size_t output_num = node_num * rank_size_t; + size_t output_num = node_num * LongToSize(rank_size); std::vector dtypes(output_num, common::AnfAlgo::GetOutputInferDataType(final_node, 0)); - std::vector> shapes; + std::vector shapes; int64_t fusion_total_size = 0; - for (size_t i = 0; i < rank_size_t; ++i) { + for (int64_t i = 0; i < rank_size; ++i) { for (size_t idx = start_index; idx <= end_index; ++idx) { auto input_node = communication_op_info.communication_op_nodes[idx]; MS_EXCEPTION_IF_NULL(input_node); - std::vector shape = common::AnfAlgo::GetOutputInferShape(input_node, 0); + auto shape = common::AnfAlgo::GetOutputInferShape(input_node, 0); if (!shape.empty()) { - shape[0] /= rank_size_t; + shape[0] /= rank_size; } shapes.push_back(shape); size_t tensor_size = AnfAlgo::GetOutputTensorMemSize(input_node, 0); diff --git a/mindspore/ccsrc/backend/common/pass/replace_node_by_proxy.cc b/mindspore/ccsrc/backend/common/pass/replace_node_by_proxy.cc index 85215461527..4a56cd8417e 100644 --- a/mindspore/ccsrc/backend/common/pass/replace_node_by_proxy.cc +++ b/mindspore/ccsrc/backend/common/pass/replace_node_by_proxy.cc @@ -29,7 +29,6 @@ kernel::KernelBuildInfoPtr ReplaceNodeByProxy::GenerateKernelBuildInfo(const CNo std::vector outputs_device_format; std::vector inputs_device_type; std::vector outputs_device_type; - std::vector> outputs_shape; kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; size_t input_num = common::AnfAlgo::GetInputTensorNum(cnode); for (size_t input_index = 0; input_index < input_num; ++input_index) { @@ -40,7 +39,6 @@ kernel::KernelBuildInfoPtr ReplaceNodeByProxy::GenerateKernelBuildInfo(const CNo for (size_t output_index = 0; output_index < output_num; ++output_index) { outputs_device_format.push_back(AnfAlgo::GetOutputFormat(cnode, output_index)); outputs_device_type.push_back(AnfAlgo::GetOutputDeviceDataType(cnode, output_index)); - outputs_shape.push_back(common::AnfAlgo::GetOutputInferShape(cnode, output_index)); } builder.SetFusionType(AnfAlgo::GetFusionType(cnode)); builder.SetProcessor(AnfAlgo::GetProcessor(cnode)); diff --git a/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.cc index c6f01060277..8a6c64331b1 100644 --- a/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.cc +++ b/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.cc @@ -83,7 +83,7 @@ static std::map, std::mapname(), {{{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 0}}, {{1, 0}, {2, 1}, {3, 2}, {4, 3}, {0, 4}}}}}; -std::string PrintKernelFormatAndType(const std::string &fmt, const TypeId &type, const std::vector &shape) { +std::string PrintKernelFormatAndType(const std::string &fmt, const TypeId &type, const std::vector &shape) { std::ostringstream buffer; buffer << "<" << TypeIdLabel(type); if (!fmt.empty()) { @@ -170,7 +170,17 @@ size_t AnfRuntimeAlgorithm::GetOutputTensorMemSize(const AnfNodePtr &node, size_ output_type_id = common::AnfAlgo::GetOutputInferDataType(node, output_index); } size_t type_size = GetTypeByte(TypeIdToType(output_type_id)); - std::vector shape = AnfAlgo::GetOutputDeviceShape(node, output_index); + auto shape = AnfAlgo::GetOutputDeviceShape(node, output_index); + if (IsDynamic(shape)) { + auto max_shape = common::AnfAlgo::GetOutputMaxShape(node, output_index); + if (!max_shape.empty()) { + shape = max_shape; + MS_LOG(DEBUG) << "shape[" << shape << "] is dynamic, using max_shape[" << max_shape << "] instead."; + } else { + shape = {1}; + MS_LOG(DEBUG) << "shape[" << shape << "] is dynamic, set default to {1}"; + } + } auto format = AnfAlgo::GetOutputFormat(node, output_index); auto dtype = AnfAlgo::GetOutputDeviceDataType(node, output_index); if (shape.empty() && format != kOpFormat_DEFAULT) { @@ -178,7 +188,7 @@ size_t AnfRuntimeAlgorithm::GetOutputTensorMemSize(const AnfNodePtr &node, size_ shape = trans::TransShapeToDevice(shape, format, node, output_index, dtype); } // scalar's output shape is a empty vector - size_t tensor_size = std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); + size_t tensor_size = type_size * SizeOf(shape); return tensor_size; } @@ -331,12 +341,29 @@ std::vector AnfRuntimeAlgorithm::GetOutputDeviceShapeForTbeBuild(const return trans::TransShapeToDevice(infer_shape, format, node, output_idx, dtype); } -std::vector AnfRuntimeAlgorithm::GetOutputDeviceShape(const AnfNodePtr &node, size_t output_idx) { +bool AnfRuntimeAlgorithm::IsShapesDynamic(const std::vector &shapes) { + for (const auto &shape : shapes) { + if (IsDynamic(shape)) { + return true; + } + } + + return false; +} + +ShapeVector AnfRuntimeAlgorithm::GetOutputDeviceShape(const AnfNodePtr &node, size_t output_idx) { auto format = GetOutputFormat(node, output_idx); auto infer_shape = common::AnfAlgo::GetOutputInferShape(node, output_idx); if (infer_shape.empty()) { return infer_shape; } + + if (IsDynamic(infer_shape)) { + auto max_shape = common::AnfAlgo::GetOutputMaxShape(node, output_idx); + if (!max_shape.empty()) { + infer_shape = max_shape; + } + } // if format is default_format or NC1KHKWHWC0,device shape = original shape if (trans::IsNeedPadding(format, infer_shape.size())) { infer_shape = trans::PaddingShape(infer_shape, format, GetOutputReshapeType(node, output_idx), node); @@ -366,7 +393,7 @@ std::vector AnfRuntimeAlgorithm::GetInputDeviceShapeForTbeBuild(const A return trans::TransShapeToDevice(infer_shape, format, node, input_idx, dtype, false); } -std::vector AnfRuntimeAlgorithm::GetInputDeviceShape(const AnfNodePtr &node, size_t input_idx) { +std::vector AnfRuntimeAlgorithm::GetInputDeviceShape(const AnfNodePtr &node, size_t input_idx) { auto format = GetInputFormat(node, input_idx); auto infer_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, input_idx); if (infer_shape.empty()) { @@ -930,9 +957,9 @@ bool AnfRuntimeAlgorithm::IsIndependentNode(const CNodePtr &node) { return true; } -static inline void GetMaxOrDefaultShape(const std::vector &max_shape, std::vector *device_shape) { - constexpr size_t kDefaultValueForDynamicDim = 16; - auto ConvertNegOneToDefault = [&kDefaultValueForDynamicDim](size_t size) { +static inline void GetMaxOrDefaultShape(const std::vector &max_shape, ShapeVector *device_shape) { + constexpr int64_t kDefaultValueForDynamicDim = 16; + auto ConvertNegOneToDefault = [&kDefaultValueForDynamicDim](int64_t size) { return static_cast(size) < 0 ? kDefaultValueForDynamicDim : size; }; if (!max_shape.empty()) { @@ -940,7 +967,7 @@ static inline void GetMaxOrDefaultShape(const std::vector &max_shape, s (void)std::transform(max_shape.begin(), max_shape.end(), std::back_inserter(*device_shape), ConvertNegOneToDefault); } else { - (void)std::transform(max_shape.begin(), max_shape.end(), device_shape->begin(), IntToSize); + *device_shape = max_shape; } } else { auto tmp_shape = *device_shape; @@ -954,10 +981,10 @@ static inline void GetMaxOrDefaultShape(const std::vector &max_shape, s // why do we do this? Because in dynamic shape case, the input shape is unknown when the `init` // function executes at the very first time, but we still need to some helpful shape to make // sure the `init` executes correctly. -std::vector AnfRuntimeAlgorithm::GetInputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index) { +ShapeVector AnfRuntimeAlgorithm::GetInputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index) { auto device_shape = GetInputDeviceShape(anf_node, index); // Initialize GPUKernel with max shape to fit 'InitDynamicOutputKernelRef()' for memory reuse. - if (AnfUtils::IsShapeDynamic(device_shape) || device_shape.empty()) { + if (IsDynamic(device_shape) || device_shape.empty()) { auto max_shape = common::AnfAlgo::GetInputMaxShape(anf_node, index); GetMaxOrDefaultShape(max_shape, &device_shape); auto format = GetInputFormat(anf_node, index); @@ -967,11 +994,11 @@ std::vector AnfRuntimeAlgorithm::GetInputDeviceShapeAdaptively(const Anf if (device_shape.empty()) { KernelWithIndex kernel_with_index = common::AnfAlgo::GetPrevNodeOutput(anf_node, index); - auto shape = common::AnfAlgo::GetOutputInferShapeSigned(kernel_with_index.first, kernel_with_index.second); - std::vector ret_shape; - constexpr size_t kDefaultValueForDynamicDim = 1; + auto shape = common::AnfAlgo::GetOutputInferShape(kernel_with_index.first, kernel_with_index.second); + ShapeVector ret_shape; + constexpr int64_t kDefaultValueForDynamicDim = 1; auto ConvertNegOneToDefault = [&kDefaultValueForDynamicDim](int64_t size) { - return size < 0 ? kDefaultValueForDynamicDim : LongToSize(size); + return size < 0 ? kDefaultValueForDynamicDim : size; }; std::transform(shape.begin(), shape.end(), std::back_inserter(ret_shape), ConvertNegOneToDefault); auto format = GetInputFormat(anf_node, index); @@ -984,10 +1011,10 @@ std::vector AnfRuntimeAlgorithm::GetInputDeviceShapeAdaptively(const Anf } // The same to GetInputDeviceShapeAdaptively -std::vector AnfRuntimeAlgorithm::GetOutputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index) { +ShapeVector AnfRuntimeAlgorithm::GetOutputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index) { auto device_shape = GetOutputDeviceShape(anf_node, index); // Initialize GPUKernel with max shape to fit 'InitDynamicOutputKernelRef()' for memory reuse. - if (AnfUtils::IsShapeDynamic(device_shape) || device_shape.empty()) { + if (IsDynamic(device_shape) || device_shape.empty()) { auto max_shape = common::AnfAlgo::GetOutputMaxShape(anf_node, index); GetMaxOrDefaultShape(max_shape, &device_shape); auto format = GetOutputFormat(anf_node, index); @@ -997,10 +1024,10 @@ std::vector AnfRuntimeAlgorithm::GetOutputDeviceShapeAdaptively(const An if (device_shape.empty()) { auto shape = common::AnfAlgo::GetOutputInferShapeSigned(anf_node, index); - std::vector ret_shape; - constexpr size_t kDefaultValueForDynamicDim = 1; + ShapeVector ret_shape; + constexpr int64_t kDefaultValueForDynamicDim = 1; auto ConvertNegOneToOne = [&kDefaultValueForDynamicDim](int64_t size) { - return size < 0 ? kDefaultValueForDynamicDim : LongToSize(size); + return size < 0 ? kDefaultValueForDynamicDim : size; }; std::transform(shape.begin(), shape.end(), std::back_inserter(ret_shape), ConvertNegOneToOne); auto format = GetOutputFormat(anf_node, index); @@ -1285,7 +1312,7 @@ bool AnfRuntimeAlgorithm::IsDynamicShapeSkipExecute(const CNodePtr &cnode) { MS_EXCEPTION_IF_NULL(axes_abs); auto axes_shape = AnfAlgo::GetInputDeviceShape(cnode, axes_index); if (axes_abs->isa()) { - if (std::any_of(axes_shape.begin(), axes_shape.end(), [](ssize_t shape) { return shape == 0; })) { + if (std::any_of(axes_shape.begin(), axes_shape.end(), [](int64_t shape) { return shape == 0; })) { return true; } } diff --git a/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.h b/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.h index 02c6ba93414..aad747618fc 100644 --- a/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.h +++ b/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.h @@ -71,9 +71,9 @@ class BACKEND_EXPORT AnfRuntimeAlgorithm { // get reshape_type of from the output of input node. static std::string GetPrevNodeOutputReshapeType(const AnfNodePtr &node, size_t input_idx); // get output shapes which will built and run in device - static std::vector GetOutputDeviceShape(const AnfNodePtr &node, size_t output_idx); + static std::vector GetOutputDeviceShape(const AnfNodePtr &node, size_t output_idx); // get input shapes which will built and run in device - static std::vector GetInputDeviceShape(const AnfNodePtr &node, size_t input_idx); + static std::vector GetInputDeviceShape(const AnfNodePtr &node, size_t input_idx); // get output shapes for tbe build static std::vector GetOutputDeviceShapeForTbeBuild(const AnfNodePtr &node, const size_t output_idx, const std::string &format); @@ -160,8 +160,8 @@ class BACKEND_EXPORT AnfRuntimeAlgorithm { static std::vector GetCallSwitchKernelGraph(const CNodePtr &cnode); static bool IsIndependentNode(const CNodePtr &node); static void InferShape(const CNodePtr &node, std::map *depend_tensors = nullptr); - static std::vector GetInputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index); - static std::vector GetOutputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index); + static ShapeVector GetInputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index); + static ShapeVector GetOutputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index); static KernelGraphPtr FetchKernelGraph(const AnfNodePtr &node); static AnfNodePtr FetchFrontNodeByBackendNode(const AnfNodePtr &backend_node, const KernelGraph &graph); static void InsertMakeTupleForOutput(const NotNull &root_graph); @@ -180,6 +180,7 @@ class BACKEND_EXPORT AnfRuntimeAlgorithm { // Update the shape of internal parameter in the sub graph. static void UpdateInternalParameterShape(const std::map> &internal_parameters, const CNodePtr &cnode); + static bool IsShapesDynamic(const std::vector &shapes); }; } // namespace session using AnfAlgo = session::AnfRuntimeAlgorithm; diff --git a/mindspore/ccsrc/backend/common/session/ascend_inference_session.cc b/mindspore/ccsrc/backend/common/session/ascend_inference_session.cc index c5a24408adf..64e0ad3e6a5 100644 --- a/mindspore/ccsrc/backend/common/session/ascend_inference_session.cc +++ b/mindspore/ccsrc/backend/common/session/ascend_inference_session.cc @@ -147,10 +147,10 @@ bool AscendInferenceSession::CompareInput(const tensor::TensorPtr &input, const // compare shape auto input_shape = input->shape(); - vector trans_input; + vector trans_input; (void)std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(trans_input), [](const int64_t dim) { return static_cast(dim); }); - auto is_scalar_shape = [](const vector &shape) { + auto is_scalar_shape = [](const vector &shape) { return shape.empty() || (shape.size() == 1 && shape[0] == 1); }; if ((!is_scalar_shape(trans_input) || !is_scalar_shape(parameter_shape)) && (trans_input != parameter_shape)) { diff --git a/mindspore/ccsrc/backend/common/session/ascend_session.cc b/mindspore/ccsrc/backend/common/session/ascend_session.cc index 939c86768d1..77d059929f6 100644 --- a/mindspore/ccsrc/backend/common/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/common/session/ascend_session.cc @@ -338,11 +338,9 @@ void AscendSession::LoadInputData(const std::shared_ptr &kernel_gra continue; } else if (input_param->has_dynamic_shape()) { auto tensor_shape = tensor->shape(); - std::vector shape_tmp; - (void)std::transform(tensor_shape.begin(), tensor_shape.end(), std::back_inserter(shape_tmp), LongToSize); - common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(input_node, 0)}, {shape_tmp}, - input_node.get()); - size = abstract::ShapeSize(shape_tmp) * abstract::TypeIdSize(tensor->data_type()); + common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(input_node, 0)}, + {tensor_shape}, input_node.get()); + size = abstract::ShapeSize(tensor_shape) * abstract::TypeIdSize(tensor->data_type()); } if (AnfAlgo::OutputAddrExist(input_node, 0) && TensorNeedSync(kernel_graph, input_node, tensor, &device_memcpy_nums)) { @@ -1768,9 +1766,7 @@ void AscendSession::UpdateOutputTensors(const VectorRef *outputs, if (common::AnfAlgo::IsDynamicShape(node)) { const auto &updated_shape = common::AnfAlgo::GetOutputInferShape(node, output_index); - ShapeVector int_shape; - (void)std::transform(updated_shape.begin(), updated_shape.end(), std::back_inserter(int_shape), SizeToInt); - (void)tensor->set_shape(int_shape); + (void)tensor->set_shape(updated_shape); } } if (tensor->NeedSyncDeviceToHostImmediately()) { diff --git a/mindspore/ccsrc/backend/common/session/cpu_session.cc b/mindspore/ccsrc/backend/common/session/cpu_session.cc index fa2d01054c3..286639165c9 100644 --- a/mindspore/ccsrc/backend/common/session/cpu_session.cc +++ b/mindspore/ccsrc/backend/common/session/cpu_session.cc @@ -257,10 +257,8 @@ void CPUSession::UpdateDynamicOutputShape(const std::map refresh_shape; - (void)std::copy(shape.begin(), shape.end(), std::back_inserter(refresh_shape)); MS_EXCEPTION_IF_NULL(tensor_node.first); - tensor_node.first->set_shape(refresh_shape); + tensor_node.first->set_shape(shape); } } } diff --git a/mindspore/ccsrc/backend/common/session/gpu_inference_session.cc b/mindspore/ccsrc/backend/common/session/gpu_inference_session.cc index ec4055f0a9a..6079ada753f 100644 --- a/mindspore/ccsrc/backend/common/session/gpu_inference_session.cc +++ b/mindspore/ccsrc/backend/common/session/gpu_inference_session.cc @@ -146,10 +146,10 @@ bool GpuInferenceSession::CompareInput(const tensor::TensorPtr &input, const Par // compare shape auto input_shape = input->shape(); - vector trans_input; + vector trans_input; (void)std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(trans_input), [](const int64_t dim) { return static_cast(dim); }); - auto is_scalar_shape = [](const vector &shape) { + auto is_scalar_shape = [](const vector &shape) { return shape.empty() || (shape.size() == 1 && shape[0] == 1); }; if ((!is_scalar_shape(trans_input) || !is_scalar_shape(parameter_shape)) && (trans_input != parameter_shape)) { diff --git a/mindspore/ccsrc/backend/common/session/gpu_session.cc b/mindspore/ccsrc/backend/common/session/gpu_session.cc index 7ad7c18353a..1a9c6122491 100644 --- a/mindspore/ccsrc/backend/common/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/common/session/gpu_session.cc @@ -308,11 +308,9 @@ size_t UpdateGraphInputAbstract(const AnfNodePtr input_node, const tensor::Tenso auto input_param = input_node->cast(); if (input_param != nullptr && input_param->has_dynamic_shape()) { auto tensor_shape = tensor->shape(); - std::vector shape_tmp; - (void)std::transform(tensor_shape.begin(), tensor_shape.end(), std::back_inserter(shape_tmp), LongToSize); - common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(input_node, 0)}, {shape_tmp}, - input_node.get()); - size = abstract::ShapeSize(shape_tmp) * abstract::TypeIdSize(tensor->data_type()); + common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(input_node, 0)}, + {tensor_shape}, input_node.get()); + size = abstract::ShapeSize(tensor_shape) * abstract::TypeIdSize(tensor->data_type()); } return size; } @@ -631,9 +629,7 @@ void GPUSession::UpdateOutputTensors(const VectorRef *outputs, if (common::AnfAlgo::IsDynamicShape(node)) { const auto &updated_shape = common::AnfAlgo::GetOutputInferShape(node, output_index); - ShapeVector int_shape; - std::transform(updated_shape.begin(), updated_shape.end(), std::back_inserter(int_shape), SizeToInt); - tensor->set_shape(int_shape); + tensor->set_shape(updated_shape); } } if (tensor->NeedSyncDeviceToHostImmediately()) { diff --git a/mindspore/ccsrc/backend/common/session/kernel_graph.cc b/mindspore/ccsrc/backend/common/session/kernel_graph.cc index e8e7451aa7a..281734310d2 100644 --- a/mindspore/ccsrc/backend/common/session/kernel_graph.cc +++ b/mindspore/ccsrc/backend/common/session/kernel_graph.cc @@ -713,7 +713,7 @@ AnfNodePtr KernelGraph::CreatTupleGetItemNode(const AnfNodePtr &node, size_t out AnfNodePtr tuple_getitem = NewCNode({mindspore::NewValueNode(prim::kPrimTupleGetItem), node, idx}); MS_EXCEPTION_IF_NULL(tuple_getitem); tuple_getitem->set_scope(node->scope()); - std::vector origin_shape = common::AnfAlgo::GetOutputInferShape(node, output_idx); + auto origin_shape = common::AnfAlgo::GetOutputInferShape(node, output_idx); TypeId origin_type = common::AnfAlgo::GetOutputInferDataType(node, output_idx); common::AnfAlgo::SetOutputInferTypeAndShape({origin_type}, {origin_shape}, tuple_getitem.get()); return tuple_getitem; @@ -722,7 +722,7 @@ AnfNodePtr KernelGraph::CreatTupleGetItemNode(const AnfNodePtr &node, size_t out AnfNodePtr KernelGraph::TransCNodeTuple(const CNodePtr &node) { MS_EXCEPTION_IF_NULL(node); std::vector types; - std::vector> shapes; + std::vector shapes; std::vector make_tuple_inputs_list = {mindspore::NewValueNode(prim::kPrimMakeTuple)}; size_t output_num = common::AnfAlgo::GetOutputTensorNum(node); for (size_t tuple_out_index = 0; tuple_out_index < output_num; ++tuple_out_index) { diff --git a/mindspore/ccsrc/backend/common/session/session_basic.cc b/mindspore/ccsrc/backend/common/session/session_basic.cc index 68e10db488b..3bc3254fa50 100644 --- a/mindspore/ccsrc/backend/common/session/session_basic.cc +++ b/mindspore/ccsrc/backend/common/session/session_basic.cc @@ -80,12 +80,7 @@ MS_REG_SESSION(kSessionBasic, SessionBasic); namespace { const int kSummaryGetItem = 2; const size_t max_depth = 128; -bool IsShapeDynamic(const abstract::ShapePtr &shape) { - if (shape == nullptr) { - return false; - } - return std::any_of(shape->shape().begin(), shape->shape().end(), [](int64_t s) { return s < 0; }); -} + bool RecursiveCheck(const FuncGraphManagerPtr &manager, const std::pair &kernel, size_t *idx) { auto node = kernel.first; MS_EXCEPTION_IF_NULL(manager); @@ -203,23 +198,24 @@ BaseRef CreateNodeOutputTensor(const session::KernelWithIndex &node_output_pair, if (type_id == kTypeUnknown) { type_id = common::AnfAlgo::GetOutputInferDataType(node, output_index); } - std::vector temp_shape; + auto shape = common::AnfAlgo::GetOutputInferShape(node, output_index); - (void)std::copy(shape.begin(), shape.end(), std::back_inserter(temp_shape)); if (common::AnfAlgo::IsDynamicShape(node)) { auto max_shape = common::AnfAlgo::GetOutputMaxShape(node, output_index); - temp_shape = abstract::ShapeSize(max_shape) > abstract::ShapeSize(temp_shape) ? max_shape : temp_shape; + if (abstract::ShapeSize(max_shape) > abstract::ShapeSize(shape)) { + shape = max_shape; + } } tensor::TensorPtr tensor; bool is_internal_output = graph->IsInternalOutput(node, output_index); if (is_internal_output) { tensor = graph->GetInternalOutputTensor(node, output_index); if (tensor == nullptr) { - tensor = std::make_shared(type_id, temp_shape); + tensor = std::make_shared(type_id, shape); graph->AddInternalOutputTensor(node, output_index, tensor); } } else { - tensor = std::make_shared(type_id, temp_shape); + tensor = std::make_shared(type_id, shape); } MS_EXCEPTION_IF_NULL(tensor); tensor->set_padding_type(AnfAlgo::GetOutputReshapeType(node, output_index)); @@ -477,7 +473,7 @@ void CheckInputTensorShape(const TensorPtr &tensor, const CNodePtr &kernel, size << "] of kernel: " << common::AnfAlgo::GetCNodeName(kernel) << trace::DumpSourceLines(kernel); } for (size_t i = 0; i < tensor_shape.size(); i++) { - if (tensor_shape[i] < 0 || static_cast(tensor_shape[i]) != input_shape[i]) { + if (tensor_shape[i] < 0 || tensor_shape[i] != input_shape[i]) { MS_LOG(EXCEPTION) << "The input tensor's shape: " << tensor_shape << " is not equal to expected shape: " << input_shape << " for input[" << input_index << "] of kernel: " << common::AnfAlgo::GetCNodeName(kernel) << trace::DumpSourceLines(kernel); @@ -1292,7 +1288,7 @@ void SessionBasic::SetInputNodeUsage(const KernelGraphPtr &graph, const FuncGrap node_ptr->SetNotUsedByRealKernelInGraph(graph->graph_id()); } auto shape = node_ptr->Shape(); - if (IsShapeDynamic(shape->cast())) { + if (AnfUtils::IsShapeDynamic(shape->cast())) { node_ptr->set_has_dynamic_shape(true); } } @@ -1442,8 +1438,8 @@ void SessionBasic::GetParameterIndex(const KernelGraph *graph, const std::vector << ") are different, input index: " << index << ", parameter: " << param->DebugString(); } for (size_t i = 0; i < input_shape.size(); i += 1) { - if (input_shape[i] < 0 || (!is_parallel_forward_ms_function && - static_cast(input_shape[i]) != param_shape[i] && !is_dynamic)) { + if (input_shape[i] < 0 || + (!is_parallel_forward_ms_function && input_shape[i] != param_shape[i] && !is_dynamic)) { MS_LOG(EXCEPTION) << "Input tensor shape(" << input_shape << ") and parameter shape(" << param_shape << ") are different, input index: " << index << ", parameter: " << param->DebugString(); } @@ -1916,9 +1912,7 @@ void SessionBasic::UpdateOutputs(const std::shared_ptr &kernel_grap << ", device address " << tensor->device_address().get(); if (common::AnfAlgo::IsDynamicShape(node)) { const auto &updated_shape = common::AnfAlgo::GetOutputInferShape(node, output_index); - ShapeVector int_shape; - (void)std::transform(updated_shape.begin(), updated_shape.end(), std::back_inserter(int_shape), SizeToInt); - (void)tensor->set_shape(int_shape); + (void)tensor->set_shape(updated_shape); } if (ms_context->get_param(MS_CTX_EXECUTION_MODE) != kPynativeMode) { tensor->data_sync(false); @@ -2011,9 +2005,7 @@ void SessionBasic::UpdateOutputTensors(const VectorRef *outputs, if (common::AnfAlgo::IsDynamicShape(node)) { const auto &updated_shape = common::AnfAlgo::GetOutputInferShape(node, output_index); - ShapeVector int_shape; - (void)std::transform(updated_shape.begin(), updated_shape.end(), std::back_inserter(int_shape), SizeToInt); - (void)tensor->set_shape(int_shape); + (void)tensor->set_shape(updated_shape); } } if (tensor->NeedSyncDeviceToHostImmediately()) { @@ -2041,10 +2033,7 @@ void SessionBasic::GetModelInputsInfo(uint32_t graph_id, std::vectorcast(); if (!common::AnfAlgo::IsParameterWeight(parameter)) { - vector input_shape; - auto parameter_shape = AnfAlgo::GetOutputDeviceShape(parameter, 0); - (void)std::transform(parameter_shape.begin(), parameter_shape.end(), std::back_inserter(input_shape), - [](const size_t dim) { return SizeToLong(dim); }); + vector input_shape = AnfAlgo::GetOutputDeviceShape(parameter, 0); auto kernel_build_info = AnfAlgo::GetSelectKernelBuildInfo(parameter); auto data_type = kernel_build_info->GetOutputDeviceType(0); auto ms_tensor = std::make_shared(data_type, input_shape); @@ -2168,9 +2157,7 @@ void SessionBasic::Summary(KernelGraph *graph) { auto address = AnfAlgo::GetOutputAddr(node, index, false); auto shape = common::AnfAlgo::GetOutputInferShape(node, index); TypeId type_id = common::AnfAlgo::GetOutputInferDataType(node, index); - std::vector temp_shape; - (void)std::copy(shape.begin(), shape.end(), std::back_inserter(temp_shape)); - tensor::TensorPtr tensor = std::make_shared(type_id, temp_shape); + tensor::TensorPtr tensor = std::make_shared(type_id, shape); MS_EXCEPTION_IF_NULL(address); if (!address->GetPtr()) { continue; @@ -2435,7 +2422,7 @@ void SessionBasic::CreateOutputNode(const CNodePtr &cnode, const std::shared_ptr idx->set_abstract(std::make_shared(imm)); auto getitem = graph->NewCNode({NewValueNode(prim::kPrimTupleGetItem), cnode, idx}); std::vector types = {common::AnfAlgo::GetOutputInferDataType(cnode, output_index)}; - std::vector> shapes = {common::AnfAlgo::GetOutputInferShape(cnode, output_index)}; + auto shapes = {common::AnfAlgo::GetOutputInferShape(cnode, output_index)}; common::AnfAlgo::SetOutputInferTypeAndShape(types, shapes, getitem.get()); make_tuple_inputs.push_back(getitem); } diff --git a/mindspore/ccsrc/backend/common/session/single_kernel_graph.cc b/mindspore/ccsrc/backend/common/session/single_kernel_graph.cc index b77a236ba49..03953a10872 100644 --- a/mindspore/ccsrc/backend/common/session/single_kernel_graph.cc +++ b/mindspore/ccsrc/backend/common/session/single_kernel_graph.cc @@ -22,7 +22,7 @@ namespace mindspore { namespace session { std::shared_ptr SingleKernelGraph::ConstructKernelGraphBasedOnSingleOp( const std::string &op_name, const std::vector &input_dtypes, const std::vector &input_shapes, - const std::vector &output_dtypes, const std::vector> &output_shapes) { + const std::vector &output_dtypes, const std::vector &output_shapes) { auto graph = std::make_shared(); MS_EXCEPTION_IF_NULL(graph); std::vector inputs; diff --git a/mindspore/ccsrc/backend/common/session/single_kernel_graph.h b/mindspore/ccsrc/backend/common/session/single_kernel_graph.h index d7d09010174..c050400beb2 100644 --- a/mindspore/ccsrc/backend/common/session/single_kernel_graph.h +++ b/mindspore/ccsrc/backend/common/session/single_kernel_graph.h @@ -31,7 +31,7 @@ class SingleKernelGraph { static std::shared_ptr ConstructKernelGraphBasedOnSingleOp( const std::string &op_name, const std::vector &input_dtypes, const std::vector &input_shapes, - const std::vector &output_dtypes, const std::vector> &output_shapes); + const std::vector &output_dtypes, const std::vector &output_shapes); }; } // namespace session } // namespace mindspore diff --git a/mindspore/ccsrc/backend/graph_compiler/backend.cc b/mindspore/ccsrc/backend/graph_compiler/backend.cc index 4dbdabd626a..575a6e31d39 100644 --- a/mindspore/ccsrc/backend/graph_compiler/backend.cc +++ b/mindspore/ccsrc/backend/graph_compiler/backend.cc @@ -308,10 +308,8 @@ TensorPtr CreateOutputTensor(const AnfNodePtr &output_node, size_t output_index) // Create host tensor, the output tensor should use the infer type, it will be handed correctly by tensor data sync // when infer type is not equal to device type. auto type_id = common::AnfAlgo::GetOutputInferDataType(output_node, output_index); - std::vector temp_shape; const auto &shape = common::AnfAlgo::GetOutputInferShape(output_node, output_index); - (void)std::copy(shape.begin(), shape.end(), std::back_inserter(temp_shape)); - auto tensor = std::make_shared(type_id, temp_shape); + auto tensor = std::make_shared(type_id, shape); tensor->set_padding_type(AnfAlgo::GetOutputReshapeType(output_node, output_index)); // Put device tensor into host tensor. diff --git a/mindspore/ccsrc/common/graph_kernel/adapter/callback_impl.cc b/mindspore/ccsrc/common/graph_kernel/adapter/callback_impl.cc index 87108fb25c9..f5b2219b18d 100644 --- a/mindspore/ccsrc/common/graph_kernel/adapter/callback_impl.cc +++ b/mindspore/ccsrc/common/graph_kernel/adapter/callback_impl.cc @@ -34,31 +34,19 @@ namespace mindspore::graphkernel { GRAPH_KERNEL_CALLBACK_REGISTER(CallbackImpl); ShapeVector CallbackImpl::GetInputShape(const AnfNodePtr &node, size_t i) { - auto vec = AnfAlgo::GetInputDeviceShape(node, i); - ShapeVector ret; - (void)std::transform(vec.begin(), vec.end(), std::back_inserter(ret), SizeToLong); - return ret; + return AnfAlgo::GetInputDeviceShape(node, i); } ShapeVector CallbackImpl::GetOutputShape(const AnfNodePtr &node, size_t i) { - auto vec = AnfAlgo::GetOutputDeviceShape(node, i); - ShapeVector ret; - (void)std::transform(vec.begin(), vec.end(), std::back_inserter(ret), SizeToLong); - return ret; + return AnfAlgo::GetOutputDeviceShape(node, i); } ShapeVector CallbackImpl::GetInputInferShape(const AnfNodePtr &node, size_t i) { - auto vec = common::AnfAlgo::GetPrevNodeOutputInferShape(node, i); - ShapeVector ret; - (void)std::transform(vec.begin(), vec.end(), std::back_inserter(ret), SizeToLong); - return ret; + return common::AnfAlgo::GetPrevNodeOutputInferShape(node, i); } ShapeVector CallbackImpl::GetOutputInferShape(const AnfNodePtr &node, size_t i) { - auto vec = common::AnfAlgo::GetOutputInferShape(node, i); - ShapeVector ret; - (void)std::transform(vec.begin(), vec.end(), std::back_inserter(ret), SizeToLong); - return ret; + return common::AnfAlgo::GetOutputInferShape(node, i); } TypeId CallbackImpl::GetInputType(const AnfNodePtr &node, size_t i) { return AnfAlgo::GetInputDeviceDataType(node, i); } @@ -235,7 +223,7 @@ ShapeVector CallbackImplWithInferShape::GetInputShape(const AnfNodePtr &node, si } ShapeVector CallbackImplWithInferShape::GetOutputShape(const AnfNodePtr &node, size_t i) { - return CallbackImpl::GetOutputInferShape(node, i); + return common::AnfAlgo::GetOutputInferShape(node, i); } TypeId CallbackImplWithInferShape::GetInputType(const AnfNodePtr &node, size_t i) { diff --git a/mindspore/ccsrc/common/graph_kernel/decrease_compute_precision.cc b/mindspore/ccsrc/common/graph_kernel/decrease_compute_precision.cc index 160bf456e57..a2af3c8d79d 100644 --- a/mindspore/ccsrc/common/graph_kernel/decrease_compute_precision.cc +++ b/mindspore/ccsrc/common/graph_kernel/decrease_compute_precision.cc @@ -34,7 +34,7 @@ namespace mindspore::graphkernel { // Add CastCNode CNodePtr AddCastCNode(const FuncGraphPtr &func_graph, const AnfNodePtr &input, const std::string &format, - const TypeId &input_type, const TypeId &output_type, const std::vector &origin_shape, + const TypeId &input_type, const TypeId &output_type, const ShapeVector &origin_shape, const TypeId &origin_type) { MS_EXCEPTION_IF_NULL(func_graph); CNodePtr cast = func_graph->NewCNode({NewValueNode(std::make_shared(prim::kPrimCast->name())), input}); diff --git a/mindspore/ccsrc/common/graph_kernel/graph_kernel_helper.cc b/mindspore/ccsrc/common/graph_kernel/graph_kernel_helper.cc index 37e56cde3bf..c38150c7f39 100644 --- a/mindspore/ccsrc/common/graph_kernel/graph_kernel_helper.cc +++ b/mindspore/ccsrc/common/graph_kernel/graph_kernel_helper.cc @@ -293,14 +293,8 @@ ShapeVector GetShape(const AnfNodePtr &node) { } ShapeVector GetDeviceShape(const AnfNodePtr &node) { - ShapeVector res_device_shape; - auto device_shape = AnfAlgo::GetOutputDeviceShape(node, 0); - if (device_shape.empty()) { - res_device_shape.push_back(1); - } else { - (void)std::transform(device_shape.begin(), device_shape.end(), std::back_inserter(res_device_shape), SizeToLong); - } - return res_device_shape; + ShapeVector res_device_shape = AnfAlgo::GetOutputDeviceShape(node, 0); + return res_device_shape.empty() ? ShapeVector({1}) : res_device_shape; } std::vector GetReduceAxis(const AnfNodePtr &node) { diff --git a/mindspore/ccsrc/common/graph_kernel/insert_pad.cc b/mindspore/ccsrc/common/graph_kernel/insert_pad.cc index 3d460e0dab4..6fba4b6eb76 100644 --- a/mindspore/ccsrc/common/graph_kernel/insert_pad.cc +++ b/mindspore/ccsrc/common/graph_kernel/insert_pad.cc @@ -28,7 +28,7 @@ GVAR_DEF(PrimitivePtr, kPrimPadAkg, std::make_shared("PadAkg")); } // namespace prim namespace graphkernel { namespace { -using vec = std::vector; +using vec = std::vector; constexpr size_t MAX_PER_DIM_SHAPE = 4096; constexpr int64_t MAX_ALL_SHAPE = static_cast(3e10); diff --git a/mindspore/ccsrc/common/graph_kernel/reshape_reduce_for_cse.cc b/mindspore/ccsrc/common/graph_kernel/reshape_reduce_for_cse.cc index be4196de343..c3f0ef9e341 100644 --- a/mindspore/ccsrc/common/graph_kernel/reshape_reduce_for_cse.cc +++ b/mindspore/ccsrc/common/graph_kernel/reshape_reduce_for_cse.cc @@ -36,28 +36,26 @@ namespace { * after: keep_dims=True, axis=(1) ,out_shape=(a,1,b,c) */ void ResetReduceAttrAndShape(const AnfNodePtr &node, const std::vector &target_output_types, - const std::vector> &target_output_shapes) { + const std::vector &target_output_shapes) { common::AnfAlgo::SetNodeAttr(kAttrKeepDims, MakeValue(true), node); common::AnfAlgo::SetOutputInferTypeAndShape(target_output_types, target_output_shapes, node.get()); } size_t ProcessTupleGetItem(const AnfNodePtr &node, const std::vector &target_output_types, - const std::vector> &target_output_shapes) { + const std::vector &target_output_shapes) { size_t index = common::AnfAlgo::GetTupleGetItemOutIndex(node->cast()); common::AnfAlgo::SetOutputInferTypeAndShape({target_output_types[index]}, {target_output_shapes[index]}, node.get()); return index; } void InsertReshape(const FuncGraphPtr &graph, const AnfNodePtr &node, const TypeId &infer_type, - const std::vector &infer_shape, const TypeId &device_type) { + const ShapeVector &infer_shape, const TypeId &device_type) { auto manager = graph->manager(); MS_EXCEPTION_IF_NULL(manager); std::vector inputs = {NewValueNode(std::make_shared(prim::kPrimReshape->name())), node}; auto reshape = graph->NewCNode(inputs); MS_EXCEPTION_IF_NULL(reshape); - std::vector reshape_size; - (void)std::transform(infer_shape.begin(), infer_shape.end(), std::back_inserter(reshape_size), SizeToLong); - common::AnfAlgo::SetNodeAttr(kAttrShape, MakeValue(reshape_size), reshape); + common::AnfAlgo::SetNodeAttr(kAttrShape, MakeValue(infer_shape), reshape); common::AnfAlgo::SetOutputInferTypeAndShape({infer_type}, {infer_shape}, reshape.get()); auto graph_sel_info = BuildSelectKernelBuildInfo({kOpFormat_DEFAULT}, {device_type}, {kOpFormat_DEFAULT}, {device_type}); @@ -66,8 +64,8 @@ void InsertReshape(const FuncGraphPtr &graph, const AnfNodePtr &node, const Type } void InsertReshapeForMultiOutputs(const FuncGraphPtr &graph, const AnfNodePtr &node, - const std::vector> &origin_output_shapes, - const std::vector> &target_output_shapes, + const std::vector &origin_output_shapes, + const std::vector &target_output_shapes, const std::vector &target_output_types, const AnfNodePtr &target) { auto used_node_list = opt::GetRealNodeUsedList(graph, node); MS_EXCEPTION_IF_NULL(used_node_list); @@ -154,12 +152,12 @@ bool ReshapeReduceForCSE::Run(const FuncGraphPtr &graph) { << target_output_num << "."; } - std::vector> origin_output_shapes; + std::vector origin_output_shapes; for (size_t i = 0; i < output_num; i++) { (void)origin_output_shapes.emplace_back(common::AnfAlgo::GetOutputInferShape(node, i)); } - std::vector> target_output_shapes; + std::vector target_output_shapes; std::vector target_output_types; for (size_t i = 0; i < target_output_num; i++) { (void)target_output_shapes.emplace_back(common::AnfAlgo::GetOutputInferShape(target, i)); diff --git a/mindspore/ccsrc/common/graph_kernel/substitute_dropout.cc b/mindspore/ccsrc/common/graph_kernel/substitute_dropout.cc index e3dd3d38e2b..428da79b6ed 100644 --- a/mindspore/ccsrc/common/graph_kernel/substitute_dropout.cc +++ b/mindspore/ccsrc/common/graph_kernel/substitute_dropout.cc @@ -47,8 +47,6 @@ AnfNodePtr DropoutExpanderDeco::Run(const AnfNodePtr &node) { auto func_graph = node->func_graph(); CheckCNodeInputSize(cnode, kDropoutInputTensorNum); auto shape = AnfAlgo::GetInputDeviceShape(cnode, 0); - ShapeVector shape_i64; - (void)std::transform(shape.begin(), shape.end(), std::back_inserter(shape_i64), SizeToLong); // Get seed from original dropout's attrs, rather than set seed by time. // Only seed0 and seed1 are all equal to 0, then set seed = time. auto node_prim = GetCNodePrimitive(node); @@ -69,7 +67,7 @@ AnfNodePtr DropoutExpanderDeco::Run(const AnfNodePtr &node) { auto uniform_real_node = func_graph->NewCNode(uniform_real_input); SetNodeAttrSafely("seed", MakeValue(seed), uniform_real_node); common::AnfAlgo::SetNodeAttr("seed2", MakeValue(static_cast(0)), uniform_real_node); - uniform_real_node->set_abstract(std::make_shared(kFloat32, shape_i64)); + uniform_real_node->set_abstract(std::make_shared(kFloat32, shape)); // Set kernel_info for uniform_real node auto uniform_real_kernel_info_builder = std::make_shared(); uniform_real_kernel_info_builder->SetInputsFormat({kOpFormat_DEFAULT}); diff --git a/mindspore/ccsrc/common/mem_reuse/mem_reuse_checker.cc b/mindspore/ccsrc/common/mem_reuse/mem_reuse_checker.cc index 586a2660b35..411701d1fe0 100644 --- a/mindspore/ccsrc/common/mem_reuse/mem_reuse_checker.cc +++ b/mindspore/ccsrc/common/mem_reuse/mem_reuse_checker.cc @@ -71,9 +71,8 @@ int64_t MemReuseChecker::CalculOriInput(const KernelGraph *graph) const { ou_type = common::AnfAlgo::GetOutputInferDataType(item, index); } size_t type_size = GetTypeByte(TypeIdToType(ou_type)); - std::vector shape = AnfAlgo::GetOutputDeviceShape(item, index); - size_t tensor_size = - shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); + auto shape = AnfAlgo::GetOutputDeviceShape(item, index); + size_t tensor_size = type_size * SizeOf(shape); auto checker_size = SizeToLong(tensor_size); static_input_size += checker_size; } diff --git a/mindspore/ccsrc/debug/data_dump/dump_utils.cc b/mindspore/ccsrc/debug/data_dump/dump_utils.cc index a0ac65b8268..6d2c70b52fa 100644 --- a/mindspore/ccsrc/debug/data_dump/dump_utils.cc +++ b/mindspore/ccsrc/debug/data_dump/dump_utils.cc @@ -89,9 +89,7 @@ void GetDumpIntShape(const AnfNodePtr &node, size_t index, NotNull weight_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, cnode_weight_idx_); + auto weight_shape = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, cnode_weight_idx_)); size_t weight_size = std::accumulate(weight_shape.begin(), weight_shape.end(), sizeof(T), std::multiplies()); size_t new_weight_size = weight_size; diff --git a/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h b/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h index 9e32cf7c600..bae23948f04 100644 --- a/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h +++ b/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h @@ -70,14 +70,20 @@ class OptimizerKernelMod : public DeprecatedNativeCpuKernelMod { size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node); size_t type_size = sizeof(float); for (size_t input_index = 0; input_index < input_num; ++input_index) { - std::vector shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, input_index); + auto shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, input_index); + if (IsDynamic(shape)) { + MS_LOG(EXCEPTION) << "Shape[" << shape << "] contains negative value and fails to calculate tensor_size"; + } size_t tensor_size = shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); input_size_list_.emplace_back(tensor_size); } size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node); for (size_t output_index = 0; output_index < output_num; ++output_index) { - std::vector shape = common::AnfAlgo::GetOutputInferShape(kernel_node, output_index); + auto shape = common::AnfAlgo::GetOutputInferShape(kernel_node, output_index); + if (IsDynamic(shape)) { + MS_LOG(EXCEPTION) << "Shape[" << shape << "] contains negative value and fails to calculate tensor_size"; + } size_t tensor_size = shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); output_size_list_.emplace_back(tensor_size); diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/ge/sparse_softmax_cross_entropy_with_logits_split.cc b/mindspore/ccsrc/frontend/optimizer/irpass/ge/sparse_softmax_cross_entropy_with_logits_split.cc index 8aebcdfa99a..8b02a88b7bb 100644 --- a/mindspore/ccsrc/frontend/optimizer/irpass/ge/sparse_softmax_cross_entropy_with_logits_split.cc +++ b/mindspore/ccsrc/frontend/optimizer/irpass/ge/sparse_softmax_cross_entropy_with_logits_split.cc @@ -79,11 +79,11 @@ CNodePtr CreateOneHot(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax_ MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(sparse_softmax_node); - std::vector logits_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 0); + auto logits_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 0); int64_t depth = 0; if (!logits_shape.empty()) { size_t index = logits_shape.size() - 1; - depth = SizeToLong(logits_shape[index]); + depth = logits_shape[index]; } else { MS_LOG(EXCEPTION) << "Logits's shape of node [" << sparse_softmax_node->DebugString() << "] is empty" << trace::DumpSourceLines(sparse_softmax_node); @@ -114,18 +114,17 @@ CNodePtr CreateOneHot(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax_ auto one_hot_node = graph->NewCNode(one_hot_inputs); MS_EXCEPTION_IF_NULL(one_hot_node); one_hot_node->set_scope(sparse_softmax_node->scope()); - std::vector labels_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 1); + auto labels_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 1); labels_shape.emplace_back(depth); - if (AnfUtils::IsShapeDynamic(labels_shape)) { + if (IsDynamic(labels_shape)) { auto kernel_info = common::AnfAlgo::GetPrevNodeOutput(sparse_softmax_node, 1); auto min_shape = common::AnfAlgo::GetOutputMinShape(kernel_info.first, kernel_info.second); auto max_shape = common::AnfAlgo::GetOutputMaxShape(kernel_info.first, kernel_info.second); - std::vector shape_tmp; - std::transform(labels_shape.begin(), labels_shape.end(), std::back_inserter(shape_tmp), SizeToLong); min_shape.emplace_back(depth); max_shape.emplace_back(depth); common::AnfAlgo::SetOutputTypeAndDetailShape( - {kNumberTypeFloat32}, {std::make_shared(shape_tmp, min_shape, max_shape)}, one_hot_node.get()); + {kNumberTypeFloat32}, {std::make_shared(labels_shape, min_shape, max_shape)}, + one_hot_node.get()); } else { common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat32}, {labels_shape}, one_hot_node.get()); } @@ -144,8 +143,8 @@ CNodePtr CreateSoftmaxCrossEntropyWithLogits(const FuncGraphPtr &graph, const CN MS_EXCEPTION_IF_NULL(softmax_node); softmax_node->set_scope(sparse_softmax_node->scope()); - std::vector labels_shape = common::AnfAlgo::GetOutputInferShape(one_hot_node, 0); - std::vector loss_shape; + auto labels_shape = common::AnfAlgo::GetOutputInferShape(one_hot_node, 0); + ShapeVector loss_shape; if (!labels_shape.empty()) { loss_shape.emplace_back(labels_shape[0]); } else { @@ -154,19 +153,18 @@ CNodePtr CreateSoftmaxCrossEntropyWithLogits(const FuncGraphPtr &graph, const CN auto data_types = common::AnfAlgo::GetOutputInferDataType(one_hot_node, 0); auto types = {data_types, data_types}; - if (AnfUtils::IsShapeDynamic(labels_shape)) { - ShapeVector shape_tmp = {static_cast(labels_shape[0])}; + if (IsDynamic(labels_shape)) { auto min_shape = common::AnfAlgo::GetOutputMinShape(one_hot_node, 0); auto max_shape = common::AnfAlgo::GetOutputMaxShape(one_hot_node, 0); std::vector shapes; if (!min_shape.empty() && !max_shape.empty()) { - shapes = {std::make_shared(shape_tmp, ShapeVector(min_shape[0]), ShapeVector(max_shape[0])), + shapes = {std::make_shared(ShapeVector({labels_shape[0]}), ShapeVector(min_shape[0]), + ShapeVector(max_shape[0])), common::AnfAlgo::GetOutputDetailShape(one_hot_node, 0)}; } else { - shapes = {std::make_shared(shape_tmp, ShapeVector({}), ShapeVector({})), + shapes = {std::make_shared(ShapeVector({labels_shape[0]})), common::AnfAlgo::GetOutputDetailShape(one_hot_node, 0)}; } - common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, softmax_node.get()); } else { // Loss shape:(N,) labels shape:(N,C) @@ -201,7 +199,7 @@ void CreateMultiOutputsOfAnfNode(const FuncGraphPtr &func_graph, const AnfNodePt std::vector GetAxis(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); - std::vector output_shape = common::AnfAlgo::GetOutputInferShape(node, 0); + auto output_shape = common::AnfAlgo::GetOutputInferShape(node, 0); if (output_shape.empty()) { MS_LOG(EXCEPTION) << node->fullname_with_scope() << "'s output shape is empty" << trace::DumpSourceLines(node); } @@ -297,10 +295,7 @@ CNodePtr CreateTile(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax_no CheckCNodeInputSize(sparse_softmax_node, kSparseSoftmaxCrossEntropyWithLogitsInputTensorsNum); CheckCNodeInputSize(mul_node, kMulInputTensorNum); - auto labels_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 1); - std::vector multiple_value; - std::transform(labels_shape.begin(), labels_shape.end(), std::back_inserter(multiple_value), - [](size_t label) { return static_cast(label); }); + auto multiple_value = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 1); if (std::all_of(multiple_value.begin(), multiple_value.end(), [](int64_t value) { return value == 1; })) { return nullptr; } @@ -337,7 +332,7 @@ CNodePtr CreateRealDiv(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax MS_EXCEPTION_IF_NULL(sparse_softmax_node); MS_EXCEPTION_IF_NULL(tile_node); CheckCNodeInputSize(sparse_softmax_node, kSparseSoftmaxCrossEntropyWithLogitsInputTensorsNum); - std::vector labels_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 1); + auto labels_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 1); if (labels_shape.size() != 1) { MS_LOG(EXCEPTION) << "Label's shape should be 1-D, but got " << labels_shape.size() << trace::DumpSourceLines(sparse_softmax_node); @@ -388,17 +383,15 @@ CNodePtr CreateExpandDims(const FuncGraphPtr &graph, const CNodePtr &real_div_no MS_EXCEPTION_IF_NULL(expand_dims_node); expand_dims_node->set_scope(real_div_node->scope()); - std::vector y_shape = common::AnfAlgo::GetOutputInferShape(real_div_node, 0); + auto y_shape = common::AnfAlgo::GetOutputInferShape(real_div_node, 0); y_shape.emplace_back(1); - if (AnfUtils::IsShapeDynamic(y_shape)) { + if (IsDynamic(y_shape)) { auto min_shape = common::AnfAlgo::GetOutputMinShape(real_div_node, 0); auto max_shape = common::AnfAlgo::GetOutputMaxShape(real_div_node, 0); min_shape.emplace_back(1); max_shape.emplace_back(1); - std::vector shape_tmp; - std::transform(y_shape.begin(), y_shape.end(), std::back_inserter(shape_tmp), SizeToLong); common::AnfAlgo::SetOutputTypeAndDetailShape({common::AnfAlgo::GetOutputInferDataType(real_div_node, 0)}, - {std::make_shared(shape_tmp, min_shape, max_shape)}, + {std::make_shared(y_shape, min_shape, max_shape)}, expand_dims_node.get()); } else { common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(real_div_node, 0)}, {y_shape}, diff --git a/mindspore/ccsrc/frontend/parallel/cache_embedding/ps_embedding_cache_inserter.cc b/mindspore/ccsrc/frontend/parallel/cache_embedding/ps_embedding_cache_inserter.cc index 49beb881bfe..be2b1400f88 100644 --- a/mindspore/ccsrc/frontend/parallel/cache_embedding/ps_embedding_cache_inserter.cc +++ b/mindspore/ccsrc/frontend/parallel/cache_embedding/ps_embedding_cache_inserter.cc @@ -116,7 +116,7 @@ void PsEmbeddingCacheInserter::GetEmbeddingLookupNodes() { int64_t rank_id_attr = GetValue(prim->GetAttr(distributed::kOpLabelRankId)); std::string node_role_attr = GetValue(prim->GetAttr(distributed::kOpLabelRole)); if (rank_id_attr == rank_id_ && node_role_attr == node_role_) { - std::vector shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); + auto shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); shapes_to_nodes_[shape] = node; } }); @@ -329,12 +329,12 @@ FuncGraphPtr PsEmbeddingCacheInserter::ConstructUpdateEmbeddingSubGraph(const Pa ParameterPtr update_values = graph->add_parameter(); MS_EXCEPTION_IF_NULL(update_values); - std::vector emb_shape = common::AnfAlgo::GetOutputInferShape(param, 0); + auto emb_shape = common::AnfAlgo::GetOutputInferShape(param, 0); if (emb_shape.size() != kEmbeddingTableDims) { MS_LOG(EXCEPTION) << "Embedding table should be 2 dims for embedding cache mode, but got: " << emb_shape.size() << " dims"; } - int64_t emb_dim = SizeToLong(emb_shape.back()); + int64_t emb_dim = emb_shape.back(); ShapeVector update_values_shape = {-1, emb_dim}; ShapeVector update_values_min_shape = {1, emb_dim}; ShapeVector update_values_max_shape = {1, emb_dim}; diff --git a/mindspore/ccsrc/frontend/parallel/cache_embedding/ps_embedding_cache_inserter.h b/mindspore/ccsrc/frontend/parallel/cache_embedding/ps_embedding_cache_inserter.h index e60a16e3229..2985449a378 100644 --- a/mindspore/ccsrc/frontend/parallel/cache_embedding/ps_embedding_cache_inserter.h +++ b/mindspore/ccsrc/frontend/parallel/cache_embedding/ps_embedding_cache_inserter.h @@ -23,6 +23,7 @@ #include "ir/anf.h" #include "distributed/constants.h" +#include "utils/shape_utils.h" namespace mindspore { namespace parallel { @@ -107,7 +108,7 @@ class PsEmbeddingCacheInserter { // Record EmbeddingLookup nodes which are executed on server from origin function graph. // Key: shape of EmbeddingLookup node, Value: EmbeddingLookup AnfNodePtr. - std::map, AnfNodePtr> shapes_to_nodes_; + std::map shapes_to_nodes_; }; } // namespace parallel } // namespace mindspore diff --git a/mindspore/ccsrc/include/common/utils/anfalgo.h b/mindspore/ccsrc/include/common/utils/anfalgo.h index d38350c5940..5638eec2857 100644 --- a/mindspore/ccsrc/include/common/utils/anfalgo.h +++ b/mindspore/ccsrc/include/common/utils/anfalgo.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "ir/anf.h" #include "ir/func_graph.h" @@ -111,11 +112,11 @@ class COMMON_EXPORT AnfAlgo { // from std::vector to ShapeVector static ShapeVector GetOutputInferShapeSigned(const AnfNodePtr &node, size_t output_idx); // get output shapes inferred by ME from input nodes. - static std::vector GetOutputInferShape(const AnfNodePtr &node, size_t output_idx); - static std::vector GetOutputInferShape(const AnfNodePtr &node, const abstract::BaseShapePtr &base_shape, - size_t output_idx); + static ShapeVector GetOutputInferShape(const AnfNodePtr &node, size_t output_idx); + static ShapeVector GetOutputInferShape(const AnfNodePtr &node, const abstract::BaseShapePtr &base_shape, + size_t output_idx); // get input shapes inferred by ME from input nodes. - static std::vector GetPrevNodeOutputInferShape(const AnfNodePtr &node, size_t input_idx); + static ShapeVector GetPrevNodeOutputInferShape(const AnfNodePtr &node, size_t input_idx); // get output data type inferred by ME of anf node static TypeId GetOutputInferDataType(const AnfNodePtr &node, size_t output_idx); static TypeId GetOutputInferDataType(const TypePtr &type_ptr, size_t output_idx); @@ -125,8 +126,8 @@ class COMMON_EXPORT AnfAlgo { // get output original data type from prev node,input_index is the input index of current node related to prev node static TypeId GetPrevNodeOutputInferDataType(const AnfNodePtr &node, size_t input_idx); // set infer shapes and types of anf node - static void SetOutputInferTypeAndShape(const std::vector &types, - const std::vector> &shapes, AnfNode *node); + static void SetOutputInferTypeAndShape(const std::vector &types, const std::vector &shapes, + AnfNode *node); // get and set output shape ptr static abstract::BaseShapePtr GetOutputDetailShape(const AnfNodePtr &node, size_t output_idx); static abstract::BaseShapePtr GetPrevNodeOutputDetailShape(const AnfNodePtr &node, size_t input_idx); @@ -184,15 +185,31 @@ class COMMON_EXPORT AnfAlgo { // Get node real inputs, skip `MakeTuple`, `TupleGetItem`, `Depend`, `Load`, `UpdateState` etc. static void GetRealInputs(const AnfNodePtr &anf_node, std::vector *inputs); // Check whether tensors need broadcast or not. - static bool IsTensorBroadcast(const std::vector &lhs, const std::vector &rhs); + template + static inline bool IsTensorBroadcast(const std::vector &lhs, const std::vector &rhs) { + if (lhs.size() != rhs.size()) { + return true; + } + for (size_t i = 0; i < lhs.size(); i++) { + if (lhs[i] != rhs[i]) { + return true; + } + } + return false; + } + // Calc tensor size in byte. + template + static size_t TensorSizeInByte(const std::vector &shape) { + return sizeof(T) * SizeOf(shape); + } + template static size_t TensorSizeInByte(const std::vector &shape) { - size_t result = sizeof(T); - for (size_t i = 0; i < shape.size(); i++) { - result *= shape[i]; - } - return result; + size_t res = sizeof(T); + res = std::accumulate(shape.begin(), shape.end(), res, std::multiplies()); + + return res; } // Judge a control operator need be compiled into kernel graph rather than be cut into single op and diff --git a/mindspore/ccsrc/kernel/common_utils.cc b/mindspore/ccsrc/kernel/common_utils.cc index a52210bd7d2..ca4f268e831 100644 --- a/mindspore/ccsrc/kernel/common_utils.cc +++ b/mindspore/ccsrc/kernel/common_utils.cc @@ -119,7 +119,7 @@ abstract::BaseShapePtr GetValidShapeFromAbstract(const abstract::AbstractBasePtr } KernelTensorPtr CreateKernelTensor(const abstract::AbstractBasePtr &cur_abstract, const TypeId &real_type, size_t idx, - const std::vector &device_shape_adaptively, const std::string &format_str) { + const ShapeVector &device_shape_adaptively, const std::string &format_str) { auto tag_abstract = cur_abstract->Clone(); if (cur_abstract->isa()) { auto abs_tuple = cur_abstract->Clone()->cast(); @@ -130,12 +130,9 @@ KernelTensorPtr CreateKernelTensor(const abstract::AbstractBasePtr &cur_abstract } TypePtr tag_type_ptr = TypeIdToType(real_type); - ShapeVector tag_shape; - (void)std::transform(device_shape_adaptively.begin(), device_shape_adaptively.end(), std::back_inserter(tag_shape), - SizeToLong); auto abstract_shape_ptr = GetValidShapeFromAbstract(tag_abstract); auto new_abstract = std::make_shared(tag_type_ptr, abstract_shape_ptr); - TensorInfo tensor_info{GetFormatFromStrToEnum(format_str), new_abstract, tag_shape}; + TensorInfo tensor_info{GetFormatFromStrToEnum(format_str), new_abstract, device_shape_adaptively}; KernelTensorPtr res_tensor = std::make_shared(); res_tensor->SetTensorInfo(tensor_info); return res_tensor; @@ -553,19 +550,7 @@ std::string GetProcessor(const AnfNodePtr &anf_node) { return device; } -bool IsSameShape(const std::vector &shape_a, const std::vector &shape_b) { - if (shape_a.size() != shape_b.size()) { - return false; - } - for (size_t i = 0; i < shape_a.size(); ++i) { - if (shape_a[i] != shape_b[i]) { - return false; - } - } - return true; -} - -bool IsSameShape(const std::vector &shape_a, const std::vector &shape_b) { +bool IsSameShape(const ShapeVector &shape_a, const ShapeVector &shape_b) { if (shape_a.size() != shape_b.size()) { return false; } @@ -696,11 +681,11 @@ std::vector GetReduceAttrAxis(const CNodePtr &cnode) { } void FillEmptyDims(const CNodePtr &kernel_node, std::vector *begin, std::vector *end, - std::vector *stride, std::vector *input_shape) { + std::vector *stride, ShapeVector *input_shape) { std::vector &_begin = *begin; std::vector &_end = *end; std::vector &_stride = *stride; - std::vector &_input_shape = *input_shape; + auto &_input_shape = *input_shape; if (_begin.size() != _end.size() || _begin.size() != _stride.size() || _begin.size() > _input_shape.size()) { MS_LOG(EXCEPTION) << "For '" << common::AnfAlgo::GetCNodeName(kernel_node) << "', the length of 'begin', 'stride' and 'end' should be equal " @@ -716,17 +701,17 @@ void FillEmptyDims(const CNodePtr &kernel_node, std::vector *begin, std } if (i < _begin.size()) { - int64_t dim = SizeToLong(_input_shape[i]); + int64_t dim = _input_shape[i]; _begin[i] = std::min(_begin[i] < 0 ? std::max(_begin[i] + dim, static_cast(0)) : _begin[i], dim - 1); } else { _begin.push_back(0); } if (i < _end.size()) { - int64_t dim = SizeToLong(_input_shape[i]); + int64_t dim = _input_shape[i]; _end[i] = std::max(_end[i] < 0 ? _end[i] + dim : std::min(_end[i], dim), static_cast(-1)); } else { - _end.push_back(i < _input_shape.size() ? SizeToLong(_input_shape[i]) : 1); + _end.push_back(i < _input_shape.size() ? _input_shape[i] : 1); } if (i >= _stride.size()) { @@ -749,31 +734,31 @@ std::vector Dec2Bin(const int64_t &mask) { } void ComputeBeginMask(const CNodePtr &kernel_node, std::vector *begin, const std::vector &stride, - const std::vector &input_shape) { + const ShapeVector &input_shape) { std::vector &_begin = *begin; auto begin_mask_int = common::AnfAlgo::GetNodeAttr(kernel_node, kAttrBeginMask); auto begin_mask = Dec2Bin(begin_mask_int); for (size_t i = 0; i < begin_mask.size(); i++) { if (i < kStridedSliceMaxDims && begin_mask[i]) { - _begin[i] = stride[i] < 0 ? SizeToLong(input_shape[i]) - 1 : 0; + _begin[i] = stride[i] < 0 ? input_shape[i] - 1 : 0; } } } void ComputeEndMask(const CNodePtr &kernel_node, std::vector *end, const std::vector &stride, - const std::vector &input_shape) { + const ShapeVector &input_shape) { std::vector &_end = *end; auto end_mask_int = common::AnfAlgo::GetNodeAttr(kernel_node, kAttrEndMask); auto end_mask = Dec2Bin(end_mask_int); for (size_t j = 0; j < end_mask.size(); j++) { if (j < kStridedSliceMaxDims && end_mask[j]) { - _end[j] = stride[j] < 0 ? -1 : SizeToLong(input_shape[j]); + _end[j] = stride[j] < 0 ? -1 : input_shape[j]; } } } void ComputeEllipsisMask(const CNodePtr &kernel_node, std::vector *begin, std::vector *end, - std::vector *stride, const std::vector &input_shape) { + std::vector *stride, const ShapeVector &input_shape) { std::vector &_begin = *begin; std::vector &_end = *end; std::vector &_stride = *stride; @@ -782,14 +767,14 @@ void ComputeEllipsisMask(const CNodePtr &kernel_node, std::vector *begi for (size_t k = 0; k < ellipsis_mask.size(); k++) { if (k < kStridedSliceMaxDims && ellipsis_mask[k]) { _begin[k] = 0; - _end[k] = SizeToLong(input_shape[k]); + _end[k] = input_shape[k]; _stride[k] = 1; } } } void ComputNewAxisMask(const CNodePtr &kernel_node, std::vector *begin, std::vector *end, - std::vector *stride, const std::vector &input_shape) { + std::vector *stride, const ShapeVector &input_shape) { std::vector &_begin = *begin; std::vector &_end = *end; std::vector &_stride = *stride; @@ -798,7 +783,7 @@ void ComputNewAxisMask(const CNodePtr &kernel_node, std::vector *begin, for (size_t l = 0; l < new_axis_mask.size(); l++) { if (l < kStridedSliceMaxDims && new_axis_mask[l]) { _begin[l] = 0; - _end[l] = SizeToLong(input_shape[l]); + _end[l] = input_shape[l]; _stride[l] = 1; } } @@ -819,7 +804,7 @@ void ComputShrinkAxisMask(const CNodePtr &kernel_node, const std::vector *begin, std::vector *end, - std::vector *stride, const std::vector &input_shape) { + std::vector *stride, const ShapeVector &input_shape) { ComputeBeginMask(kernel_node, begin, *stride, input_shape); ComputeEndMask(kernel_node, end, *stride, input_shape); ComputeEllipsisMask(kernel_node, begin, end, stride, input_shape); @@ -906,24 +891,24 @@ void ComputeInterpolationWeights(const size_t out_size, const size_t in_size, co } } -bool GetShapeSize(const std::vector &shape, const TypePtr &type_ptr, int64_t *size_i) { +bool GetShapeSize(const ShapeVector &shape, const TypePtr &type_ptr, int64_t *size_i) { MS_EXCEPTION_IF_NULL(type_ptr); size_t type_byte = GetTypeByte(type_ptr); if (type_byte == 0) { return false; } for (size_t j = 0; j < shape.size(); j++) { - size_i[0] = LongMulWithOverflowCheck(size_i[0], static_cast(shape[j])); + if (shape[j] <= 0) { + MS_LOG(DEBUG) << "shape[" << shape << "] has invalid value(less equal 0), set size to 0"; + size_i[0] = 0; + return true; + } + size_i[0] = LongMulWithOverflowCheck(size_i[0], shape[j]); } size_i[0] = LongMulWithOverflowCheck(size_i[0], SizeToInt(type_byte)); return true; } -void CastShapeSizeToLong(const std::vector &shape, std::vector *long_shape) { - MS_EXCEPTION_IF_NULL(long_shape); - (void)std::transform(shape.begin(), shape.end(), std::back_inserter(*long_shape), SizeToLong); -} - void CheckSliceValid(const std::vector &start, const std::vector &stop, const std::vector &step, const std::vector &input_shape) { if (start.size() != stop.size() || start.size() != step.size() || start.size() > input_shape.size()) { @@ -1337,14 +1322,12 @@ void UpdateNodeShape(const CNodePtr &cnode) { return; } std::vector type_ids; - std::vector> shapes; + std::vector shapes; size_t output_num = output_tensor.size(); for (size_t i = 0; i < output_num; ++i) { MS_EXCEPTION_IF_NULL(output_tensor[i]); auto out_shape = output_tensor[i]->GetShapeVector(); - std::vector u_out_shape; - std::transform(out_shape.begin(), out_shape.end(), std::back_inserter(u_out_shape), LongToSize); - shapes.emplace_back(std::move(u_out_shape)); + shapes.emplace_back(std::move(out_shape)); type_ids.emplace_back(output_tensor[i]->GetDtype()); } common::AnfAlgo::SetOutputInferTypeAndShape(type_ids, shapes, cnode.get()); diff --git a/mindspore/ccsrc/kernel/common_utils.h b/mindspore/ccsrc/kernel/common_utils.h index 8f630b10413..863d9ba2cac 100644 --- a/mindspore/ccsrc/kernel/common_utils.h +++ b/mindspore/ccsrc/kernel/common_utils.h @@ -87,7 +87,7 @@ class KernelMeta { class MatrixInfo { public: - explicit MatrixInfo(size_t max_index, const std::vector &matrix_shapes) + explicit MatrixInfo(size_t max_index, const ShapeVector &matrix_shapes) : max_index_(max_index), shapes_(matrix_shapes) { current_indexes_.resize(shapes_.size(), 0); } @@ -101,8 +101,8 @@ class MatrixInfo { int last_rank = SizeToInt(current_indexes_.size()) - 1; for (int i = last_rank; i >= 0; --i) { size_t position = IntToSize(i); - current_indexes_[position] = start % shapes_.at(position); - start = start / shapes_.at(position); + current_indexes_[position] = start % LongToSize(shapes_.at(position)); + start = start / LongToSize(shapes_.at(position)); if (start == 0) { break; } @@ -116,7 +116,7 @@ class MatrixInfo { } size_t last_rank = current_indexes_.size() - 1; current_indexes_[last_rank]++; - for (size_t i = last_rank; current_indexes_.at(i) >= shapes_.at(i) && i > 0; --i) { + for (size_t i = last_rank; current_indexes_.at(i) >= LongToSize(shapes_.at(i)) && i > 0; --i) { current_indexes_[i] = 0; current_indexes_[i - 1] += 1; } @@ -128,7 +128,7 @@ class MatrixInfo { bool is_first_iterator_{true}; size_t min_index{0}; size_t max_index_{1}; - std::vector shapes_; + ShapeVector shapes_; std::vector current_indexes_; }; using MatrixInfoPtr = std::shared_ptr; @@ -143,14 +143,13 @@ KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &pro TypeId DtypeToTypeId(const std::string &dtypes); std::string Dtype2ShortType(const std::string &dtypes); size_t GetDtypeNbyte(const std::string &dtypes); -bool GetShapeSize(const std::vector &shape, const TypePtr &type_ptr, int64_t *size_i); +bool GetShapeSize(const ShapeVector &shape, const TypePtr &type_ptr, int64_t *size_i); bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr &op_info_ptr, Processor processor, std::vector> *const kernel_info_list); void SaveJsonInfo(const std::string &json_name, const std::string &info, const std::string &base_path); std::string GetProcessor(const AnfNodePtr &anf_node); Processor GetProcessor(const string &processor); -bool IsSameShape(const std::vector &shape_a, const std::vector &shape_b); -bool IsSameShape(const std::vector &shape_a, const std::vector &shape_b); +bool IsSameShape(const ShapeVector &shape_a, const ShapeVector &shape_b); std::vector> GetOutputIndex(const std::vector &node_list, const std::vector &input_list, const std::vector &output_list); @@ -170,9 +169,9 @@ FusionType GetFusionTypeByName(const std::string &name); std::string GetFusionNameByType(const kernel::FusionType &type); std::vector Dec2Bin(const int64_t &mask); void FillEmptyDims(const CNodePtr &kernel_node, std::vector *begin, std::vector *end, - std::vector *stride, std::vector *input_shape); + std::vector *stride, ShapeVector *input_shape); void ParseStrideSliceMasks(const CNodePtr &kernel_node, std::vector *begin, std::vector *end, - std::vector *stride, const std::vector &input_shape); + std::vector *stride, const ShapeVector &input_shape); struct CachedInterpolation { size_t lower; size_t upper; @@ -246,7 +245,6 @@ inline T ComputeLerp(T top_left, T top_right, T bottom_left, T bottom_right, T x return top + (bottom - top) * y_lerp; } -void CastShapeSizeToLong(const std::vector &shape, std::vector *long_shape); void CheckSliceValid(const std::vector &start, const std::vector &stop, const std::vector &step, const std::vector &input_shape); size_t CalOffset(const std::vector &start, const std::vector &stop, diff --git a/mindspore/ccsrc/kernel/environ_manager.cc b/mindspore/ccsrc/kernel/environ_manager.cc index e546775fc15..e94d851f8ed 100644 --- a/mindspore/ccsrc/kernel/environ_manager.cc +++ b/mindspore/ccsrc/kernel/environ_manager.cc @@ -100,7 +100,7 @@ bool EnvironMgr::CheckEnvInput(const CNodePtr &kernel_node) const { return true; } -bool EnvironMgr::IsScalarTensor(TypeId type, const std::vector &shape) const { +bool EnvironMgr::IsScalarTensor(TypeId type, const std::vector &shape) const { if (type == kObjectTypeTensorType) { MS_LOG(ERROR) << "The type is invalid: " << type; return false; diff --git a/mindspore/ccsrc/kernel/environ_manager.h b/mindspore/ccsrc/kernel/environ_manager.h index 7ade4b3b033..5cbbb211491 100644 --- a/mindspore/ccsrc/kernel/environ_manager.h +++ b/mindspore/ccsrc/kernel/environ_manager.h @@ -43,7 +43,7 @@ class EnvironMgr { // Check whether the inputs of EnvironGet kernel or EnvironSet kernel are valid. bool CheckEnvInput(const CNodePtr &kernel_node) const; // Check whether is scalar tensor. Environ handle and env key only support scalar tensor currently. - bool IsScalarTensor(TypeId type, const std::vector &shape) const; + bool IsScalarTensor(TypeId type, const std::vector &shape) const; private: EnvironMgr() = default; diff --git a/mindspore/ccsrc/kernel/kernel.h b/mindspore/ccsrc/kernel/kernel.h index d2874872f43..583e260ee57 100644 --- a/mindspore/ccsrc/kernel/kernel.h +++ b/mindspore/ccsrc/kernel/kernel.h @@ -200,13 +200,13 @@ class KernelTensor { // If real type is not a list or tuple tensor, it will return kTypeUnknown. std::vector GetListOrTupleDtype() const; // If real type is not a single shape vector, it will return empty. - std::vector GetShapeVector() const; + ShapeVector GetShapeVector() const; // If real type is not a list or tuple shape vector, it will return empty. - std::vector> GetListOrTupleShapeVector() const; + std::vector GetListOrTupleShapeVector() const; void SetData(const AddressPtr &data) { data_ = data; } void SetDtype(const TypePtr &dtype); void SetFormat(mindspore::Format format) { tensor_info_.format = format; } - void SetShapeVector(const std::vector &shape); + void SetShapeVector(const ShapeVector &shape); abstract::BaseShapePtr GetBaseShape() const; // If the shape need to be List or Tuple, `SetBaseShape` should be called. @@ -215,8 +215,8 @@ class KernelTensor { void SetTensorInfo(const TensorInfo &tensor_info) { tensor_info_ = tensor_info; } // deprecated field for dynamic shape - const std::vector &GetDeviceShapeAdaptively() const; - void SetDeviceShapeAdaptively(const std::vector &device_shape_adaptively); + const ShapeVector &GetDeviceShapeAdaptively() const; + void SetDeviceShapeAdaptively(const ShapeVector &device_shape_adaptively); private: TensorInfo tensor_info_; @@ -293,7 +293,7 @@ class KernelMod { protected: virtual void SyncData() {} virtual std::vector GetOutputs() { return {}; } - bool IsValidShape(const std::vector &shape) { + bool IsValidShape(const ShapeVector &shape) { if (std::any_of(shape.begin(), shape.end(), [](int64_t dim) { return dim < 0; })) { return false; } diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.cc index 2d6da36ce52..d0c38679443 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.cc @@ -283,7 +283,7 @@ ShapeVector AscendDeviceAddress::GetDeviceShape(ShapeVector *host_shape) const { return device_shape; } -std::shared_ptr AscendDeviceAddress::CreateLaunchTransData(const std::vector &host_shape, +std::shared_ptr AscendDeviceAddress::CreateLaunchTransData(const ShapeVector &host_shape, const std::string &ori_format, const std::string &dst_format) const { auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime(); @@ -300,8 +300,8 @@ std::shared_ptr AscendDeviceAddress::CreateLaunchTransData(const s return launch_trans_data; } -bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormatBasedOnTransData(const std::vector &host_shape, - size_t size, mindspore::TypeId type, +bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormatBasedOnTransData(const ShapeVector &host_shape, size_t size, + mindspore::TypeId type, void *host_ptr) const { bool sync_ok = true; const std::string dst_format = kOpFormat_NCHW; @@ -325,7 +325,7 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormatBasedOnTransData(const auto host = std::vector(size); SyncMemory(host.data(), output_addr_vec[0], size, ACL_MEMCPY_DEVICE_TO_HOST); auto shape_size = abstract::ShapeSize(host_shape); - const trans::TypeIdArgs type_args{host.data(), SizeToLong(shape_size), type_id_, type, size}; + const trans::TypeIdArgs type_args{host.data(), shape_size, type_id_, type, size}; sync_ok = trans::TransDataType(type_args, host_ptr); if (!sync_ok) { MS_LOG(ERROR) << "Trans data type failed."; @@ -353,9 +353,7 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormat(const ShapeVector &sh type_id_name_map.find(type_id_) != type_id_name_map.end()) { std::pair type_format = std::make_pair(type_id_name_map.at(type_id_), format_); if (use_trans_data.find(type_format) != use_trans_data.end()) { - std::vector st_shape; - (void)std::transform(host_shape.begin(), host_shape.end(), std::back_inserter(st_shape), LongToSize); - sync_ok = SyncDeviceToHostAndConvertFormatBasedOnTransData(st_shape, size, type, host_ptr); + sync_ok = SyncDeviceToHostAndConvertFormatBasedOnTransData(host_shape, size, type, host_ptr); return sync_ok; } } diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.h b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.h index f2ad13b3bd6..05440a6f5fb 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.h @@ -74,15 +74,14 @@ class AscendDeviceAddress : public DeviceAddress { private: bool SyncDeviceToHostAndConvertFormat(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const; bool ConvertFormatAndSyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr) const; - bool SyncDeviceToHostAndConvertFormatBasedOnTransData(const std::vector &host_shape, size_t size, + bool SyncDeviceToHostAndConvertFormatBasedOnTransData(const ShapeVector &host_shape, size_t size, mindspore::TypeId type, void *host_ptr) const; bool SyncDeviceToDeviceWithSameFormatType(const ShapeVector &shape, size_t size, TypeId type, const void *src_ptr, const std::string &format) const; bool SyncDeviceToDeviceWithDiffFormatType(const DeviceSync *src_device_addr) const; void SyncStream() const; ShapeVector GetDeviceShape(ShapeVector *host_shape) const; - std::shared_ptr CreateLaunchTransData(const std::vector &host_shape, - const std::string &ori_format, + std::shared_ptr CreateLaunchTransData(const ShapeVector &host_shape, const std::string &ori_format, const std::string &dst_format) const; mutable std::shared_ptr launch_transdata_{nullptr}; void BindDevice() const; diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_launch_atomic_clean.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_launch_atomic_clean.cc index 45d99e58cca..cd1ff463f60 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_launch_atomic_clean.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_launch_atomic_clean.cc @@ -85,7 +85,7 @@ std::shared_ptr AscendLaunchAtomicClean::ObtainAtomicClean } auto shape = total_size_ / dtype_size; std::vector> input_shapes = {{static_cast(shape)}}; - std::vector> output_shapes = {}; + std::vector output_shapes = {}; auto atomic_clean_graph = session::SingleKernelGraph::ConstructKernelGraphBasedOnSingleOp( kAtomicAddrCleanOpName, input_dtypes, input_shapes, output_dtypes, output_shapes); MS_EXCEPTION_IF_NULL(atomic_clean_graph); diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_launch_transdata.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_launch_transdata.cc index a428991ffa7..f01cafd6419 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_launch_transdata.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_launch_transdata.cc @@ -81,10 +81,8 @@ std::shared_ptr AscendLaunchTransData::ObtainTransDataKern std::vector input_dtypes = {dtype_}; std::vector output_dtypes = {dtype_}; // obtain input & output shape - std::vector input_shape; - std::transform(shape_.begin(), shape_.end(), std::back_inserter(input_shape), SizeToLong); - std::vector> input_shapes = {{input_shape}}; - std::vector> output_shapes = {{shape_}}; + std::vector input_shapes = {{shape_}}; + std::vector output_shapes = {{shape_}}; auto transdata_graph = session::SingleKernelGraph::ConstructKernelGraphBasedOnSingleOp( kTransDataOpName, input_dtypes, input_shapes, output_dtypes, output_shapes); MS_EXCEPTION_IF_NULL(transdata_graph); diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_launch_transdata.h b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_launch_transdata.h index 472ee07ce9e..37f4755f8ce 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_launch_transdata.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_launch_transdata.h @@ -27,7 +27,7 @@ namespace mindspore::device::ascend { class AscendLaunchTransData : public AscendLaunchKernel { public: AscendLaunchTransData(void *stream, TypeId dtype, size_t total_size, std::string src_format, std::string dst_format, - std::vector host_shape, int64_t groups) + ShapeVector host_shape, int64_t groups) : AscendLaunchKernel(stream), dtype_(dtype), total_size_(total_size), @@ -57,7 +57,7 @@ class AscendLaunchTransData : public AscendLaunchKernel { uint8_t *input_addr_; std::string src_format_; std::string dst_format_; - std::vector shape_; + ShapeVector shape_; int64_t groups_; private: diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/dump/data_dumper.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/dump/data_dumper.cc index 5bb87752eb4..308baac8ebd 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/dump/data_dumper.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/dump/data_dumper.cc @@ -409,7 +409,7 @@ void DataDumper::RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, voi } } -void SetDumpShape(const std::vector &ms_shape, NotNull dump_shape) { +void SetDumpShape(const ShapeVector &ms_shape, NotNull dump_shape) { for (auto &dim : ms_shape) { dump_shape->add_dim(dim); } diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_graph_kernel.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_graph_kernel.cc index 64f23bf886b..341fc9f12e6 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_graph_kernel.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_graph_kernel.cc @@ -103,7 +103,7 @@ void UpdateKernelInfo(const std::vector &node_list) { } } -bool CanConvertDefaultShapeToNZ(const std::vector &shape) { +bool CanConvertDefaultShapeToNZ(const ShapeVector &shape) { for (size_t i = 1; i <= shape.size(); ++i) { if (i > 2) { break; @@ -115,7 +115,7 @@ bool CanConvertDefaultShapeToNZ(const std::vector &shape) { return true; } -std::vector DefaultToFracNZAxis(const std::vector &ori_shape, const std::vector &axis) { +std::vector DefaultToFracNZAxis(const ShapeVector &ori_shape, const std::vector &axis) { std::vector frac_nz_axis = axis; auto shape_len = ori_shape.size(); for (size_t i = 0; i < axis.size(); ++i) { @@ -133,23 +133,6 @@ std::vector DefaultToFracNZAxis(const std::vector &ori_shape, c return frac_nz_axis; } -std::vector GetReducedFracNZShape(const std::vector &ori_shape, const std::vector &axis, - bool keep_dims) { - std::vector result; - std::set positive_idx; - for (const auto &a : axis) { - (void)positive_idx.insert(a >= 0 ? LongToSize(a) : ori_shape.size() + LongToSize(a)); - } - for (size_t i = 0; i < ori_shape.size(); ++i) { - if (positive_idx.count(i) == 0) { - result.push_back(ori_shape[i]); - } else if (keep_dims) { - result.push_back(1); - } - } - return result; -} - void UpdateFracNZReduceOp(const CNodePtr &cnode) { MS_EXCEPTION_IF_NULL(cnode); auto input_format = AnfAlgo::GetPrevNodeOutputFormat(cnode, 0); diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_reporter.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_reporter.cc index 200a2abb2b6..653ef055ebe 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_reporter.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_reporter.cc @@ -261,7 +261,7 @@ void ProfilingReporter::BuildTensorData(MsprofGeTensorData *tensor_data, const C uint32_t tensor_type) { MS_EXCEPTION_IF_NULL(tensor_data); tensor_data->tensorType = tensor_type; - std::vector shape; + std::vector shape; string data_format; uint32_t vm_data_type; if (tensor_type == MSPROF_GE_TENSOR_TYPE_INPUT) { diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc index cd5d6c70ef6..bc48fa934ca 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc @@ -302,10 +302,10 @@ void ReorderInputsAsFrontGraph(const KernelGraphPtr &kernel_graph, const FuncGra } void UpdateOutputNodeShape(const std::vector &outputs, const std::vector &outputs_type, - const std::vector> &shapes) { + const std::vector &shapes) { AnfNodePtr cur_node = nullptr; std::vector cur_types = {}; - std::vector> cur_shapes = {}; + std::vector cur_shapes = {}; for (size_t i = 0; i < outputs.size(); ++i) { const auto &node = outputs[i].first; if (node != cur_node && cur_node != nullptr) { @@ -349,7 +349,7 @@ void GeGraphExecutor::AllocInputHostMemory(const KernelGraphPtr &kernel_graph) c continue; } TypeId output_type_id = common::AnfAlgo::GetOutputInferDataType(input_node, 0); - std::vector shape = common::AnfAlgo::GetOutputInferShape(input_node, 0); + std::vector shape = Convert2SizeT(common::AnfAlgo::GetOutputInferShape(input_node, 0)); size_t type_size = GetTypeByte(TypeIdToType(output_type_id)); size_t tensor_size = std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); auto device_address_ptr = @@ -364,7 +364,7 @@ void GeGraphExecutor::AllocOutputHostMemory(const KernelGraphPtr &kernel_graph) auto outputs = common::AnfAlgo::GetAllOutputWithIndex(kernel_graph->output()); for (const auto &[output_node, i] : outputs) { TypeId output_type_id = common::AnfAlgo::GetOutputInferDataType(output_node, i); - std::vector shape = common::AnfAlgo::GetOutputInferShape(output_node, i); + std::vector shape = Convert2SizeT(common::AnfAlgo::GetOutputInferShape(output_node, i)); size_t type_size = GetTypeByte(TypeIdToType(output_type_id)); size_t tensor_size = std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); auto device_address_ptr = @@ -446,7 +446,7 @@ bool GeGraphExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector> output_shapes; + std::vector output_shapes; for (size_t i = 0; i < outputs.size(); ++i) { const auto &[output_node, idx] = outputs[i]; const auto &tensor = ge_outputs[i]; @@ -462,9 +462,7 @@ bool GeGraphExecutor::RunGraph(const FuncGraphPtr &graph, const std::vectorGetMutablePtr(), tensor->GetData(), tensor->GetSize()); auto actual_shapes = tensor->GetTensorDesc().GetShape().GetDims(); - std::vector u_out_shape; - std::transform(actual_shapes.begin(), actual_shapes.end(), std::back_inserter(u_out_shape), LongToSize); - output_shapes.emplace_back(std::move(u_out_shape)); + output_shapes.emplace_back(std::move(actual_shapes)); } UpdateOutputNodeShape(outputs, me_types, output_shapes); MS_LOG(INFO) << "GE run graph end."; diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/all_to_all_v_calc_param.cc b/mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/all_to_all_v_calc_param.cc index 8d9a652e9b7..5d694b37b40 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/all_to_all_v_calc_param.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/all_to_all_v_calc_param.cc @@ -65,7 +65,7 @@ void AllToAllvCalcParam::CalcOpParam() { if (type_size == 0) { MS_LOG(EXCEPTION) << "Invalid type_size 0 of node: " << cnode->fullname_with_scope(); } - size_t origin_mem_size = std::accumulate(ms_shape.begin(), ms_shape.end(), type_size, std::multiplies()); + size_t origin_mem_size = type_size * SizeOf(ms_shape); size_t aligned_mem_size = device::MemoryManager::GetCommonAlignSize(origin_mem_size); input_aligned_mem_size[i] = aligned_mem_size / type_size; input_real_mem_size[i] = origin_mem_size / type_size; @@ -76,7 +76,7 @@ void AllToAllvCalcParam::CalcOpParam() { if (type_size == 0) { MS_LOG(EXCEPTION) << "Invalid type_size 0 of node: " << cnode->fullname_with_scope(); } - size_t origin_mem_size = std::accumulate(ms_shape.begin(), ms_shape.end(), type_size, std::multiplies()); + size_t origin_mem_size = type_size * SizeOf(ms_shape); size_t aligned_mem_size = device::MemoryManager::GetCommonAlignSize(origin_mem_size); output_aligned_mem_size[i] = aligned_mem_size / type_size; output_real_mem_size[i] = origin_mem_size / type_size; diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/converter.cc b/mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/converter.cc index fd20068b7a5..4c7f3735933 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/converter.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/converter.cc @@ -176,18 +176,12 @@ std::tuple GenerateStubGeNode(const AnfNodePtr size_t input_num = common::AnfAlgo::GetInputTensorNum(cnode); size_t output_num = common::AnfAlgo::GetOutputTensorNum(cnode); for (size_t i = 0; i < input_num; ++i) { - std::vector ge_shape; - auto ms_shape = AnfAlgo::GetInputDeviceShape(cnode, i); - std::transform(ms_shape.begin(), ms_shape.end(), std::back_inserter(ge_shape), - [](size_t in) { return static_cast(in); }); + auto ge_shape = AnfAlgo::GetInputDeviceShape(cnode, i); op_desc->AddInputDesc(ge::GeTensorDesc(ge::GeShape(ge_shape), ge::Format::FORMAT_NCHW, transform::ConvertDataType(AnfAlgo::GetInputDeviceDataType(cnode, i)))); } for (size_t i = 0; i < output_num; ++i) { - std::vector ge_shape; - auto ms_shape = AnfAlgo::GetOutputDeviceShape(cnode, i); - std::transform(ms_shape.begin(), ms_shape.end(), std::back_inserter(ge_shape), - [](size_t in) { return static_cast(in); }); + auto ge_shape = AnfAlgo::GetOutputDeviceShape(cnode, i); op_desc->AddOutputDesc(ge::GeTensorDesc(ge::GeShape(ge_shape), ge::Format::FORMAT_NCHW, transform::ConvertDataType(AnfAlgo::GetOutputDeviceDataType(cnode, i)))); } diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ext_info_handle.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ext_info_handle.cc index a7589af1407..f7a989907f6 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ext_info_handle.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ext_info_handle.cc @@ -154,18 +154,16 @@ bool AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const No } auto input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index); - std::vector tmp_shape; - (void)std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(tmp_shape), SizeToLong); if (input_index >= input_shape_and_type_.size()) { MS_LOG(ERROR) << "Invalid input_index: " << input_index << " the size of input_shape_and_type_ is: " << input_shape_and_type_.size(); return false; } - if (tmp_shape.empty()) { - tmp_shape = {1}; + if (input_shape.empty()) { + input_shape = {1}; } - return UpdateShapeAndType(tmp_shape, NOT_NULL(input_shape_and_type_[input_index])); + return UpdateShapeAndType(input_shape, NOT_NULL(input_shape_and_type_[input_index])); } bool AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const NotNull &anf_node) { @@ -177,23 +175,21 @@ bool AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); auto max_shape = common::AnfAlgo::GetOutputMaxShape(anf_node, output_index); for (size_t i = 0; i < shape.size(); ++i) { - if (i < max_shape.size() && shape[i] == SIZE_MAX) { - MS_LOG(INFO) << "Node:" << node_name_ << " update shape from SIZE_MAX to " << max_shape[i]; - shape[i] = LongToSize(max_shape[i]); + if (i < max_shape.size() && shape[i] == abstract::Shape::SHP_ANY) { + MS_LOG(INFO) << "Node:" << node_name_ << " update shape from SHP_ANY to " << max_shape[i]; + shape[i] = max_shape[i]; } } - std::vector tmp_shape; - (void)std::transform(shape.begin(), shape.end(), std::back_inserter(tmp_shape), SizeToLong); if (output_index >= output_shape_and_type_.size()) { MS_LOG(ERROR) << "Invalid output_index: " << output_index << " the size of output_shape_and_type_ is: " << output_shape_and_type_.size(); return false; } - if (tmp_shape.empty()) { - tmp_shape = {1}; + if (shape.empty()) { + shape = {1}; } - return UpdateShapeAndType(tmp_shape, NOT_NULL(output_shape_and_type_[output_index])); + return UpdateShapeAndType(shape, NOT_NULL(output_shape_and_type_[output_index])); } bool AicpuExtInfoHandler::GetOutputShapeAndType(uint32_t output_index, NotNull *> shape, diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_kernel_build.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_kernel_build.cc index 04df5bdd681..9f01e6e4677 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_kernel_build.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_kernel_build.cc @@ -47,7 +47,7 @@ bool SetIOIputSize(const std::shared_ptr &anf_node, const size_t &input MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(input_size_list); for (size_t i = 0; i < input_num; i++) { - std::vector shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i); + auto shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i); if (AnfAlgo::GetInputDeviceDataType(anf_node, i) == kObjectTypeString) { if (!anf_node->isa()) { MS_LOG(EXCEPTION) << "anf_node is not CNode."; @@ -93,7 +93,7 @@ bool SetIOSize(const std::shared_ptr &anf_node, const std::shared_ptr shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i); + auto shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i); TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i)); int64_t size_i = 1; if (!GetShapeSize(shape_i, type_ptr, &size_i)) { @@ -222,7 +222,7 @@ void SetNodeInputs(const std::shared_ptr &anf_node, mindspore::NodeDef ::mindspore::Tensor *node_inputs = proto->add_inputs(); MS_EXCEPTION_IF_NULL(node_inputs); TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index); - std::vector input_shape; + std::vector input_shape; int32_t input_data_type; if (input_type == kObjectTypeString) { auto cnode = anf_node->cast(); @@ -232,7 +232,7 @@ void SetNodeInputs(const std::shared_ptr &anf_node, mindspore::NodeDef MS_EXCEPTION_IF_NULL(value_ptr); auto value = GetValue(value_ptr); input_shape.push_back(1); - input_shape.push_back(value.size()); + input_shape.push_back(static_cast(value.size())); input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown); } else { input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index); @@ -266,7 +266,7 @@ void SetNodeOutputs(const std::shared_ptr &anf_node, mindspore::NodeDef for (size_t output_index = 0; output_index < output_num; output_index++) { ::mindspore::Tensor *node_outputs = proto->add_outputs(); MS_EXCEPTION_IF_NULL(node_outputs); - std::vector output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); + auto output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); mindspore::TensorShape *tensorShape = node_outputs->mutable_tensor_shape(); MS_EXCEPTION_IF_NULL(tensorShape); for (auto item : output_shape) { @@ -350,7 +350,7 @@ uint64_t SetExtInfoInputShapeType(char *ext_info_buf, uint64_t ext_info_offset, auto *inputs = reinterpret_cast(ext_info_buf + ext_info_offset); for (size_t input_index = 0; input_index < input_num; input_index++) { TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index); - std::vector input_shape; + std::vector input_shape; int32_t input_data_type; if (input_type == kObjectTypeString) { auto cnode = anf_node->cast(); @@ -359,7 +359,7 @@ uint64_t SetExtInfoInputShapeType(char *ext_info_buf, uint64_t ext_info_offset, auto value_ptr = GetValueNode(input_node); auto value = GetValue(value_ptr); input_shape.push_back(1); - input_shape.push_back(value.size()); + input_shape.push_back(static_cast(value.size())); input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown); } else { input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index); @@ -369,7 +369,7 @@ uint64_t SetExtInfoInputShapeType(char *ext_info_buf, uint64_t ext_info_offset, size_t input_shape_index = 0; for (; input_shape_index < input_shape.size(); input_shape_index++) { - inputs[input_index].dims[input_shape_index] = SizeToLong(input_shape[input_shape_index]); + inputs[input_index].dims[input_shape_index] = input_shape[input_shape_index]; } if (input_shape.size() < kMaxShapeDims) { inputs[input_index].dims[input_shape_index] = LLONG_MIN; @@ -389,14 +389,14 @@ uint64_t SetExtInfoOutputShapeType(char *ext_info_buf, uint64_t ext_info_offset, auto *outputs = reinterpret_cast(ext_info_buf + ext_info_offset); for (size_t output_index = 0; output_index < output_num; output_index++) { - std::vector output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); + auto output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type); outputs[output_index].type = output_data_type; size_t output_shape_index = 0; for (; output_shape_index < output_shape.size(); output_shape_index++) { - outputs[output_index].dims[output_shape_index] = SizeToLong(output_shape[output_shape_index]); + outputs[output_index].dims[output_shape_index] = output_shape[output_shape_index]; } if (output_shape_index < kMaxShapeDims) { outputs[output_index].dims[output_shape_index] = LLONG_MIN; diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/dynamic_aicpu_kernel_mod.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/dynamic_aicpu_kernel_mod.cc index 3c3f873bb61..771c2dd6ebb 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/dynamic_aicpu_kernel_mod.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/dynamic_aicpu_kernel_mod.cc @@ -212,16 +212,14 @@ bool DynamicAicpuOpKernelMod::UpdateOutputShapeFromExtInfo(const CNodePtr &cnode MS_EXCEPTION_IF_NULL(ext_info_handler_); std::vector type_ids; - std::vector> shapes; + std::vector shapes; auto output_num = common::AnfAlgo::GetOutputTensorNum(cnode); for (size_t i = 0; i < output_num; ++i) { std::vector shape; TypeId type_id; (void)ext_info_handler_->GetOutputShapeAndType(SizeToUint(i), NOT_NULL(&shape), NOT_NULL(&type_id)); type_ids.emplace_back(type_id); - std::vector size_t_shape; - std::transform(shape.begin(), shape.end(), std::back_inserter(size_t_shape), LongToSize); - shapes.emplace_back(size_t_shape); + shapes.emplace_back(shape); } common::AnfAlgo::SetOutputInferTypeAndShape(type_ids, shapes, cnode.get()); diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc index eb6c1f54d22..9bb61fe41ad 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc @@ -361,7 +361,7 @@ int HcclKernel::Resize(const BaseOperatorPtr &base_operator, const std::vectorDebugString(); - std::vector> hccl_kernel_input_shape_list; + std::vector hccl_kernel_input_shape_list; if (!HcomUtil::GetKernelInputShape(cnode, &hccl_kernel_input_shape_list)) { MS_LOG(EXCEPTION) << "GetKernelInputShape fail! Node info: " << cnode->DebugString(); } diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.h b/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.h index 487aba461f2..0fcbc313ca7 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.h +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.h @@ -56,8 +56,8 @@ class HcclKernel : public AscendKernelMod { const std::map &inputsOnHost = std::map()) override; protected: - std::vector> hccl_kernel_input_shape_list_; - std::vector> hccl_kernel_output_shape_list_; + std::vector> hccl_kernel_input_shape_list_; + std::vector> hccl_kernel_output_shape_list_; std::vector hccl_data_type_list_; std::vector hccl_format_list_; uint64_t hccl_count_; diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc index b0951f8b1b7..5c3f188901c 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc @@ -32,24 +32,24 @@ bool IsPyNativeMode() { } } // namespace -bool HcomUtil::GetKernelInputShape(const AnfNodePtr &anf_node, vector> *hccl_kernel_intput_shape_list) { +bool HcomUtil::GetKernelInputShape(const AnfNodePtr &anf_node, vector *hccl_kernel_intput_shape_list) { MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(hccl_kernel_intput_shape_list); size_t input_num = common::AnfAlgo::GetInputTensorNum(anf_node); for (size_t i = 0; i < input_num; ++i) { - std::vector shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i); + auto shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i); hccl_kernel_intput_shape_list->emplace_back(shape_i); } return true; } -bool HcomUtil::GetKernelOutputShape(const AnfNodePtr &anf_node, vector> *hccl_kernel_output_shape_list) { +bool HcomUtil::GetKernelOutputShape(const AnfNodePtr &anf_node, vector *hccl_kernel_output_shape_list) { MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(hccl_kernel_output_shape_list); size_t output_num = common::AnfAlgo::GetOutputTensorNum(anf_node); for (size_t i = 0; i < output_num; ++i) { - std::vector shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i); + auto shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i); (void)hccl_kernel_output_shape_list->emplace_back(shape_i); } @@ -91,19 +91,19 @@ bool HcomUtil::GetHcomDataType(const AnfNodePtr &anf_node, vector return true; } -bool HcomUtil::GetHcclOpSize(const HcclDataType &data_type, const vector &shape, size_t *size) { +bool HcomUtil::GetHcclOpSize(const HcclDataType &data_type, const ShapeVector &shape, size_t *size) { MS_EXCEPTION_IF_NULL(size); - size_t tmp_size = 1; + int64_t tmp_size = 1; uint32_t type_size = 4; for (size_t i = 0; i < shape.size(); i++) { - tmp_size = SizetMulWithOverflowCheck(tmp_size, shape[i]); + tmp_size = LongMulWithOverflowCheck(tmp_size, shape[i]); } if (!GetHcomTypeSize(data_type, &type_size)) { return false; } - *size = SizetMulWithOverflowCheck(tmp_size, type_size); + *size = SizetMulWithOverflowCheck(LongToSize(tmp_size), type_size); MS_LOG(DEBUG) << "size[" << *size << "]"; return true; @@ -121,7 +121,7 @@ bool HcomUtil::GetHcomTypeSize(const HcclDataType &data_type, uint32_t *size) { } bool HcomUtil::GetHcomCount(const AnfNodePtr &anf_node, const vector &data_type_list, - const vector> &shape_list, uint64_t *total_count) { + const vector &shape_list, uint64_t *total_count) { MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(total_count); const uint32_t align_size = 512; diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.h b/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.h index a68631ea90b..9cc966094bb 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.h +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.h @@ -25,6 +25,7 @@ #include "hccl/base.h" #include "include/common/utils/contract.h" #include "hccl/hccl_types.h" +#include "utils/shape_utils.h" namespace mindspore { using std::map; @@ -57,14 +58,14 @@ static map kConstOpHcomDataTypeSizeMap = { class HcomUtil { public: - static bool GetKernelInputShape(const AnfNodePtr &anf_node, vector> *hccl_kernel_shape_list); - static bool GetKernelOutputShape(const AnfNodePtr &anf_node, vector> *hccl_kernel_shape_list); + static bool GetKernelInputShape(const AnfNodePtr &anf_node, vector *hccl_kernel_shape_list); + static bool GetKernelOutputShape(const AnfNodePtr &anf_node, vector *hccl_kernel_shape_list); static ::HcclDataType ConvertHcclType(TypeId type_id); static bool GetHcomDataType(const AnfNodePtr &anf_node, vector *data_type_list); - static bool GetHcclOpSize(const HcclDataType &data_type, const vector &shape, size_t *size); + static bool GetHcclOpSize(const HcclDataType &data_type, const ShapeVector &shape, size_t *size); static bool GetHcomTypeSize(const HcclDataType &data_type, uint32_t *size); static bool GetHcomCount(const AnfNodePtr &anf_node, const vector &data_type_list, - const vector> &shape_list, uint64_t *total_count); + const vector &shape_list, uint64_t *total_count); static bool GetHcomOperationType(const AnfNodePtr &anf_node, HcclReduceOp *op_type); static bool GetHcomRootId(const AnfNodePtr &anf_node, uint32_t *root_id); static bool GetHcomSrcRank(const AnfNodePtr &anf_node, uint32_t *src_rank); diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_broadcast_gradient_args_kernel.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_broadcast_gradient_args_kernel.cc index 3178ab3987f..b6a13c9637f 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_broadcast_gradient_args_kernel.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_broadcast_gradient_args_kernel.cc @@ -134,8 +134,8 @@ std::vector GetInputShape(const CNodePtr &cnode, size_t index) { << trace::DumpSourceLines(cnode); } - size_t x_num = shape_x[0]; - std::vector x{SizeToLong(x_num)}; + auto x_num = shape_x[0]; + std::vector x{x_num}; auto x_shape_value = std::make_shared(type_x, x); // The second parameter must be false, otherwise the device address cannot be released and allocated, and the @@ -208,8 +208,8 @@ void DynamicBroadcastGradientArgsKernelMod::Execute() { auto r0_size = SetOutputValue(cnode, grad_reduce_idx, 0, input_shapes[0].size()); auto r1_size = SetOutputValue(cnode, grad_reduce_idx, 1, input_shapes[1].size()); - std::vector r0_shp{r0_size}; - std::vector r1_shp{r1_size}; + ShapeVector r0_shp{SizeToLong(r0_size)}; + ShapeVector r1_shp{SizeToLong(r1_size)}; auto output_type = TypeId::kNumberTypeInt64; common::AnfAlgo::SetOutputInferTypeAndShape({output_type, output_type}, {r0_shp, r1_shp}, cnode.get()); MS_LOG(INFO) << "Execute DynamicBroadcastGradientArgsKernel End"; diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_shape_kernel.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_shape_kernel.cc index 12f92bc9fb2..a59df563349 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_shape_kernel.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_shape_kernel.cc @@ -45,7 +45,7 @@ void TensorShapeKernelMod::Execute() { auto data_ptr = static_cast(output_tensor_for_sync->data_c()); for (size_t i = 0; i < prev_output_shape.size(); ++i) { MS_LOG(INFO) << "DEBUG prev_output_shape[" << i << "]:" << prev_output_shape[i]; - *(data_ptr + i) = SizeToLong(prev_output_shape[i]); + *(data_ptr + i) = prev_output_shape[i]; } auto output_addr = AnfAlgo::GetOutputAddr(cnode, 0); @@ -94,7 +94,7 @@ void TensorShapeKernelMod::Execute(const std::vector &inputs, const auto data_ptr = static_cast(output_tensor_for_sync->data_c()); for (size_t i = 0; i < prev_output_shape.size(); ++i) { MS_LOG(INFO) << "DEBUG prev_output_shape[" << i << "]:" << prev_output_shape[i]; - *(data_ptr + i) = SizeToLong(prev_output_shape[i]); + *(data_ptr + i) = prev_output_shape[i]; } if (outputs.empty()) { diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/host/host_kernel_mod.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/host/host_kernel_mod.cc index 9a5b6562f79..07ba0d04c0b 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/host/host_kernel_mod.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/host/host_kernel_mod.cc @@ -50,7 +50,7 @@ bool HostKernelMod::Init(const AnfNodePtr &anf_node) { size_t output_num = common::AnfAlgo::GetOutputTensorNum(anf_node); for (size_t i = 0; i < input_num; i++) { - std::vector shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i); + auto shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i); TypePtr type_ptr = TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, i)); int64_t size_i = 1; if (!GetShapeSize(shape_i, type_ptr, &size_i)) { @@ -60,7 +60,7 @@ bool HostKernelMod::Init(const AnfNodePtr &anf_node) { } for (size_t i = 0; i < output_num; i++) { - std::vector shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i); + auto shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i); TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i)); MS_EXCEPTION_IF_NULL(type_ptr); int64_t size_i = 1; diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/host/reshape_kernel.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/host/reshape_kernel.cc index 8b1764592e7..4fb1aee64e6 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/host/reshape_kernel.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/host/reshape_kernel.cc @@ -45,7 +45,7 @@ std::vector GetInputValue(const CNodePtr &cnode, size_t index) { << trace::DumpSourceLines(cnode); } - size_t x_num = shape_x[0]; + auto x_num = shape_x[0]; std::vector x{SizeToLong(x_num)}; auto x_shape_value = std::make_shared(type_x, x); @@ -60,7 +60,7 @@ std::vector GetInputValue(const CNodePtr &cnode, size_t index) { } else { auto x_value = reinterpret_cast(x_shape_value->data_c()); MS_EXCEPTION_IF_NULL(x_value); - for (size_t i = 0; i < x_num; i++) { + for (int64_t i = 0; i < x_num; i++) { input_shape.push_back(static_cast(*x_value)); ++x_value; } diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/rts/memcpy_async.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/rts/memcpy_async.cc index bd3a4b300db..790175a75a2 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/rts/memcpy_async.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/rts/memcpy_async.cc @@ -92,10 +92,10 @@ void MemCpyAsyncKernel::GetInputOutputTotalCount(const AnfNodePtr &anf_node) { MS_LOG(EXCEPTION) << "MemCpyAsync input size is not 1, got " << input_size; } size_t type_size = abstract::TypeIdSize(input_type_id_); - std::vector shape_i = AnfAlgo::GetInputDeviceShape(anf_node, 0); + auto shape_i = AnfAlgo::GetInputDeviceShape(anf_node, 0); size_t total_size = 1; for (size_t i = 0; i < shape_i.size(); i++) { - total_size = SizetMulWithOverflowCheck(total_size, shape_i[i]); + total_size = SizetMulWithOverflowCheck(total_size, static_cast(shape_i[i])); } total_size = SizetMulWithOverflowCheck(total_size, type_size); MS_LOG(INFO) << "MemCpyAsync size[" << total_size << "]"; diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/rts/tensor_copy_slices.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/rts/tensor_copy_slices.cc index ee12bec1d68..6b5ffb3f28c 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/rts/tensor_copy_slices.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/rts/tensor_copy_slices.cc @@ -99,12 +99,9 @@ void TensorCopySlices::GetInputOutputInfo(const AnfNodePtr &anf_node) { << " output_type_id_:" << output_type_id_; } - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(anf_node, 0); - auto update_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(anf_node, 1); - auto output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, 0); - CastShapeSizeToLong(input_shape, &input_shape_); - CastShapeSizeToLong(update_shape, &update_shape_); - CastShapeSizeToLong(output_shape, &output_shape_); + input_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(anf_node, 0); + update_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(anf_node, 1); + output_shape_ = AnfAlgo::GetOutputDeviceShape(anf_node, 0); } void *TensorCopySlices::VoidPointerOffset(void *ptr, size_t offset) const { diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_dynaminc_shape_util.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_dynaminc_shape_util.cc index 3d20897a5fb..598a579a899 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_dynaminc_shape_util.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_dynaminc_shape_util.cc @@ -121,6 +121,7 @@ RangePair TbeDynamicShapeUtil::GetInputDynamicRange(const AnfNodePtr &anf_node, std::string reshape_type = AnfAlgo::GetInputReshapeType(anf_node, index); trans::ShapeRangeTransfer shapeRangeTransfer; RangePair ret; + if (input_range_min.empty() && input_range_max.empty()) { auto prev_node = common::AnfAlgo::GetPrevNodeOutput(anf_node, index); MS_EXCEPTION_IF_NULL(prev_node.first); diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc index 7279ae91b59..12546d12270 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc @@ -103,9 +103,9 @@ bool TbeKernelBroadCastSelecter::IsBroadCastSupport5HD(SupportFormat *support_fo } } auto shape_tmp = input_shapes_[0]; - auto broadcast_c_axis = std::any_of( - input_shapes_.begin(), input_shapes_.end(), - [&shape_tmp](const std::vector &elem) { return shape_tmp.at(kChannelC) != elem.at(kChannelC); }); + auto broadcast_c_axis = + std::any_of(input_shapes_.begin(), input_shapes_.end(), + [&shape_tmp](const ShapeVector &elem) { return shape_tmp.at(kChannelC) != elem.at(kChannelC); }); if (broadcast_c_axis) { MS_LOG(INFO) << "This node broadcast c channel."; return false; @@ -186,7 +186,7 @@ bool TbeKernelBroadCastSelecter::IsBroadCastSupportC1HWNCoC0(SupportFormat *supp } auto shape_tmp = input_shapes_[0]; auto broadcast_nc_axis = - std::any_of(input_shapes_.begin(), input_shapes_.end(), [&shape_tmp](const std::vector &elem) { + std::any_of(input_shapes_.begin(), input_shapes_.end(), [&shape_tmp](const ShapeVector &elem) { return (shape_tmp.at(kChannelC) != elem.at(kChannelC) || shape_tmp.at(kChannelN) != elem.at(kChannelN)); }); if (broadcast_nc_axis) { @@ -230,7 +230,7 @@ bool TbeKernelBroadCastSelecter::IsBroadCastSupportFracNZ(SupportFormat *support } } else { auto less_2dims = std::any_of(input_shapes_.begin(), input_shapes_.end(), - [](const std::vector &elem) { return elem.size() < kShape2dDims; }); + [](const ShapeVector &elem) { return elem.size() < kShape2dDims; }); if (less_2dims) { MS_LOG(INFO) << "This node dim less 2."; return false; @@ -238,7 +238,7 @@ bool TbeKernelBroadCastSelecter::IsBroadCastSupportFracNZ(SupportFormat *support auto shape_tmp = input_shapes_[0]; auto broadcast_last_dim = - std::any_of(input_shapes_.begin(), input_shapes_.end(), [&shape_tmp](const std::vector &elem) { + std::any_of(input_shapes_.begin(), input_shapes_.end(), [&shape_tmp](const ShapeVector &elem) { return (shape_tmp.at(shape_tmp.size() - 1) != elem.at(elem.size() - 1)) || (shape_tmp.at(shape_tmp.size() - 2) != elem.at(elem.size() - 2)); }); @@ -285,9 +285,9 @@ bool TbeKernelBroadCastSelecter::IsBroadCastSupportNDC1HWC0(SupportFormat *suppo } } auto shape_tmp = input_shapes_[0]; - auto broadcast_c_axis = std::any_of( - input_shapes_.begin(), input_shapes_.end(), - [&shape_tmp](const std::vector &elem) { return shape_tmp.at(kChannelC) != elem.at(kChannelC); }); + auto broadcast_c_axis = + std::any_of(input_shapes_.begin(), input_shapes_.end(), + [&shape_tmp](const ShapeVector &elem) { return shape_tmp.at(kChannelC) != elem.at(kChannelC); }); if (broadcast_c_axis) { MS_LOG(INFO) << "This node broadcast c channel."; return false; @@ -300,13 +300,9 @@ bool TbeKernelBroadCastSelecter::IsBroadCastSupportNDC1HWC0(SupportFormat *suppo return true; } -bool TbeKernelBroadCastSelecter::Is4DShape(const std::vector &shape) const { - return shape.size() == kShape4dDims; -} +bool TbeKernelBroadCastSelecter::Is4DShape(const ShapeVector &shape) const { return shape.size() == kShape4dDims; } -bool TbeKernelBroadCastSelecter::Is5DShape(const std::vector &shape) const { - return shape.size() == kShape5dDims; -} +bool TbeKernelBroadCastSelecter::Is5DShape(const ShapeVector &shape) const { return shape.size() == kShape5dDims; } bool TbeKernelBroadCastSelecter::IsSameShape() const { auto shape = input_shapes_.begin(); @@ -323,14 +319,14 @@ bool TbeKernelBroadCastSelecter::IsSameShape() const { return true; } -void TbeKernelBroadCastSelecter::PadScalarShape(std::vector *shape) const { +void TbeKernelBroadCastSelecter::PadScalarShape(ShapeVector *shape) const { MS_EXCEPTION_IF_NULL(shape); if (shape->empty()) { shape->emplace_back(1); } } -bool TbeKernelBroadCastSelecter::IsScalarShape(const std::vector &shape) const { +bool TbeKernelBroadCastSelecter::IsScalarShape(const ShapeVector &shape) const { return (shape.size() == 1 && shape[0] == 1); } diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h index aa99888e6f0..21ffca3b97d 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h @@ -38,10 +38,10 @@ class TbeKernelBroadCastSelecter { private: bool IsSameShape() const; - void PadScalarShape(std::vector *shape) const; - bool Is4DShape(const std::vector &shape) const; - bool Is5DShape(const std::vector &shape) const; - bool IsScalarShape(const std::vector &shape) const; + void PadScalarShape(ShapeVector *shape) const; + bool Is4DShape(const ShapeVector &shape) const; + bool Is5DShape(const ShapeVector &shape) const; + bool IsScalarShape(const ShapeVector &shape) const; bool HasScalarInput() const; void GenOutputSupportFormat(const std::string &support_format, SupportFormatItem *output_support_item) const; void AssignSupportFormat(const std::string &support_format_str, SupportFormat *support_format) const; @@ -49,8 +49,8 @@ class TbeKernelBroadCastSelecter { CNodePtr cnode_ptr_; size_t input_num_{}; size_t output_num_{}; - std::vector> input_shapes_; - std::vector> output_shapes_; + std::vector input_shapes_; + std::vector output_shapes_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc index d06ebed9d4e..b6883d9fd9b 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc @@ -153,11 +153,11 @@ void TbeKernelReduceSelecter::AssignSupportFormat(const std::string &support_for (void)support_format->output_format.emplace_back(output_support_format); } -bool TbeKernelReduceSelecter::Is4DShape(const std::vector &shape) const { return shape.size() == kShape4dDims; } +bool TbeKernelReduceSelecter::Is4DShape(const ShapeVector &shape) const { return shape.size() == kShape4dDims; } -bool TbeKernelReduceSelecter::Is5DShape(const std::vector &shape) const { return shape.size() == kShape5dDims; } +bool TbeKernelReduceSelecter::Is5DShape(const ShapeVector &shape) const { return shape.size() == kShape5dDims; } -void TbeKernelReduceSelecter::PadScalarShape(std::vector *shape) const { +void TbeKernelReduceSelecter::PadScalarShape(ShapeVector *shape) const { MS_EXCEPTION_IF_NULL(shape); if (shape->empty()) { (void)shape->emplace_back(1); diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h index 185689d63f4..47b1f66e6c4 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h @@ -38,12 +38,12 @@ class TbeKernelReduceSelecter { bool IsFracZAndC1HWNCoC0Common(const std::string &format, SupportFormat *support_format) const; void GetReduceAttrKeepDim(); void AssignSupportFormat(const std::string &support_format_str, SupportFormat *support_format) const; - bool Is4DShape(const std::vector &shape) const; - bool Is5DShape(const std::vector &shape) const; - void PadScalarShape(std::vector *shape) const; + bool Is4DShape(const ShapeVector &shape) const; + bool Is5DShape(const ShapeVector &shape) const; + void PadScalarShape(ShapeVector *shape) const; CNodePtr cnode_ptr_; - std::vector input_shape_{}; - std::vector output_shape_{}; + ShapeVector input_shape_{}; + ShapeVector output_shape_{}; std::vector axis_{}; bool keep_dims_ = false; }; diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.cc index 414594dec51..966358a677a 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.cc @@ -337,7 +337,7 @@ bool TbeKernelSelect::FilterInVaildShape(const KernelBuildInfoIter &kernel_build return true; } -bool TbeKernelSelect::IsShapeMatchFormat(const std::vector &shape, const std::string &format) { +bool TbeKernelSelect::IsShapeMatchFormat(const ShapeVector &shape, const std::string &format) { if (format == kOpFormat_DEFAULT) { return true; } diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h index 1ec1ccf4b71..c88c55c482d 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h @@ -49,7 +49,7 @@ class TbeKernelSelect { void GetReducePatternKernelInfo(const OpInfo &op_info); void FilterInVaildKernelInfo(const OpInfo &op_info); bool FilterInVaildShape(const KernelBuildInfoIter &kernel_build_info_iter, bool is_dynamic_input); - static bool IsShapeMatchFormat(const std::vector &shape, const std::string &format); + static bool IsShapeMatchFormat(const ShapeVector &shape, const std::string &format); bool TbeCheckSupported(const KernelBuildInfoIter &kernel_build_info_iter); static void SetTbeBuildCommonInfo(const OpInfo &op_info, KernelBuildInfo::KernelBuildInfoBuilder *builder); std::vector GetNodeDynamicInputs(); diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tiling/op_tiling_adapter.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tiling/op_tiling_adapter.cc index 3b71ebcc67b..21f19e4fc7b 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tiling/op_tiling_adapter.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tiling/op_tiling_adapter.cc @@ -108,21 +108,15 @@ void OpTilingCalculateAdapter::ConvertInputShapeAndType(const CNodePtr &node, :: auto ms_dtype = AnfAlgo::GetOutputDeviceDataType(input_node, input_index); // ge info - std::vector ge_shape; - std::vector ge_ori_shape; ::ge::DataType ge_dtype = ascend::GeTypesConvert::TransTypeIdToGeDataType(ms_dtype); ::ge::Format ge_format = ascend::GeTypesConvert::GetGeFormat(ms_format, ms_shape.size()); - std::transform(ms_shape.begin(), ms_shape.end(), std::back_inserter(ge_shape), - [](size_t s) { return static_cast(s); }); - std::transform(ms_ori_shape.begin(), ms_ori_shape.end(), std::back_inserter(ge_ori_shape), - [](size_t s) { return static_cast(s); }); auto input_name = GetInputName(node, real_index); ::ge::GeTensorDesc ge_tensor_desc; ge_tensor_desc.SetFormat(ge_format); ge_tensor_desc.SetDataType(ge_dtype); - ge_tensor_desc.SetShape(::ge::GeShape(ge_shape)); - ge_tensor_desc.SetOriginShape(::ge::GeShape(ge_ori_shape)); + ge_tensor_desc.SetShape(::ge::GeShape(ms_shape)); + ge_tensor_desc.SetOriginShape(::ge::GeShape(ms_ori_shape)); ge_tensor_desc.SetName(input_name); (void)(*op_desc)->AddInputDesc(input_name, ge_tensor_desc); } @@ -138,20 +132,15 @@ void OpTilingCalculateAdapter::ConvertOutputShapeAndType(const CNodePtr &node, : auto ms_format = AnfAlgo::GetOutputFormat(node, i); auto ms_dtype = AnfAlgo::GetOutputDeviceDataType(node, i); - std::vector ge_shape; - std::vector ge_ori_shape; ::ge::DataType ge_dtype = ascend::GeTypesConvert::TransTypeIdToGeDataType(ms_dtype); ::ge::Format ge_format = ascend::GeTypesConvert::GetGeFormat(ms_format, ms_shape.size()); - std::transform(ms_shape.begin(), ms_shape.end(), std::back_inserter(ge_shape), - [](size_t s) { return static_cast(s); }); - std::transform(ms_ori_shape.begin(), ms_ori_shape.end(), std::back_inserter(ge_ori_shape), - [](size_t s) { return static_cast(s); }); + auto output_name = GetOutputName(node, i); ::ge::GeTensorDesc ge_tensor_desc; ge_tensor_desc.SetFormat(ge_format); ge_tensor_desc.SetDataType(ge_dtype); - ge_tensor_desc.SetShape(::ge::GeShape(ge_shape)); - ge_tensor_desc.SetOriginShape(::ge::GeShape(ge_ori_shape)); + ge_tensor_desc.SetShape(::ge::GeShape(ms_shape)); + ge_tensor_desc.SetOriginShape(::ge::GeShape(ms_ori_shape)); ge_tensor_desc.SetName(output_name); (void)(*op_desc)->AddOutputDesc(output_name, ge_tensor_desc); } @@ -224,18 +213,12 @@ void OpTilingCalculateAdapter::ConvertAtomicCompileInfo(const CNodePtr &node, :: MS_EXCEPTION_IF_NULL(tensor_data); ::ge::OpDescPtr op_desc = std::make_shared<::ge::OpDesc>(name, CONSTANTOP); auto ms_format = AnfAlgo::GetPrevNodeOutputFormat(node, index); - auto ms_ori_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, index); + auto ge_ori_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, index); auto ms_dtype = AnfAlgo::GetPrevNodeOutputDeviceDataType(node, index); - auto ms_shape = AnfAlgo::GetInputDeviceShape(node, index); + auto ge_shape = AnfAlgo::GetInputDeviceShape(node, index); - std::vector ge_shape; - std::vector ge_ori_shape; - std::transform(ms_shape.begin(), ms_shape.end(), std::back_inserter(ge_shape), - [](size_t s) { return static_cast(s); }); - std::transform(ms_ori_shape.begin(), ms_ori_shape.end(), std::back_inserter(ge_ori_shape), - [](size_t s) { return static_cast(s); }); ::ge::DataType ge_dtype = ascend::GeTypesConvert::TransTypeIdToGeDataType(ms_dtype); - ::ge::Format ge_format = ascend::GeTypesConvert::GetGeFormat(ms_format, ms_shape.size()); + ::ge::Format ge_format = ascend::GeTypesConvert::GetGeFormat(ms_format, ge_shape.size()); ::ge::GeTensorDesc ge_tensor_desc; ge_tensor_desc.SetFormat(ge_format); ge_tensor_desc.SetDataType(ge_dtype); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_helper.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_helper.cc index 43e50847cdc..977fd689a0d 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_helper.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_helper.cc @@ -35,7 +35,7 @@ namespace mindspore { namespace opt { using KernelBuildInfoBuilder = kernel::KernelBuildInfo::KernelBuildInfoBuilder; namespace { -bool NeedInsertTransData(const std::vector &origin_shape, const std::string &format) { +bool NeedInsertTransData(const ShapeVector &origin_shape, const std::string &format) { bool shape_check = origin_shape.size() > 1 || (origin_shape.size() == 1 && origin_shape[0] % kCubeSize != 0); return kCommonFormatSet.find(format) == kCommonFormatSet.end() && (shape_check || format == kOpFormat_ND_RNN_BIAS); } @@ -72,10 +72,8 @@ CNodePtr CreateReshapeNode(const FuncGraphPtr &func_graph, const AnfNodePtr &inp common::AnfAlgo::SetNodeAttr(kAttrReshapePaddingAxis, MakeValue(padding_axis), reshape); } } else { - std::vector shape_size_t; - std::transform(dst_shape->shape().begin(), dst_shape->shape().end(), std::back_inserter(shape_size_t), LongToSize); common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(input_node, 0)}, - {shape_size_t}, reshape.get()); + {dst_shape->shape()}, reshape.get()); } common::AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), reshape); @@ -157,7 +155,7 @@ AnfNodePtr GetTransInputNodePtr(const FuncGraphPtr &func_graph, const CNodePtr & MS_EXCEPTION_IF_NULL(input_node); common::AnfAlgo::SetNodeInput(node, input_node, index); } - std::vector origin_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, index); + ShapeVector origin_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, index); std::string dest_format = AnfAlgo::GetInputFormat(node, index); if (NeedInsertTransData(origin_shape, dest_format)) { MS_LOG(DEBUG) << node->DebugString() << "Insert transdata " << AnfAlgo::GetInputFormat(node, index) @@ -176,7 +174,7 @@ AnfNodePtr InsertTransOpForSingleOutput(const FuncGraphPtr &func_graph, const An MS_EXCEPTION_IF_NULL(node); MS_EXCEPTION_IF_NULL(func_graph); std::string output_format = AnfAlgo::GetOutputFormat(node, 0); - std::vector origin_shape = common::AnfAlgo::GetOutputInferShape(node, 0); + auto origin_shape = common::AnfAlgo::GetOutputInferShape(node, 0); if (output_format == kOpFormat_NC1KHKWHWC0) { MS_LOG(EXCEPTION) << "Got the hw format " << output_format << "when insert the transdata node " << node->DebugString() << trace::DumpSourceLines(node); @@ -211,7 +209,7 @@ AnfNodePtr InsertTransOpForMultipleOutput(const FuncGraphPtr &func_graph, const << node->DebugString() << trace::DumpSourceLines(node); } auto tuple_getitem = CreatTupleGetItemNode(func_graph, node, output_idx); - std::vector origin_shape = common::AnfAlgo::GetOutputInferShape(node, output_idx); + auto origin_shape = common::AnfAlgo::GetOutputInferShape(node, output_idx); if (NeedInsertTransData(origin_shape, output_format)) { auto trans_op = AddTransOpNodeToGraph(func_graph, tuple_getitem, kernel_select, 0, false); if (kernel_graph != nullptr && kernel_graph->IsInternalOutput(node, output_idx)) { diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/concat_outputs_for_all_gather.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/concat_outputs_for_all_gather.cc index 40ddd4b9c4e..dbe42bd47ad 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/concat_outputs_for_all_gather.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/concat_outputs_for_all_gather.cc @@ -25,9 +25,9 @@ namespace { OutputInfo GetNodeOutputInfo(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); std::vector output_infer_dtype; - std::vector> output_infer_shape; - std::vector> output_max_shape; - std::vector> output_min_shape; + std::vector output_infer_shape; + std::vector output_max_shape; + std::vector output_min_shape; std::vector output_format; std::vector output_device_dtype; auto type_ptr = node->Type(); @@ -109,18 +109,15 @@ AnfNodePtr ConcatOutputsForAllGather::InsertConcatForOutput(const FuncGraphPtr & MS_EXCEPTION_IF_NULL(new_tuple_getitems[i]); const std::vector &dtypes = {std::get<0>(output_info)[i]}; auto shape = std::get<1>(output_info)[i]; - shape[0] *= LongToSize(rank_size); - if (AnfUtils::IsShapeDynamic(shape)) { - ShapeVector tensor_shape; + shape[0] *= rank_size; + if (IsDynamic(shape)) { auto min_shape = std::get(output_info)[i]; auto max_shape = std::get(output_info)[i]; if (!min_shape.empty() && !max_shape.empty()) { max_shape[0] *= rank_size; min_shape[0] *= rank_size; } - - (void)std::transform(shape.begin(), shape.end(), std::back_inserter(tensor_shape), SizeToLong); - BaseShapePtr base_shape = std::make_shared(tensor_shape, min_shape, max_shape); + BaseShapePtr base_shape = std::make_shared(shape, min_shape, max_shape); common::AnfAlgo::SetOutputTypeAndDetailShape(dtypes, {base_shape}, concat.get()); } else { common::AnfAlgo::SetOutputInferTypeAndShape(dtypes, {shape}, concat.get()); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/concat_outputs_for_all_gather.h b/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/concat_outputs_for_all_gather.h index 382d75d5175..b425a2049a8 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/concat_outputs_for_all_gather.h +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/concat_outputs_for_all_gather.h @@ -25,7 +25,7 @@ namespace mindspore { namespace opt { -using OutputInfo = std::tuple, std::vector>, std::vector>, +using OutputInfo = std::tuple, std::vector, std::vector>, std::vector>, std::vector, std::vector>; class ConcatOutputsForAllGather : public PatternProcessPass { diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/insert_transpose_for_sort.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/insert_transpose_for_sort.cc index a62f8f16ad0..8e54191c49b 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/insert_transpose_for_sort.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/insert_transpose_for_sort.cc @@ -132,7 +132,7 @@ AnfNodePtr InsertForOutput(const FuncGraphPtr &func_graph, const CNodePtr &orig_ return make_tuple; } -AnfNodePtr InsertTranspose(const FuncGraphPtr &func_graph, const CNodePtr &node, const std::vector &in_shape, +AnfNodePtr InsertTranspose(const FuncGraphPtr &func_graph, const CNodePtr &node, const ShapeVector &in_shape, int64_t axis) { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(node); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/split_inputs_for_reduce_scatter.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/split_inputs_for_reduce_scatter.cc index a9e763e2f4f..2959f343217 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/split_inputs_for_reduce_scatter.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/split_inputs_for_reduce_scatter.cc @@ -28,7 +28,7 @@ std::vector SplitInputsForReduceScatter::InsertSplitForInput(const F size_t inputs_size = common::AnfAlgo::GetInputTensorNum(node); std::vector split_outputs; size_t rank_size_t = LongToSize(rank_size); - if (rank_size_t == 0) { + if (rank_size == 0) { MS_LOG(EXCEPTION) << "The rank size can not be zero."; } for (size_t i = 0; i < inputs_size; i++) { @@ -39,23 +39,20 @@ std::vector SplitInputsForReduceScatter::InsertSplitForInput(const F std::vector dtypes(rank_size, common::AnfAlgo::GetPrevNodeOutputInferDataType(node, i)); std::vector size_splits; - std::vector output_node_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, i); - output_node_shape[0] /= rank_size_t; - if (AnfUtils::IsShapeDynamic(output_node_shape)) { + auto output_node_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, i); + output_node_shape[0] /= rank_size; + if (IsDynamic(output_node_shape)) { auto min_shape = common::AnfAlgo::GetInputMinShape(node, i); auto max_shape = common::AnfAlgo::GetInputMaxShape(node, i); if (!min_shape.empty() && !max_shape.empty()) { - min_shape[0] /= static_cast(rank_size_t); - max_shape[0] /= static_cast(rank_size_t); + min_shape[0] /= rank_size; + max_shape[0] /= rank_size; } - - ShapeVector shape_tmp; - (void)std::transform(output_node_shape.begin(), output_node_shape.end(), std::back_inserter(shape_tmp), - SizeToLong); - std::vector shapes(rank_size_t, std::make_shared(shape_tmp, min_shape, max_shape)); + std::vector shapes(rank_size_t, + std::make_shared(output_node_shape, min_shape, max_shape)); common::AnfAlgo::SetOutputTypeAndDetailShape(dtypes, shapes, split.get()); } else { - std::vector> shapes(rank_size_t, output_node_shape); + std::vector shapes(rank_size_t, output_node_shape); common::AnfAlgo::SetOutputInferTypeAndShape(dtypes, shapes, split.get()); } diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/format_type/insert_transpose_for_basiclstm_op.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/format_type/insert_transpose_for_basiclstm_op.cc index 9cd1b67627b..ef15143c9ea 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/format_type/insert_transpose_for_basiclstm_op.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/format_type/insert_transpose_for_basiclstm_op.cc @@ -47,13 +47,13 @@ CNodePtr Insert(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const std auto origin_type = common::AnfAlgo::GetPrevNodeOutputInferDataType(cnode, 1); auto origin_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(cnode, 1); auto dst_shape = {origin_shape[1], origin_shape[0]}; - auto is_dynamic = AnfUtils::IsShapeDynamic(dst_shape); + auto is_dynamic = IsDynamic(dst_shape); transpose_inputs.push_back(common::AnfAlgo::GetInputNode(cnode, 1)); CNodePtr transpose = func_graph->NewCNode(transpose_inputs); MS_EXCEPTION_IF_NULL(transpose); if (is_dynamic) { - auto shape = {SizeToLong(origin_shape[1]), SizeToLong(origin_shape[0])}; + auto shape = {origin_shape[1], origin_shape[0]}; auto max_shape = common::AnfAlgo::GetInputMaxShape(cnode, 1); auto min_shape = common::AnfAlgo::GetInputMinShape(cnode, 1); auto min_shape_tmp = min_shape; @@ -86,8 +86,8 @@ CNodePtr Insert(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const std transpose_inputs.push_back(tuple_getitem); CNodePtr transpose = func_graph->NewCNode(transpose_inputs); MS_EXCEPTION_IF_NULL(transpose); - if (AnfUtils::IsShapeDynamic(origin_shape)) { - auto dst_shape = {SizeToLong(origin_shape[0]), SizeToLong(origin_shape[1])}; + if (IsDynamic(origin_shape)) { + auto dst_shape = {origin_shape[0], origin_shape[1]}; auto min_shape = common::AnfAlgo::GetOutputMinShape(cnode, output_idx); auto max_shape = common::AnfAlgo::GetOutputMaxShape(cnode, output_idx); auto min_shape_tmp = min_shape; diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/broadcastto_fission.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/broadcastto_fission.cc index 1b226034daf..7356f53fa00 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/broadcastto_fission.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/broadcastto_fission.cc @@ -60,10 +60,8 @@ CNodePtr AddBroadCastToNode(const FuncGraphPtr &func_graph, const CNodePtr &inpu CNodePtr broadcastto_node = NewCNode(broadcastto_inputs, func_graph); broadcastto_node->set_scope(input_node->scope()); broadcastto_node->set_abstract(input_node->abstract()); - std::vector out_shape; - std::transform(broad_shape.begin(), broad_shape.end(), std::back_inserter(out_shape), SizeToLong); common::AnfAlgo::SetNodeAttr(kAttrShape, MakeValue>(broad_shape), broadcastto_node); - common::AnfAlgo::SetOutputInferTypeAndShape({input_type}, {out_shape}, broadcastto_node.get()); + common::AnfAlgo::SetOutputInferTypeAndShape({input_type}, {broad_shape}, broadcastto_node.get()); return broadcastto_node; } diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/cdist_fission.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/cdist_fission.cc index 6234c0c92c5..1d04aefacd7 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/cdist_fission.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/cdist_fission.cc @@ -28,7 +28,7 @@ constexpr size_t kCdistGradInputNum = 4; constexpr int64_t kInputXDimP = -1; constexpr int64_t kInputYDimR = -2; -std::vector CalCdistBroadCastShape(std::vector x_shape, std::vector y_shape) { +ShapeVector CalCdistBroadCastShape(ShapeVector x_shape, ShapeVector y_shape) { (void)x_shape.insert(x_shape.end() + kInputXDimP, 1); (void)y_shape.insert(y_shape.end() + kInputYDimR, 1); if (x_shape.size() != y_shape.size()) { @@ -38,7 +38,7 @@ std::vector CalCdistBroadCastShape(std::vector x_shape, std::vec return x_shape; } auto length = x_shape.size(); - std::vector broadcast_shape; + ShapeVector broadcast_shape; (void)std::copy(x_shape.begin(), x_shape.end() - SizeToLong(length), std::back_inserter(broadcast_shape)); for (size_t i = length; i > 0; --i) { if (x_shape[length - i] == 1) { @@ -56,7 +56,7 @@ std::vector CalCdistBroadCastShape(std::vector x_shape, std::vec } AnfNodePtr AddBroadCastToNode(const FuncGraphPtr &func_graph, const AnfNodePtr &input_node, int64_t dim, - const std::vector &need_shape, const PatternProcessPass &pass) { + const ShapeVector &need_shape, const PatternProcessPass &pass) { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(input_node); // Add ExpandDims Node @@ -74,9 +74,7 @@ AnfNodePtr AddBroadCastToNode(const FuncGraphPtr &func_graph, const AnfNodePtr & NewValueNode(std::make_shared(prim::kPrimBroadcastTo->name())), expand_dims}; auto broadcast_to = pass.NewCNode(broadcast_to_inputs, func_graph); common::AnfAlgo::SetOutputInferTypeAndShape({dtype}, {need_shape}, broadcast_to.get()); - std::vector shape; - (void)std::transform(need_shape.begin(), need_shape.end(), std::back_inserter(shape), LongToSize); - common::AnfAlgo::SetNodeAttr(kAttrShape, MakeValue(shape), broadcast_to); + common::AnfAlgo::SetNodeAttr(kAttrShape, MakeValue(need_shape), broadcast_to); common::AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), broadcast_to); return broadcast_to; } diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/conv2d_backprop_filter_mul_fission.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/conv2d_backprop_filter_mul_fission.cc index 2545d928092..28825622863 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/conv2d_backprop_filter_mul_fission.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/conv2d_backprop_filter_mul_fission.cc @@ -33,32 +33,30 @@ namespace { constexpr int64_t kGroupsDefaultValue = 1; template -void SetAssistTensorData(void *data, const T &value, size_t dims_size) { +void SetAssistTensorData(void *data, const T &value, int64_t dims_size) { MS_EXCEPTION_IF_NULL(data); auto tensor_data = static_cast(data); - for (size_t i = 0; i < dims_size; ++i) { + for (size_t i = 0; i < static_cast(dims_size); ++i) { tensor_data[i] = value; } } -ValueNodePtr CreateAssistNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const std::vector &shape, - size_t matrix_size) { +ValueNodePtr CreateAssistNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const ShapeVector &shape, + int64_t matrix_size) { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(node); auto type = common::AnfAlgo::GetOutputInferDataType(node, 0); - std::vector assist_shape; - std::transform(shape.begin(), shape.end(), std::back_inserter(assist_shape), SizeToLong); - tensor::TensorPtr tensor = std::make_shared(type, assist_shape); + tensor::TensorPtr tensor = std::make_shared(type, shape); AbstractBasePtr x_abstract; if (type == kNumberTypeInt32) { SetAssistTensorData(tensor->data_c(), 1, matrix_size); - x_abstract = std::make_shared(kInt32, assist_shape); + x_abstract = std::make_shared(kInt32, shape); } else if (type == kNumberTypeFloat16) { SetAssistTensorData(tensor->data_c(), float16(static_cast(1)), matrix_size); - x_abstract = std::make_shared(kFloat16, assist_shape); + x_abstract = std::make_shared(kFloat16, shape); } else if (type == kNumberTypeFloat32) { SetAssistTensorData(tensor->data_c(), static_cast(1), matrix_size); - x_abstract = std::make_shared(kFloat, assist_shape); + x_abstract = std::make_shared(kFloat, shape); } else { MS_EXCEPTION(TypeError) << "The type of node [" << node->DebugString() << "] should be int32, float16 or float32, but got" << node->Type()->ToString(); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/diag_fission.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/diag_fission.cc index 4f892fc2a88..7bbb8c40e63 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/diag_fission.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/diag_fission.cc @@ -28,39 +28,37 @@ constexpr size_t kDiagInputNum = 1; constexpr size_t kDiagInputMaxDim = 4; template -void SetAssistTensorData(void *data, const T &value, size_t dims_size) { +void SetAssistTensorData(void *data, const T &value, int64_t dims_size) { MS_EXCEPTION_IF_NULL(data); auto tensor_data = reinterpret_cast(data); - for (size_t i = 0; i < dims_size; ++i) { - tensor_data[(1 + dims_size) * i] = value; + for (size_t i = 0; i < static_cast(dims_size); ++i) { + tensor_data[(1 + static_cast(dims_size)) * i] = value; } } } // namespace ValueNodePtr DiagFission::CreateAssistNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, - const std::vector &ori_shape) const { + const ShapeVector &ori_shape) const { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(node); - std::vector output_shape(ori_shape); - size_t dims = 1; + ShapeVector output_shape(ori_shape); + ShapeValueDType dims = 1; for (size_t i = 0; i < ori_shape.size(); i++) { dims = dims * ori_shape[i]; } (void)output_shape.insert(output_shape.end(), ori_shape.begin(), ori_shape.end()); auto type = common::AnfAlgo::GetOutputInferDataType(node, 0); - std::vector assist_shape; - std::transform(output_shape.begin(), output_shape.end(), std::back_inserter(assist_shape), SizeToLong); - tensor::TensorPtr tensor = std::make_shared(type, assist_shape); + tensor::TensorPtr tensor = std::make_shared(type, output_shape); AbstractBasePtr x_abstract; if (type == kNumberTypeInt32) { SetAssistTensorData(tensor->data_c(), 1, dims); - x_abstract = std::make_shared(kInt32, assist_shape); + x_abstract = std::make_shared(kInt32, output_shape); } else if (type == kNumberTypeFloat16) { SetAssistTensorData(tensor->data_c(), float16(static_cast(1)), dims); - x_abstract = std::make_shared(kFloat16, assist_shape); + x_abstract = std::make_shared(kFloat16, output_shape); } else if (type == kNumberTypeFloat32) { SetAssistTensorData(tensor->data_c(), static_cast(1), dims); - x_abstract = std::make_shared(kFloat, assist_shape); + x_abstract = std::make_shared(kFloat, output_shape); } else { MS_EXCEPTION(TypeError) << "The type of node [" << node->DebugString() << "] should be int32, float16 or float32, but got" << node->Type()->ToString(); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/diag_fission.h b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/diag_fission.h index 7aed60c36d3..64c071e3930 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/diag_fission.h +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/diag_fission.h @@ -32,7 +32,7 @@ class DiagFission : public PatternProcessPass { protected: ValueNodePtr CreateAssistNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, - const std::vector &ori_shape) const; + const ShapeVector &ori_shape) const; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/dynamic_gru_v2_grad_fission.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/dynamic_gru_v2_grad_fission.cc index fdaa67f1713..fc965264939 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/dynamic_gru_v2_grad_fission.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/dynamic_gru_v2_grad_fission.cc @@ -100,10 +100,9 @@ AnfNodePtr DynamicGRUV2GradFission::CreateGRUV2HiddenGradCellNode(const FuncGrap (void)gru_v2_hidden_grad_cell_inputs.emplace_back(dynamic_gru_v2_grad_inputs[input_index["hidden_new"]]); auto gru_v2_hidden_grad_cell_op = NewCNode(gru_v2_hidden_grad_cell_inputs, func_graph); - std::vector dh_prev_shape = - common::AnfAlgo::GetOutputInferShape(dynamic_gru_grad_outputs[output_index["dh_prev"]], 0); - std::vector dgate_h_shape = {1, batch_size, kGateNum * hidden_size}; - std::vector dnt_x_shape = {1, batch_size, hidden_size}; + auto dh_prev_shape = common::AnfAlgo::GetOutputInferShape(dynamic_gru_grad_outputs[output_index["dh_prev"]], 0); + ShapeVector dgate_h_shape = {1, SizeToLong(batch_size), SizeToLong(kGateNum * hidden_size)}; + ShapeVector dnt_x_shape = {1, SizeToLong(batch_size), SizeToLong(hidden_size)}; common::AnfAlgo::SetOutputInferTypeAndShape( {dh_dtype, dh_dtype, dh_dtype}, {dh_prev_shape, dgate_h_shape, dnt_x_shape}, gru_v2_hidden_grad_cell_op.get()); common::AnfAlgo::SetNodeAttr("t_state", MakeValue(SizeToLong(cur_t)), gru_v2_hidden_grad_cell_op); @@ -142,13 +141,13 @@ void DynamicGRUV2GradFission::AddTLoopNode(const FuncGraphPtr &func_graph, const weight_hidden}; auto reshape = NewCNode(reshape_inputs, func_graph); auto weight_hidden_dtype = common::AnfAlgo::GetOutputInferDataType(weight_hidden, input_index["weight_hidden"]); - auto reshape_out_shape = {IntToSize(1), common::AnfAlgo::GetOutputInferShape(weight_hidden, 0)[0], - common::AnfAlgo::GetOutputInferShape(weight_hidden, 0)[1]}; + ShapeVector reshape_out_shape = {1, common::AnfAlgo::GetOutputInferShape(weight_hidden, 0)[0], + common::AnfAlgo::GetOutputInferShape(weight_hidden, 0)[1]}; common::AnfAlgo::SetOutputInferTypeAndShape({weight_hidden_dtype}, {reshape_out_shape}, reshape.get()); (void)matmul_inputs.emplace_back(reshape); auto matmul_node = NewCNode(matmul_inputs, func_graph); MS_EXCEPTION_IF_NULL(matmul_node); - std::vector out_shape = {1, batch_size, hidden_size}; + ShapeVector out_shape = {1, SizeToLong(batch_size), SizeToLong(hidden_size)}; common::AnfAlgo::SetOutputInferTypeAndShape({weight_hidden_dtype}, {out_shape}, matmul_node.get()); common::AnfAlgo::SetNodeAttr("transpose_x1", MakeValue(false), matmul_node); common::AnfAlgo::SetNodeAttr("transpose_x2", MakeValue(true), matmul_node); @@ -181,7 +180,7 @@ AnfNodePtr DynamicGRUV2GradFission::AddTConcatNode(const FuncGraphPtr &func_grap } auto concat_t_node = NewCNode(concat_inputs, func_graph); auto out_dims = common::AnfAlgo::GetOutputInferShape(gru_hidden_grad_nodes[kIndex0], concat_output_index); - std::vector concat_output_shape = {t_size, out_dims[kDim1], out_dims[kDim2]}; + ShapeVector concat_output_shape = {SizeToLong(t_size), out_dims[kDim1], out_dims[kDim2]}; auto out_type = common::AnfAlgo::GetOutputInferDataType(gru_hidden_grad_nodes[kIndex0], concat_output_index); common::AnfAlgo::SetOutputInferTypeAndShape({out_type}, {concat_output_shape}, concat_t_node.get()); common::AnfAlgo::SetNodeAttr(kAttrN, MakeValue(SizeToLong(t_size)), concat_t_node); @@ -232,7 +231,7 @@ AnfNodePtr DynamicGRUV2GradFission::AddHSplitNode(const FuncGraphPtr &func_graph std::vector output1_shape = {t_size - 1, batch_size, hidden_size}; std::vector output2_shape = {1, batch_size, hidden_size}; std::vector split_list = {SizeToLong(t_size - 1), 1}; - std::vector> shapes = {output1_shape, output2_shape}; + std::vector shapes = {Convert2Long(output1_shape), Convert2Long(output2_shape)}; common::AnfAlgo::SetOutputInferTypeAndShape(dtypes, shapes, split_v.get()); // Set attr common::AnfAlgo::SetNodeAttr(kAttrSplitDim, MakeValue(SizeToLong(0)), split_v); @@ -246,17 +245,17 @@ AnfNodePtr DynamicGRUV2GradFission::CreateHReshape(const FuncGraphPtr &graph, co MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(node); auto ori_shape = common::AnfAlgo::GetOutputInferShape(node, 0); - std::vector> shape_tmp; + ShapeVector shape_tmp; if (ori_shape.size() == k3Dims) { shape_tmp = {ori_shape}; } else { - shape_tmp = {{IntToSize(1), ori_shape[kDim0], ori_shape[kDim1]}}; + shape_tmp = {{1, ori_shape[kDim0], ori_shape[kDim1]}}; } auto ori_dtype = {common::AnfAlgo::GetOutputInferDataType(node, 0)}; // reshape std::vector reshape_input = {NewValueNode(std::make_shared(prim::kPrimReshape->name())), node}; auto reshape = NewCNode(reshape_input, graph); - common::AnfAlgo::SetOutputInferTypeAndShape(ori_dtype, shape_tmp, reshape.get()); + common::AnfAlgo::SetOutputInferTypeAndShape(ori_dtype, {shape_tmp}, reshape.get()); common::AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), reshape); return reshape; } @@ -280,7 +279,7 @@ AnfNodePtr DynamicGRUV2GradFission::AddHConcatNode(const FuncGraphPtr &func_grap (void)concat_inputs.emplace_back(splitv_outputs[kIndex0]); auto concat = NewCNode(concat_inputs, func_graph); // Set infer data type and shape - std::vector output_shape = {t_size, batch_size, hidden_size}; + ShapeVector output_shape = Convert2Long({t_size, batch_size, hidden_size}); common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(init_h_reshape, 0)}, {output_shape}, concat.get()); // Set attr @@ -307,7 +306,7 @@ AnfNodePtr DynamicGRUV2GradFission::AddDwhMatmulNode(const FuncGraphPtr &func_gr (void)matmul_inputs.emplace_back(dgate_h); } auto batch_matmul = NewCNode(matmul_inputs, func_graph); - std::vector shape = {t_size, hidden_size, kGateNum * hidden_size}; + ShapeVector shape = Convert2Long({t_size, hidden_size, kGateNum * hidden_size}); common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16}, {shape}, batch_matmul.get()); common::AnfAlgo::SetNodeAttr("transpose_x1", MakeValue(true), batch_matmul); common::AnfAlgo::SetNodeAttr("transpose_x2", MakeValue(false), batch_matmul); @@ -330,9 +329,9 @@ AnfNodePtr DynamicGRUV2GradFission::CreateDgateHSplitVDNode(const FuncGraphPtr & auto split_vd = NewCNode(splitvd_input, func_graph); auto dtypes = {common::AnfAlgo::GetOutputInferDataType(dgate_h, 0), common::AnfAlgo::GetOutputInferDataType(dgate_h, 0)}; - std::vector shape = {t_size, batch_size, hidden_size << 1}; - std::vector shape2 = {t_size, batch_size, hidden_size}; - std::vector> shapes = {shape, shape2}; + auto shape = Convert2Long({t_size, batch_size, hidden_size << 1}); + auto shape2 = Convert2Long({t_size, batch_size, hidden_size}); + std::vector shapes = {shape, shape2}; common::AnfAlgo::SetOutputInferTypeAndShape(dtypes, shapes, split_vd.get()); common::AnfAlgo::SetNodeAttr("split_dim", MakeValue(SizeToLong(kDim2)), split_vd); common::AnfAlgo::SetNodeAttr("num_split", MakeValue(SizeToLong(kSplitVOutputNum)), split_vd); @@ -359,7 +358,7 @@ AnfNodePtr DynamicGRUV2GradFission::CreateDgateXConcatDNode(const FuncGraphPtr & (void)concat_inputs.emplace_back(dnt_x); } auto concat_op = NewCNode(concat_inputs, func_graph); - std::vector shape = {t_size, batch_size, kGateNum * hidden_size}; + auto shape = Convert2Long({t_size, batch_size, kGateNum * hidden_size}); auto types = {common::AnfAlgo::GetOutputInferDataType(dnt_x, 0)}; common::AnfAlgo::SetOutputInferTypeAndShape(types, {shape}, concat_op.get()); common::AnfAlgo::SetNodeAttr(kAttrN, MakeValue(SizeToLong(kConcatNum)), concat_op); @@ -379,7 +378,7 @@ AnfNodePtr DynamicGRUV2GradFission::CreateDwxBatchMatMul(const FuncGraphPtr &gra node1, node2}; auto batch_matmul = NewCNode(matmul_inputs, graph); MS_EXCEPTION_IF_NULL(batch_matmul); - std::vector shape = {t_size, input_size, kGateNum * hidden_size}; + auto shape = Convert2Long({t_size, input_size, kGateNum * hidden_size}); auto x_dtype = common::AnfAlgo::GetOutputInferDataType(node1, input_index["x"]); common::AnfAlgo::SetOutputInferTypeAndShape({x_dtype}, {shape}, batch_matmul.get()); common::AnfAlgo::SetNodeAttr("transpose_x1", MakeValue(true), batch_matmul); @@ -412,7 +411,7 @@ AnfNodePtr DynamicGRUV2GradFission::CreateWBroadcastToDNode(const FuncGraphPtr & // BroadcastTo std::vector braodcast_to_input = {NewValueNode(std::make_shared(kBroadcastToOpName)), node}; auto broadcast_to_d = NewCNode(braodcast_to_input, graph); - std::vector shape = {t_size, input_size, kGateNum * hidden_size}; + auto shape = Convert2Long({t_size, input_size, kGateNum * hidden_size}); auto type = {common::AnfAlgo::GetOutputInferDataType(node, 0)}; common::AnfAlgo::SetOutputInferTypeAndShape(type, {shape}, broadcast_to_d.get()); std::vector attr_shape = {SizeToLong(t_size), SizeToLong(input_size), SizeToLong(kGateNum * hidden_size)}; @@ -449,7 +448,7 @@ AnfNodePtr DynamicGRUV2GradFission::CreateDbReduceSumDNode(const FuncGraphPtr &g node}; auto reduce_sumd = NewCNode(reducesum_inputs, graph); MS_EXCEPTION_IF_NULL(reduce_sumd); - std::vector shape = {kGateNum * hidden_size}; + auto shape = Convert2Long({kGateNum * hidden_size}); auto types = {common::AnfAlgo::GetOutputInferDataType(node2, 0)}; common::AnfAlgo::SetOutputInferTypeAndShape(types, {shape}, reduce_sumd.get()); common::AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(std::vector{0, 1}), reduce_sumd); @@ -485,10 +484,10 @@ const AnfNodePtr DynamicGRUV2GradFission::Process(const FuncGraphPtr &func_graph CreateMultipleOutputsOfAnfNode(func_graph, dynamic_gru_v2_grad_cnode, kDynamicGRUV2GradOutputNum, &gru_grad_outputs); auto input_h = ori_inputs[input_index["h"]]; auto input_x = ori_inputs[input_index["x"]]; - t_size = common::AnfAlgo::GetOutputInferShape(input_h, 0)[kDim0]; - batch_size = common::AnfAlgo::GetOutputInferShape(input_h, 0)[kDim1]; - hidden_size = common::AnfAlgo::GetOutputInferShape(input_h, 0)[kDim2]; - input_size = common::AnfAlgo::GetOutputInferShape(input_x, 0)[kDim2]; + t_size = LongToSize(common::AnfAlgo::GetOutputInferShape(input_h, 0)[kDim0]); + batch_size = LongToSize(common::AnfAlgo::GetOutputInferShape(input_h, 0)[kDim1]); + hidden_size = LongToSize(common::AnfAlgo::GetOutputInferShape(input_h, 0)[kDim2]); + input_size = LongToSize(common::AnfAlgo::GetOutputInferShape(input_x, 0)[kDim2]); MS_LOG(INFO) << "For DynamicGRUV2Grad op, t_size: " << t_size << ", batch_size: " << batch_size << ", hidden_size: " << hidden_size << ", input_size: " << input_size; // add GRUHiddenGrad {dhPrevNode, dgateHConcatTNode, dntXConcatTNode} diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/dynamic_rnn_grad_fission_v2.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/dynamic_rnn_grad_fission_v2.cc index d4b0ee7b9ce..616d4a03825 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/dynamic_rnn_grad_fission_v2.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/dynamic_rnn_grad_fission_v2.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include "backend/common/session/kernel_graph.h" #include "backend/common/session/anf_runtime_algorithm.h" #include "include/common/utils/anfalgo.h" @@ -87,9 +88,9 @@ void DynamicRnnGradFissionV2::CreateTLoopNode(const FuncGraphPtr &func_graph, co NewValueNode(std::make_shared(kBasicLSTMCellCStateGradV2OpName))}; auto basic_lstm_cell_c_state_grad = NewCNode(basic_lstm_cell_c_state_grad_inputs, func_graph); - std::vector output0_dims{specs.batch_size, kDimMultiNum * specs.hidden_nz_size * kCubeSize}; + ShapeVector output0_dims{SizeToLong(specs.batch_size), SizeToLong(kDimMultiNum * specs.hidden_nz_size * kCubeSize)}; // batch_size, hidden_size - std::vector output1_dims{input_i_shape[kDim1], input_i_shape[kDim2]}; + ShapeVector output1_dims{input_i_shape[kDim1], input_i_shape[kDim2]}; common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16, kNumberTypeFloat32}, {output0_dims, output1_dims}, basic_lstm_cell_c_state_grad.get()); common::AnfAlgo::SetNodeAttr("forget_bias", MakeValue(1.0f), basic_lstm_cell_c_state_grad); @@ -104,7 +105,8 @@ void DynamicRnnGradFissionV2::CreateTLoopNode(const FuncGraphPtr &func_graph, co } auto matmul = NewCNode(matmul_inputs, func_graph); common::AnfAlgo::SetOutputInferTypeAndShape( - {kNumberTypeFloat32}, {{IntToSize(1), specs.batch_size, specs.input_size + specs.hidden_size}}, matmul.get()); + {kNumberTypeFloat32}, {{1, SizeToLong(specs.batch_size), SizeToLong(specs.input_size + specs.hidden_size)}}, + matmul.get()); common::AnfAlgo::SetNodeAttr("transpose_x1", MakeValue(false), matmul); common::AnfAlgo::SetNodeAttr("transpose_x2", MakeValue(true), matmul); if (specs.shape_need_align) { @@ -122,8 +124,8 @@ void DynamicRnnGradFissionV2::CreateTLoopNode(const FuncGraphPtr &func_graph, co // Create split std::vector splitv_input = {NewValueNode(std::make_shared(prim::kPrimSplitV->name()))}; auto split_v = NewCNode(splitv_input, func_graph); - std::vector split_v_output0_shape{IntToSize(1), specs.batch_size, specs.input_size}; - std::vector split_v_output1_shape{IntToSize(1), specs.batch_size, specs.hidden_size}; + auto split_v_output0_shape = Convert2Long({IntToSize(1), specs.batch_size, specs.input_size}); + auto split_v_output1_shape = Convert2Long({IntToSize(1), specs.batch_size, specs.hidden_size}); common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat32, kNumberTypeFloat32}, {split_v_output0_shape, split_v_output1_shape}, split_v.get()); common::AnfAlgo::SetNodeAttr(kAttrSizeSplits, @@ -152,7 +154,7 @@ void DynamicRnnGradFissionV2::CreateTLoopNode(const FuncGraphPtr &func_graph, co } AnfNodePtr DynamicRnnGradFissionV2::CreateLSTMSPlitV(const FuncGraphPtr &func_graph, const AnfNodePtr &input, - const std::vector> &split_shapes, + const std::vector &split_shapes, const std::vector &split_types, const std::vector &size_split, size_t num_split_x) const { std::vector lstm_split_input = {NewValueNode(std::make_shared(prim::kPrimSplitV->name())), @@ -195,9 +197,9 @@ void DynamicRnnGradFissionV2::CreateTLoopNodeWithEdge(const FuncGraphPtr &func_g std::vector reshape_inputs = {NewValueNode(std::make_shared(kReshapeOpName)), dynamic_rnn_grad_cnode->input(kIndex6)}; auto reshape = NewCNode(reshape_inputs, func_graph); - auto reshape_out_shape = {IntToSize(1), - common::AnfAlgo::GetOutputInferShape(dynamic_rnn_grad_cnode->input(kIndex6), 0)[0], - common::AnfAlgo::GetOutputInferShape(dynamic_rnn_grad_cnode->input(kIndex6), 0)[1]}; + ShapeVector reshape_out_shape = { + 1, common::AnfAlgo::GetOutputInferShape(dynamic_rnn_grad_cnode->input(kIndex6), 0)[0], + common::AnfAlgo::GetOutputInferShape(dynamic_rnn_grad_cnode->input(kIndex6), 0)[1]}; auto reshape_out_dtype = common::AnfAlgo::GetOutputInferDataType(dynamic_rnn_grad_cnode->input(kIndex6), 0); common::AnfAlgo::SetOutputInferTypeAndShape({reshape_out_dtype}, {reshape_out_shape}, reshape.get()); (void)basic_lstm_cell_c_state_grad_inputs.emplace_back(reshape); @@ -253,7 +255,7 @@ void DynamicRnnGradFissionV2::CreateTLoopNodeWithEdge(const FuncGraphPtr &func_g } else { auto basic_lstm_cell_output_0_shape = common::AnfAlgo::GetOutputInferShape(basic_lstm_cell_c_state_grad_outputs[0], 0); - std::vector temp_shape; + ShapeVector temp_shape; if (basic_lstm_cell_output_0_shape.size() == kBasicLstmCStateGradOutput0DimNum) { temp_shape = basic_lstm_cell_output_0_shape; } else { @@ -282,11 +284,11 @@ AnfNodePtr DynamicRnnGradFissionV2::AddLSTMInputGradNode(const FuncGraphPtr &fun CreateTLoopNode(func_graph, dynamic_rnn_grad_cnode, specs, &result_nodes); auto origin_input5_shape = common::AnfAlgo::GetOutputInferShape(dynamic_rnn_grad_cnode->input(kIndex6), 0); - std::vector split_c_dims{IntToSize(1), origin_input5_shape[0], origin_input5_shape[1]}; + ShapeVector split_c_dims{1, origin_input5_shape[0], origin_input5_shape[1]}; auto origin_input7 = dynamic_rnn_grad_cnode->input(kIndex8); - size_t num_split_x = common::AnfAlgo::GetOutputInferShape(origin_input7, 0)[0]; - std::vector> split_shapes; + size_t num_split_x = LongToSize(common::AnfAlgo::GetOutputInferShape(origin_input7, 0)[0]); + std::vector split_shapes; std::vector split_types; std::vector size_split; for (size_t i = 0; i < num_split_x; ++i) { @@ -376,7 +378,8 @@ AnfNodePtr DynamicRnnGradFissionV2::AddLSTMInputGradNode(const FuncGraphPtr &fun } else { gage_concat_shape = {specs.t_size, specs.batch_size, kDimMultiNum * specs.hidden_nz_size * kCubeSize}; } - common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16}, {gage_concat_shape}, lstm_gage_concat.get()); + common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16}, {Convert2Long(gage_concat_shape)}, + lstm_gage_concat.get()); common::AnfAlgo::SetNodeAttr(kAttrN, MakeValue(SizeToLong(num_split_x)), lstm_gage_concat); common::AnfAlgo::SetNodeAttr(kAttrDynInputSizes, MakeValue(std::vector{SizeToLong(num_split_x)}), lstm_gage_concat); @@ -406,7 +409,7 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateSplitV(const FuncGraphPtr &func_graph, if (specs.batch_size % kCubeSize == 0 && !specs.shape_need_align) { std::vector reshape_input = {NewValueNode(std::make_shared(kReshapeOpName)), origin_input6}; auto reshape = NewCNode(reshape_input, func_graph); - std::vector shape = {origin_input6_shape[kDim0] * origin_input6_shape[kDim1], origin_input6_shape[kDim2]}; + ShapeVector shape = {origin_input6_shape[kDim0] * origin_input6_shape[kDim1], origin_input6_shape[kDim2]}; common::AnfAlgo::SetOutputInferTypeAndShape({origin_input6_dtype}, {shape}, reshape.get()); splitv_input.push_back(reshape); } else { @@ -414,7 +417,7 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateSplitV(const FuncGraphPtr &func_graph, } auto split_v = NewCNode(splitv_input, func_graph); // Set infer data type and shape - std::vector shape1, shape2; + ShapeVector shape1, shape2; if (specs.batch_size % kCubeSize == 0 && !specs.shape_need_align) { shape1 = {(origin_input6_shape[kDim0] - 1) * origin_input6_shape[kDim1], origin_input6_shape[kDim2]}; shape2 = {origin_input6_shape[kDim1], origin_input6_shape[kDim2]}; @@ -423,7 +426,7 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateSplitV(const FuncGraphPtr &func_graph, shape2 = {1, origin_input6_shape[kDim1], origin_input6_shape[kDim2]}; } auto dtypes = {origin_input6_dtype, origin_input6_dtype}; - std::vector> shapes = {shape1, shape2}; + std::vector shapes = {shape1, shape2}; common::AnfAlgo::SetOutputInferTypeAndShape(dtypes, shapes, split_v.get()); // Set attr common::AnfAlgo::SetNodeAttr(kAttrSplitDim, MakeValue(SizeToLong(0)), split_v); @@ -456,7 +459,7 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateHConcat(const FuncGraphPtr &func_graph auto origin_input4 = dynamic_rnn_grad_cnode->input(kIndex5); auto origin_input4_shape = common::AnfAlgo::GetOutputInferShape(origin_input4, 0); // Create reshape to change shape - std::vector shape_tmp; + ShapeVector shape_tmp; if (specs.batch_size % kCubeSize == 0 && !specs.shape_need_align) { shape_tmp = {origin_input4_shape[0], origin_input4_shape[1]}; } else { @@ -477,7 +480,7 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateHConcat(const FuncGraphPtr &func_graph auto concat = NewCNode(concat_inputs, func_graph); // Set infer data type and shape auto splitv_output0_shape = common::AnfAlgo::GetOutputInferShape(splitv, 0); - std::vector shape; + ShapeVector shape; if (specs.batch_size % kCubeSize == 0 && !specs.shape_need_align) { shape = {splitv_output0_shape[0] + origin_input4_shape[0], origin_input4_shape[1]}; } else { @@ -506,7 +509,7 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateConcat(const FuncGraphPtr &func_graph, if (specs.batch_size % kCubeSize == 0 && !specs.shape_need_align) { std::vector reshape_input = {NewValueNode(std::make_shared(kReshapeOpName)), origin_input0}; auto reshape = NewCNode(reshape_input, func_graph); - std::vector shape = {origin_input0_shape[kDim0] * origin_input0_shape[kDim1], origin_input0_shape[kDim2]}; + ShapeVector shape = {origin_input0_shape[kDim0] * origin_input0_shape[kDim1], origin_input0_shape[kDim2]}; common::AnfAlgo::SetOutputInferTypeAndShape({origin_input0_dtype}, {shape}, reshape.get()); // t_size * batch_size, input_size concat_inputs.push_back(reshape); @@ -518,7 +521,7 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateConcat(const FuncGraphPtr &func_graph, auto concat = NewCNode(concat_inputs, func_graph); // Set infer data type and shape auto h_concat_output_shape = common::AnfAlgo::GetOutputInferShape(h_concat, 0); - std::vector shape; + ShapeVector shape; if (specs.batch_size % kCubeSize == 0 && !specs.shape_need_align) { shape = {origin_input0_shape[kDim0] * origin_input0_shape[kDim1], origin_input0_shape[kDim2] + h_concat_output_shape[kDim1]}; @@ -562,7 +565,7 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateConcatNodeT1(const FuncGraphPtr &func_ if (specs.batch_size % kCubeSize == 0 && !specs.shape_need_align) { std::vector reshape_inputs = {NewValueNode(std::make_shared(kReshapeOpName)), origin_input0}; auto reshape_in0 = NewCNode(reshape_inputs, func_graph); - std::vector shape = {origin_input0_shape[kDim0] * origin_input0_shape[kDim1], origin_input0_shape[kDim2]}; + ShapeVector shape = {origin_input0_shape[kDim0] * origin_input0_shape[kDim1], origin_input0_shape[kDim2]}; common::AnfAlgo::SetOutputInferTypeAndShape({origin_input0_dtype}, {shape}, reshape_in0.get()); (void)concat_inputs.emplace_back(reshape_in0); } else { @@ -571,7 +574,7 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateConcatNodeT1(const FuncGraphPtr &func_ auto origin_input4 = dynamic_rnn_grad_cnode->input(kIndex5); auto origin_input4_shape = common::AnfAlgo::GetOutputInferShape(origin_input4, 0); - std::vector shape_tmp; + ShapeVector shape_tmp; if (specs.batch_size % kCubeSize == 0 && !specs.shape_need_align) { shape_tmp = {origin_input4_shape[0], origin_input4_shape[1]}; } else { @@ -590,7 +593,7 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateConcatNodeT1(const FuncGraphPtr &func_ concat_inputs.push_back(reshape_in4); auto concat = NewCNode(concat_inputs, func_graph); // Set infer data type and shape - std::vector shape; + ShapeVector shape; if (specs.batch_size % kCubeSize == 0 && !specs.shape_need_align) { shape = {origin_input0_shape[kDim0] * origin_input0_shape[kDim1], origin_input0_shape[kDim2] + shape_tmp[kDim1]}; } else { @@ -631,7 +634,7 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateMatMulNode(const FuncGraphPtr &func_gr // Set infer data type and shape auto concat_shape = common::AnfAlgo::GetOutputInferShape(concat, 0); auto lstm_input_grad_shape = common::AnfAlgo::GetOutputInferShape(lstm_input_grad, 0); - std::vector shape; + ShapeVector shape; if (specs.batch_size % kCubeSize == 0 && !specs.shape_need_align) { // t_size * (input_size + hidden_size), 4 * hidden_size shape = {concat_shape[kDim1], lstm_input_grad_shape[kDim1]}; @@ -674,11 +677,11 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateMatMulNode2(const FuncGraphPtr &func_g auto matmul = NewCNode(matmul_inputs, func_graph); // Set infer data type and shape auto lstm_input_grad_shape = common::AnfAlgo::GetOutputInferShape(lstm_input_grad, 0); - std::vector out_shape; + ShapeVector out_shape; if (specs.batch_size % kCubeSize == 0 && !specs.shape_need_align) { - out_shape = {IntToSize(1), lstm_input_grad_shape[kDim1]}; + out_shape = {1, lstm_input_grad_shape[kDim1]}; } else { - out_shape = {lstm_input_grad_shape[kDim0], IntToSize(1), lstm_input_grad_shape[kDim2]}; + out_shape = {lstm_input_grad_shape[kDim0], 1, lstm_input_grad_shape[kDim2]}; } common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16}, {out_shape}, matmul.get()); // Set attr @@ -694,7 +697,7 @@ CNodePtr DynamicRnnGradFissionV2::CreateTranspose(const FuncGraphPtr &func_graph std::vector transpose_inputs = {NewValueNode(std::make_shared(prim::kPrimTranspose->name())), dw_reduce_sum}; auto transpose = NewCNode(transpose_inputs, func_graph); - std::vector out_shape = {specs.input_size + specs.hidden_size, kDimMultiNum * specs.hidden_size}; + auto out_shape = Convert2Long({specs.input_size + specs.hidden_size, kDimMultiNum * specs.hidden_size}); common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(dw_reduce_sum, 0)}, {out_shape}, transpose.get()); common::AnfAlgo::SetNodeAttr(kAttrPerm, MakeValue(std::vector{1, 0, 2, 3}), transpose); @@ -718,8 +721,8 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateDwReduceSum(const FuncGraphPtr &func_g // if matmul output is too large, need to insert cast to enable BatchMatmul&ReduceSum ub fusion later const size_t max_out_shape_size = 1 << 10; auto matmul_out_shape = common::AnfAlgo::GetOutputInferShape(matmul, 0); - auto matmul_out_shape_size = std::accumulate(matmul_out_shape.begin(), matmul_out_shape.end(), static_cast(1), - std::multiplies()); + auto matmul_out_shape_size = LongToSize(std::accumulate(matmul_out_shape.begin(), matmul_out_shape.end(), + static_cast(1), std::multiplies())); bool size_exceed_limit = matmul_out_shape_size > max_out_shape_size; if (size_exceed_limit) { std::vector cast_inputs = {NewValueNode(std::make_shared(prim::kPrimCast->name())), matmul}; @@ -732,8 +735,8 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateDwReduceSum(const FuncGraphPtr &func_g input_node}; auto reduce_sum = NewCNode(reduce_sum_inputs, func_graph); // Set infer data type and shape - std::vector reduce_sum_shape = {specs.input_size + specs.hidden_size, - kDimMultiNum * specs.hidden_nz_size * kCubeSize}; + auto reduce_sum_shape = + Convert2Long({specs.input_size + specs.hidden_size, kDimMultiNum * specs.hidden_nz_size * kCubeSize}); auto reduce_sum_type = size_exceed_limit ? kNumberTypeFloat32 : common::AnfAlgo::GetOutputInferDataType(dynamic_rnn_grad_cnode, 0); common::AnfAlgo::SetOutputInferTypeAndShape({reduce_sum_type}, {reduce_sum_shape}, reduce_sum.get()); @@ -778,8 +781,8 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateDwReshape(const FuncGraphPtr &func_gra matmul}; auto reshape = NewCNode(reshape_inputs, func_graph); // Set infer data type and shape - std::vector out_shape = {specs.input_size + specs.hidden_size, - kDimMultiNum * specs.hidden_nz_size * kCubeSize}; + auto out_shape = + Convert2Long({specs.input_size + specs.hidden_size, kDimMultiNum * specs.hidden_nz_size * kCubeSize}); common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(dynamic_rnn_grad_cnode, 0)}, {out_shape}, reshape.get()); common::AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), reshape); @@ -807,7 +810,7 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateValueNode(const FuncGraphPtr &func_gra auto kernel_graph = func_graph->cast(); auto value_node = kernel_graph->NewValueNode(x_abstract, tensor); kernel_graph->AddValueNodeToGraph(value_node); - common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat32}, {shape}, value_node.get()); + common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat32}, {output_shape}, value_node.get()); return value_node; } @@ -821,7 +824,7 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateDbReduceSum(const FuncGraphPtr &func_g std::vector reshape_inputs = {NewValueNode(std::make_shared(prim::kPrimReshape->name())), matmul}; auto reshape = NewCNode(reshape_inputs, func_graph); - std::vector out_shape = {kDimMultiNum * specs.hidden_size}; + ShapeVector out_shape = {SizeToLong(kDimMultiNum * specs.hidden_size)}; common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16}, {out_shape}, reshape.get()); common::AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), reshape); return reshape; @@ -830,7 +833,7 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateDbReduceSum(const FuncGraphPtr &func_g NewValueNode(std::make_shared(prim::kPrimReduceSum->name())), matmul}; auto reduce_sum = NewCNode(reduce_sum_inputs, func_graph); // Set infer data type and shape - std::vector out_shape = {kDimMultiNum * specs.hidden_size}; + ShapeVector out_shape = {SizeToLong(kDimMultiNum * specs.hidden_size)}; common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16}, {out_shape}, reduce_sum.get()); // Set attr common::AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(std::vector{0}), reduce_sum); @@ -870,10 +873,11 @@ const AnfNodePtr DynamicRnnGradFissionV2::Process(const FuncGraphPtr &func_graph } auto input0_shape = common::AnfAlgo::GetOutputInferShape(dynamic_rnn_grad_cnode->input(kIndex1), 0); RNNShapeSpecs specs; - specs.t_size = input0_shape[0]; - specs.batch_size = input0_shape[1]; - specs.input_size = input0_shape[kDim2]; - specs.hidden_size = common::AnfAlgo::GetOutputInferShape(dynamic_rnn_grad_cnode->input(kIndex7), 0)[kDim2]; + specs.t_size = LongToSize(input0_shape[0]); + specs.batch_size = LongToSize(input0_shape[1]); + specs.input_size = LongToSize(input0_shape[kDim2]); + specs.hidden_size = + LongToSize(common::AnfAlgo::GetOutputInferShape(dynamic_rnn_grad_cnode->input(kIndex7), 0)[kDim2]); if (specs.input_size % kCubeSize != 0 || specs.hidden_size % kCubeSize != 0) { specs.shape_need_align = true; SetAttrInputAndHiddenSize(func_graph, dynamic_rnn_grad_cnode, SizeToLong(specs.input_size), diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/dynamic_rnn_grad_fission_v2.h b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/dynamic_rnn_grad_fission_v2.h index d788336b6aa..8153716316e 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/dynamic_rnn_grad_fission_v2.h +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/dynamic_rnn_grad_fission_v2.h @@ -45,9 +45,8 @@ class DynamicRnnGradFissionV2 : public PatternProcessPass { void CreateTLoopNode(const FuncGraphPtr &func_graph, const CNodePtr &dynamic_rnn_grad_cnode, const RNNShapeSpecs &specs, std::vector> *result_nodes) const; AnfNodePtr CreateLSTMSPlitV(const FuncGraphPtr &func_graph, const AnfNodePtr &input, - const std::vector> &split_shapes, - const std::vector &split_types, const std::vector &size_split, - size_t num_split_x) const; + const std::vector &split_shapes, const std::vector &split_types, + const std::vector &size_split, size_t num_split_x) const; void CreateTLoopNodeWithEdge(const FuncGraphPtr &func_graph, const CNodePtr &dynamic_rnn_grad_cnode, const std::vector> &result_nodes, size_t num_split_x, const RNNShapeSpecs &specs, diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/gather_v2_ds_fission.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/gather_v2_ds_fission.cc index 1cae15a52c1..8b8dc33eaf7 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/gather_v2_ds_fission.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/gather_v2_ds_fission.cc @@ -132,9 +132,8 @@ CNodePtr GatherV2DsFission::CreateGatherV2Ds(const FuncGraphPtr &graph, const CN gather_v2->set_scope(origin_node->scope()); auto shape = common::AnfAlgo::GetOutputInferShape(origin_node, 0); - shape[shape.size() - 1] = pad_dim_size; - if (AnfUtils::IsShapeDynamic(shape)) { - ShapeVector shape_tmp; + shape[shape.size() - 1] = SizeToLong(pad_dim_size); + if (IsDynamic(shape)) { auto min_shape = common::AnfAlgo::GetOutputMinShape(origin_node, 0); auto max_shape = common::AnfAlgo::GetOutputMaxShape(origin_node, 0); if (!min_shape.empty() && !max_shape.empty()) { @@ -142,8 +141,7 @@ CNodePtr GatherV2DsFission::CreateGatherV2Ds(const FuncGraphPtr &graph, const CN max_shape[max_shape.size() - 1] = SizeToLong(pad_dim_size); } - std::transform(shape.begin(), shape.end(), std::back_inserter(shape_tmp), SizeToLong); - std::vector shapes = {std::make_shared(shape_tmp, min_shape, max_shape)}; + std::vector shapes = {std::make_shared(shape, min_shape, max_shape)}; common::AnfAlgo::SetOutputTypeAndDetailShape({common::AnfAlgo::GetOutputInferDataType(origin_node, 0)}, shapes, gather_v2.get()); } else { @@ -172,7 +170,7 @@ CNodePtr GatherV2DsFission::CreateSlice(const FuncGraphPtr &graph, const CNodePt auto gather_v2_shape = common::AnfAlgo::GetOutputInferShape(gather_v2, 0); std::vector offsets(gather_v2_shape.size(), 0); common::AnfAlgo::SetNodeAttr(kAttrBegin, MakeValue(Convert2Long(offsets)), slice); - common::AnfAlgo::SetNodeAttr(kAttrSize, MakeValue(Convert2Long(gather_v2_shape)), slice); + common::AnfAlgo::SetNodeAttr(kAttrSize, MakeValue(gather_v2_shape), slice); return slice; } diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/lamb_fission.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/lamb_fission.cc index ca38f65a6f0..b690189462e 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/lamb_fission.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/lamb_fission.cc @@ -163,7 +163,7 @@ AnfNodePtr CreateLayerNormNode(const FuncGraphPtr &graph, const AnfNodePtr &inpu square_sum_node->set_abstract(input_node->abstract()); auto types = {common::AnfAlgo::GetOutputInferDataType(input_node, 0)}; - std::vector shape = {1}; + ShapeVector shape = {1}; common::AnfAlgo::SetOutputInferTypeAndShape(types, {shape}, square_sum_node.get()); // Calc sqrt of the sum of square diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/lars_v2_fission.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/lars_v2_fission.cc index 838d5a9c992..158be72f4a4 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/lars_v2_fission.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/lars_v2_fission.cc @@ -43,7 +43,7 @@ void LarsV2Fission::CreateOutputsOfSquareSumAll(const FuncGraphPtr &graph, const square_sum_all->set_scope(lars_v2->scope()); auto types = {kNumberTypeFloat32, kNumberTypeFloat32}; - std::vector shape; + ShapeVector shape; auto shapes = {shape, shape}; common::AnfAlgo::SetOutputInferTypeAndShape(types, shapes, square_sum_all.get()); CreateMultipleOutputsOfAnfNode(graph, square_sum_all, kSquareSumOutputNum, square_sum_all_outputs); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/layer_norm_grad_split.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/layer_norm_grad_split.cc index 4b67dbfbc0f..9f7260f80e7 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/layer_norm_grad_split.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/layer_norm_grad_split.cc @@ -83,10 +83,8 @@ void LayerNormGradSplit::CreateOutputsOfLayerNormBetaGammaBackpropV2( common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, layer_norm_beta_gamma_backprop.get()); // get device shape of LayerNormGrad's 5th Input, and convert it to attr - std::vector shape_gamma = - common::AnfAlgo::GetPrevNodeOutputInferShape(layer_norm_grad, kLayerNormGradInputGammaIndex); - common::AnfAlgo::SetNodeAttr(kAttrShapeGamma, MakeValue(opt::Convert2Long(shape_gamma)), - layer_norm_beta_gamma_backprop); + auto shape_gamma = common::AnfAlgo::GetPrevNodeOutputInferShape(layer_norm_grad, kLayerNormGradInputGammaIndex); + common::AnfAlgo::SetNodeAttr(kAttrShapeGamma, MakeValue(shape_gamma), layer_norm_beta_gamma_backprop); CreateMultipleOutputsOfAnfNode(graph, layer_norm_beta_gamma_backprop, kLayerNormBetaGammaBackpropOutputNum, layer_norm_beta_gamma_backprop_outputs); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/pack_fission.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/pack_fission.cc index 4fbd18bbd72..2653229225c 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/pack_fission.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/pack_fission.cc @@ -52,6 +52,7 @@ AnfNodePtr PackFission::CreateNewPack(const FuncGraphPtr &func_graph, const CNod MS_LOG(EXCEPTION) << "The concat_dim value " << axis << "is out of range" << trace::DumpSourceLines(origin_pack_cnode); } + ShapeVector new_shape = output_shape->shape(); ShapeVector new_shape_min = output_shape->min_shape(); ShapeVector new_shape_max = output_shape->max_shape(); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/reduce_min_fission.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/reduce_min_fission.cc index 7b7878aeb25..b3e988de072 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/reduce_min_fission.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/reduce_min_fission.cc @@ -22,7 +22,7 @@ namespace mindspore { namespace opt { namespace { -bool NeedOptimize(const TypeId &dtype, const std::vector &shape, const std::vector &axis) { +bool NeedOptimize(const TypeId &dtype, const ShapeVector &shape, const std::vector &axis) { if (dtype != kNumberTypeFloat32) { MS_LOG(INFO) << "ReduceMin's input Dtype is not float32, no need to optimize!"; return false; @@ -44,7 +44,7 @@ bool NeedOptimize(const TypeId &dtype, const std::vector &shape, const s return true; } -std::vector CalFirstAxis(const std::vector &shape, const std::vector &axis) { +std::vector CalFirstAxis(const ShapeVector &shape, const std::vector &axis) { std::vector axis_fisrt; int64_t last_dim = SizeToLong(shape.size() - 1); std::copy_if(axis.begin(), axis.end(), std::back_inserter(axis_fisrt), @@ -68,9 +68,8 @@ std::vector CalFirstAxis(const std::vector &shape, const std::v return axis_fisrt; } -std::vector GetInferShape(const std::vector &shape, const std::vector &axis_first, - bool keep_dims) { - std::vector shape_first; +ShapeVector GetInferShape(const ShapeVector &shape, const std::vector &axis_first, bool keep_dims) { + ShapeVector shape_first; for (size_t item = 0; item < shape.size(); ++item) { if (axis_first.end() != std::find(axis_first.begin(), axis_first.end(), item)) { if (keep_dims) { @@ -138,7 +137,7 @@ const AnfNodePtr ReduceMinFission::Process(const FuncGraphPtr &graph, const AnfN // Create reduce_min1 CNodePtr reduce_min1 = CreateReduceMin(graph, cnode->input(1), cnode); std::vector axis_first = CalFirstAxis(shape, axis); - std::vector shape_first = GetInferShape(shape, axis_first, keep_dims); + auto shape_first = GetInferShape(shape, axis_first, keep_dims); common::AnfAlgo::SetOutputInferTypeAndShape({dtype}, {shape_first}, reduce_min1.get()); common::AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(axis_first), reduce_min1); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/renorm_split.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/renorm_split.cc index 16ee0cd7a96..2561912fd1a 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/renorm_split.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/renorm_split.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace opt { namespace { -void FreshRenormInferShape(const CNodePtr &node, std::vector in_shape, const TypeId &type) { +void FreshRenormInferShape(const CNodePtr &node, ShapeVector in_shape, const TypeId &type) { MS_EXCEPTION_IF_NULL(node); auto dim = common::AnfAlgo::GetNodeAttr(node, "dim"); if (dim < 0) { @@ -37,10 +37,8 @@ void FreshRenormInferShape(const CNodePtr &node, std::vector in_shape, c if (common::AnfAlgo::IsDynamicShape(node)) { auto max_shape = common::AnfAlgo::GetOutputMaxShape(node, 0); auto min_shape = common::AnfAlgo::GetOutputMinShape(node, 0); - std::vector shape_tmp; - std::transform(in_shape.begin(), in_shape.end(), std::back_inserter(shape_tmp), SizeToLong); common::AnfAlgo::SetOutputTypeAndDetailShape( - {type}, {std::make_shared(shape_tmp, min_shape, max_shape)}, node.get()); + {type}, {std::make_shared(in_shape, min_shape, max_shape)}, node.get()); return; } common::AnfAlgo::SetOutputInferTypeAndShape({type}, {in_shape}, node.get()); @@ -83,9 +81,7 @@ const AnfNodePtr RenormSplit::Process(const FuncGraphPtr &func_graph, const AnfN node}; auto broadcast_node = NewCNode(broadcast_inputs, func_graph); MS_EXCEPTION_IF_NULL(broadcast_node); - std::vector shape; - (void)std::transform(in_shape.begin(), in_shape.end(), std::back_inserter(shape), SizeToLong); - common::AnfAlgo::SetNodeAttr("shape", MakeValue(shape), broadcast_node); + common::AnfAlgo::SetNodeAttr("shape", MakeValue(in_shape), broadcast_node); common::AnfAlgo::SetOutputInferTypeAndShape({type}, {in_shape}, broadcast_node.get()); std::vector mul_inputs = {NewValueNode(std::make_shared(prim::kPrimMul->name())), diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/space_to_depth_split.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/space_to_depth_split.cc index d012122f23d..05a31b57b46 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/space_to_depth_split.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/space_to_depth_split.cc @@ -35,9 +35,9 @@ tensor::TensorPtr CreateTensor(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(cnode); auto input_x = cnode->input(kSpaceToDepthInputNum); int64_t block_size = common::AnfAlgo::GetNodeAttr(cnode, "block_size"); - std::vector x_shape = common::AnfAlgo::GetOutputInferShape(input_x, 0); - int64_t input_channel = SizeToLong(x_shape[kDim1]); - int64_t assist_input_channel = SizeToLong(x_shape[kDim1]) * block_size * block_size; + auto x_shape = common::AnfAlgo::GetOutputInferShape(input_x, 0); + int64_t input_channel = x_shape[kDim1]; + int64_t assist_input_channel = x_shape[kDim1] * block_size * block_size; std::vector assist_input_shape = {assist_input_channel, input_channel, block_size, block_size}; int64_t dest_size = assist_input_channel * input_channel * block_size * block_size; MS_LOG(DEBUG) << "For SpaceToDepth op, assist input shape is: (" << assist_input_channel << ", " << input_channel diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/split_fission.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/split_fission.cc index 1d1ce13daca..3f8a16699a1 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/split_fission.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/split_fission.cc @@ -51,7 +51,7 @@ AnfNodePtr CreateTupleGetItem(const FuncGraphPtr &func_graph, const AnfNodePtr & void CreateOutputShapeAndTypeId(const CNodePtr &origin_cnode, int64_t split_dim, const std::vector &size_splits_new, std::vector *new_type_ids, - std::vector> *new_output_shapes) { + std::vector *new_output_shapes) { MS_EXCEPTION_IF_NULL(new_type_ids); MS_EXCEPTION_IF_NULL(new_output_shapes); auto output_shape = common::AnfAlgo::GetOutputInferShape(origin_cnode, 0); @@ -65,7 +65,7 @@ void CreateOutputShapeAndTypeId(const CNodePtr &origin_cnode, int64_t split_dim, TypeId type_id = common::AnfAlgo::GetOutputInferDataType(origin_cnode, 0); for (size_t i = 0; i < size_splits_new.size(); ++i) { (void)new_type_ids->emplace_back(type_id); - output_shape[split_dim_unsigned] = LongToSize(size_splits_new[i]); + output_shape[split_dim_unsigned] = size_splits_new[i]; (void)new_output_shapes->emplace_back(output_shape); } } @@ -78,7 +78,7 @@ void SetAttrAndAbstractForBaseSplitv(const CNodePtr &origin_cnode, const CNodePt auto output_shape = common::AnfAlgo::GetOutputInferShape(origin_cnode, 0); TypeId type_id = common::AnfAlgo::GetOutputInferDataType(origin_cnode, 0); std::vector base_type_ids(num_split, type_id); - std::vector> base_output_shapes_base; + std::vector base_output_shapes_base; if (split_dim < 0) { split_dim += SizeToLong(output_shape.size()); } @@ -88,7 +88,7 @@ void SetAttrAndAbstractForBaseSplitv(const CNodePtr &origin_cnode, const CNodePt auto split_dim_l = LongToSize(split_dim); auto num_split_l = LongToSize(num_split); for (size_t i = 0; i < num_split_l; ++i) { - output_shape[split_dim_l] = LongToSize(size_splits_base[i]); + output_shape[split_dim_l] = size_splits_base[i]; (void)base_output_shapes_base.emplace_back(output_shape); common::AnfAlgo::SetOutputInferTypeAndShape({type_id}, {output_shape}, base_splitv_outputs[i].get()); } @@ -140,7 +140,7 @@ AnfNodePtr SplitFission::DoFission(const FuncGraphPtr &func_graph, const CNodePt SetAttrForSplitVNode(new_splitv, size_splits_new, split_dim, divisor); // Create new output shape and new output type id for each new Splitv node which has full inputs. std::vector new_type_ids; - std::vector> new_output_shapes; + std::vector new_output_shapes; CreateOutputShapeAndTypeId(cnode, split_dim, size_splits_new, &new_type_ids, &new_output_shapes); common::AnfAlgo::SetOutputInferTypeAndShape(new_type_ids, new_output_shapes, new_splitv.get()); AddNewOutputs(func_graph, new_splitv, divisor, &make_tuple_inputs); @@ -160,7 +160,7 @@ AnfNodePtr SplitFission::DoFission(const FuncGraphPtr &func_graph, const CNodePt SetAttrForSplitVNode(new_splitv, size_splits_new_last, split_dim, last_node_num_split); // Create new output shape and new output type id for the last Splitv node std::vector last_new_type_ids; - std::vector> last_new_output_shapes; + std::vector last_new_output_shapes; CreateOutputShapeAndTypeId(cnode, split_dim, size_splits_new_last, &last_new_type_ids, &last_new_output_shapes); common::AnfAlgo::SetOutputInferTypeAndShape(last_new_type_ids, last_new_output_shapes, new_splitv.get()); AddNewOutputs(func_graph, new_splitv, last_node_num_split, &make_tuple_inputs); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/topk_split.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/topk_split.cc index c8800e184fc..a74680295eb 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/topk_split.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/topk_split.cc @@ -111,7 +111,7 @@ bool CheckOutputShape(const AnfNodePtr &node) { return false; } auto last_dim = shape[shape.size() - 1]; - const size_t kMaxFloat16 = 65500; + const int64_t kMaxFloat16 = 65500; if (last_dim > kMaxFloat16) { MS_LOG(INFO) << "The last dim is more than " << kMaxFloat16 << ", switch to aicpu ops."; return false; diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/unsorted_segment_sum_fission.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/unsorted_segment_sum_fission.cc index 4d49944b021..70b7e634131 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/unsorted_segment_sum_fission.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/unsorted_segment_sum_fission.cc @@ -56,17 +56,15 @@ CNodePtr UnsortSegmentSumFission::CreatePadding(const FuncGraphPtr &graph, const MS_EXCEPTION_IF_NULL(padding); padding->set_scope(origin_node->scope()); auto shape = common::AnfAlgo::GetPrevNodeOutputInferShape(origin_node, 0); - shape[shape.size() - 1] = pad_dim_size; - if (AnfUtils::IsShapeDynamic(shape)) { + shape[shape.size() - 1] = SizeToLong(pad_dim_size); + if (IsDynamic(shape)) { auto min_shape = common::AnfAlgo::GetInputMinShape(origin_node, 0); auto max_shape = common::AnfAlgo::GetInputMaxShape(origin_node, 0); if (!min_shape.empty() && !max_shape.empty()) { min_shape[shape.size() - 1] = SizeToLong(pad_dim_size); max_shape[shape.size() - 1] = SizeToLong(pad_dim_size); } - ShapeVector shape_tmp; - std::transform(shape.begin(), shape.end(), std::back_inserter(shape_tmp), SizeToLong); - BaseShapePtr base_shape = std::make_shared(shape_tmp, min_shape, max_shape); + BaseShapePtr base_shape = std::make_shared(shape, min_shape, max_shape); common::AnfAlgo::SetOutputTypeAndDetailShape({common::AnfAlgo::GetPrevNodeOutputInferDataType(origin_node, 0)}, {base_shape}, padding.get()); } else { @@ -89,17 +87,16 @@ CNodePtr UnsortSegmentSumFission::CreateUnsortedSegmentSum(const FuncGraphPtr &g MS_EXCEPTION_IF_NULL(unsorted_segment_sum); unsorted_segment_sum->set_scope(origin_node->scope()); auto shape = common::AnfAlgo::GetOutputInferShape(origin_node, 0); - shape[shape.size() - 1] = pad_dim_size; - if (AnfUtils::IsShapeDynamic(shape)) { + shape[shape.size() - 1] = SizeToLong(pad_dim_size); + if (IsDynamic(shape)) { auto min_shape = common::AnfAlgo::GetOutputMinShape(origin_node, 0); auto max_shape = common::AnfAlgo::GetInputMaxShape(origin_node, 0); if (!min_shape.empty() && !max_shape.empty()) { min_shape[shape.size() - 1] = SizeToLong(pad_dim_size); max_shape[shape.size() - 1] = SizeToLong(pad_dim_size); } - ShapeVector shape_tmp; - std::transform(shape.begin(), shape.end(), std::back_inserter(shape_tmp), SizeToLong); - BaseShapePtr base_shape = std::make_shared(shape_tmp, min_shape, max_shape); + + BaseShapePtr base_shape = std::make_shared(shape, min_shape, max_shape); common::AnfAlgo::SetOutputTypeAndDetailShape({common::AnfAlgo::GetOutputInferDataType(origin_node, 0)}, {base_shape}, unsorted_segment_sum.get()); } else { @@ -107,7 +104,7 @@ CNodePtr UnsortSegmentSumFission::CreateUnsortedSegmentSum(const FuncGraphPtr &g unsorted_segment_sum.get()); } - common::AnfAlgo::SetNodeAttr(kAttrNumSegments, MakeValue(SizeToLong(shape[0])), unsorted_segment_sum); + common::AnfAlgo::SetNodeAttr(kAttrNumSegments, MakeValue(shape[0]), unsorted_segment_sum); return unsorted_segment_sum; } @@ -125,7 +122,7 @@ CNodePtr UnsortSegmentSumFission::CreateSlice(const FuncGraphPtr &graph, const C auto unsort_segment_sum_shape = common::AnfAlgo::GetOutputInferShape(unsort_segment_sum, 0); std::vector offsets(unsort_segment_sum_shape.size(), 0); common::AnfAlgo::SetNodeAttr(kAttrBegin, MakeValue(Convert2Long(offsets)), slice); - common::AnfAlgo::SetNodeAttr(kAttrSize, MakeValue(Convert2Long(unsort_segment_sum_shape)), slice); + common::AnfAlgo::SetNodeAttr(kAttrSize, MakeValue(unsort_segment_sum_shape), slice); return slice; } diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/avgpool_3d_fusion.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/avgpool_3d_fusion.cc index 53c59d84cfb..93bb32b3736 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/avgpool_3d_fusion.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/avgpool_3d_fusion.cc @@ -140,8 +140,8 @@ AnfNodePtr ConstructFilter(const FuncGraphPtr &func_graph, const std::vector assist_shape = {c1 * kd * kh * kw, 1, kC0, kC0}; // frac_z_3d - std::vector infer_shape = {IntToSize(1), LongToSize(fc), LongToSize(kd), LongToSize(kh), LongToSize(kw)}; + ShapeVector assist_shape = {c1 * kd * kh * kw, 1, kC0, kC0}; // frac_z_3d + ShapeVector infer_shape = {1, fc, kd, kh, kw}; float val = 1.0 / (kd * kh * kw); if (divisor_override != 0) { val = 1.0 / divisor_override; @@ -160,7 +160,6 @@ AnfNodePtr ConstructMultiplier(const FuncGraphPtr &func_graph, int64_t fn, int64 MS_EXCEPTION_IF_NULL(func_graph); // assist tensor 2 std::vector assist_shape = {fn, fc, dd, dh, dw}; // NCDHW - auto infer_shape = {LongToSize(fn), LongToSize(fc), LongToSize(dd), LongToSize(dh), LongToSize(dw)}; tensor::TensorPtr tensor = std::make_shared(kNumberTypeFloat16, assist_shape); MS_EXCEPTION_IF_NULL(tensor); auto tensor_data = reinterpret_cast(tensor->data_c()); @@ -206,13 +205,13 @@ AnfNodePtr ConstructMultiplier(const FuncGraphPtr &func_graph, int64_t fn, int64 MS_EXCEPTION_IF_NULL(kernel_graph); auto value_node = kernel_graph->NewValueNode(x_abstract, tensor); kernel_graph->AddValueNodeToGraph(value_node); - common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16}, {infer_shape}, value_node.get()); + common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16}, {assist_shape}, value_node.get()); return value_node; } } // namespace -AnfNodePtr ConstructFilterValueNode(const FuncGraphPtr &func_graph, float val, const std::vector &assist_shape, - const std::vector &infer_shape, int64_t cnt) { +AnfNodePtr ConstructFilterValueNode(const FuncGraphPtr &func_graph, float val, const ShapeVector &assist_shape, + const ShapeVector &infer_shape, int64_t cnt) { tensor::TensorPtr assist_tensor = std::make_shared(kNumberTypeFloat16, assist_shape); MS_EXCEPTION_IF_NULL(assist_tensor); TensorTypePtr tensor_type = std::make_shared(kFloat16); @@ -265,14 +264,14 @@ const AnfNodePtr AvgPool3DFusion::Process(const FuncGraphPtr &func_graph, const << ", but got in_shape is " << dims_in.size() << "-D, out_shape is " << dims_out.size() << trace::DumpSourceLines(node); } - auto fn = SizeToLong(dims_in[kDim0]); - auto fc = SizeToLong(dims_in[kDim1]); - auto fd = SizeToLong(dims_in[kDim2]); - auto fh = SizeToLong(dims_in[kDim3]); - auto fw = SizeToLong(dims_in[kDim4]); - auto dout = SizeToLong(dims_out[kDim2]); - auto dh = SizeToLong(dims_out[kDim3]); - auto dw = SizeToLong(dims_out[kDim4]); + auto fn = dims_in[kDim0]; + auto fc = dims_in[kDim1]; + auto fd = dims_in[kDim2]; + auto fh = dims_in[kDim3]; + auto fw = dims_in[kDim4]; + auto dout = dims_out[kDim2]; + auto dh = dims_out[kDim3]; + auto dw = dims_out[kDim4]; // kernel size int64_t kd; int64_t kh; diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/avgpool_3d_fusion.h b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/avgpool_3d_fusion.h index 96842d860e2..1c68bdcaf21 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/avgpool_3d_fusion.h +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/avgpool_3d_fusion.h @@ -32,8 +32,8 @@ class AvgPool3DFusion : public PatternProcessPass { const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; }; -AnfNodePtr ConstructFilterValueNode(const FuncGraphPtr &func_graph, float val, const std::vector &assist_shape, - const std::vector &infer_shape, int64_t cnt); +AnfNodePtr ConstructFilterValueNode(const FuncGraphPtr &func_graph, float val, const ShapeVector &assist_shape, + const ShapeVector &infer_shape, int64_t cnt); } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/avgpool_3d_grad_fusion.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/avgpool_3d_grad_fusion.cc index 4554303ff92..ec29bf78969 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/avgpool_3d_grad_fusion.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/avgpool_3d_grad_fusion.cc @@ -97,8 +97,8 @@ AnfNodePtr ConstructFilter(const FuncGraphPtr &func_graph, const std::vector assist_shape = {c1 * kd * kh * kw, 1, kC0, kC0}; // frac_z_3d - std::vector infer_shape = {IntToSize(1), LongToSize(fc), LongToSize(kd), LongToSize(kh), LongToSize(kw)}; + ShapeVector assist_shape = {c1 * kd * kh * kw, 1, kC0, kC0}; // frac_z_3d + ShapeVector infer_shape = {1, fc, kd, kh, kw}; float val = 1.0; if (divisor_override != 0) { val = 1.0 / divisor_override; @@ -110,15 +110,13 @@ AnfNodePtr ConstructFilter(const FuncGraphPtr &func_graph, const std::vector &ori_shape, +AnfNodePtr ConstructMultiplier(const FuncGraphPtr &func_graph, const ShapeVector &ori_shape, const std::vector &ori_input_shape, const std::vector &kernel_size, const std::vector &strides, const std::vector &pad_list, bool count_include_pad) { MS_EXCEPTION_IF_NULL(func_graph); // assist tensor 2 - std::vector grad_shape; - (void)std::transform(ori_shape.begin(), ori_shape.end(), std::back_inserter(grad_shape), SizeToLong); - std::vector assist_shape = grad_shape; // NCDHW + std::vector assist_shape = ori_shape; // NCDHW tensor::TensorPtr tensor = std::make_shared(kNumberTypeFloat16, assist_shape); MS_EXCEPTION_IF_NULL(tensor); auto tensor_data = reinterpret_cast(tensor->data_c()); @@ -128,14 +126,14 @@ AnfNodePtr ConstructMultiplier(const FuncGraphPtr &func_graph, const std::vector auto len_d = ori_input_shape[kDim2] + pad_d; auto len_h = ori_input_shape[kDim3] + pad_h; auto len_w = ori_input_shape[kDim4] + pad_w; - for (int64_t nn = 0; nn < grad_shape[kDim0]; nn++) { - for (int64_t cc = 0; cc < grad_shape[kDim1]; cc++) { + for (int64_t nn = 0; nn < ori_shape[kDim0]; nn++) { + for (int64_t cc = 0; cc < ori_shape[kDim1]; cc++) { int64_t start_d = 0; - for (int64_t di = 0; di < grad_shape[kDim2]; di++) { + for (int64_t di = 0; di < ori_shape[kDim2]; di++) { int64_t start_h = 0; - for (int64_t hi = 0; hi < grad_shape[kDim3]; hi++) { + for (int64_t hi = 0; hi < ori_shape[kDim3]; hi++) { int64_t start_w = 0; - for (int64_t wi = 0; wi < grad_shape[kDim4]; wi++) { + for (int64_t wi = 0; wi < ori_shape[kDim4]; wi++) { int64_t valid_d = 0; int64_t valid_h = 0; int64_t valid_w = 0; diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/bn_reduce_grad_conv2d_backprop_filter_fusion.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/bn_reduce_grad_conv2d_backprop_filter_fusion.cc index 460eb536a6b..36370d07066 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/bn_reduce_grad_conv2d_backprop_filter_fusion.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/bn_reduce_grad_conv2d_backprop_filter_fusion.cc @@ -44,19 +44,19 @@ bool CheckSupported(const CNodePtr &conv_back_filter) { << y_shape.size() << "-D, x_shape is " << x_shape.size() << "-D, out_shape is " << out_shape.size() << trace::DumpSourceLines(conv_back_filter); } - const std::set kSupportedBatchSize = {32, 256}; + const std::set kSupportedBatchSize = {32, 256}; if (kSupportedBatchSize.find(x_shape[0]) == kSupportedBatchSize.end()) { return false; } - std::vector> supported_cases = { + std::vector supported_cases = { // c_in, c_out, x_h, x_w, y_h, y_w, k_h, k_w {64, 256, 56, 56, 56, 56, 1, 1}, {256, 64, 56, 56, 56, 56, 1, 1}, {3, 64, 224, 224, 112, 112, 7, 7}, {512, 128, 28, 28, 28, 28, 1, 1}, {64, 64, 56, 56, 56, 56, 3, 3}, {256, 512, 56, 56, 28, 28, 1, 1}, {128, 512, 28, 28, 28, 28, 1, 1}, {256, 128, 56, 56, 56, 56, 1, 1}, {64, 64, 56, 56, 56, 56, 1, 1}, }; return std::any_of( - supported_cases.begin(), supported_cases.end(), [&x_shape, &y_shape, &out_shape](const std::vector &c) { + supported_cases.begin(), supported_cases.end(), [&x_shape, &y_shape, &out_shape](const ShapeVector &c) { return (c[kIndex0] == x_shape[kIndex1] && c[kIndex1] == y_shape[kIndex1] && c[kIndex2] == x_shape[kIndex2] && c[kIndex3] == x_shape[kIndex3] && c[kIndex4] == y_shape[kIndex2] && c[kIndex5] == y_shape[kIndex3] && c[kIndex6] == out_shape[kIndex2] && c[kIndex7] == out_shape[kIndex3]); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/confusion_mul_grad_fusion.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/confusion_mul_grad_fusion.cc index b1f9387e755..2234cc1ea12 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/confusion_mul_grad_fusion.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/confusion_mul_grad_fusion.cc @@ -61,8 +61,8 @@ bool QuitFusion(const FuncGraphPtr &graph, const AnfNodePtr &mul0_anf, const Anf MS_EXCEPTION_IF_NULL(input2); auto addn = input2->cast(); constexpr size_t kInferShapeIndex = 2; - constexpr size_t kShape2Dim1 = 1024; - constexpr size_t kShape2Dim2 = 768; + constexpr ShapeValueDType kShape2Dim1 = 1024; + constexpr ShapeValueDType kShape2Dim2 = 768; if (addn == nullptr || common::AnfAlgo::GetCNodeName(addn) != prim::kPrimAddN->name()) { MS_LOG(INFO) << "Mul's second input is not Addn, quit fusion"; return true; @@ -70,7 +70,7 @@ bool QuitFusion(const FuncGraphPtr &graph, const AnfNodePtr &mul0_anf, const Anf if (common::AnfAlgo::IsDynamicShape(addn)) { return true; } - std::vector shape = common::AnfAlgo::GetOutputInferShape(addn, 0); + auto shape = common::AnfAlgo::GetOutputInferShape(addn, 0); if (shape.size() != kInferShapeIndex || !(shape[1] == kShape2Dim1 || shape[1] == kShape2Dim2)) { MS_LOG(INFO) << "Addn's infer shape is not equal to [x,1024] or [x,768], quit fusion"; return true; diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/confusion_softmax_grad_rule.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/confusion_softmax_grad_rule.cc index aae08420435..0de02817a3d 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/confusion_softmax_grad_rule.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/confusion_softmax_grad_rule.cc @@ -71,7 +71,7 @@ bool NeedFusion(const AnfNodePtr &sum_anf, const AnfNodePtr &input0, const AnfNo return false; } - const size_t last_dim_limit = 30000; + const ShapeValueDType last_dim_limit = 30000; auto input0_shape = common::AnfAlgo::GetOutputInferShape(input0, 0); if (!input0_shape.empty() && input0_shape[input0_shape.size() - 1] > last_dim_limit) { MS_LOG(INFO) << "Input shape is too large to optimize, quit fusion, shape: " << input0_shape; diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_fusion.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_fusion.cc index b50085eb9f7..d1dd8244387 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_fusion.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_fusion.cc @@ -31,7 +31,7 @@ constexpr size_t kChannel = 3; } // namespace ValueNodePtr DeformableOffsetsFusion::CreateHelperNode( - const FuncGraphPtr &func_graph, const AnfNodePtr &node, const std::vector &offset_shape, + const FuncGraphPtr &func_graph, const AnfNodePtr &node, const ShapeVector &offset_shape, const std::vector &kernel_sizes, const std::vector &strides, const std::vector &pads, const std::vector &dilations, const size_t axis_h, const size_t axis_w, const size_t axis_c) const { int64_t H_OUT = offset_shape[axis_h]; @@ -47,10 +47,8 @@ ValueNodePtr DeformableOffsetsFusion::CreateHelperNode( int64_t pad_left = pads[axis_w]; int64_t h_index; int64_t w_index; - std::vector out_shape = {1, offset_shape[1], offset_shape[2], offset_shape[3]}; - std::vector assist_shape; - std::transform(out_shape.begin(), out_shape.end(), std::back_inserter(assist_shape), SizeToLong); - tensor::TensorPtr helper_tensor = std::make_shared(kNumberTypeFloat32, assist_shape); + ShapeVector out_shape = {1, offset_shape[1], offset_shape[2], offset_shape[3]}; + tensor::TensorPtr helper_tensor = std::make_shared(kNumberTypeFloat32, out_shape); TensorTypePtr tensor_type = std::make_shared(kFloat32); tensor::DeviceInfo device_info{kOpFormat_NHWC, tensor_type, kOpFormat_NHWC}; helper_tensor->set_device_info(device_info); @@ -73,7 +71,7 @@ ValueNodePtr DeformableOffsetsFusion::CreateHelperNode( } } } - AbstractBasePtr x_abstract = std::make_shared(kFloat, assist_shape); + AbstractBasePtr x_abstract = std::make_shared(kFloat, out_shape); auto kernel_graph = func_graph->cast(); MS_EXCEPTION_IF_NULL(kernel_graph); auto assist_value_node = kernel_graph->NewValueNode(x_abstract, helper_tensor); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_fusion.h b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_fusion.h index a2c7d962bd7..b944463637c 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_fusion.h +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_fusion.h @@ -30,11 +30,10 @@ class DeformableOffsetsFusion : public PatternProcessPass { const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; private: - ValueNodePtr CreateHelperNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, - const std::vector &offset_shape, const std::vector &kernel_sizes, - const std::vector &strides, const std::vector &pads, - const std::vector &dilations, const size_t axis_h, const size_t axis_w, - const size_t axis_c) const; + ValueNodePtr CreateHelperNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const ShapeVector &offset_shape, + const std::vector &kernel_sizes, const std::vector &strides, + const std::vector &pads, const std::vector &dilations, + const size_t axis_h, const size_t axis_w, const size_t axis_c) const; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_grad_fusion.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_grad_fusion.cc index 075f4219bce..5f03ede133d 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_grad_fusion.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_grad_fusion.cc @@ -31,11 +31,11 @@ constexpr size_t kChannel = 3; } // namespace ValueNodePtr DeformableOffsetsGradFusion::CreateHelperNode( - const FuncGraphPtr &func_graph, const AnfNodePtr &node, const std::vector &offset_shape, + const FuncGraphPtr &func_graph, const AnfNodePtr &node, const ShapeVector &offset_shape, const std::vector &kernel_sizes, const std::vector &strides, const std::vector &pads, const std::vector &dilations, const size_t axis_h, const size_t axis_w, const size_t axis_c) const { - size_t h_out = offset_shape[axis_h]; - size_t w_out = offset_shape[axis_w]; + int64_t h_out = offset_shape[axis_h]; + int64_t w_out = offset_shape[axis_w]; int64_t kernel_size_h = kernel_sizes[0]; int64_t kernel_size_w = kernel_sizes[1]; int64_t stride_h = strides[axis_h]; @@ -47,16 +47,14 @@ ValueNodePtr DeformableOffsetsGradFusion::CreateHelperNode( int64_t pad_left = pads[axis_w]; int64_t h_index; int64_t w_index; - std::vector out_shape = {1, offset_shape[1], offset_shape[2], offset_shape[3]}; - std::vector assist_shape; - std::transform(out_shape.begin(), out_shape.end(), std::back_inserter(assist_shape), SizeToLong); - tensor::TensorPtr helper_tensor = std::make_shared(kNumberTypeFloat32, assist_shape); + ShapeVector out_shape = {1, offset_shape[1], offset_shape[2], offset_shape[3]}; + tensor::TensorPtr helper_tensor = std::make_shared(kNumberTypeFloat32, out_shape); TensorTypePtr tensor_type = std::make_shared(kFloat32); tensor::DeviceInfo device_info{kOpFormat_NHWC, tensor_type, kOpFormat_NHWC}; helper_tensor->set_device_info(device_info); auto tensor_data = reinterpret_cast(helper_tensor->data_c()); - for (size_t h = 0; h < h_out; ++h) { - for (size_t w = 0; w < w_out; ++w) { + for (int64_t h = 0; h < h_out; ++h) { + for (int64_t w = 0; w < w_out; ++w) { for (size_t g = 0; g < group; ++g) { for (int64_t k_h = 0; k_h < kernel_size_h; ++k_h) { for (int64_t k_w = 0; k_w < kernel_size_w; ++k_w) { @@ -77,7 +75,7 @@ ValueNodePtr DeformableOffsetsGradFusion::CreateHelperNode( } } } - AbstractBasePtr x_abstract = std::make_shared(kFloat, assist_shape); + AbstractBasePtr x_abstract = std::make_shared(kFloat, out_shape); auto kernel_graph = func_graph->cast(); MS_EXCEPTION_IF_NULL(kernel_graph); auto assist_value_node = kernel_graph->NewValueNode(x_abstract, helper_tensor); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_grad_fusion.h b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_grad_fusion.h index 1a28d5fe3e6..9cd5e7229b3 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_grad_fusion.h +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/deformable_offsets_grad_fusion.h @@ -30,11 +30,10 @@ class DeformableOffsetsGradFusion : public PatternProcessPass { const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; private: - ValueNodePtr CreateHelperNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, - const std::vector &offset_shape, const std::vector &kernel_sizes, - const std::vector &strides, const std::vector &pads, - const std::vector &dilations, const size_t axis_h, const size_t axis_w, - const size_t axis_c) const; + ValueNodePtr CreateHelperNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const ShapeVector &offset_shape, + const std::vector &kernel_sizes, const std::vector &strides, + const std::vector &pads, const std::vector &dilations, + const size_t axis_h, const size_t axis_w, const size_t axis_c) const; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/derelu_fusion.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/derelu_fusion.cc index 51479fa5fc7..0aa6ce9aaa4 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/derelu_fusion.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/derelu_fusion.cc @@ -54,7 +54,7 @@ CNodePtr DereluFusion::CreateReluV2(const FuncGraphPtr &graph, const CNodePtr &r if (common::AnfAlgo::IsDynamicShape(relu)) { return nullptr; } - std::vector mask_shape = common::AnfAlgo::GetOutputInferShape(relu, 0); + auto mask_shape = common::AnfAlgo::GetOutputInferShape(relu, 0); if (mask_shape.size() != kMaskShapeSize) { MS_LOG(DEBUG) << "relu's infer shape size not equal 4"; return nullptr; diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/momentum_lossscale_fusion.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/momentum_lossscale_fusion.cc index d3153518ba9..fa28bcecd17 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/momentum_lossscale_fusion.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/momentum_lossscale_fusion.cc @@ -33,7 +33,7 @@ bool CheckValueNodeInputOfMul(const AnfNodePtr &node) { if (common::AnfAlgo::IsDynamicShape(node)) { return false; } - std::vector mul_input_shape = common::AnfAlgo::GetOutputInferShape(node, 0); + auto mul_input_shape = common::AnfAlgo::GetOutputInferShape(node, 0); return mul_input_shape.empty() || (mul_input_shape.size() == 1 && mul_input_shape[0] == 1); } } // namespace diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/reshape_transpose_fusion.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/reshape_transpose_fusion.cc index cde137fc8b6..09f974bb9ce 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/reshape_transpose_fusion.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/reshape_transpose_fusion.cc @@ -25,7 +25,7 @@ namespace mindspore { namespace opt { namespace { -bool CheckShapeDimInfo(const std::vector &shape) { +bool CheckShapeDimInfo(const ShapeVector &shape) { if (shape.empty()) { return false; } @@ -63,8 +63,8 @@ const AnfNodePtr ReshapeTransposeFusion::Process(const FuncGraphPtr &func_graph, (kernel_graph->IsInternalOutput(reshape_cnode, 0) || kernel_graph->IsInternalOutput(transpose_cnode, 0))) { return nullptr; } - std::vector reshape_input0_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(reshape_cnode, 0); - std::vector transpose_output0_shape = common::AnfAlgo::GetOutputInferShape(transpose_cnode, 0); + auto reshape_input0_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(reshape_cnode, 0); + auto transpose_output0_shape = common::AnfAlgo::GetOutputInferShape(transpose_cnode, 0); if (!CheckShapeDimInfo(reshape_input0_shape) || !CheckShapeDimInfo(transpose_output0_shape)) { return nullptr; } @@ -79,7 +79,7 @@ const AnfNodePtr ReshapeTransposeFusion::Process(const FuncGraphPtr &func_graph, common::AnfAlgo::CopyNodeAttr(kAttrPerm, transpose_cnode, new_node); common::AnfAlgo::SetNodeAttr(kAttrTransposeFirst, MakeValue(false), new_node); auto reshape_output_shape = common::AnfAlgo::GetOutputInferShape(reshape_cnode, 0); - common::AnfAlgo::SetNodeAttr(kAttrShape, MakeValue(Convert2Long(reshape_output_shape)), new_node); + common::AnfAlgo::SetNodeAttr(kAttrShape, MakeValue(reshape_output_shape), new_node); return new_node; } diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/transpose_reshape_fusion.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/transpose_reshape_fusion.cc index 5a202959709..0eb38d55d83 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/transpose_reshape_fusion.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/transpose_reshape_fusion.cc @@ -25,7 +25,7 @@ namespace mindspore { namespace opt { namespace { -bool CheckShapeDimInfo(const std::vector &shape) { +bool CheckShapeDimInfo(const ShapeVector &shape) { constexpr size_t kShape2Dim = 2; if (shape.empty()) { return false; @@ -62,8 +62,8 @@ const AnfNodePtr TransposeReshapeFusion::Process(const FuncGraphPtr &func_graph, (kernel_graph->IsInternalOutput(reshape_cnode, 0) || kernel_graph->IsInternalOutput(transpose_cnode, 0))) { return nullptr; } - std::vector reshape_output0_shape = common::AnfAlgo::GetOutputInferShape(reshape_cnode, 0); - std::vector transpose_input0_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(transpose_cnode, 0); + auto reshape_output0_shape = common::AnfAlgo::GetOutputInferShape(reshape_cnode, 0); + auto transpose_input0_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(transpose_cnode, 0); if (!CheckShapeDimInfo(reshape_output0_shape) || !CheckShapeDimInfo(transpose_input0_shape)) { return nullptr; } @@ -77,7 +77,7 @@ const AnfNodePtr TransposeReshapeFusion::Process(const FuncGraphPtr &func_graph, common::AnfAlgo::CopyNodeAttr(kAttrPerm, transpose_cnode, new_node); common::AnfAlgo::SetNodeAttr(kAttrTransposeFirst, MakeValue(true), new_node); auto reshape_output_shape = common::AnfAlgo::GetOutputInferShape(reshape_cnode, 0); - common::AnfAlgo::SetNodeAttr(kAttrShape, MakeValue(Convert2Long(reshape_output_shape)), new_node); + common::AnfAlgo::SetNodeAttr(kAttrShape, MakeValue(reshape_output_shape), new_node); return new_node; } diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/all_to_all_unify_mindir.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/all_to_all_unify_mindir.cc index 2ccfc59cfa8..90e26c205ee 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/all_to_all_unify_mindir.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/all_to_all_unify_mindir.cc @@ -31,7 +31,7 @@ namespace { constexpr size_t kCNodePrimitiveIdx = 0; constexpr size_t kAllToAllInputIdx = 1; -inline int64_t NormalizeDim(const std::vector &shape, int64_t dim) { +inline int64_t NormalizeDim(const ShapeVector &shape, int64_t dim) { return dim < 0 ? SizeToLong(shape.size()) + dim : dim; } @@ -85,9 +85,9 @@ CNodePtr AllToAllUnifyMindIR::CreateSplitNode(const FuncGraphPtr &graph, const C MS_LOG(EXCEPTION) << "Invalid split count " << split_count << " cannot be divisible by shape[" << split_dim << "] = " << shape[LongToSize(split_dim)] << trace::DumpSourceLines(all_to_all); } - shape[LongToSize(split_dim)] /= static_cast(split_count); + shape[LongToSize(split_dim)] /= split_count; std::vector dtypes(split_count, dtype); - if (AnfUtils::IsShapeDynamic(shape)) { + if (IsDynamic(shape)) { auto min_shape = common::AnfAlgo::GetOutputMinShape(all_to_all_input, 0); auto max_shape = common::AnfAlgo::GetOutputMaxShape(all_to_all_input, 0); if (!min_shape.empty() && !max_shape.empty()) { @@ -95,13 +95,10 @@ CNodePtr AllToAllUnifyMindIR::CreateSplitNode(const FuncGraphPtr &graph, const C min_shape[LongToSize(split_dim)] /= split_count; } - ShapeVector new_shape; - std::transform(shape.begin(), shape.end(), std::back_inserter(new_shape), SizeToLong); - - std::vector shapes(split_count, std::make_shared(new_shape, min_shape, max_shape)); + std::vector shapes(split_count, std::make_shared(shape, min_shape, max_shape)); common::AnfAlgo::SetOutputTypeAndDetailShape(dtypes, shapes, split_v.get()); } else { - std::vector> shapes(split_count, shape); + std::vector shapes(split_count, shape); common::AnfAlgo::SetOutputInferTypeAndShape(dtypes, shapes, split_v.get()); } @@ -176,8 +173,8 @@ CNodePtr AllToAllUnifyMindIR::CreateConcatNode(const FuncGraphPtr &graph, const MS_LOG(EXCEPTION) << "Invalid concat dim " << concat_dim << " is greater than shape size " << single_shape.size() << trace::DumpSourceLines(all_to_all); } - single_shape[LongToSize(concat_dim)] *= static_cast(split_count); - if (AnfUtils::IsShapeDynamic(single_shape)) { + single_shape[LongToSize(concat_dim)] *= split_count; + if (IsDynamic(single_shape)) { auto min_shape = common::AnfAlgo::GetOutputMinShape(all_to_all_v_outputs[0], 0); auto max_shape = common::AnfAlgo::GetOutputMaxShape(all_to_all_v_outputs[0], 0); if (!min_shape.empty() && !max_shape.empty()) { @@ -185,11 +182,9 @@ CNodePtr AllToAllUnifyMindIR::CreateConcatNode(const FuncGraphPtr &graph, const min_shape[LongToSize(concat_dim)] *= split_count; } - ShapeVector new_shape; - (void)std::transform(single_shape.begin(), single_shape.end(), std::back_inserter(new_shape), SizeToLong); - common::AnfAlgo::SetOutputTypeAndDetailShape({common::AnfAlgo::GetOutputInferDataType(all_to_all_v_outputs[0], 0)}, - {std::make_shared(new_shape, min_shape, max_shape)}, - concat.get()); + common::AnfAlgo::SetOutputTypeAndDetailShape( + {common::AnfAlgo::GetOutputInferDataType(all_to_all_v_outputs[0], 0)}, + {std::make_shared(single_shape, min_shape, max_shape)}, concat.get()); } else { common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(all_to_all_v_outputs[0], 0)}, {single_shape}, concat.get()); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/avg_pool_grad_unify_mindir.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/avg_pool_grad_unify_mindir.cc index 702c34dbb56..a469af49ea9 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/avg_pool_grad_unify_mindir.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/avg_pool_grad_unify_mindir.cc @@ -40,10 +40,7 @@ constexpr float kKernelMatrixInitNum = 1.0; constexpr size_t kFloat32Len = 4; // size of float32 std::vector GetInputXShape(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); - std::vector shapes; - auto shape_size_t = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); - std::transform(shape_size_t.begin(), shape_size_t.end(), std::back_inserter(shapes), SizeToLong); - return shapes; + return common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); } int64_t windowed_output_size(const AnfNodePtr &node, int64_t input_size, int64_t ksize, int64_t stride, diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/conv2d_unify_mindir.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/conv2d_unify_mindir.cc index 06be024eabc..9a3e972c44b 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/conv2d_unify_mindir.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/conv2d_unify_mindir.cc @@ -44,9 +44,9 @@ constexpr auto kAttrChannelMultiplier = "channel_multiplier"; constexpr auto kAttrInputSizes = "input_sizes"; constexpr auto kAttrInputSize = "input_size"; -bool NeedUpdate(const CNodePtr &conv2d, std::vector in_shape, std::vector out_shape) { +bool NeedUpdate(const CNodePtr &conv2d, ShapeVector in_shape, ShapeVector out_shape) { MS_EXCEPTION_IF_NULL(conv2d); - auto group = LongToSize(common::AnfAlgo::GetNodeAttr(conv2d, kAttrGroup)); + auto group = common::AnfAlgo::GetNodeAttr(conv2d, kAttrGroup); if (group == 1) { return false; } @@ -124,8 +124,7 @@ CNodePtr CreateTranspose(const FuncGraphPtr &graph, const CNodePtr &conv2d, cons << out_shape.size() << trace::DumpSourceLines(conv2d); } std::swap(out_shape[kDim0], out_shape[kDim1]); - if (AnfUtils::IsShapeDynamic(out_shape)) { - ShapeVector new_shape; + if (IsDynamic(out_shape)) { auto min_shape = common::AnfAlgo::GetOutputMinShape(input_node, 0); auto max_shape = common::AnfAlgo::GetOutputMaxShape(input_node, 0); if (!min_shape.empty() && !max_shape.empty()) { @@ -133,9 +132,8 @@ CNodePtr CreateTranspose(const FuncGraphPtr &graph, const CNodePtr &conv2d, cons std::swap(max_shape[kDim0], max_shape[kDim1]); } - std::transform(out_shape.begin(), out_shape.end(), std::back_inserter(new_shape), SizeToLong); common::AnfAlgo::SetOutputTypeAndDetailShape( - types, {std::make_shared(new_shape, min_shape, max_shape)}, transpose.get()); + types, {std::make_shared(out_shape, min_shape, max_shape)}, transpose.get()); } else { auto shapes = {out_shape}; common::AnfAlgo::SetOutputInferTypeAndShape(types, shapes, transpose.get()); @@ -326,14 +324,13 @@ CNodePtr Conv2DBackpropFilterUnifyMindIR::CreateDepthwiseConv2DBackpropFilter(co depth_conv_backfil->set_scope(conv2d_backfil->scope()); auto types = {common::AnfAlgo::GetOutputInferDataType(conv2d_backfil, 0)}; - std::vector out_shape = common::AnfAlgo::GetOutputInferShape(conv2d_backfil, 0); + auto out_shape = common::AnfAlgo::GetOutputInferShape(conv2d_backfil, 0); if (out_shape.size() != kConv2DAxisNum) { MS_LOG(EXCEPTION) << "Conv2DBackpropFilter's output axis number should be " << kConv2DAxisNum << ", but got " << out_shape.size() << trace::DumpSourceLines(conv2d_backfil); } std::swap(out_shape[0], out_shape[1]); - if (AnfUtils::IsShapeDynamic(out_shape)) { - ShapeVector new_shape; + if (IsDynamic(out_shape)) { auto min_shape = common::AnfAlgo::GetOutputMinShape(conv2d_backfil, 0); auto max_shape = common::AnfAlgo::GetOutputMaxShape(conv2d_backfil, 0); if (!min_shape.empty() && !max_shape.empty()) { @@ -341,9 +338,8 @@ CNodePtr Conv2DBackpropFilterUnifyMindIR::CreateDepthwiseConv2DBackpropFilter(co std::swap(max_shape[0], max_shape[1]); } - std::transform(out_shape.begin(), out_shape.end(), std::back_inserter(new_shape), SizeToLong); common::AnfAlgo::SetOutputTypeAndDetailShape( - types, {std::make_shared(new_shape, min_shape, max_shape)}, depth_conv_backfil.get()); + types, {std::make_shared(out_shape, min_shape, max_shape)}, depth_conv_backfil.get()); } else { auto shapes = {out_shape}; common::AnfAlgo::SetOutputInferTypeAndShape(types, shapes, depth_conv_backfil.get()); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/maxpool_with_argmax_unify_mindir.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/maxpool_with_argmax_unify_mindir.cc index 82c1850b4d5..a9c6218f9f4 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/maxpool_with_argmax_unify_mindir.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/maxpool_with_argmax_unify_mindir.cc @@ -75,8 +75,9 @@ const AnfNodePtr MaxPoolWithArgmaxUnifyMindIR::Process(const FuncGraphPtr &graph MS_LOG(EXCEPTION) << "Argmax or kernel_size's shape dim should be equal to 4, but got argmax dim: " << argmax_shape.size() << ", kernel_size dim: " << ksize.size() << trace::DumpSourceLines(node); } - argmax_shape[kDim2] = LongToSize(ksize[kDim1] * ksize[kDim2]); - argmax_shape[kDim3] = (output_shape[kDim2] * output_shape[kDim3] + kAlignBytes - 1) / kAlignBytes + 1; + argmax_shape[kDim2] = ksize[kDim1] * ksize[kDim2]; + argmax_shape[kDim3] = + (output_shape[kDim2] * output_shape[kDim3] + SizeToLong(kAlignBytes) - 1) / SizeToLong(kAlignBytes) + 1; auto types = {common::AnfAlgo::GetOutputInferDataType(maxpool_with_argmax, 0), argmax_dtype}; auto shapes = {output_shape, argmax_shape}; common::AnfAlgo::SetOutputInferTypeAndShape(types, shapes, maxpool_with_argmax.get()); @@ -114,8 +115,9 @@ const AnfNodePtr MaxPoolGradWithArgmaxUnifyMindIR::Process(const FuncGraphPtr &g MS_LOG(EXCEPTION) << "Argmax or kernel_size's shape dim should be equal to 4, but got argmax dim: " << argmax_shape.size() << ", kernel_size dim: " << ksize.size() << trace::DumpSourceLines(node); } - argmax_shape[kDim3] = (argmax_shape[kDim2] * argmax_shape[kDim3] + kAlignBytes - 1) / kAlignBytes + 1; - argmax_shape[kDim2] = LongToSize(ksize[kDim1] * ksize[kDim2]); + argmax_shape[kDim3] = + (argmax_shape[kDim2] * argmax_shape[kDim3] + SizeToLong(kAlignBytes) - 1) / SizeToLong(kAlignBytes) + 1; + argmax_shape[kDim2] = ksize[kDim1] * ksize[kDim2]; common::AnfAlgo::SetOutputInferTypeAndShape({argmax_dtype}, {argmax_shape}, tuple_getitem0_anf.get()); return maxpool_grad_with_argmax; diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/neighbor_exchange_v2_unify_mindir.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/neighbor_exchange_v2_unify_mindir.cc index 43b57a91582..d76615dcb75 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/neighbor_exchange_v2_unify_mindir.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/neighbor_exchange_v2_unify_mindir.cc @@ -69,10 +69,10 @@ inline void UpdateMaxMinShape(std::vector *min_shape, std::vector &base_shape, const bool is_first, const bool is_last, - const size_t split_dim, const std::vector &send_lens, std::vector *size_splits, - std::vector> *shapes, std::vector *min_shape, - std::vector *max_shape, bool is_dynamic) { +int64_t CalSplitAttrs(const ShapeVector &base_shape, const bool is_first, const bool is_last, const size_t split_dim, + const std::vector &send_lens, std::vector *size_splits, + std::vector *shapes, ShapeVector *min_shape, ShapeVector *max_shape, + bool is_dynamic) { MS_EXCEPTION_IF_NULL(size_splits); MS_EXCEPTION_IF_NULL(shapes); MS_EXCEPTION_IF_NULL(max_shape); @@ -84,8 +84,8 @@ int64_t CalSplitAttrs(const std::vector &base_shape, const bool is_first MS_LOG(EXCEPTION) << "Wrong split_dim: " << split_dim << ", it should less than 4."; } int64_t num_split = 0; - int64_t split_middle_size = SizeToLong(base_shape[split_dim]); - std::vector shape_tmp(base_shape); + int64_t split_middle_size = base_shape[split_dim]; + ShapeVector shape_tmp(base_shape); // [top, bottom, left, right] int64_t first_size = split_dim == kWDim ? send_lens[kDim2] : send_lens[0]; int64_t last_size = split_dim == kWDim ? send_lens[kDim3] : send_lens[1]; @@ -95,7 +95,7 @@ int64_t CalSplitAttrs(const std::vector &base_shape, const bool is_first ++num_split; size_splits->push_back(first_size); split_middle_size -= first_size; - shape_tmp[split_dim] = static_cast(first_size); + shape_tmp[split_dim] = first_size; shapes->push_back(shape_tmp); UpdateMaxMinShape(min_shape, max_shape, is_dynamic, first_size, split_dim); @@ -130,7 +130,7 @@ int64_t CalSplitAttrs(const std::vector &base_shape, const bool is_first } CNodePtr CreateSplitNode(const FuncGraphPtr &graph, const std::vector &split_input, - const std::vector &base_shape, bool is_first, bool is_last, size_t split_dim, + const ShapeVector &base_shape, bool is_first, bool is_last, size_t split_dim, const std::vector &send_lens, TypeId input_dtype, std::pair *shape_pair, int64_t *num_split, const PatternProcessPass &pass) { @@ -142,18 +142,16 @@ CNodePtr CreateSplitNode(const FuncGraphPtr &graph, const std::vector size_splits = {}; - std::vector> shapes = {}; - auto is_dynamic = AnfUtils::IsShapeDynamic(base_shape); + std::vector shapes = {}; + auto is_dynamic = IsDynamic(base_shape); *num_split = CalSplitAttrs(base_shape, is_first, is_last, split_dim, send_lens, &size_splits, &shapes, - &shape_pair->first, &shape_pair->first, is_dynamic); + &shape_pair->first, &shape_pair->second, is_dynamic); std::vector dtypes(*num_split, input_dtype); if (is_dynamic) { std::vector shapes_ptr; for (const auto &shape : shapes) { - ShapeVector shape_tmp; - std::transform(shape.begin(), shape.end(), std::back_inserter(shape_tmp), SizeToLong); - BaseShapePtr shape_ptr = std::make_shared(shape_tmp, shape_pair->first, shape_pair->second); + BaseShapePtr shape_ptr = std::make_shared(shape, shape_pair->first, shape_pair->second); shapes_ptr.push_back(shape_ptr); } common::AnfAlgo::SetOutputTypeAndDetailShape(dtypes, shapes_ptr, split_v.get()); @@ -167,26 +165,21 @@ CNodePtr CreateSplitNode(const FuncGraphPtr &graph, const std::vector> CalAllToAllvOutputShape(const std::vector &base_shape, - const std::vector &recv_lens, - const std::vector &recv_rank_ids) { +std::vector CalAllToAllvOutputShape(const ShapeVector &base_shape, const std::vector &recv_lens, + const std::vector &recv_rank_ids) { if (SizeToLong(base_shape.size()) != kShapeSize) { MS_LOG(EXCEPTION) << "Wrong base_shape size: " << base_shape.size() << ", it should be equal to 4."; } - std::vector> shapes = {}; - std::vector> ori_shapes = { - {base_shape[0], base_shape[1], static_cast(recv_lens[kLenTopIdx]), base_shape[kWDim]}, - {base_shape[0], base_shape[1], static_cast(recv_lens[kLenTopIdx]), - static_cast(recv_lens[kLenRightIdx])}, - {base_shape[0], base_shape[1], base_shape[kHDim], static_cast(recv_lens[kLenRightIdx])}, - {base_shape[0], base_shape[1], static_cast(recv_lens[kLenBottomIdx]), - static_cast(recv_lens[kLenRightIdx])}, - {base_shape[0], base_shape[1], static_cast(recv_lens[kLenBottomIdx]), base_shape[kWDim]}, - {base_shape[0], base_shape[1], static_cast(recv_lens[kLenBottomIdx]), - static_cast(recv_lens[kLenLeftIdx])}, - {base_shape[0], base_shape[1], base_shape[kHDim], static_cast(recv_lens[kLenLeftIdx])}, - {base_shape[0], base_shape[1], static_cast(recv_lens[kLenTopIdx]), - static_cast(recv_lens[kLenLeftIdx])}}; + std::vector shapes = {}; + std::vector ori_shapes = { + {base_shape[0], base_shape[1], recv_lens[kLenTopIdx], base_shape[kWDim]}, + {base_shape[0], base_shape[1], recv_lens[kLenTopIdx], recv_lens[kLenRightIdx]}, + {base_shape[0], base_shape[1], base_shape[kHDim], recv_lens[kLenRightIdx]}, + {base_shape[0], base_shape[1], recv_lens[kLenBottomIdx], recv_lens[kLenRightIdx]}, + {base_shape[0], base_shape[1], recv_lens[kLenBottomIdx], base_shape[kWDim]}, + {base_shape[0], base_shape[1], recv_lens[kLenBottomIdx], recv_lens[kLenLeftIdx]}, + {base_shape[0], base_shape[1], base_shape[kHDim], recv_lens[kLenLeftIdx]}, + {base_shape[0], base_shape[1], recv_lens[kLenTopIdx], recv_lens[kLenLeftIdx]}}; for (size_t idx = 0; idx < recv_rank_ids.size(); ++idx) { if (recv_rank_ids[idx] != kInvalidId) { @@ -364,7 +357,7 @@ CNodePtr CreateAllToAllvNode(const FuncGraphPtr &graph, const CNodePtr &neighbor if (SizeToLong(base_shape.size()) != kShapeSize) { MS_LOG(EXCEPTION) << "Invalid shape size " << base_shape.size() << ", only support NCHW input now!"; } - std::vector> shapes = CalAllToAllvOutputShape(base_shape, recv_lens, recv_rank_ids); + std::vector shapes = CalAllToAllvOutputShape(base_shape, recv_lens, recv_rank_ids); // erase -1 in send_rank_ids std::vector real_send_rank_ids(send_rank_ids.size()); @@ -438,7 +431,7 @@ std::vector NeighborExchangeV2UnifyMindIR::CreateSplitNodes(const Func auto dtype = common::AnfAlgo::GetOutputInferDataType(neighbor_exchange_v2_input, 0); auto shape = common::AnfAlgo::GetOutputInferShape(neighbor_exchange_v2_input, 0); - auto is_dynamic = AnfUtils::IsShapeDynamic(shape); + auto is_dynamic = IsDynamic(shape); auto max_shape = common::AnfAlgo::GetOutputMaxShape(neighbor_exchange_v2_input, 0); auto min_shape = common::AnfAlgo::GetOutputMinShape(neighbor_exchange_v2_input, 0); auto shape_pair = std::make_pair(min_shape, max_shape); @@ -499,11 +492,11 @@ std::vector NeighborExchangeV2UnifyMindIR::CreateSplitNodes(const Func std::vector split_input = {NewValueNode(std::make_shared(prim::kPrimSplitV->name()))}; if (corner_splitvs_is_input_top[i]) { (void)split_input.insert(split_input.end(), split_outputs_top.begin(), split_outputs_top.begin() + 1); - shape_tmp[kHDim] = LongToSize(send_lens[0]); + shape_tmp[kHDim] = send_lens[0]; UpdateMaxMinShape(&min_shape, &max_shape, is_dynamic, send_lens[0], kHDim); } else { (void)split_input.insert(split_input.end(), split_outputs_bottom.end() - 1, split_outputs_bottom.end()); - shape_tmp[kHDim] = LongToSize(send_lens[1]); + shape_tmp[kHDim] = send_lens[1]; UpdateMaxMinShape(&min_shape, &max_shape, is_dynamic, send_lens[1], kHDim); } auto pair_tmp = std::make_pair(min_shape, max_shape); @@ -550,10 +543,10 @@ CNodePtr NeighborExchangeV2UnifyMindIR::CreateLeftRightConcat(const FuncGraphPtr auto min_shape = common::AnfAlgo::GetOutputMinShape(all_to_all_v_outputs[LongToSize(AllToAllRealIds(middle_ids, recv_rank_ids))], 0); - auto is_dynamic = AnfUtils::IsShapeDynamic(single_shape); + auto is_dynamic = IsDynamic(single_shape); if (recv_rank_ids[first_ids] != kInvalidId) { ++input_num; - single_shape[kDim2] += static_cast(recv_lens[0]); // H in NCHW + single_shape[kDim2] += recv_lens[0]; // H in NCHW if (!min_shape.empty() && !max_shape.empty()) { max_shape[kDim2] += (is_dynamic) ? recv_lens[0] : 0; min_shape[kDim2] += (is_dynamic) ? recv_lens[0] : 0; @@ -561,7 +554,7 @@ CNodePtr NeighborExchangeV2UnifyMindIR::CreateLeftRightConcat(const FuncGraphPtr } if (recv_rank_ids[last_ids] != kInvalidId) { ++input_num; - single_shape[kDim2] += static_cast(recv_lens[1]); // H in NCHW + single_shape[kDim2] += recv_lens[1]; // H in NCHW if (!min_shape.empty() && !max_shape.empty()) { max_shape[kDim2] += (is_dynamic) ? recv_lens[1] : 0; min_shape[kDim2] += (is_dynamic) ? recv_lens[1] : 0; @@ -579,9 +572,7 @@ CNodePtr NeighborExchangeV2UnifyMindIR::CreateLeftRightConcat(const FuncGraphPtr all_to_all_v_outputs[LongToSize(AllToAllRealIds(middle_ids, recv_rank_ids))], 0)}; auto concat = CreateConcatNode(graph, concat_input, SizeToLong(kHDim), input_num); if (is_dynamic) { - ShapeVector shape; - std::transform(single_shape.begin(), single_shape.end(), std::back_inserter(shape), SizeToLong); - BaseShapePtr base_shape = std::make_shared(shape, min_shape, max_shape); + BaseShapePtr base_shape = std::make_shared(single_shape, min_shape, max_shape); common::AnfAlgo::SetOutputTypeAndDetailShape(concat_output_dtype, {base_shape}, concat.get()); } else { common::AnfAlgo::SetOutputInferTypeAndShape(concat_output_dtype, {single_shape}, concat.get()); @@ -598,7 +589,7 @@ CNodePtr NeighborExchangeV2UnifyMindIR::CreateMiddleConcat( auto single_shape = common::AnfAlgo::GetOutputInferShape(neighbor_exchange_v2_input, 0); auto max_shape = common::AnfAlgo::GetOutputMaxShape(neighbor_exchange_v2_input, 0); auto min_shape = common::AnfAlgo::GetOutputMinShape(neighbor_exchange_v2_input, 0); - auto is_dynamic = AnfUtils::IsShapeDynamic(single_shape); + auto is_dynamic = IsDynamic(single_shape); size_t first_idx = concat_dim == kWDim ? kIndex6 : kIndex0; size_t last_idx = concat_dim == kWDim ? kIndex2 : kIndex4; int64_t first_len = concat_dim == kWDim ? recv_lens[kDim2] : recv_lens[0]; @@ -614,7 +605,7 @@ CNodePtr NeighborExchangeV2UnifyMindIR::CreateMiddleConcat( } ++input_num_all; - single_shape[concat_dim] += LongToSize(first_len); + single_shape[concat_dim] += first_len; if (!min_shape.empty() && !max_shape.empty()) { max_shape[concat_dim] += (is_dynamic) ? first_len : 0; @@ -636,7 +627,7 @@ CNodePtr NeighborExchangeV2UnifyMindIR::CreateMiddleConcat( } ++input_num_all; - single_shape[concat_dim] += LongToSize(last_len); + single_shape[concat_dim] += last_len; if (!min_shape.empty() && !max_shape.empty()) { max_shape[concat_dim] += (is_dynamic) ? static_cast(last_len) : 0; min_shape[concat_dim] += (is_dynamic) ? static_cast(last_len) : 0; @@ -646,9 +637,7 @@ CNodePtr NeighborExchangeV2UnifyMindIR::CreateMiddleConcat( std::vector concat_output_dtype = {common::AnfAlgo::GetOutputInferDataType(all_to_all_v_outputs[0], 0)}; auto concat_all = CreateConcatNode(graph, concat_input_all, SizeToLong(concat_dim), input_num_all); if (is_dynamic) { - ShapeVector shape; - std::transform(single_shape.begin(), single_shape.end(), std::back_inserter(shape), SizeToLong); - BaseShapePtr base_shape = std::make_shared(shape, min_shape, max_shape); + BaseShapePtr base_shape = std::make_shared(single_shape, min_shape, max_shape); common::AnfAlgo::SetOutputTypeAndDetailShape(concat_output_dtype, {base_shape}, concat_all.get()); } else { common::AnfAlgo::SetOutputInferTypeAndShape(concat_output_dtype, {single_shape}, concat_all.get()); @@ -716,11 +705,9 @@ CNodePtr NeighborExchangeV2UnifyMindIR::CreateConcatNodes(const FuncGraphPtr &gr std::vector concat_input_all = {NewValueNode(std::make_shared(kConcatOpName))}; auto neighbor_exchange_v2_input = neighbor_exchange_v2->input(kNeighborExchangeV2InputIdx); - std::vector shape_all = common::AnfAlgo::GetOutputInferShape(neighbor_exchange_v2_input, 0); - shape_all[kDim2] = - recv_rank_ids[kRankIdZero] != kInvalidId ? shape_all[kDim2] + static_cast(recv_lens[0]) : shape_all[kDim2]; - shape_all[kDim2] = - recv_rank_ids[kRankIdFour] != kInvalidId ? shape_all[kDim2] + static_cast(recv_lens[1]) : shape_all[kDim2]; + auto shape_all = common::AnfAlgo::GetOutputInferShape(neighbor_exchange_v2_input, 0); + shape_all[kDim2] = recv_rank_ids[kRankIdZero] != kInvalidId ? shape_all[kDim2] + recv_lens[0] : shape_all[kDim2]; + shape_all[kDim2] = recv_rank_ids[kRankIdFour] != kInvalidId ? shape_all[kDim2] + recv_lens[1] : shape_all[kDim2]; int64_t input_nums_all = 0; // left concat if (is_left) { @@ -735,7 +722,7 @@ CNodePtr NeighborExchangeV2UnifyMindIR::CreateConcatNodes(const FuncGraphPtr &gr } (void)concat_input_all.insert(concat_input_all.end(), concat_left_outputs.begin(), concat_left_outputs.end()); ++input_nums_all; - shape_all[kDim3] += static_cast(recv_lens[kDim2]); + shape_all[kDim3] += recv_lens[kDim2]; } // middle concat connect to concat_all @@ -760,7 +747,7 @@ CNodePtr NeighborExchangeV2UnifyMindIR::CreateConcatNodes(const FuncGraphPtr &gr } (void)concat_input_all.insert(concat_input_all.end(), concat_right_outputs.begin(), concat_right_outputs.end()); ++input_nums_all; - shape_all[kDim3] += LongToSize(recv_lens[kDim3]); + shape_all[kDim3] += recv_lens[kDim3]; } std::vector concat_right_output_dtype = {common::AnfAlgo::GetOutputInferDataType(concat_input_all[1], 0)}; @@ -790,7 +777,7 @@ std::vector NeighborExchangeV2GradUnifyMindIR::CreateSplitNodesForGrad auto neighbor_exchange_v2_grad_input = neighbor_exchange_v2_grad->input(kNeighborExchangeV2InputIdx); auto dtype = common::AnfAlgo::GetOutputInferDataType(neighbor_exchange_v2_grad_input, 0); auto shape = common::AnfAlgo::GetOutputInferShape(neighbor_exchange_v2_grad_input, 0); - auto is_dynamic = AnfUtils::IsShapeDynamic(shape); + auto is_dynamic = IsDynamic(shape); auto max_shape = common::AnfAlgo::GetOutputMaxShape(neighbor_exchange_v2_grad_input, 0); auto min_shape = common::AnfAlgo::GetOutputMinShape(neighbor_exchange_v2_grad_input, 0); @@ -808,7 +795,7 @@ std::vector NeighborExchangeV2GradUnifyMindIR::CreateSplitNodesForGrad if (is_top || is_bottom) { std::vector split_input = {NewValueNode(std::make_shared(prim::kPrimSplitV->name())), neighbor_exchange_v2_grad_input}; - auto pair_tmp = std::make_pair(max_shape, min_shape); + auto pair_tmp = std::make_pair(min_shape, max_shape); split_v_top_bottom = CreateSplitNode(graph, split_input, shape, is_top, is_bottom, kHDim, send_lens, dtype, &pair_tmp, &num_split_h, *this); } @@ -847,8 +834,8 @@ std::vector NeighborExchangeV2GradUnifyMindIR::CreateSplitNodesForGrad split_outputs_top_bottom[i]}; int64_t num_split_w = 0; - std::vector base_shape(shape); - base_shape[kHDim] = static_cast(size_split_h[i]); + ShapeVector base_shape(shape); + base_shape[kHDim] = size_split_h[i]; UpdateMaxMinShape(&min_shape, &max_shape, is_dynamic, size_split_h[i], kHDim); @@ -874,10 +861,11 @@ std::vector NeighborExchangeV2GradUnifyMindIR::CreateSplitNodesForGrad return split_nodes; } -CNodePtr NeighborExchangeV2GradUnifyMindIR::CreatePadNode( - const FuncGraphPtr &graph, const AnfNodePtr &input, const std::vector &begin, - const std::vector &size, const std::pair, BaseShapePtr> &shape_info, - TypeId dtype) const { +CNodePtr NeighborExchangeV2GradUnifyMindIR::CreatePadNode(const FuncGraphPtr &graph, const AnfNodePtr &input, + const std::vector &begin, + const std::vector &size, + const std::pair &shape_info, + TypeId dtype) const { MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(input); auto shape = shape_info.first; @@ -887,7 +875,7 @@ CNodePtr NeighborExchangeV2GradUnifyMindIR::CreatePadNode( auto pad = NewCNode(pad_inputs, graph); std::vector> paddings; for (size_t i = 0; i < shape.size(); ++i) { - (void)paddings.emplace_back(std::vector{begin[i], (static_cast(shape[i]) - begin[i]) - size[i]}); + (void)paddings.emplace_back(std::vector{begin[i], (shape[i] - begin[i]) - size[i]}); } common::AnfAlgo::SetOutputTypeAndDetailShape({dtype}, {shape_base}, pad.get()); common::AnfAlgo::SetNodeAttr(kAttrPaddings, MakeValue(paddings), pad); @@ -936,28 +924,24 @@ CNodePtr NeighborExchangeV2GradUnifyMindIR::CreateSplitGradNodes(const FuncGraph } // create pad nodes // slice begin & size - std::vector> begins = {{0, 0, 0, 0}, - {0, 0, 0, static_cast(centerx_shape[kDim3]) - recv_lens[kDim3]}, - {0, 0, 0, static_cast(centerx_shape[kDim3]) - recv_lens[kDim3]}, - {0, 0, static_cast(centerx_shape[kDim2]) - recv_lens[kDim1], - static_cast(centerx_shape[kDim3]) - recv_lens[kDim3]}, - {0, 0, static_cast(centerx_shape[kDim2]) - recv_lens[kDim1], 0}, - {0, 0, static_cast(centerx_shape[kDim2]) - recv_lens[kDim1], 0}, - {0, 0, 0, 0}, - {0, 0, 0, 0}}; + std::vector> begins = { + {0, 0, 0, 0}, + {0, 0, 0, centerx_shape[kDim3] - recv_lens[kDim3]}, + {0, 0, 0, centerx_shape[kDim3] - recv_lens[kDim3]}, + {0, 0, centerx_shape[kDim2] - recv_lens[kDim1], centerx_shape[kDim3] - recv_lens[kDim3]}, + {0, 0, centerx_shape[kDim2] - recv_lens[kDim1], 0}, + {0, 0, centerx_shape[kDim2] - recv_lens[kDim1], 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}}; std::vector> sizes = { - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), recv_lens[0], - static_cast(centerx_shape[kDim3])}, - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), recv_lens[0], recv_lens[kDim3]}, - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), - static_cast(centerx_shape[kDim2]), recv_lens[kDim3]}, - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), recv_lens[1], recv_lens[kDim3]}, - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), recv_lens[1], - static_cast(centerx_shape[kDim3])}, - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), recv_lens[1], recv_lens[kDim2]}, - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), - static_cast(centerx_shape[kDim2]), recv_lens[kDim2]}, - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), recv_lens[0], recv_lens[kDim2]}}; + {centerx_shape[0], centerx_shape[1], recv_lens[0], centerx_shape[kDim3]}, + {centerx_shape[0], centerx_shape[1], recv_lens[0], recv_lens[kDim3]}, + {centerx_shape[0], centerx_shape[1], centerx_shape[kDim2], recv_lens[kDim3]}, + {centerx_shape[0], centerx_shape[1], recv_lens[1], recv_lens[kDim3]}, + {centerx_shape[0], centerx_shape[1], recv_lens[1], centerx_shape[kDim3]}, + {centerx_shape[0], centerx_shape[1], recv_lens[1], recv_lens[kDim2]}, + {centerx_shape[0], centerx_shape[1], centerx_shape[kDim2], recv_lens[kDim2]}, + {centerx_shape[0], centerx_shape[1], recv_lens[0], recv_lens[kDim2]}}; std::vector pad_nodes; size_t output_index = 0; for (size_t i = 0; i < recv_rank_ids.size(); ++i) { diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/neighbor_exchange_v2_unify_mindir.h b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/neighbor_exchange_v2_unify_mindir.h index bc90f411188..a3fab3592c7 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/neighbor_exchange_v2_unify_mindir.h +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/neighbor_exchange_v2_unify_mindir.h @@ -63,8 +63,8 @@ class NeighborExchangeV2GradUnifyMindIR : public PatternProcessPass { std::vector CreateSplitNodesForGrad(const FuncGraphPtr &graph, const CNodePtr &neighbor_exchange_v2_grad, std::vector *split_num) const; CNodePtr CreatePadNode(const FuncGraphPtr &graph, const AnfNodePtr &input, const std::vector &begin, - const std::vector &size, - const std::pair, BaseShapePtr> &shape_info, TypeId dtype) const; + const std::vector &size, const std::pair &shape_info, + TypeId dtype) const; CNodePtr CreateSplitGradNodes(const FuncGraphPtr &graph, const CNodePtr &neighbor_exchange_v2_grad, const CNodePtr &all_to_all_v, const std::vector &split_nodes, const std::vector &split_num) const; diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/slice_grad_unify_mindir.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/slice_grad_unify_mindir.cc index 18de3f55be2..617e418c4da 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/slice_grad_unify_mindir.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/slice_grad_unify_mindir.cc @@ -37,10 +37,7 @@ constexpr size_t kSliceGradCangjieInputTensorNum = 2; std::vector GetInputXShape(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); - std::vector shapes; - auto shape_size_t = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1); - (void)std::transform(shape_size_t.begin(), shape_size_t.end(), std::back_inserter(shapes), SizeToLong); - return shapes; + return common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1); } std::vector GetTupleValue(const AnfNodePtr &node) { diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.cc index 1ab3e6317d3..37368404457 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.cc @@ -58,11 +58,11 @@ CNodePtr CreateOneHot(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax_ MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(sparse_softmax_node); - std::vector logits_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 0); + auto logits_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 0); int64_t depth = 0; if (!logits_shape.empty()) { size_t index = logits_shape.size() - 1; - depth = SizeToLong(logits_shape[index]); + depth = logits_shape[index]; } else { MS_LOG(EXCEPTION) << "Logits's shape of node [" << sparse_softmax_node->DebugString() << "] is empty" << trace::DumpSourceLines(sparse_softmax_node); @@ -102,22 +102,20 @@ CNodePtr CreateOneHot(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax_ auto one_hot_node = pass.NewCNode(one_hot_inputs, graph); MS_EXCEPTION_IF_NULL(one_hot_node); one_hot_node->set_scope(sparse_softmax_node->scope()); - std::vector labels_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 1); + auto labels_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 1); labels_shape.emplace_back(depth); - if (AnfUtils::IsShapeDynamic(labels_shape)) { + if (IsDynamic(labels_shape)) { auto kernel_info = common::AnfAlgo::GetPrevNodeOutput(sparse_softmax_node, 1); auto min_shape = common::AnfAlgo::GetOutputMinShape(kernel_info.first, kernel_info.second); auto max_shape = common::AnfAlgo::GetOutputMaxShape(kernel_info.first, kernel_info.second); - std::vector shape_tmp; - std::transform(labels_shape.begin(), labels_shape.end(), std::back_inserter(shape_tmp), SizeToLong); - if (!min_shape.empty() && !max_shape.empty()) { min_shape.emplace_back(depth); max_shape.emplace_back(depth); } common::AnfAlgo::SetOutputTypeAndDetailShape( - {kNumberTypeFloat32}, {std::make_shared(shape_tmp, min_shape, max_shape)}, one_hot_node.get()); + {kNumberTypeFloat32}, {std::make_shared(labels_shape, min_shape, max_shape)}, + one_hot_node.get()); } else { common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat32}, {labels_shape}, one_hot_node.get()); } @@ -139,8 +137,8 @@ CNodePtr CreateSoftmaxCrossEntropyWithLogits(const FuncGraphPtr &graph, const CN MS_EXCEPTION_IF_NULL(softmax_node); softmax_node->set_scope(sparse_softmax_node->scope()); - std::vector labels_shape = common::AnfAlgo::GetOutputInferShape(one_hot_node, 0); - std::vector loss_shape; + auto labels_shape = common::AnfAlgo::GetOutputInferShape(one_hot_node, 0); + ShapeVector loss_shape; if (!labels_shape.empty()) { loss_shape.emplace_back(labels_shape[0]); } else { @@ -149,8 +147,8 @@ CNodePtr CreateSoftmaxCrossEntropyWithLogits(const FuncGraphPtr &graph, const CN auto data_types = common::AnfAlgo::GetOutputInferDataType(one_hot_node, 0); auto types = {data_types, data_types}; - if (AnfUtils::IsShapeDynamic(labels_shape)) { - ShapeVector shape_tmp = {static_cast(labels_shape[0])}; + if (IsDynamic(labels_shape)) { + ShapeVector shape_tmp = {labels_shape[0]}; auto min_shape = common::AnfAlgo::GetOutputMinShape(one_hot_node, 0); auto max_shape = common::AnfAlgo::GetOutputMaxShape(one_hot_node, 0); if (!min_shape.empty() && !max_shape.empty()) { @@ -170,7 +168,7 @@ CNodePtr CreateSoftmaxCrossEntropyWithLogits(const FuncGraphPtr &graph, const CN std::vector GetAxis(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); - std::vector output_shape = common::AnfAlgo::GetOutputInferShape(node, 0); + auto output_shape = common::AnfAlgo::GetOutputInferShape(node, 0); if (output_shape.empty()) { MS_LOG(EXCEPTION) << node->fullname_with_scope() << "'s output shape is empty" << trace::DumpSourceLines(node); } @@ -251,9 +249,9 @@ CNodePtr CreateExpandDims(const FuncGraphPtr &graph, const CNodePtr &real_div_no MS_EXCEPTION_IF_NULL(expand_dims_node); expand_dims_node->set_scope(real_div_node->scope()); - std::vector y_shape = common::AnfAlgo::GetOutputInferShape(real_div_node, 0); + auto y_shape = common::AnfAlgo::GetOutputInferShape(real_div_node, 0); y_shape.emplace_back(1); - if (AnfUtils::IsShapeDynamic(y_shape)) { + if (IsDynamic(y_shape)) { auto min_shape = common::AnfAlgo::GetOutputMinShape(real_div_node, 0); auto max_shape = common::AnfAlgo::GetOutputMaxShape(real_div_node, 0); if (!min_shape.empty() && !max_shape.empty()) { @@ -261,10 +259,8 @@ CNodePtr CreateExpandDims(const FuncGraphPtr &graph, const CNodePtr &real_div_no max_shape.emplace_back(1); } - std::vector shape_tmp; - std::transform(y_shape.begin(), y_shape.end(), std::back_inserter(shape_tmp), SizeToLong); common::AnfAlgo::SetOutputTypeAndDetailShape({common::AnfAlgo::GetOutputInferDataType(real_div_node, 0)}, - {std::make_shared(shape_tmp, min_shape, max_shape)}, + {std::make_shared(y_shape, min_shape, max_shape)}, expand_dims_node.get()); } else { common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(real_div_node, 0)}, {y_shape}, @@ -291,9 +287,9 @@ CNodePtr CreateExpandDimsPynative(const FuncGraphPtr &graph, const CNodePtr &rea MS_EXCEPTION_IF_NULL(expand_dims_node); expand_dims_node->set_scope(real_div_node->scope()); - std::vector y_shape = common::AnfAlgo::GetOutputInferShape(real_div_node, 0); + auto y_shape = common::AnfAlgo::GetOutputInferShape(real_div_node, 0); (void)y_shape.emplace_back(1); - if (AnfUtils::IsShapeDynamic(y_shape)) { + if (IsDynamic(y_shape)) { auto min_shape = common::AnfAlgo::GetOutputMinShape(real_div_node, 0); auto max_shape = common::AnfAlgo::GetOutputMaxShape(real_div_node, 0); if (!min_shape.empty() && !max_shape.empty()) { @@ -301,10 +297,8 @@ CNodePtr CreateExpandDimsPynative(const FuncGraphPtr &graph, const CNodePtr &rea max_shape.emplace_back(1); } - std::vector shape_tmp; - std::transform(y_shape.begin(), y_shape.end(), std::back_inserter(shape_tmp), SizeToLong); common::AnfAlgo::SetOutputTypeAndDetailShape({common::AnfAlgo::GetOutputInferDataType(real_div_node, 0)}, - {std::make_shared(shape_tmp, min_shape, max_shape)}, + {std::make_shared(y_shape, min_shape, max_shape)}, expand_dims_node.get()); } else { common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(real_div_node, 0)}, {y_shape}, @@ -322,10 +316,7 @@ CNodePtr CreateTile(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax_no CheckCNodeInputSize(sparse_softmax_node, kSparseSoftmaxCrossEntropyWithLogitsInputTensorNum); CheckCNodeInputSize(mul_node, kMulInputTensorNum); - auto labels_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 1); - std::vector multiple_value; - std::transform(labels_shape.begin(), labels_shape.end(), std::back_inserter(multiple_value), - [](size_t label) { return static_cast(label); }); + auto multiple_value = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 1); if (std::all_of(multiple_value.begin(), multiple_value.end(), [](int64_t value) { return value == 1; })) { return nullptr; } @@ -370,7 +361,7 @@ CNodePtr CreateRealDiv(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax MS_EXCEPTION_IF_NULL(sparse_softmax_node); MS_EXCEPTION_IF_NULL(tile_node); CheckCNodeInputSize(sparse_softmax_node, kSparseSoftmaxCrossEntropyWithLogitsInputTensorNum); - std::vector labels_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 1); + auto labels_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 1); if (labels_shape.size() != 1) { MS_LOG(EXCEPTION) << "Label's shape should be 1-D, but got " << labels_shape.size() << trace::DumpSourceLines(sparse_softmax_node); diff --git a/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_kernel_runtime.cc b/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_kernel_runtime.cc index 38b343bfe27..77a333ed27c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_kernel_runtime.cc +++ b/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_kernel_runtime.cc @@ -141,11 +141,9 @@ void CPUKernelRuntime::AssignInputNodeAddress(const session::KernelGraph *kernel if (output_type_id == kTypeUnknown) { output_type_id = common::AnfAlgo::GetOutputInferDataType(item, index); } - std::vector fmt_shape = AnfAlgo::GetOutputDeviceShape(item, index); + auto fmt_shape = AnfAlgo::GetOutputDeviceShape(item, index); size_t type_size = GetTypeByte(TypeIdToType(output_type_id)); - size_t tensor_size = - fmt_shape.empty() ? type_size - : std::accumulate(fmt_shape.begin(), fmt_shape.end(), type_size, std::multiplies()); + size_t tensor_size = type_size * SizeOf(fmt_shape); auto format = AnfAlgo::GetOutputFormat(item, index); auto address = CreateDeviceAddress(nullptr, tensor_size, format, output_type_id); address->set_from_persistent_mem(true); @@ -354,9 +352,7 @@ void CPUKernelRuntime::BindInputTensorAddressPtr(const session::KernelGraph &ker auto input_param = item->cast(); if (input_param != nullptr && input_param->IsUsedByRealKernelInGraph(kernel_graph.graph_id())) { auto tensor_shape = tensor->shape(); - std::vector shape_tmp; - (void)std::transform(tensor_shape.begin(), tensor_shape.end(), std::back_inserter(shape_tmp), IntToSize); - common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(item, 0)}, {shape_tmp}, + common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(item, 0)}, {tensor_shape}, item.get()); } address->ref_count_ = INIT_NODE_REF; diff --git a/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_tensor_array.h b/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_tensor_array.h index dc39a9ce493..84d097cc588 100644 --- a/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_tensor_array.h +++ b/mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_tensor_array.h @@ -28,7 +28,7 @@ namespace device { namespace cpu { class CPUTensorArray : public TensorArray { public: - CPUTensorArray(const string &name, const TypePtr &dtype, const std::vector &shapes) + CPUTensorArray(const string &name, const TypePtr &dtype, const ShapeVector &shapes) : TensorArray(name, dtype, shapes) {} ~CPUTensorArray() override = default; void FreeMemory(const DeviceMemPtr addr) override; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/adam_delta_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/adam_delta_cpu_kernel.cc index 7a49ff392ca..83aed578f22 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/adam_delta_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/adam_delta_cpu_kernel.cc @@ -70,11 +70,16 @@ void AdamDeltaCpuKernelMod::LaunchAdamDelta(T *delta, T *m, T *v, float lr, floa void AdamDeltaCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector delta_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); - std::vector m_shape = AnfAlgo::GetInputDeviceShape(kernel_node, kMIndex); - std::vector v_shape = AnfAlgo::GetInputDeviceShape(kernel_node, kVIndex); - std::vector grad_shape = AnfAlgo::GetInputDeviceShape(kernel_node, kGradIndex); + auto delta_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + auto m_shape = AnfAlgo::GetInputDeviceShape(kernel_node, kMIndex); + auto v_shape = AnfAlgo::GetInputDeviceShape(kernel_node, kVIndex); + auto grad_shape = AnfAlgo::GetInputDeviceShape(kernel_node, kGradIndex); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); + + if (AnfAlgo::IsShapesDynamic({delta_shape, m_shape, v_shape})) { + return; + } + if (!IsSameShape(delta_shape, m_shape)) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of 'delta' must be the same as the shape of 'm', but got the shape of 'delta': " @@ -96,7 +101,7 @@ void AdamDeltaCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { } elem_num_ = 1; for (size_t i = 0; i < delta_shape.size(); ++i) { - elem_num_ *= delta_shape[i]; + elem_num_ *= static_cast(delta_shape[i]); } if (elem_num_ < 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'delta' must be at least 1-D, but got empty shape!"; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/addcmul_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/addcmul_cpu_kernel.cc index 8de4beeca4e..da562f6194e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/addcmul_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/addcmul_cpu_kernel.cc @@ -99,7 +99,7 @@ void AddcmulCpuKernelMod::AddcmulMul2(const T *input1, const T *input2, T *outpu }; output_size_ = 1; for (size_t i = 0; i < output_shape_.size(); ++i) { - output_size_ *= output_shape_[i]; + output_size_ *= static_cast(output_shape_[i]); } ParallelLaunchAutoSearch(task, output_size_, this, ¶llel_search_info_); } @@ -121,7 +121,7 @@ void AddcmulCpuKernelMod::AddcmulAdd(const T *input1, const T *input2, T *output }; output_size_ = 1; for (size_t i = 0; i < output_shape_.size(); ++i) { - output_size_ *= output_shape_[i]; + output_size_ *= static_cast(output_shape_[i]); } ParallelLaunchAutoSearch(add_task, output_size_, this, ¶llel_search_info_); } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/addcmul_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/addcmul_cpu_kernel.h index d5769b33505..0b4d7899ed7 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/addcmul_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/addcmul_cpu_kernel.h @@ -41,11 +41,11 @@ class AddcmulCpuKernelMod : public DeprecatedNativeCpuKernelMod { private: TypeId dtype_{kTypeUnknown}; - std::vector input_shape0_; - std::vector input_shape1_; - std::vector input_shape2_; - std::vector input_shape3_; - std::vector output_shape_; + std::vector input_shape0_; + std::vector input_shape1_; + std::vector input_shape2_; + std::vector input_shape3_; + std::vector output_shape_; size_t output_size_{1}; size_t data_shape_size_{0}; size_t inputx_shape_size_{0}; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/adjust_contrastv2_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/adjust_contrastv2_cpu_kernel.cc index 6f3cfb999ac..8be6dbd713a 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/adjust_contrastv2_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/adjust_contrastv2_cpu_kernel.cc @@ -62,9 +62,9 @@ std::uint32_t AdjustContrastv2CpuKernelMod::LaunchAdjustContrastv2Kernel(const s T *input{static_cast(inputs[0]->addr)}; std::float_t *contrast_factor{static_cast(inputs[1]->addr)}; T *output{static_cast(outputs[0]->addr)}; - std::vector x_dim_sizes = images_shape; + std::vector x_dim_sizes = images_shape; std::size_t n{x_dim_sizes.size()}; - std::size_t per_batch_elements{x_dim_sizes[n - 1] * x_dim_sizes[n - 2] * x_dim_sizes[n - 3]}; + std::size_t per_batch_elements{LongToSize(x_dim_sizes[n - 1] * x_dim_sizes[n - 2] * x_dim_sizes[n - 3])}; std::int64_t input_numelements = static_cast(inputs[0]->size / sizeof(T)); std::int64_t total{SizeToLong(input_numelements / per_batch_elements)}; std::int64_t per_unit_size{total / std::min(kAdjustContrastv2ParallelNum - 2L, total)}; @@ -78,8 +78,11 @@ std::uint32_t AdjustContrastv2CpuKernelMod::LaunchAdjustContrastv2Kernel(const s void AdjustContrastv2CpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); - std::vector output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + std::vector output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); images_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({output_shape, images_shape})) { + return; + } if (images_shape != output_shape) { MS_LOG(EXCEPTION) << "For AdjustContrastv2, the data type of the input " << images_shape << "need be the same as the output " << output_shape << "."; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/adjust_contrastv2_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/adjust_contrastv2_cpu_kernel.h index f5a94129fc6..7aec43d7fd5 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/adjust_contrastv2_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/adjust_contrastv2_cpu_kernel.h @@ -40,7 +40,7 @@ class AdjustContrastv2CpuKernelMod : public DeprecatedNativeCpuKernelMod { template std::uint32_t LaunchAdjustContrastv2Kernel(const std::vector &inputs, const std::vector &outputs); - std::vector images_shape; + std::vector images_shape; TypeId input_type_{kTypeUnknown}; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/adjust_hue_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/adjust_hue_cpu_kernel.cc index 3186c01b062..8246be2f8f2 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/adjust_hue_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/adjust_hue_cpu_kernel.cc @@ -269,8 +269,11 @@ bool LaunchAdjustHueKernelHalf(const std::vector &inputs, void AdjustHueCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); - std::vector image_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - std::vector output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + std::vector image_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + std::vector output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({image_shape, output_shape})) { + return; + } dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); if (image_shape != output_shape) { MS_LOG(EXCEPTION) << "For AdjustHue, the data type of the input " << image_shape diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/argmax_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/argmax_cpu_kernel.cc index 25e632bd71b..8cf9dbca7c0 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/argmax_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/argmax_cpu_kernel.cc @@ -25,13 +25,7 @@ constexpr size_t kArgMaxInputsNum = 1; constexpr size_t kArgMaxOutputsNum = 1; constexpr char kKernelName[] = "ArgMax"; -int64_t get_element_num(const std::vector &shape) { - int64_t size = 1; - for (int64_t i = 0; i < static_cast(shape.size()); i++) { - size *= shape[i]; - } - return size; -} +int64_t get_element_num(const std::vector &shape) { return SizeToLong(SizeOf(shape)); } template bool check_validation(const std::vector &shape, const int64_t num_before_axis, const int64_t num_after_axis, diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/argmax_with_value_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/argmax_with_value_cpu_kernel.cc index b5b89c0d361..fefc903d40c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/argmax_with_value_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/argmax_with_value_cpu_kernel.cc @@ -93,7 +93,7 @@ bool ArgMaxWithValueCpuKernelMod::LaunchKernel(const std::vector(kernel_node, AXIS); axis += SizeToLong(shape_len); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/argmin_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/argmin_cpu_kernel.cc index 62df3c3397f..778672bf8de 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/argmin_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/argmin_cpu_kernel.cc @@ -22,6 +22,7 @@ namespace mindspore { namespace kernel { +namespace { constexpr size_t kArgMinInputsNum = 1; constexpr size_t kArgMinOutputsNum = 1; constexpr char kKernelName[] = "Argmin"; @@ -33,6 +34,7 @@ int64_t get_element_num(const std::vector &shape) { } return size; } +} // namespace template bool check_validation(const std::vector &shape, const int64_t num_before_axis, const int64_t num_after_axis, @@ -149,11 +151,7 @@ int ArgminCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std:: } } dim_axis_ = shape_[LongToSize(axis_)]; - std::vector shape_t(shape_len); - for (size_t k = 0; k < shape_len; ++k) { - shape_t[k] = LongToSize(shape_[k]); - } - axisIterator_.Init(shape_t, LongToSize(axis_)); + axisIterator_.Init(shape_, LongToSize(axis_)); size_t element_size = axisIterator_.OuterSize() * axisIterator_.InnerSize() * axisIterator_.AxisSize(); (void)workspace_size_list_.emplace_back((sizeof(size_t) * element_size)); return KRET_OK; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/argmin_with_value_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/argmin_with_value_cpu_kernel.cc index ecbcbf15971..67c229ae6c9 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/argmin_with_value_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/argmin_with_value_cpu_kernel.cc @@ -95,7 +95,7 @@ bool ArgMinWithValueCpuKernelMod::LaunchKernel(const std::vector input_shape1_; - std::vector input_shape2_; + ShapeVector input_shape1_; + ShapeVector input_shape2_; std::vector input_element_num1_; std::vector input_element_num2_; - std::vector output_shape_; + ShapeVector output_shape_; std::vector output_element_num_; using TypeComputeFunc = std::function; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/arithmetic_logic_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/arithmetic_logic_cpu_kernel.cc index e0f5f88a1d1..9c79a04e5c8 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/arithmetic_logic_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/arithmetic_logic_cpu_kernel.cc @@ -62,7 +62,7 @@ class ArithLogicCpuTypeFunc : public DeprecatedCpuKernelFunc { output_size_ = 1; for (size_t i = 0; i < output_shape_.size(); ++i) { - output_size_ *= output_shape_[i]; + output_size_ *= LongToSize(output_shape_[i]); } size_t l = input_shape1_.size(); @@ -119,11 +119,11 @@ class ArithLogicCpuTypeFunc : public DeprecatedCpuKernelFunc { size_t output_size_{1}; TypeId dtype_{kTypeUnknown}; - std::vector input_shape1_; - std::vector input_shape2_; + ShapeVector input_shape1_; + ShapeVector input_shape2_; std::vector input_element_num1_; std::vector input_element_num2_; - std::vector output_shape_; + ShapeVector output_shape_; std::vector output_element_num_; }; @@ -195,19 +195,19 @@ class ArithComplexLogicCpuTypeFunc : public DeprecatedCpuKernelFunc { size_t output_size_{1}; TypeId dtype_{kTypeUnknown}; - std::vector input_shape1_; - std::vector input_shape2_; + ShapeVector input_shape1_; + ShapeVector input_shape2_; std::vector input_element_num1_; std::vector input_element_num2_; - std::vector output_shape_; + ShapeVector output_shape_; std::vector output_element_num_; }; template template void ArithLogicCpuTypeFunc::BinaryOp(const T *input1, const T *input2, bool *out, Op op) { - size_t input1_size = 1; - size_t input2_size = 2; + int64_t input1_size = 1; + int64_t input2_size = 2; for (size_t i = 0; i < output_shape_.size(); i++) { input1_size *= input_shape1_[i]; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/assign_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/assign_cpu_kernel.cc index c93b99648c8..a68d13064af 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/assign_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/assign_cpu_kernel.cc @@ -40,6 +40,9 @@ void AssignCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); auto input_x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto input_y_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + if (AnfAlgo::IsShapesDynamic({input_x_shape, input_y_shape})) { + return; + } if (input_x_shape.size() != input_y_shape.size()) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'x' and 'y' must have the same dimension, but got the dimension of 'x': " @@ -51,7 +54,7 @@ void AssignCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { << "', the 'x' and 'y' must have the same shape, but got the shape of 'x': " << Vector2Str(input_x_shape) << " and the shape of 'y': " << Vector2Str(input_y_shape); } - batch_size_ *= input_x_shape[i]; + batch_size_ *= LongToSize(input_x_shape[i]); } input_x_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); auto type_len = input_x_dtype_size_map.find(input_x_dtype_); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/bce_with_logits_loss_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/bce_with_logits_loss_cpu_kernel.cc index 9834eb46858..6cc29ba7097 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/bce_with_logits_loss_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/bce_with_logits_loss_cpu_kernel.cc @@ -69,20 +69,12 @@ int BCEWithLogitsLossCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, return ret; } - auto input_logits_shape = inputs.at(kIndex0)->GetShapeVector(); - (void)std::transform(input_logits_shape.begin(), input_logits_shape.end(), std::back_inserter(input_logits_shape_), - LongToSize); - input_size_ = std::accumulate(input_logits_shape_.begin(), input_logits_shape_.end(), 1, std::multiplies()); + input_logits_shape_ = inputs.at(kIndex0)->GetShapeVector(); + input_size_ = SizeOf(input_logits_shape_); - auto input_label_shape = inputs.at(kIndex1)->GetShapeVector(); - (void)std::transform(input_label_shape.begin(), input_label_shape.end(), std::back_inserter(input_label_shape_), - LongToSize); - auto input_weight_shape = inputs.at(kIndex2)->GetShapeVector(); - (void)std::transform(input_weight_shape.begin(), input_weight_shape.end(), std::back_inserter(input_weight_shape_), - LongToSize); - auto input_post_weight_shape = inputs.at(kIndex3)->GetShapeVector(); - (void)std::transform(input_post_weight_shape.begin(), input_post_weight_shape.end(), - std::back_inserter(input_post_weight_shape_), LongToSize); + input_label_shape_ = inputs.at(kIndex1)->GetShapeVector(); + input_weight_shape_ = inputs.at(kIndex2)->GetShapeVector(); + input_post_weight_shape_ = inputs.at(kIndex3)->GetShapeVector(); // output_size_list_ should be clear and reset. output_size_list_.clear(); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/bce_with_logits_loss_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/bce_with_logits_loss_cpu_kernel.h index ce84328babd..bfeed8fb88b 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/bce_with_logits_loss_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/bce_with_logits_loss_cpu_kernel.h @@ -56,10 +56,10 @@ class BCEWithLogitsLossCpuKernelMod : public NativeCpuKernelMod, const std::vector &outputs); size_t input_size_{1}; - std::vector input_logits_shape_; - std::vector input_label_shape_; - std::vector input_weight_shape_; - std::vector input_post_weight_shape_; + ShapeVector input_logits_shape_; + ShapeVector input_label_shape_; + ShapeVector input_weight_shape_; + ShapeVector input_post_weight_shape_; ReductionType reduction_{kNone}; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/bias_add_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/bias_add_cpu_kernel.cc index 97278679ca2..dd820b0dbc7 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/bias_add_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/bias_add_cpu_kernel.cc @@ -32,6 +32,9 @@ void BiasAddCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); bias_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + if (AnfAlgo::IsShapesDynamic({input_shape_, bias_shape_})) { + return; + } data_shape_ = input_shape_.size(); if (input_shape_.size() < kBiasAddMinDim || input_shape_.size() > kBiasAddMaxDim) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ @@ -59,15 +62,15 @@ bool BiasAddCpuKernelMod::Launch(const std::vector &inputs, const st auto *output_addr = reinterpret_cast(outputs[0]->addr); if (input_shape_.size() > 2) { - size_t hw_size = 1; + int64_t hw_size = 1; for (size_t i = 2; i < input_shape_.size(); ++i) { hw_size *= input_shape_[i]; } - size_t c_size = input_shape_[1]; - for (size_t n = 0; n < input_shape_[0]; ++n) { - for (size_t c = 0; c < c_size; ++c) { - size_t offset = n * c_size * hw_size + c * hw_size; + int64_t c_size = input_shape_[1]; + for (int64_t n = 0; n < input_shape_[0]; ++n) { + for (int64_t c = 0; c < c_size; ++c) { + size_t offset = LongToSize(n * c_size * hw_size + c * hw_size); size_t hw = 0; #ifdef ENABLE_AVX constexpr size_t C8NUM = 8; @@ -84,7 +87,7 @@ bool BiasAddCpuKernelMod::Launch(const std::vector &inputs, const st out_ptr += C8NUM; } #endif - for (; hw < hw_size; ++hw) { + for (; hw < LongToSize(hw_size); ++hw) { output_addr[offset + hw] = src_addr[offset + hw] + bias_addr[c]; } } @@ -92,13 +95,13 @@ bool BiasAddCpuKernelMod::Launch(const std::vector &inputs, const st } else { auto task = [&](size_t start, size_t end) { for (size_t n = start; n < end; ++n) { - size_t n_offset = input_shape_[1] * n; + size_t n_offset = LongToSize(input_shape_[1] * n); if (ElementAdd(src_addr + n_offset, bias_addr, output_addr + n_offset, input_shape_[1]) != NNACL_OK) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', ElementAdd failed."; } } }; - ParallelLaunchAutoSearch(task, input_shape_[0], this, ¶llel_search_info_); + ParallelLaunchAutoSearch(task, LongToSize(input_shape_[0]), this, ¶llel_search_info_); } return true; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/bias_add_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/bias_add_cpu_kernel.h index 06255e49cd4..7a8973969b6 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/bias_add_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/bias_add_cpu_kernel.h @@ -36,8 +36,8 @@ class BiasAddCpuKernelMod : public DeprecatedNativeCpuKernelMod { private: size_t data_shape_{0}; - std::vector input_shape_; - std::vector bias_shape_; + std::vector input_shape_; + std::vector bias_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/bias_add_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/bias_add_grad_cpu_kernel.cc index 5dd5853fcd1..5a99670ed5f 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/bias_add_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/bias_add_grad_cpu_kernel.cc @@ -28,7 +28,7 @@ constexpr size_t kBiasAddGradOutputsNum = 1; void BiasAddGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + input_shape_ = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShape(kernel_node, 0)); if (input_shape_.size() < 2) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', input tensor's dimension must be at least 2, but got " << input_shape_.size(); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/binary_cross_entropy_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/binary_cross_entropy_cpu_kernel.cc index 01438c8faaf..b3f693add76 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/binary_cross_entropy_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/binary_cross_entropy_cpu_kernel.cc @@ -113,8 +113,11 @@ void BinaryCrossEntropyCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { weight_defined_ = (input_num == kBceInputsNumWithWeight); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({input_shape})) { + return; + } for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; + input_size_ *= LongToSize(input_shape[i]); } const std::string reduction = common::AnfAlgo::GetNodeAttr(kernel_node, REDUCTION); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/binary_cross_entropy_grad_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/binary_cross_entropy_grad_kernel.cc index 6872f99e390..54e7c9c22fe 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/binary_cross_entropy_grad_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/binary_cross_entropy_grad_kernel.cc @@ -92,8 +92,11 @@ void BinaryCrossEntropyGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) weight_defined_ = (input_num == kBceGradInputsNumWithWeight); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({input_shape})) { + return; + } for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; + input_size_ *= LongToSize(input_shape[i]); } const std::string reduction = common::AnfAlgo::GetNodeAttr(kernel_node, REDUCTION); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/bitwise_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/bitwise_cpu_kernel.cc index e1c84ee6570..d8710ef2d44 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/bitwise_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/bitwise_cpu_kernel.cc @@ -136,28 +136,15 @@ int BitwiseCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std: if (auto ret = KernelMod::Resize(base_operator, inputs, outputs, inputsOnHost); ret != 0) { return ret; } - std::vector input_shape_1 = inputs[0]->GetShapeVector(); - std::vector input_shape_2 = inputs[1]->GetShapeVector(); - std::vector output_shape = outputs[0]->GetShapeVector(); - - if (output_shape.size() > max_dims_) { + input_shape_1_ = inputs[0]->GetShapeVector(); + input_shape_2_ = inputs[1]->GetShapeVector(); + output_shape_ = outputs[0]->GetShapeVector(); + if (output_shape_.size() > max_dims_) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ - << "', the dimension of output should be less than or equal to 7, but got " << output_shape.size() - << "."; + << "', the dimension of output should be less than or equal to 7, but got " + << output_shape_.size() << "."; return KRET_RESIZE_FAILED; } - input_shape_1_.resize(input_shape_1.size(), 1); - input_shape_2_.resize(input_shape_2.size(), 1); - output_shape_.resize(output_shape.size(), 1); - for (size_t i = 0; i < input_shape_1.size(); i++) { - input_shape_1_[i] = static_cast(input_shape_1[i]); - } - for (size_t i = 0; i < input_shape_2.size(); i++) { - input_shape_2_[i] = static_cast(input_shape_2[i]); - } - for (size_t i = 0; i < output_shape.size(); i++) { - output_shape_[i] = static_cast(output_shape[i]); - } if (output_shape_.size() == 0) { (void)output_shape_.insert(output_shape_.begin(), 1); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/bitwise_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/bitwise_cpu_kernel.h index fd6463cae21..684156d80a6 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/bitwise_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/bitwise_cpu_kernel.h @@ -72,9 +72,9 @@ class BitwiseCpuKernelMod : public NativeCpuKernelMod, public MatchKernelHelper< std::string kernel_name_; TypeId input_type_1_{kTypeUnknown}; TypeId input_type_2_{kTypeUnknown}; - std::vector input_shape_1_; - std::vector input_shape_2_; - std::vector output_shape_; + ShapeVector input_shape_1_; + ShapeVector input_shape_2_; + ShapeVector output_shape_; size_t output_size_ = 1; const size_t max_dims_{7}; }; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/blackman_window_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/blackman_window_cpu_kernel.cc index 83581452c7a..443ad7b446a 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/blackman_window_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/blackman_window_cpu_kernel.cc @@ -63,11 +63,11 @@ bool BlackmanWindowCpuKernelMod::BlackmanWindowKernelFunc(const std::vector= 0, but got " << *input; } - size_t window_length = static_cast(*input); + auto window_length = static_cast(*input); double pre_window_length = static_cast(window_length); const size_t OUTPUTISONE = 1.0; - std::vector out_shape = {window_length}; + ShapeVector out_shape = {window_length}; std::vector dtypes = {AnfAlgo::GetOutputDeviceDataType(node_, 0)}; if (*input == 1) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/broadcast_to_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/broadcast_to_cpu_kernel.cc index 0aa7656b45d..31064250e59 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/broadcast_to_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/broadcast_to_cpu_kernel.cc @@ -66,14 +66,17 @@ void BroadcastToCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); input_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); output_shape_ = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({input_shape_, output_shape_})) { + return; + } size_t input_shape_size = input_shape_.size(); size_t output_shape_size = output_shape_.size(); for (size_t i = 0; i < input_shape_size; ++i) { - shape_info_.input_shape_[i] = SizeToInt(input_shape_[i]); + shape_info_.input_shape_[i] = LongToInt(input_shape_[i]); } for (size_t i = 0; i < output_shape_size; ++i) { - shape_info_.output_shape_[i] = SizeToInt(output_shape_[i]); + shape_info_.output_shape_[i] = LongToInt(output_shape_[i]); } shape_info_.input_shape_size_ = SizeToInt(input_shape_size); shape_info_.output_shape_size_ = SizeToInt(output_shape_size); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/broadcast_to_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/broadcast_to_cpu_kernel.h index b707c4cd78d..97ee7f369cd 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/broadcast_to_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/broadcast_to_cpu_kernel.h @@ -61,8 +61,8 @@ class BroadcastToCpuKernelMod : public DeprecatedNativeCpuKernelMod { BroadcastToFunc kernel_func_; void InitTaskFunc(const CNodePtr &kernel_node); - std::vector input_shape_; - std::vector output_shape_; + ShapeVector input_shape_; + ShapeVector output_shape_; BroadcastShapeInfo shape_info_{}; std::string kernel_type_{kUnknown}; }; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/bucketize_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/bucketize_cpu_kernel.h index ec877a45c77..42478370375 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/bucketize_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/bucketize_cpu_kernel.h @@ -39,8 +39,8 @@ class BucketizeCpuKernelMod : public DeprecatedNativeCpuKernelMod { std::vector GetOpSupport() override; private: - std::vector input_shape_; - std::vector output_shape_; + ShapeVector input_shape_; + ShapeVector output_shape_; std::vector boundaries_; TypeId dtype_{kTypeUnknown}; }; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/check_valid_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/check_valid_cpu_kernel.cc index 8f7ade28520..3633b0cfc61 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/check_valid_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/check_valid_cpu_kernel.cc @@ -42,6 +42,7 @@ void CheckValidCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { if (!is_match) { MS_LOG(EXCEPTION) << "CheckValid does not support this kernel data type: " << kernel_attr; } + kernel_func_ = func_list_[index].second; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/check_valid_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/check_valid_cpu_kernel.h index 0e9c0218422..853271c6591 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/check_valid_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/check_valid_cpu_kernel.h @@ -52,9 +52,9 @@ class CheckValidCpuKernelMod : public DeprecatedNativeCpuKernelMod { const std::vector &, const std::vector &)>; static std::vector> func_list_; CheckValidFunc kernel_func_; - std::vector anchor_box_shape_; - std::vector img_metas_shape_; - std::vector output_shape_; + std::vector anchor_box_shape_; + std::vector img_metas_shape_; + std::vector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/cholesky_inverse_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/cholesky_inverse_cpu_kernel.cc index d60b7bfc13e..04805417433 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/cholesky_inverse_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/cholesky_inverse_cpu_kernel.cc @@ -30,7 +30,7 @@ bool CholeskyInverseKernelFunc(const CNodePtr &node_wpt, const std::vector(inputs[0]->addr); auto output_y = reinterpret_cast(outputs[0]->addr); auto inputShape = AnfAlgo::GetInputDeviceShape(node_wpt, 0); - int64_t n = SizeToLong(inputShape[0]); + int64_t n = inputShape[0]; using MatrixXd = Eigen::Matrix; Eigen::Map A(input_x0, n, n); MatrixXd result; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/clip_by_norm_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/clip_by_norm_cpu_kernel.cc index 2c0290ddeae..edab56d2c4c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/clip_by_norm_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/clip_by_norm_cpu_kernel.cc @@ -120,27 +120,23 @@ void ClipByNormCpuKernelMod::InitIOShape(const std::vector &inp MS_EXCEPTION_IF_CHECK_FAIL(outputs.size() == 1, "The size of output tensors should be 1."); // Init `input_x` shape MS_EXCEPTION_IF_NULL(inputs[0]); - const auto x_origin_shape = inputs[0]->GetShapeVector(); - if (!IsValidShape(x_origin_shape)) { + x_shape_ = inputs[0]->GetShapeVector(); + if (!IsValidShape(x_shape_)) { MS_EXCEPTION(ValueError) << "For " << kernel_name_ << ", x_shape not supports dynamic shape."; } - std::transform(x_origin_shape.begin(), x_origin_shape.end(), std::back_inserter(x_shape_), LongToSize); x_dim_ = x_shape_.size(); // Init 'clip_norm' shape MS_EXCEPTION_IF_NULL(inputs[1]); - const auto clip_norm_origin_shape = inputs[1]->GetShapeVector(); - if (!IsValidShape(clip_norm_origin_shape)) { + clip_norm_shape_ = inputs[1]->GetShapeVector(); + if (!IsValidShape(clip_norm_shape_)) { MS_EXCEPTION(ValueError) << "For " << kernel_name_ << ", clip_norm_shape not support dynamic shape."; } - std::transform(clip_norm_origin_shape.begin(), clip_norm_origin_shape.end(), std::back_inserter(clip_norm_shape_), - LongToSize); // Init output shape MS_EXCEPTION_IF_NULL(outputs[0]); - const auto output_origin_shape = outputs[0]->GetShapeVector(); - if (!IsValidShape(output_origin_shape)) { + output_shape_ = outputs[0]->GetShapeVector(); + if (!IsValidShape(output_shape_)) { MS_EXCEPTION(ValueError) << "For " << kernel_name_ << ", output_shape not supports dynamic shape."; } - std::transform(output_origin_shape.begin(), output_origin_shape.end(), std::back_inserter(output_shape_), LongToSize); MS_EXCEPTION_IF_CHECK_FAIL(output_shape_ == x_shape_, "Output shape should be same with input x shape."); } @@ -179,23 +175,20 @@ void ClipByNormCpuKernelMod::InitSizeLists() { auto x_size = std::accumulate(x_shape_.begin(), x_shape_.end(), x_type_size, std::multiplies()); x_size = std::max(x_size, x_type_size); size_t clip_norm_type_size = GetTypeByte(TypeIdToType(data_type_.second)); - auto clip_norm_size = - std::accumulate(clip_norm_shape_.begin(), clip_norm_shape_.end(), clip_norm_type_size, std::multiplies()); + size_t clip_norm_size = SizeOf(clip_norm_shape_); clip_norm_size = std::max(clip_norm_size, clip_norm_type_size); input_size_list_.emplace_back(x_size); input_size_list_.emplace_back(clip_norm_size); // Init workspace size list size_t float_type_size = sizeof(float); - auto l2_norm_out_size = std::accumulate(l2_norm_output_shape_.begin(), l2_norm_output_shape_.end(), float_type_size, - std::multiplies()); + auto l2_norm_out_size = float_type_size * SizeOf(l2_norm_output_shape_); l2_norm_out_size = std::max(l2_norm_out_size, float_type_size); // This workspace size used for l2_norm calculation workspace_size_list_.emplace_back(l2_norm_out_size); // This workspace size used for `x/l2_norm(x)` calculation workspace_size_list_.emplace_back((x_size / x_type_size) * float_type_size); // Init output size list - auto output_size = - std::accumulate(output_shape_.begin(), output_shape_.end(), float_type_size, std::multiplies()); + auto output_size = float_type_size * SizeOf(output_shape_); output_size = std::max(output_size, float_type_size); output_size_list_.emplace_back(output_size); } @@ -233,7 +226,7 @@ void ClipByNormCpuKernelMod::L2NormLaunch(T *x_addr, float *l2_norm_output_addr, axes[k] = i; ++k; } else { - stride *= x_shape_[i]; + stride *= LongToSize(x_shape_[i]); ++j; } } @@ -242,7 +235,7 @@ void ClipByNormCpuKernelMod::L2NormLaunch(T *x_addr, float *l2_norm_output_addr, ++k; } // Calculate transpose shape - std::vector transpose_shape(x_shape_.size()); + ShapeVector transpose_shape(x_shape_.size()); for (size_t i = 0; i < x_dim_; ++i) { transpose_shape[i] = x_shape_[axes[i]]; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/clip_by_norm_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/clip_by_norm_cpu_kernel.h index d23de4fe634..74a9c709a19 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/clip_by_norm_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/clip_by_norm_cpu_kernel.h @@ -69,10 +69,10 @@ class ClipByNormCpuKernelMod : public NativeCpuKernelMod { size_t x_dim_{0}; std::pair data_type_{kNumberTypeFloat32, kNumberTypeFloat32}; std::vector axis_; - std::vector x_shape_; - std::vector clip_norm_shape_; - std::vector l2_norm_output_shape_; - std::vector output_shape_; + ShapeVector x_shape_; + ShapeVector clip_norm_shape_; + ShapeVector l2_norm_output_shape_; + ShapeVector output_shape_; ParallelSearchInfo parallel_search_info_; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/coalesce_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/coalesce_cpu_kernel.cc index 24f13190688..c7eb8e082fa 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/coalesce_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/coalesce_cpu_kernel.cc @@ -51,11 +51,11 @@ bool CoalesceCpuKernelMod::Launch(const std::vector &inputs, for (size_t i = 0; i < output_nm; i++) { dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i); } - std::vector dims; - (void)dims.emplace_back(shape_size_); - (void)dims.emplace_back(jump + 1); - std::vector dim; - (void)dim.emplace_back(jump + 1); + ShapeVector dims; + (void)dims.emplace_back(SizeToLong(shape_size_)); + (void)dims.emplace_back(SizeToLong(jump) + 1); + ShapeVector dim; + (void)dim.emplace_back(SizeToLong(jump) + 1); common::AnfAlgo::SetOutputInferTypeAndShape(dtypes, {dims, dim, common::AnfAlgo::GetOutputInferShape(node_, 2)}, node_.get()); return true; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/concat_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/concat_cpu_kernel.cc index babcb1afef2..0e175298a7c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/concat_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/concat_cpu_kernel.cc @@ -54,7 +54,7 @@ bool ConcatCpuKernelMod::LaunchKernel(const std::vector &inp CHECK_KERNEL_INPUTS_NUM(inputs.size(), input_num, kernel_name_); CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kConcatOutputsNum, kernel_name_); - std::vector> input_flat_shape_list; + std::vector input_flat_shape_list; input_flat_shape_list.reserve(input_num); for (size_t i = 0; i < input_num; i++) { auto input_shape_i = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, i); @@ -64,7 +64,7 @@ bool ConcatCpuKernelMod::LaunchKernel(const std::vector &inp size_t output_dim_1 = 0; for (size_t j = 0; j < input_num; ++j) { - output_dim_1 += input_flat_shape_list[j][1]; + output_dim_1 += LongToSize(input_flat_shape_list[j][1]); } auto *output_addr = reinterpret_cast(outputs[0]->addr); std::vector input_addr_list; @@ -73,7 +73,7 @@ bool ConcatCpuKernelMod::LaunchKernel(const std::vector &inp (void)input_addr_list.emplace_back(tmp_addr); } // each input's row of shape after flat are same - auto before_axis = input_flat_shape_list[0][0]; + auto before_axis = LongToSize(input_flat_shape_list[0][0]); auto task = [&](size_t start, size_t end) { for (size_t i = start; i < end; ++i) { auto output_ptr = output_addr + i * output_dim_1; @@ -81,7 +81,7 @@ bool ConcatCpuKernelMod::LaunchKernel(const std::vector &inp if (input_flat_shape_list[j][1] == 0) { continue; } - auto copy_num = input_flat_shape_list[j][1]; + auto copy_num = LongToSize(input_flat_shape_list[j][1]); auto copy_size = copy_num * sizeof(T); auto offset = copy_num * i; auto ret = memcpy_s(output_ptr, copy_size, input_addr_list[j] + offset, copy_size); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/concat_offset_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/concat_offset_cpu_kernel.cc index 07aa37e892d..84c65f2bcab 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/concat_offset_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/concat_offset_cpu_kernel.cc @@ -61,8 +61,8 @@ bool ConcatOffsetCpuKernelMod::LaunchKernel(const std::vector(outputs[0]->addr); size_t input_num = common::AnfAlgo::GetInputTensorNum(node_); - std::vector offset{0}; - size_t all_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, 0)[axis_]; + ShapeVector offset{0}; + auto all_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, 0)[axis_]; // cal offset for (size_t i = 1; i < input_num; i++) { @@ -80,13 +80,13 @@ bool ConcatOffsetCpuKernelMod::LaunchKernel(const std::vector shape = AnfAlgo::GetInputDeviceShape(kernel_node, input_index); + auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, input_index); size_t tensor_size = shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); tensor_size = std::max(tensor_size, type_size); @@ -124,12 +124,7 @@ void DeprecatedNativeCpuKernelMod::InitInputOutputSize(const CNodePtr &kernel_no } size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node); for (size_t output_index = 0; output_index < output_num; ++output_index) { - TypeId type_id = AnfAlgo::GetOutputDeviceDataType(kernel_node, output_index); - size_t type_size = GetTypeByte(TypeIdToType(type_id)); - std::vector shape = AnfAlgo::GetOutputDeviceShape(kernel_node, output_index); - size_t tensor_size = - shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); - tensor_size = std::max(tensor_size, type_size); + size_t tensor_size = AnfAlgo::GetOutputTensorMemSize(kernel_node, output_index); (void)output_size_list_.emplace_back(tensor_size); } } @@ -196,7 +191,7 @@ void DeprecatedNativeCpuKernelMod::SetCpuRefMapToKernelInfo(const CNodePtr &appl } } -void CPUKernelUtils::ExpandDimsTo4(std::vector *shape) { +void CPUKernelUtils::ExpandDimsTo4(ShapeVector *shape) { MS_EXCEPTION_IF_NULL(shape); auto len = shape->size(); const size_t expect_dims = 4; @@ -207,29 +202,28 @@ void CPUKernelUtils::ExpandDimsTo4(std::vector *shape) { } } -size_t CPUKernelUtils::CalcOffset(const std::vector &shape, size_t dim0, size_t dim1, size_t dim2, - size_t dim3) { +size_t CPUKernelUtils::CalcOffset(const ShapeVector &shape, size_t dim0, size_t dim1, size_t dim2, size_t dim3) { size_t offset = dim0 * shape[1] * shape[2] * shape[3] + dim1 * shape[2] * shape[3] + dim2 * shape[3] + dim3; return offset; } -size_t CPUKernelUtils::GetElementNumOnAxis(const std::vector &shape, int axis) { +size_t CPUKernelUtils::GetElementNumOnAxis(const ShapeVector &shape, int axis) { if (axis < 0) { axis = axis + SizeToInt(shape.size()); } - size_t result = 1; + int64_t result = 1; for (int j = 3; j > axis; --j) { result *= shape[j]; } - return result; + return LongToSize(result); } -void CPUKernelUtils::GetElementNumEveryDim(const std::vector &shape, std::vector *element_num) { +void CPUKernelUtils::GetElementNumEveryDim(const ShapeVector &shape, std::vector *element_num) { size_t accumulation = 1; MS_EXCEPTION_IF_NULL(element_num); (void)element_num->emplace_back(1); for (size_t i = shape.size() - 1; i > 0; --i) { - accumulation *= shape[i]; + accumulation *= LongToSizeClipNeg(shape[i]); (void)element_num->emplace_back(accumulation); } std::reverse(element_num->begin(), element_num->end()); @@ -393,12 +387,12 @@ void ParallelLaunchAutoSearch(const CTask &task, size_t count, Content content, } } -std::vector CPUKernelUtils::FlatShapeByAxis(const std::vector &shape, int axis) { +ShapeVector CPUKernelUtils::FlatShapeByAxis(const ShapeVector &shape, int axis) { if (axis < 0) { axis = axis + SizeToInt(shape.size()); } - size_t dim_row = 1; - size_t dim_col = 1; + int64_t dim_row = 1; + int64_t dim_col = 1; for (size_t i = 0; i < shape.size(); ++i) { if (SizeToInt(i) < axis) { dim_row *= shape[i]; @@ -408,11 +402,10 @@ std::vector CPUKernelUtils::FlatShapeByAxis(const std::vector &s } // referred to Copy elision https://en.cppreference.com/w/cpp/language/copy_elision // returning a vector won't cause extra vector constructed or moved - return std::vector{dim_row, dim_col}; + return ShapeVector{dim_row, dim_col}; } -BroadcastIterator::BroadcastIterator(std::vector input_shape_a, std::vector input_shape_b, - std::vector output_shape) +BroadcastIterator::BroadcastIterator(ShapeVector input_shape_a, ShapeVector input_shape_b, ShapeVector output_shape) : input_shape_a_(std::move(input_shape_a)), input_shape_b_(std::move(input_shape_b)), output_shape_(std::move(output_shape)) { @@ -455,12 +448,12 @@ void BroadcastIterator::GenNextPos() { void BroadcastIterator::BroadcastShape() { int input_dimension_a = input_shape_a_.size(); if (input_dimension_a < output_dimension_) { - (void)input_shape_a_.insert(input_shape_a_.begin(), IntToSize(output_dimension_ - input_dimension_a), 1); + (void)input_shape_a_.insert(input_shape_a_.begin(), IntToLong(output_dimension_ - input_dimension_a), 1); } int input_dimension_b = input_shape_b_.size(); if (input_dimension_b < output_dimension_) { - (void)input_shape_b_.insert(input_shape_b_.begin(), IntToSize(output_dimension_ - input_dimension_b), 1); + (void)input_shape_b_.insert(input_shape_b_.begin(), IntToLong(output_dimension_ - input_dimension_b), 1); } } @@ -489,8 +482,8 @@ MultipleBroadcastIterator::MultipleBroadcastIterator(std::vector mul BroadcastShape(); input_pos_.resize(multi_inputs_.size()); // Allocate strides memory - multi_inputs_strides_.resize(multi_inputs_.size(), std::vector(output_dimension_, 0)); - multi_inputs_back_strides_.resize(multi_inputs_.size(), std::vector(output_dimension_, 0)); + multi_inputs_strides_.resize(multi_inputs_.size(), std::vector(output_dimension_, 0)); + multi_inputs_back_strides_.resize(multi_inputs_.size(), std::vector(output_dimension_, 0)); coordinates_.resize(output_dimension_); InitStrides(); } @@ -527,7 +520,7 @@ void MultipleBroadcastIterator::BroadcastShape() { for (auto &multi_input : multi_inputs_) { int input_dimension = SizeToInt(multi_input.size()); if (input_dimension < output_dimension_) { - (void)multi_input.insert(multi_input.begin(), IntToSize(output_dimension_ - input_dimension), 1); + (void)multi_input.insert(multi_input.begin(), IntToLong(output_dimension_ - input_dimension), 1); } } } @@ -546,8 +539,7 @@ void MultipleBroadcastIterator::InitStrides() { } } -TransposeIterator::TransposeIterator(std::vector output_shape, std::vector axes, - const std::vector &input_shape) +TransposeIterator::TransposeIterator(ShapeVector output_shape, std::vector axes, const ShapeVector &input_shape) : shape_(std::move(output_shape)), axes_(std::move(axes)) { // Calculate strides dimension_ = shape_.size(); @@ -589,12 +581,12 @@ void TransposeIterator::GenNextPos() { } } -std::vector CPUKernelUtils::GetBroadcastShape(const std::vector &x, const std::vector &y) { +ShapeVector CPUKernelUtils::GetBroadcastShape(const ShapeVector &x, const ShapeVector &y) { size_t x_len = x.size(); size_t y_len = y.size(); size_t length = x_len < y_len ? x_len : y_len; - std::vector broadcast_shape; - std::vector broadcast_shape_back; + ShapeVector broadcast_shape; + ShapeVector broadcast_shape_back; for (int i = -length; i < 0; ++i) { if (x[x_len + i] == 1) { broadcast_shape_back.push_back(y[y_len + i]); @@ -619,17 +611,17 @@ std::vector CPUKernelUtils::GetBroadcastShape(const std::vector return broadcast_shape; } -void AxisIterator::Init(const std::vector &input_shape, size_t axis) { +void AxisIterator::Init(const ShapeVector &input_shape, size_t axis) { outer_size_ = 1; for (size_t i = 0; i < axis; i++) { - outer_size_ *= input_shape[i]; + outer_size_ *= LongToSize(input_shape[i]); } axis_size_ = input_shape[axis]; inner_size_ = 1; for (size_t i = axis + 1; i < input_shape.size(); ++i) { - inner_size_ *= input_shape[i]; + inner_size_ *= LongToSize(input_shape[i]); } } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.h index d309e203724..9823ad8de1f 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.h @@ -241,21 +241,20 @@ class CpuKernelFunc { class CPUKernelUtils { public: - static void ExpandDimsTo4(std::vector *shape); - static size_t CalcOffset(const std::vector &shape, size_t dim0, size_t dim1, size_t dim2, size_t dim3); - static size_t GetElementNumOnAxis(const std::vector &shape, int axis); - static void GetElementNumEveryDim(const std::vector &shape, std::vector *element_num); + static void ExpandDimsTo4(ShapeVector *shape); + static size_t CalcOffset(const ShapeVector &shape, size_t dim0, size_t dim1, size_t dim2, size_t dim3); + static size_t GetElementNumOnAxis(const ShapeVector &shape, int axis); + static void GetElementNumEveryDim(const ShapeVector &shape, std::vector *element_num); static void ParallelFor(const CTask &task, size_t count, float block_size = 128.0); - static std::vector FlatShapeByAxis(const std::vector &shape, int axis); - static std::vector GetBroadcastShape(const std::vector &x, const std::vector &y); + static ShapeVector FlatShapeByAxis(const ShapeVector &shape, int axis); + static ShapeVector GetBroadcastShape(const std::vector &x, const std::vector &y); static void ParallelForAutoSearch(const CTask &task, size_t count, ParallelSearchInfo *parallel_search_info); static size_t CalcElementNum(const std::vector &shape); }; class BroadcastIterator { public: - BroadcastIterator(std::vector input_shape_a, std::vector input_shape_b, - std::vector output_shape); + BroadcastIterator(ShapeVector input_shape_a, ShapeVector input_shape_b, ShapeVector output_shape); virtual ~BroadcastIterator() = default; inline size_t GetInputPosA() const { return input_pos_[0]; } inline size_t GetInputPosB() const { return input_pos_[1]; } @@ -266,14 +265,14 @@ class BroadcastIterator { void BroadcastShape(); void InitStrides(); - std::vector coordinates_; - std::vector input_shape_a_; - std::vector input_shape_b_; - std::vector output_shape_; - std::vector input_strides_a_; - std::vector input_strides_b_; - std::vector input_back_strides_a_; - std::vector input_back_strides_b_; + ShapeVector coordinates_; + ShapeVector input_shape_a_; + ShapeVector input_shape_b_; + ShapeVector output_shape_; + ShapeVector input_strides_a_; + ShapeVector input_strides_b_; + ShapeVector input_back_strides_a_; + ShapeVector input_back_strides_b_; std::array input_pos_{0}; int output_dimension_{0}; }; @@ -281,7 +280,7 @@ class BroadcastIterator { // Broadcast for multi_inputs and single output class MultipleBroadcastIterator { public: - using shape_info = std::vector; + using shape_info = ShapeVector; MultipleBroadcastIterator(std::vector multi_inputs, shape_info output_shape); virtual ~MultipleBroadcastIterator() = default; inline size_t GetInputPos(size_t index) const { return input_pos_[index]; } @@ -303,7 +302,7 @@ class MultipleBroadcastIterator { class TransposeIterator { public: - TransposeIterator(std::vector output_shape, std::vector axes, const std::vector &input_shape); + TransposeIterator(ShapeVector output_shape, std::vector axes, const ShapeVector &input_shape); virtual ~TransposeIterator() = default; inline size_t GetPos() const { return pos_; } void SetPos(size_t pos); @@ -311,10 +310,10 @@ class TransposeIterator { private: int dimension_{0}; - std::vector coordinates_; - std::vector shape_; - std::vector strides_; - std::vector back_strides_; + ShapeVector coordinates_; + ShapeVector shape_; + ShapeVector strides_; + ShapeVector back_strides_; std::vector axes_; size_t pos_{0}; }; @@ -331,7 +330,7 @@ class AxisIterator { public: AxisIterator() = default; virtual ~AxisIterator() = default; - void Init(const std::vector &input_shape, size_t axis); + void Init(const ShapeVector &input_shape, size_t axis); // Iterate index through outer_size_ * inner_size_, combine inner iteration and outer iteration // into one single iteration to fit ParallelLaunchAutoSearch // Possible usage: diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/crop_and_resize_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/crop_and_resize_cpu_kernel.cc index de1c14fbd85..d2140b10edc 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/crop_and_resize_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/crop_and_resize_cpu_kernel.cc @@ -40,8 +40,8 @@ void CropAndResizeCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { << input_image_shape_len << "-D."; } - input_height_ = SizeToInt(input_image_shape[IMAGE_HEIGHT]); - input_width_ = SizeToInt(input_image_shape[IMAGE_WEIGHT]); + input_height_ = LongToInt(input_image_shape[IMAGE_HEIGHT]); + input_width_ = LongToInt(input_image_shape[IMAGE_WEIGHT]); // input boxes auto input_boxes_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, BOXES); @@ -79,13 +79,13 @@ void CropAndResizeCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { auto output_shape_len = output_shape.size(); output_size_ = 1; for (size_t i = 0; i < output_shape_len; i++) { - output_size_ *= SizeToInt(output_shape[i]); + output_size_ *= LongToInt(output_shape[i]); } // set expected output params - final_height_ = SizeToInt(output_shape[HEIGHT]); - final_width_ = SizeToInt(output_shape[WEIGHT]); - channel_ = SizeToInt(output_shape[CHANNEL]); + final_height_ = LongToInt(output_shape[HEIGHT]); + final_width_ = LongToInt(output_shape[WEIGHT]); + channel_ = LongToInt(output_shape[CHANNEL]); // get op parameters string method = common::AnfAlgo::GetNodeAttr(kernel_node, "method"); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/crop_and_resize_grad_boxes_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/crop_and_resize_grad_boxes_cpu_kernel.h index 2d3fa66b0e5..982903b6f9c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/crop_and_resize_grad_boxes_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/crop_and_resize_grad_boxes_cpu_kernel.h @@ -76,11 +76,11 @@ class CropAndResizeGradBoxesCpuKernelMod : public DeprecatedNativeCpuKernelMod { const std::vector &)>; static std::vector> func_list_; CropAndResizeGradBoxesFunc kernel_func_; - std::vector grads_shape_; - std::vector image_shape_; - std::vector boxes_shape_; - std::vector box_in_shape_; - std::vector output_shape_; + ShapeVector grads_shape_; + ShapeVector image_shape_; + ShapeVector boxes_shape_; + ShapeVector box_in_shape_; + ShapeVector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/cross_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/cross_cpu_kernel.cc index b10e4d0e8f5..887d4d08abb 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/cross_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/cross_cpu_kernel.cc @@ -39,7 +39,7 @@ void CrossCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { dim_ = common::AnfAlgo::GetNodeAttr(kernel_node, "dim"); int64_t default_dim = -65530; if (dim_ == default_dim) { - size_t dim_size_value = 3; + int64_t dim_size_value = 3; for (size_t i = 0; i < input1_shape_.size(); i++) { if (input1_shape_[i] == dim_size_value) { dim_ = static_cast(i); @@ -83,7 +83,7 @@ template bool CrossCpuKernelMod::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { auto input1_data_addr = reinterpret_cast(inputs[0]->addr); - size_t tmp = 1; + int64_t tmp = 1; for (size_t i = 0; i < input1_shape_.size(); i++) { tmp = tmp * input1_shape_[i]; } @@ -93,7 +93,7 @@ bool CrossCpuKernelMod::LaunchKernel(const std::vector &inpu size_t total = input1_data_num / kNumber3; const size_t n = input1_shape_.size(); std::vector a_stride(n); - size_t stride_tmp = 1; + int64_t stride_tmp = 1; for (int64_t i = static_cast(n - 1); i >= 0; i--) { a_stride[LongToSize(i)] = stride_tmp; stride_tmp *= input1_shape_[LongToSize(i)]; @@ -117,11 +117,11 @@ bool CrossCpuKernelMod::LaunchKernel(const std::vector &inpu auto cross_shard = [this, &a_stride, &b_stride, &r_stride, &output_data_addr, &input1_data_addr, &input2_data_addr, &output_data_stride, &input1_data_stride, &input2_data_stride](size_t start, size_t end) { const size_t input1_data_dim = input1_shape_.size(); - std::vector position_in_dims(input1_data_dim); - size_t index_in_curr_dim = start; - size_t input1_data_start = 0; - size_t input2_data_start = 0; - size_t output_data_start = 0; + std::vector position_in_dims(input1_data_dim); + int64_t index_in_curr_dim = start; + int64_t input1_data_start = 0; + int64_t input2_data_start = 0; + int64_t output_data_start = 0; for (int64_t i = 0; i < static_cast(input1_data_dim); i++) { if (i == static_cast(dim_)) continue; position_in_dims[LongToSize(i)] = index_in_curr_dim % input1_shape_[LongToSize(i)]; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/cross_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/cross_cpu_kernel.h index 8b488744e07..f4a5dd5eb93 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/cross_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/cross_cpu_kernel.h @@ -40,9 +40,9 @@ class CrossCpuKernelMod : public DeprecatedNativeCpuKernelMod { std::vector GetOpSupport() override; private: - std::vector input1_shape_; - std::vector input2_shape_; - std::vector output_shape_; + std::vector input1_shape_; + std::vector input2_shape_; + std::vector output_shape_; int64_t dim_; TypeId input1_dtype_; }; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ctcloss_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/ctcloss_cpu_kernel.cc index 63fdab0ee31..4096d6a3bad 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ctcloss_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ctcloss_cpu_kernel.cc @@ -76,6 +76,9 @@ void CTCLossCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { probs_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); indices_dims_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); labels_dims_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + if (AnfAlgo::IsShapesDynamic({probs_shape_, indices_dims_, labels_dims_})) { + return; + } dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); if (probs_shape_.size() != 3) { @@ -94,9 +97,9 @@ void CTCLossCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { preprocess_collapse_repeated_ = common::AnfAlgo::GetNodeAttr(kernel_node, PCR); ctc_merge_repeated_ = common::AnfAlgo::GetNodeAttr(kernel_node, CTR); ignore_longer_outputs_than_inputs_ = common::AnfAlgo::GetNodeAttr(kernel_node, ILOTI); - max_time_ = probs_shape_[0]; - batch_size_ = probs_shape_[1]; - num_class_ = probs_shape_[2]; + max_time_ = LongToSize(probs_shape_[0]); + batch_size_ = LongToSize(probs_shape_[1]); + num_class_ = LongToSize(probs_shape_[2]); blank_index_ = num_class_ - 1; } @@ -294,7 +297,7 @@ void CTCLossCpuKernelMod::LaunchKernel(const std::vector &inputs, << max_time_ << " and 'sequence_length': " << sequence_length_addr[b]; } } - for (size_t i = 0; i < indices_dims_[0]; ++i) { + for (size_t i = 0; i < LongToSize(indices_dims_[0]); ++i) { const size_t factor = 2; auto index = labels_indices_addr[i * factor]; if (index >= SizeToUlong(each_label_length.size())) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ctcloss_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/ctcloss_cpu_kernel.h index 7989f418c4d..cc7150ebc70 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ctcloss_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ctcloss_cpu_kernel.h @@ -59,9 +59,9 @@ class CTCLossCpuKernelMod : public DeprecatedNativeCpuKernelMod { template void LaunchKernel(const std::vector &inputs, const std::vector &outputs) const; - std::vector probs_shape_; - std::vector indices_dims_; - std::vector labels_dims_; + ShapeVector probs_shape_; + ShapeVector indices_dims_; + ShapeVector labels_dims_; size_t num_class_{0}; size_t max_time_{0}; size_t batch_size_{0}; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/cumsum_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/cumsum_cpu_kernel.cc index 8d67037850a..343b55f97ef 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/cumsum_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/cumsum_cpu_kernel.cc @@ -51,7 +51,7 @@ template void CumSumCpuKernelMod::InitWorkspaceSize() { input_size_0_ = sizeof(T); for (size_t i = 0; i < shape_.size(); i++) { - input_size_0_ *= shape_[i]; + input_size_0_ *= LongToSize(shape_[i]); } (void)workspace_size_list_.emplace_back(input_size_0_); } @@ -101,13 +101,13 @@ bool CumSumCpuKernelMod::Launch(const std::vector &inputs, void CumSumCpuKernelMod::Reshape() { dims_[0] = 1; - dims_[1] = shape_[IntToSize(axis_)]; + dims_[1] = LongToSize(shape_[IntToSize(axis_)]); dims_[2] = 1; for (size_t i = 0; i < IntToSize(axis_); i++) { - dims_[0] *= shape_[i]; + dims_[0] *= LongToSize(shape_[i]); } for (size_t i = IntToSize(axis_) + 1; i < shape_.size(); i++) { - dims_[2] *= shape_[i]; + dims_[2] *= LongToSize(shape_[i]); } stride_ = dims_[1] * dims_[2]; stride2_ = dims_[2]; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/cumsum_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/cumsum_cpu_kernel.h index eaf0ae98035..34abb0f3af8 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/cumsum_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/cumsum_cpu_kernel.h @@ -73,8 +73,8 @@ class CumSumCpuKernelMod : public DeprecatedNativeCpuKernelMod { template void LaunchCumSum(const T *input_addr, T *output_addr, T *ws_addr, size_t start, size_t end) const; - std::vector shape_; - std::vector dst_shape; + ShapeVector shape_; + ShapeVector dst_shape; size_t input_size_0_{0}; size_t stride_{0}; size_t stride2_{0}; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/custom/custom_aot_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/custom/custom_aot_cpu_kernel.cc index ef13e07118d..01edd4a4f7c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/custom/custom_aot_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/custom/custom_aot_cpu_kernel.cc @@ -62,12 +62,9 @@ void CustomAOTCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { } for (size_t i = 0; i < num_input_; i++) { - std::vector in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i); - std::vector in_shape_tmp; - std::for_each(in_shape.begin(), in_shape.end(), - [&in_shape_tmp](size_t c) { in_shape_tmp.push_back(SizeToLong(c)); }); - shape_list_.emplace_back(in_shape_tmp); - ndims_.push_back(SizeToInt(in_shape_tmp.size())); + auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i); + shape_list_.emplace_back(in_shape); + ndims_.push_back(SizeToInt(in_shape.size())); type_list_.emplace_back(TypeIdToString(input_type_list[i], true)); } @@ -79,12 +76,9 @@ void CustomAOTCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { } for (size_t i = 0; i < num_output_; i++) { - std::vector out_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, i); - std::vector out_shape_tmp; - (void)std::transform(out_shape.begin(), out_shape.end(), std::back_inserter(out_shape_tmp), - [](const size_t &c) { return SizeToLong(c); }); - shape_list_.emplace_back(out_shape_tmp); - ndims_.push_back(SizeToInt(out_shape_tmp.size())); + auto out_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, i); + shape_list_.emplace_back(out_shape); + ndims_.push_back(SizeToInt(out_shape.size())); type_list_.emplace_back(TypeIdToString(output_type_list[i], true)); } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/custom/custom_julia_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/custom/custom_julia_cpu_kernel.cc index 9ddc20e2b03..112035de36d 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/custom/custom_julia_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/custom/custom_julia_cpu_kernel.cc @@ -51,11 +51,8 @@ void CustomJULIACpuKernelMod::InitKernel(const CNodePtr &kernel_node) { for (size_t i = 0; i < num_input_; i++) { auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i); - std::vector in_shape_tmp; - (void)std::for_each(in_shape.begin(), in_shape.end(), - [&in_shape_tmp](size_t c) { in_shape_tmp.push_back(SizeToLong(c)); }); - ndims_.push_back(in_shape_tmp.size()); - shape_list_.push_back(in_shape_tmp); + ndims_.push_back(in_shape.size()); + shape_list_.push_back(in_shape); type_list_.push_back(TypeIdToString(input_type_list[i], true)); } @@ -67,12 +64,9 @@ void CustomJULIACpuKernelMod::InitKernel(const CNodePtr &kernel_node) { } for (size_t i = 0; i < num_output_; i++) { - std::vector out_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, i); - std::vector out_shape_tmp; - (void)std::for_each(out_shape.begin(), out_shape.end(), - [&out_shape_tmp](size_t c) { out_shape_tmp.push_back(SizeToLong(c)); }); - ndims_.push_back(out_shape_tmp.size()); - shape_list_.push_back(out_shape_tmp); + auto out_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, i); + ndims_.push_back(out_shape.size()); + shape_list_.push_back(out_shape); type_list_.push_back(TypeIdToString(output_type_list[i], true)); } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/data_format_vec_permute_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/data_format_vec_permute_cpu_kernel.cc index df1354631a5..591ebe7b1c3 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/data_format_vec_permute_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/data_format_vec_permute_cpu_kernel.cc @@ -34,9 +34,12 @@ void DataFormatVecPermuteCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { input_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); output_type_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, 0); dim_ = input_shape_.size(); + if (AnfAlgo::IsShapesDynamic({input_shape_, output_shape_})) { + return; + } // check attr - std::vector shape1 = {4}; - std::vector shape2 = {4, 2}; + std::vector shape1 = {4}; + std::vector shape2 = {4, 2}; if (src_format_ != "NHWC" && src_format_ != "NCHW") { MS_LOG(EXCEPTION) << "For " << kernel_name_ << ", src_format must be 'NHWC' or 'NCHW' , but got " << src_format_ << "."; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/data_format_vec_permute_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/data_format_vec_permute_cpu_kernel.h index 2bacf187890..b07c45f3a7a 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/data_format_vec_permute_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/data_format_vec_permute_cpu_kernel.h @@ -52,8 +52,8 @@ class DataFormatVecPermuteCpuKernelMod : public DeprecatedNativeCpuKernelMod { DataFormatVecPermuteFunc kernel_func_; std::string src_format_; std::string dst_format_; - std::vector input_shape_; - std::vector output_shape_; + std::vector input_shape_; + std::vector output_shape_; TypeId input_type_{kTypeUnknown}; TypeId output_type_{kTypeUnknown}; size_t dim_{0}; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/dense_to_csr_sparse_matrix_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/dense_to_csr_sparse_matrix_cpu_kernel.cc index 35237d7c8f3..4a7363a0c02 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/dense_to_csr_sparse_matrix_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/dense_to_csr_sparse_matrix_cpu_kernel.cc @@ -41,8 +41,8 @@ void DenseToCSRSparseMatrixCpuKernelMod::InitKernel(const CNodePtr &kernel_node) kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); indices_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, kInputIndex1); values_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, kInputIndex0); - auto dense_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex0); - auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex1); + auto dense_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex0)); + auto indices_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex1)); rank_ = dense_shape.size(); total_nnz_ = indices_shape[kZero]; batch_size_ = (rank_ == kDefaultRank) ? kOne : dense_shape[kZero]; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/dense_to_dense_set_operation_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/dense_to_dense_set_operation_cpu_kernel.cc index be3c54efae2..3f8e7e8ecd0 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/dense_to_dense_set_operation_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/dense_to_dense_set_operation_cpu_kernel.cc @@ -48,17 +48,17 @@ constexpr size_t kNum2 = 2; #define Int64_matrix Eigen::TensorMap, Eigen::Aligned> #define Int64_flat Eigen::TensorMap, Eigen::Aligned> -const std::vector GetStrides(const std::vector &shape) { +const std::vector GetStrides(const ShapeVector &shape) { std::vector result(shape.size()); size_t product = 1; for (int64_t i = SizeToLong(shape.size()) - 1; i >= 0; --i) { result[i] = product; - product *= shape[i]; + product *= LongToSize(shape[i]); } return result; } -bool GroupShape(const std::vector &input_shape, std::vector *grouped_shape) { +bool GroupShape(const ShapeVector &input_shape, ShapeVector *grouped_shape) { const size_t min_shape_size = 2; if (input_shape.size() < min_shape_size) { return false; @@ -67,7 +67,7 @@ bool GroupShape(const std::vector &input_shape, std::vector *gro return true; } -bool CheckShapesMatch(const std::vector &shape1, const std::vector &shape2) { +bool CheckShapesMatch(const ShapeVector &shape1, const ShapeVector &shape2) { if (shape1.size() != shape2.size()) { return false; } @@ -79,15 +79,14 @@ bool CheckShapesMatch(const std::vector &shape1, const std::vector &shape1, const std::vector &shape2, - std::vector *group_shape) { - std::vector group_shape_1; +void GetCommonShape(const ShapeVector &shape1, const ShapeVector &shape2, ShapeVector *group_shape) { + ShapeVector group_shape_1; if (!GroupShape(shape1, &group_shape_1)) { MS_LOG(EXCEPTION) << "For DenseToDenseSerOperation, " << "the shape rank of input x1 must be at least 2, " << "but got " << shape1.size() << "."; } - std::vector group_shape_2; + ShapeVector group_shape_2; if (!GroupShape(shape2, &group_shape_2)) { MS_LOG(EXCEPTION) << "For DenseToDenseSerOperation, " << "the shape rank of input x2 must be at least 2, " @@ -101,8 +100,7 @@ void GetCommonShape(const std::vector &shape1, const std::vector group_shape->assign(group_shape_1.begin(), group_shape_1.end()); } -void GetGroupIdx(const int64_t flat_group_index, const std::vector &group_shape, - std::vector *group_indices) { +void GetGroupIdx(const int64_t flat_group_index, const ShapeVector &group_shape, std::vector *group_indices) { group_indices->clear(); int64_t running_flat_group_index = flat_group_index; for (int64_t group_dim_index = SizeToLong(group_shape.size()) - 1; group_dim_index >= 0; --group_dim_index) { @@ -196,14 +194,16 @@ void DenseToDenseSetOperationCpuKernelMod::SetCompute(const std::set &set1, c template bool DenseToDenseSetOperationCpuKernelMod::PopulateOutput(const std::vector &inputs, const std::vector &outputs, - const std::vector &output_shape, - const size_t num_values, + const ShapeVector &output_shape, const size_t num_values, const std::map, std::set> *sets) { auto out_indices_ptr = reinterpret_cast(outputs[kOutput1]->addr); auto out_values_ptr = reinterpret_cast(outputs[kOutput2]->addr); auto out_shape_ptr = reinterpret_cast(outputs[kOutput3]->addr); size_t output_shape_size = output_shape.size(); - std::vector> infer_shape = {{num_values, output_shape_size}, {num_values}, {output_shape_size}}; + auto num_values_signed = SizeToLong(num_values); + auto output_shape_size_signed = SizeToLong(output_shape_size); + std::vector infer_shape = { + {num_values_signed, output_shape_size_signed}, {num_values_signed}, {output_shape_size_signed}}; std::vector infer_type(kOutputNum); for (size_t i = 0; i < kOutputNum; i++) { infer_type[i] = AnfAlgo::GetOutputDeviceDataType(node_ptr_, i); @@ -242,7 +242,7 @@ bool DenseToDenseSetOperationCpuKernelMod::PopulateOutput(const std::vector bool DenseToDenseSetOperationCpuKernelMod::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { - std::vector group_shape; + ShapeVector group_shape; const auto x1_shape = AnfAlgo::GetInputDeviceShape(node_ptr_, kInputX1); const auto x2_shape = AnfAlgo::GetInputDeviceShape(node_ptr_, kInputX2); GetCommonShape(x1_shape, x2_shape, &group_shape); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/dense_to_dense_set_operation_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/dense_to_dense_set_operation_cpu_kernel.h index eda09a3ede7..d7deaba1528 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/dense_to_dense_set_operation_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/dense_to_dense_set_operation_cpu_kernel.h @@ -46,7 +46,7 @@ class DenseToDenseSetOperationCpuKernelMod : public DeprecatedNativeCpuKernelMod bool LaunchKernel(const std::vector &inputs, const std::vector &outputs); template bool PopulateOutput(const std::vector &inputs, const std::vector &outputs, - const std::vector &output_shape, const size_t num_values, + const ShapeVector &output_shape, const size_t num_values, const std::map, std::set> *sets); template void SetCompute(const std::set &set1, const std::set &set2, std::set *result); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/depthtospace_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/depthtospace_cpu_kernel.cc index 29a5702e887..337ce1baa2e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/depthtospace_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/depthtospace_cpu_kernel.cc @@ -49,15 +49,15 @@ bool DepthToSpaceCpuKernelMod::LaunchKernel(const std::vector(inputs[0]->addr); auto output_addr = reinterpret_cast(outputs[0]->addr); size_t size = inputs[0]->size / sizeof(T); - std::vector input_shape = input_shape_; - std::vector output_shape = output_shape_; + auto input_shape = input_shape_; + auto output_shape = output_shape_; size_t block_size = block_size_; size_t input_dimension = input_shape.size(); size_t output_strides[3] = {1, 1, 1}; for (size_t i = input_dimension - 1; i >= 1; --i) { for (size_t j = 0; j < i; ++j) { - output_strides[j] *= output_shape[i]; + output_strides[j] *= static_cast(output_shape[i]); } } @@ -72,11 +72,11 @@ bool DepthToSpaceCpuKernelMod::LaunchKernel(const std::vector(input_shape[1])) + + (output_pos_array[1] + (block_size * (output_pos_array[2] % block_size) + output_pos_array[3] % block_size) * + static_cast(output_shape[1])); + input_pos = (input_pos * static_cast(input_shape[2])) + (output_pos_array[2] / block_size); + input_pos = (input_pos * static_cast(input_shape[3])) + (output_pos_array[3] / block_size); output_addr[i] = input_addr[input_pos]; } }; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/depthtospace_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/depthtospace_cpu_kernel.h index 6335ab2848c..4134a51d7c9 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/depthtospace_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/depthtospace_cpu_kernel.h @@ -47,8 +47,8 @@ class DepthToSpaceCpuKernelMod : public DeprecatedNativeCpuKernelMod { static std::vector> func_list_; DepthToSpaceFunc kernel_func_; - std::vector input_shape_; - std::vector output_shape_; + std::vector input_shape_; + std::vector output_shape_; size_t block_size_{0}; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/dropout_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/dropout_cpu_kernel.cc index 01b60b34061..ce76a442346 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/dropout_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/dropout_cpu_kernel.cc @@ -34,13 +34,16 @@ void DropoutCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { input_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); output_shape_ = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); mask_shape_ = common::AnfAlgo::GetOutputInferShape(kernel_node, 1); + if (AnfAlgo::IsShapesDynamic({input_shape_, output_shape_, mask_shape_})) { + return; + } keep_prob_ = common::AnfAlgo::GetNodeAttr(kernel_node, "keep_prob"); if (keep_prob_ <= 0.0 || keep_prob_ > 1.0) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", the 'keep_prob' must be in (0.0, 1.0], but got " << keep_prob_; } dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); - for (const uint64_t &d : input_shape_) { - tensor_size_ *= d; + for (const auto &d : input_shape_) { + tensor_size_ *= LongToSize(d); } } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/dropout_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/dropout_cpu_kernel.h index 9a4e7b18f9b..07588153f9c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/dropout_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/dropout_cpu_kernel.h @@ -39,9 +39,9 @@ class DropoutCpuKernelMod : public DeprecatedNativeCpuKernelMod { template void LaunchKernel(const std::vector &inputs, const std::vector &outputs) const; - std::vector input_shape_; - std::vector output_shape_; - std::vector mask_shape_; + ShapeVector input_shape_; + ShapeVector output_shape_; + ShapeVector mask_shape_; TypeId dtype_{kTypeUnknown}; float keep_prob_{0.0}; uint64_t tensor_size_{1}; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/dropout_grad_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/dropout_grad_kernel.cc index c9cc4fa402f..12142d1a469 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/dropout_grad_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/dropout_grad_kernel.cc @@ -40,8 +40,8 @@ void DropoutGradBwdCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { << input_shape.size() << ", and the dimension of 'input_mask': " << input_mask_shape.size(); } num_count_ = 1; - for (size_t x : input_shape) { - num_count_ *= x; + for (auto x : input_shape) { + num_count_ *= static_cast(x); } dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); keep_prob_ = common::AnfAlgo::GetNodeAttr(kernel_node, "keep_prob"); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_assign_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_assign_cpu_kernel.cc index b82a3948961..2120fee910e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_assign_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_assign_cpu_kernel.cc @@ -65,9 +65,12 @@ void DynamicAssignCpuKernelMod::LaunchKernel(const std::vector &inpu } auto input_x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); auto input_y_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1); + if (AnfAlgo::IsShapesDynamic({input_x_shape, input_y_shape})) { + return; + } batch_size_ = 1; for (size_t i = 0; i < input_x_shape.size(); ++i) { - batch_size_ *= input_x_shape[i]; + batch_size_ *= LongToSize(input_x_shape[i]); } if (input_x_shape.size() != input_y_shape.size()) { @@ -104,9 +107,7 @@ void DynamicAssignCpuKernelMod::LaunchKernel(const std::vector &inpu auto node_ptr = out_node->cast(); auto value = node_ptr->default_param(); auto tensor = value->cast>(); - ShapeVector shape_tmp; - (void)std::transform(input_x_shape.begin(), input_x_shape.end(), std::back_inserter(shape_tmp), SizeToLong); - tensor->set_shape(shape_tmp); + tensor->set_shape(input_x_shape); } else { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', output must be a Parameter."; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_shape_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_shape_cpu_kernel.cc index 9daa5b49479..1d2759aad16 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_shape_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_shape_cpu_kernel.cc @@ -41,18 +41,18 @@ bool TensorShapeCpuKernelMod::Launch(const std::vector &inpu MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', cnode_ptr_(kernel_node) is expired. Error no: " << node_; } auto output_addr = reinterpret_cast(outputs[0]->addr); - std::vector input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, 0); + auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, 0); auto output_shape = common::AnfAlgo::GetOutputInferShape(node_, 0); if (output_shape.size() != 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of output must be 1-D, but got: " << output_shape.size(); } - if (output_shape[0] != input_shape.size()) { + if (output_shape[0] != SizeToLong(input_shape.size())) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', 'output_shape[0]' must be equal to the dimension of input, but got 'output_shape[0]': " << output_shape[0] << " and the dimension of input: " << input_shape.size(); } - for (size_t i = 0; i < output_shape[0]; ++i) { + for (size_t i = 0; i < LongToSize(output_shape[0]); ++i) { output_addr[i] = input_shape[i]; } return true; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_stitch_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_stitch_cpu_kernel.cc index f83960529ed..ad1398265c5 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_stitch_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_stitch_cpu_kernel.cc @@ -25,8 +25,8 @@ namespace kernel { namespace { constexpr size_t kDynamicStitchOutputNum = 1; } // namespace -size_t GetShapeSize(const std::vector &shape) { - return std::accumulate(shape.begin(), shape.end(), size_t(1), std::multiplies()); +int64_t GetShapeSize(const ShapeVector &shape) { + return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); } template @@ -41,14 +41,14 @@ bool DynamicStitchCpuKernelMod::LaunchKernel(const std::vector(inputs[i]->addr); auto shape_size = GetShapeSize(common::AnfAlgo::GetPrevNodeOutputInferShape(node_, i)); - for (size_t j = 0; j < shape_size; ++j) { + for (auto j = 0; j < shape_size; ++j) { max_index = std::max(indice[j], max_index); } } first_dim_size = max_index + 1; std::vector dtypes{AnfAlgo::GetOutputDeviceDataType(node_, 0)}; - std::vector result_shape{IntToSize(first_dim_size)}; + ShapeVector result_shape{first_dim_size}; auto data0_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, input_tuple_num_); auto indice_dims = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, 0).size(); for (size_t d = indice_dims; d < data0_shape.size(); ++d) { @@ -57,7 +57,7 @@ bool DynamicStitchCpuKernelMod::LaunchKernel(const std::vector out_dims(num_out_dims, 0); + ShapeVector out_dims(num_out_dims, 0); for (size_t out_dim = 0; out_dim <= num_out_dims - 1; ++out_dim) { out_dims[out_dim] = out_dim >= result_shape.size() ? 1 : result_shape[out_dim]; } @@ -66,13 +66,13 @@ bool DynamicStitchCpuKernelMod::LaunchKernel(const std::vector(outputs[0]->addr); - size_t slice_size = out_dims[1]; + size_t slice_size = LongToSize(out_dims[1]); size_t slice_bytes = slice_size * sizeof(T); for (size_t i = 0; i < input_tuple_num_; i++) { auto indice = reinterpret_cast(inputs[i]->addr); auto data = reinterpret_cast(inputs[i + input_tuple_num_]->addr); auto shape_size = GetShapeSize(common::AnfAlgo::GetPrevNodeOutputInferShape(node_, i)); - for (size_t j = 0; j < shape_size; ++j) { + for (auto j = 0; j < shape_size; ++j) { auto ret = memcpy_s(merged + indice[j] * slice_size, slice_bytes, data + j * slice_size, slice_bytes); if (ret != 0) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', memcpy_s error. Error no: " << ret; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/cholesky_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/cholesky_cpu_kernel.cc index e490c2e03d8..065c471c2d3 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/cholesky_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/cholesky_cpu_kernel.cc @@ -61,9 +61,9 @@ void CholeskyCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { CHECK_KERNEL_INPUTS_NUM(input_num, kInputsNum, kernel_name_); size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node); CHECK_KERNEL_OUTPUTS_NUM(output_num, kOutputsNum, kernel_name_); - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex)); InitMatrixInfo(input_shape, &input_row_, &input_col_); - auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, kOutputIndex); + auto output_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, kOutputIndex)); InitMatrixInfo(output_shape, &output_row_, &output_col_); if (common::AnfAlgo::HasNodeAttr("upper", kernel_node)) { flag_ = false; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/cholesky_solve_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/cholesky_solve_cpu_kernel.cc index f59fc1c67bf..ae5ca466215 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/cholesky_solve_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/cholesky_solve_cpu_kernel.cc @@ -35,7 +35,7 @@ void CholeskySolveCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, kInputIndex0); - std::vector x1_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex0); + std::vector x1_shape = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex0)); size_t rank = x1_shape.size(); if (rank == kDefalutRank) { dim = x1_shape[rank - kRowIndex]; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/eig_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/eig_cpu_kernel.cc index 7d8db40b572..e0b76fcc14e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/eig_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/eig_cpu_kernel.cc @@ -61,7 +61,7 @@ void EigCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node); auto expect_output_num = compute_v_ ? kOutputsNumV : kOutputsNumNV; CHECK_KERNEL_OUTPUTS_NUM(output_num, expect_output_num, kernel_name_); - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); InitMatrixInfo(input_shape); auto kernel_attr = GetKernelAttrFromNode(kernel_node); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/eigh_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/eigh_cpu_kernel.cc index 92cd619b94a..6a1c5e660ac 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/eigh_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/eigh_cpu_kernel.cc @@ -35,6 +35,9 @@ void EighCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { compute_eigen_vectors_ = common::AnfAlgo::GetNodeAttr(kernel_node, C_EIEH_VECTOR); lower_ = common::AnfAlgo::GetNodeAttr(kernel_node, LOWER); auto A_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({A_shape})) { + return; + } if (A_shape.size() != kShape2dDims) { MS_LOG(EXCEPTION) << "Wrong array shape. For '" << kernel_name_ << "', a must be 2D, but got [" << A_shape.size() << "] dimensions."; @@ -44,7 +47,7 @@ void EighCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { << "', a must be a squre matrix like [N X N], but got [" << A_shape[kDim0] << " X " << A_shape[kDim1] << "]."; } - m_ = A_shape[kDim0]; + m_ = LongToSize(A_shape[kDim0]); auto kernel_attr = GetKernelAttrFromNode(kernel_node); auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport()); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/expand_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/expand_cpu_kernel.cc index be8d2eb852d..f2595e808c5 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/expand_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/expand_cpu_kernel.cc @@ -47,10 +47,10 @@ void ExpandCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { if (output_num != kExpandOutputsNum) { MS_LOG(EXCEPTION) << "For " << kernel_name_ << ", the number of output is 1, but got " << output_num << "."; } - input_x_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + input_x_shape_ = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShape(kernel_node, 0)); input_x_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); - input_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); - output_y_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + input_shape_ = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShape(kernel_node, 0)); + output_y_shape_ = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShape(kernel_node, 0)); } bool ExpandCpuKernelMod::Launch(const std::vector &inputs, const std::vector &, diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/lu_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/lu_cpu_kernel.cc index 33ec5a9a9e4..20229742704 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/lu_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/lu_cpu_kernel.cc @@ -71,13 +71,13 @@ void LUCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { CHECK_KERNEL_INPUTS_NUM(input_num, kLUInputsNum, kernel_name_); size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node); CHECK_KERNEL_OUTPUTS_NUM(output_num, kLUOutputsNum, kernel_name_); - auto a_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kLUaIndex); + auto a_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kLUaIndex)); InitMatrixInfo(a_shape, &a_row_, &a_col_); - auto lu_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, kLuIndex); + auto lu_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, kLuIndex)); InitMatrixInfo(lu_shape, &lu_row_, &lu_col_); - auto permutation_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, kPermutationIndex); + auto permutation_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, kPermutationIndex)); InitMatrixInfo(permutation_shape, &permutation_row_, &permutation_col_); - auto pivots_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, kPivotsIndex); + auto pivots_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, kPivotsIndex)); InitPivotVecInfo(pivots_shape, &pivots_row_, &pivots_col_); auto kernel_attr = GetKernelAttrFromNode(kernel_node); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/lu_solve_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/lu_solve_cpu_kernel.cc index 229b5235a27..788f69df042 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/lu_solve_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/lu_solve_cpu_kernel.cc @@ -43,8 +43,8 @@ void LUSolverCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { CHECK_KERNEL_INPUTS_NUM(input_num, kLUInputsNum, kernel_name_); size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node); CHECK_KERNEL_OUTPUTS_NUM(output_num, kLUOutputsNum, kernel_name_); - auto a_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kLUaIndex); - auto b_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kLUbIndex); + auto a_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kLUaIndex)); + auto b_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kLUbIndex)); if (a_shape.empty() || b_shape.empty()) { MS_LOG_EXCEPTION << kernel_name_ << " input a or b matrix shape invalid."; } @@ -60,7 +60,7 @@ void LUSolverCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { b_row_ = b_shape.at(b_shape.size() - kRowIndex); b_col_ = b_shape.at(b_shape.size() - kColIndex); } - auto output_lu_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, kLuIndex); + auto output_lu_shape = Convert2SizeT(common::AnfAlgo::GetOutputInferShape(kernel_node, kLuIndex)); if (output_lu_shape.empty()) { MS_LOG_EXCEPTION << kernel_name_ << " output lu shape invalid."; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/matmul_double_cpu_kernel_func.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/matmul_double_cpu_kernel_func.cc index fae8473552a..c869373f191 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/matmul_double_cpu_kernel_func.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/matmul_double_cpu_kernel_func.cc @@ -50,21 +50,21 @@ inline void matmul_b(const MatrixBase &A, double *b_addr, double *outpu void MatmulDoubleCpuKernelFunc::InitFunc(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector a_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - std::vector b_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); - std::vector out_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + std::vector a_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + std::vector b_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + std::vector out_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); if (a_shape.size() != kAMatrixDimNum || b_shape.size() != kAMatrixDimNum || out_shape.size() != kAMatrixDimNum) { MS_LOG(EXCEPTION) << "The tensor rank of MatMul must be equal to 2."; } trans_a_ = common::AnfAlgo::GetNodeAttr(kernel_node, TRANSPOSE_A); trans_b_ = common::AnfAlgo::GetNodeAttr(kernel_node, TRANSPOSE_B); - a_row_ = a_shape[kDim0]; - a_col_ = a_shape[kDim1]; - b_row_ = b_shape[kDim0]; - b_col_ = b_shape[kDim1]; - out_row_ = out_shape[kDim0]; - out_col_ = out_shape[kDim1]; + a_row_ = static_cast(a_shape[kDim0]); + a_col_ = static_cast(a_shape[kDim1]); + b_row_ = static_cast(b_shape[kDim0]); + b_col_ = static_cast(b_shape[kDim1]); + out_row_ = static_cast(out_shape[kDim0]); + out_col_ = static_cast(out_shape[kDim1]); } bool MatmulDoubleCpuKernelFunc::RunFunc(const std::vector &inputs, diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/matrix_triangular_solve_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/matrix_triangular_solve_cpu_kernel.cc index 6ce77fa3d41..b5cc6ee6b1b 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/matrix_triangular_solve_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/matrix_triangular_solve_cpu_kernel.cc @@ -41,8 +41,8 @@ constexpr auto kAMatrixDimNum = 2; constexpr size_t kRowIndex = 2; constexpr size_t kColIndex = 1; void MatrixTriangularSolveCpuKernelMod::InitShape(const CNodePtr &kernel_node) { - auto a_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto b_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto a_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); + auto b_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1)); // Since the shape check is done in frontend, we can suppose that the shape of a, b here is valid. size_t a_dims = a_shape.size(); size_t aRowIndex = a_dims - kRowIndex; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/qr_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/qr_cpu_kernel.cc index e079b1d6b73..c76599dcbc5 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/qr_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/qr_cpu_kernel.cc @@ -43,17 +43,17 @@ void QRCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_LOG(EXCEPTION) << "mode must be in [full, r, economic], but got [" << mode << "]."; } - auto a_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto a_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); CHECK_KERNEL_INPUTS_NUM(a_shape.size(), kAMatrixDimNum, kernel_name_); a_row_ = a_shape[kDim0]; a_col_ = a_shape[kDim1]; - auto q_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto q_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); CHECK_KERNEL_INPUTS_NUM(q_shape.size(), kAMatrixDimNum, kernel_name_); q_row_ = q_shape[kDim0]; q_col_ = q_shape[kDim1]; - auto r_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 1); + auto r_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 1)); CHECK_KERNEL_INPUTS_NUM(r_shape.size(), kAMatrixDimNum, kernel_name_); r_row_ = r_shape[kDim0]; r_col_ = r_shape[kDim1]; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/embedding_look_up_comm_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/embedding_look_up_comm_grad_cpu_kernel.cc index 8c63911f575..8d54309b181 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/embedding_look_up_comm_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/embedding_look_up_comm_grad_cpu_kernel.cc @@ -33,6 +33,9 @@ void EmbeddingLookUpCommGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node split_num_ = LongToSize(split_num); MS_LOG(INFO) << "split_num: " << split_num; auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + if (IsDynamic(input_shape)) { + return; + } if (split_num <= 0 || split_num_ == 0) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'split_num' must be greater than 0, but got " << split_num; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/embedding_look_up_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/embedding_look_up_cpu_kernel.cc index cae742b19e1..7ca729078ab 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/embedding_look_up_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/embedding_look_up_cpu_kernel.cc @@ -57,7 +57,7 @@ void EmbeddingLookUpCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); node_wpt_ = kernel_node; - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); if (input_shape.empty() || input_shape.size() > kEmbeddingLookupInputParamsMaxDim) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input must be 1-" << kEmbeddingLookupInputParamsMaxDim << "D, but got " << input_shape.size() << "D."; @@ -67,11 +67,10 @@ void EmbeddingLookUpCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { for (size_t i = 1; i < input_shape.size(); ++i) { outer_dim_size_ *= input_shape[i]; } - indices_lens_ = 1; - std::vector indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - for (const auto &shape : indices_shape) { - indices_lens_ *= shape; - } + + auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + indices_lens_ = SizeOf(indices_shape); + indices_data_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 1); if (common::AnfAlgo::HasNodeAttr(kAttrOffset, kernel_node)) { offset_ = common::AnfAlgo::GetNodeAttr(kernel_node, kAttrOffset); @@ -86,7 +85,7 @@ void EmbeddingLookUpCpuKernelMod::LaunchKernel(const std::vector input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); + auto input_shape = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0)); if (input_shape.empty()) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input must be at least 1D, but got empty input."; @@ -98,7 +97,7 @@ void EmbeddingLookUpCpuKernelMod::LaunchKernel(const std::vector indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1); + auto indices_shape = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1)); for (const auto &shape : indices_shape) { indices_lens_ *= shape; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/environ/environ_cpu_get.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/environ/environ_cpu_get.cc index c8adecbda7c..c8f30d7b6cc 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/environ/environ_cpu_get.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/environ/environ_cpu_get.cc @@ -35,12 +35,15 @@ void EnvironGetCpuKernelMod::InitKernel(const CNodePtr &node) { auto value_shapes = AnfAlgo::GetOutputDeviceShape(node, 0); auto default_value_type = AnfAlgo::GetInputDeviceDataType(node, 2); auto default_value_shapes = AnfAlgo::GetInputDeviceShape(node, 2); + if (AnfAlgo::IsShapesDynamic({value_shapes, default_value_shapes})) { + return; + } if ((value_type != default_value_type) || (value_shapes != default_value_shapes)) { MS_LOG(EXCEPTION) << "The env value checks invalid, kernel: " << node->fullname_with_scope(); } value_size_ = GetTypeByte(TypeIdToType(value_type)); for (auto &i : value_shapes) { - value_size_ *= i; + value_size_ *= static_cast(i); } input_size_list_.push_back(handle_size_); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/environ/environ_cpu_set.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/environ/environ_cpu_set.cc index 85b696ef068..b694a2a5f15 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/environ/environ_cpu_set.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/environ/environ_cpu_set.cc @@ -43,7 +43,7 @@ void EnvironSetCpuKernelMod::InitKernel(const CNodePtr &node) { auto value_shapes = AnfAlgo::GetInputDeviceShape(node, 2); value_size_ = GetTypeByte(TypeIdToType(value_type)); for (auto &i : value_shapes) { - value_size_ *= i; + value_size_ *= static_cast(i); } input_size_list_.push_back(handle_size_); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fill_diagonal_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/fill_diagonal_cpu_kernel.h index f6b425ae263..b8926fc8e36 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fill_diagonal_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fill_diagonal_cpu_kernel.h @@ -41,7 +41,7 @@ class FillDiagonalCpuKernelMod : public DeprecatedNativeCpuKernelMod { template bool LaunchKernel(const std::vector &inputs, const std::vector &outputs); TypeId input_type_{kTypeUnknown}; - std::vector input_shape_; + std::vector input_shape_; float fill_value_; bool wrap_; }; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fill_v2_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/fill_v2_cpu_kernel.cc index 152d8b03b00..384e908ac15 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fill_v2_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fill_v2_cpu_kernel.cc @@ -81,7 +81,7 @@ bool FillV2CpuKernelMod::Launch(const std::vector &inputs, c std::vector output_new_shape_; auto num = output_shape_.size(); for (size_t i = 0; i < num; i++) { - auto element = static_cast(output_shape_[i]); + auto element = output_shape_[i]; output_new_shape_.emplace_back(element); } if (output_new_shape_ != dims) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fill_v2_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/fill_v2_cpu_kernel.h index 936b4082317..5858687d026 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fill_v2_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fill_v2_cpu_kernel.h @@ -47,7 +47,7 @@ class FillV2CpuKernelMod : public DeprecatedNativeCpuKernelMod { private: TypeId output_dtype_{kTypeUnknown}; TypeId input1_dtype_{kTypeUnknown}; - std::vector output_shape_; + ShapeVector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fl/fused_pull_weight_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/fl/fused_pull_weight_kernel.h index d6a64fe2e38..7b824580116 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fl/fused_pull_weight_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fl/fused_pull_weight_kernel.h @@ -156,7 +156,7 @@ class FusedPullWeightKernelMod : public DeprecatedNativeCpuKernelMod { MS_EXCEPTION_IF_NULL(kernel_node); size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node); for (size_t i = 0; i < input_num; i++) { - auto weight_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); + auto weight_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i)); size_t weight_size_ = std::accumulate(weight_shape.begin(), weight_shape.end(), sizeof(T), std::multiplies()); input_size_list_.push_back(weight_size_); } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fl/fused_push_weight_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/fl/fused_push_weight_kernel.h index 5226c428092..aea21ea5507 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fl/fused_push_weight_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fl/fused_push_weight_kernel.h @@ -153,7 +153,7 @@ class FusedPushWeightKernelMod : public DeprecatedNativeCpuKernelMod { MS_EXCEPTION_IF_NULL(kernel_node); size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node); for (size_t i = 0; i < input_num; i++) { - auto weight_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); + auto weight_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i)); size_t weight_size_ = std::accumulate(weight_shape.begin(), weight_shape.end(), sizeof(T), std::multiplies()); input_size_list_.push_back(weight_size_); } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fl/get_model_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/fl/get_model_kernel.h index 4d03b26c30d..80e6217c9eb 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fl/get_model_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fl/get_model_kernel.h @@ -124,7 +124,7 @@ class GetModelKernelMod : public DeprecatedNativeCpuKernelMod { MS_LOG(INFO) << "Parameter name is " << weight_name; weight_name_to_input_idx_.insert(std::make_pair(weight_name, i)); - auto weight_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); + auto weight_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i)); size_t weight_size_ = std::accumulate(weight_shape.begin(), weight_shape.end(), sizeof(float), std::multiplies()); input_size_list_.push_back(weight_size_); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fl/update_model_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/fl/update_model_kernel.h index 1dcecf16ce8..7c2b2f3aab4 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fl/update_model_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fl/update_model_kernel.h @@ -127,7 +127,7 @@ class UpdateModelKernelMod : public DeprecatedNativeCpuKernelMod { MS_LOG(INFO) << "Parameter name is " << weight_name; weight_full_names_.push_back(weight_name); - auto weight_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); + auto weight_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i)); size_t weight_size_ = std::accumulate(weight_shape.begin(), weight_shape.end(), sizeof(float), std::multiplies()); input_size_list_.push_back(weight_size_); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_cpu_kernel.cc index 8ff9f6ce55c..5b7c96e7443 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_cpu_kernel.cc @@ -42,6 +42,9 @@ void FractionalAvgPoolCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({input_shape_, output_shape_})) { + return; + } if (input_shape_.size() != tensor_in_and_out_dims) { MS_EXCEPTION(ValueError) << "For '" << kernel_name_ << "', the input 'x' must be 4-dimensional."; } @@ -78,7 +81,7 @@ void FractionalAvgPoolCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { kernel_func_ = func_list_[index].second; } -static std::vector GeneratePoolingSequencePseudoRandom(size_t input_length, size_t output_length, +static std::vector GeneratePoolingSequencePseudoRandom(int64_t input_length, int64_t output_length, int64_t seed) { std::vector cum_seq(output_length + 1, 0); std::vector diff(output_length, 0); @@ -100,10 +103,10 @@ static std::vector GeneratePoolingSequencePseudoRandom(size_t input_len const double u = dis2(random) * max_u; cum_seq[0] = 1; cum_seq[output_length] = input_length + 1; - for (size_t i = 1; i < output_length; ++i) { + for (size_t i = 1; i < LongToSize(output_length); ++i) { cum_seq[i] = static_cast(ceil(alpha * (i + u))); } - for (size_t i = 0; i < output_length; ++i) { + for (size_t i = 0; i < LongToSize(output_length); ++i) { diff[i] = cum_seq[i + 1] - cum_seq[i]; } return diff; @@ -111,7 +114,7 @@ static std::vector GeneratePoolingSequencePseudoRandom(size_t input_len } } -static std::vector GeneratePoolingSequenceRandom(size_t input_length, size_t output_length, int64_t seed) { +static std::vector GeneratePoolingSequenceRandom(int64_t input_length, int64_t output_length, int64_t seed) { if (output_length == 0) { MS_EXCEPTION(ValueError) << "For FractionalAvgPool, output_length got 0, please check it."; } else { @@ -126,7 +129,7 @@ static std::vector GeneratePoolingSequenceRandom(size_t input_length, s } } -std::vector GeneratePoolingSequence(size_t input_length, size_t output_length, bool pseudo_random, +std::vector GeneratePoolingSequence(int64_t input_length, int64_t output_length, bool pseudo_random, int64_t seed) { std::vector diff; if (output_length == 0) { @@ -142,7 +145,7 @@ std::vector GeneratePoolingSequence(size_t input_length, size_t output_ } } int k = input_length / output_length; - for (size_t i = 0; i < output_length; i++) { + for (size_t i = 0; i < LongToSize(output_length); i++) { if (diff[i] < k || diff[i] > k + 1) { MS_EXCEPTION(ValueError) << "For FractionalAvgPool, GeneratePoolingSequence diff[" << i << "] is error"; } @@ -204,7 +207,7 @@ bool FractionalAvgPoolCpuKernelMod::FractionalAvgPoolLaunch(const std::vector(input_shape_[kInputShapeIndexN]); const int64_t height_max = input_shape_[kInputShapeIndexH] - 1; auto shard_fractional_avg_pool = [this, input_ptr, output_ptr, height_cum_seq, width_cum_seq, height_max]( size_t start, size_t end) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_cpu_kernel.h index 21ad3ac6d2c..919daae648e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_cpu_kernel.h @@ -52,8 +52,8 @@ class FractionalAvgPoolCpuKernelMod : public DeprecatedNativeCpuKernelMod { FractionalAvgPoolCpuKernelMod *, const std::vector &, const std::vector &)>; static std::vector> func_list_; FractionalAvgPoolFunc kernel_func_; - std::vector input_shape_; - std::vector output_shape_; + std::vector input_shape_; + std::vector output_shape_; std::vector pooling_ratio_; bool pseudo_random_{false}; bool overlapping_{false}; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_grad_cpu_kernel.cc index e49eac235ef..21e874cfcc8 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_grad_cpu_kernel.cc @@ -93,7 +93,7 @@ bool FractionalAvgPoolGradCpuKernelMod::FractionalAvgPoolGradLaunch(const std::v const int64_t in_rows = *(orig_input_tensor_shape + kShapeIndexH); const int64_t in_cols = *(orig_input_tensor_shape + kShapeIndexW); const int64_t in_depth = *(orig_input_tensor_shape + kShapeIndexC); - std::vector out_shape; + ShapeVector out_shape; for (size_t i = 0; i < orig_input_shape_num; i++) { if (orig_input_tensor_shape[i] <= 0) { MS_EXCEPTION(ValueError) << "For '" << kernel_name_ diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_grad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_grad_cpu_kernel.h index 3744c57ea52..1bd2c89f661 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_grad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_avg_pool_grad_cpu_kernel.h @@ -59,8 +59,8 @@ class FractionalAvgPoolGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { FractionalAvgPoolGradFunc kernel_func_; TypeId output_type_; CNodeWeakPtr node_wpt_; - std::vector orig_input_shape_; - std::vector out_backprop_shape_; + std::vector orig_input_shape_; + std::vector out_backprop_shape_; bool overlapping_{false}; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_grad_with_fixed_ksize_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_grad_with_fixed_ksize_cpu_kernel.cc index 316fbe5f414..45a8b420647 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_grad_with_fixed_ksize_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_grad_with_fixed_ksize_cpu_kernel.cc @@ -59,6 +59,9 @@ void FractionalMaxPool3DGradWithFixedKsizeCPUKernelMod::InitKernel(const CNodePt argmax_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, kInputIndex2); argmax_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, kInputIndex2); data_format_ = common::AnfAlgo::GetNodeAttr(kernel_node, FORMAT); + if (AnfAlgo::IsShapesDynamic({input_shape_, out_backprop_shape_, argmax_shape_})) { + return; + } size_t input_dims = input_shape_.size(); size_t out_backprop_dims = out_backprop_shape_.size(); size_t argmax_dims = argmax_shape_.size(); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_grad_with_fixed_ksize_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_grad_with_fixed_ksize_cpu_kernel.h index 3250b8e34fc..0d0397c578a 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_grad_with_fixed_ksize_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_grad_with_fixed_ksize_cpu_kernel.h @@ -41,9 +41,9 @@ class FractionalMaxPool3DGradWithFixedKsizeCPUKernelMod : public DeprecatedNativ template bool DoComputeWithArgmaxType(const std::vector &inputs, const std::vector &outputs, TypeId argmax_type); - std::vector input_shape_; - std::vector out_backprop_shape_; - std::vector argmax_shape_; + std::vector input_shape_; + std::vector out_backprop_shape_; + std::vector argmax_shape_; std::string data_format_; TypeId out_backprop_type_; TypeId argmax_type_; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_with_fixed_ksize_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_with_fixed_ksize_cpu_kernel.cc index a20ea696392..4b9041e4f87 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_with_fixed_ksize_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_with_fixed_ksize_cpu_kernel.cc @@ -79,6 +79,9 @@ void FractionalMaxPool3DWithFixedKsizeCPUKernelMod::InitKernel(const CNodePtr &k output_shape_ = common::AnfAlgo::GetNodeAttr>(kernel_node, "output_shape"); ksize_ = common::AnfAlgo::GetNodeAttr>(kernel_node, "ksize"); data_format_ = common::AnfAlgo::GetNodeAttr(kernel_node, FORMAT); + if (AnfAlgo::IsShapesDynamic({input_shape_, random_samples_shape_, output_shape_})) { + return; + } size_t input_num_dims = input_shape_.size(); size_t random_samples_dims = random_samples_shape_.size(); size_t output_shape_dims = output_shape_.size(); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_with_fixed_ksize_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_with_fixed_ksize_cpu_kernel.h index c540f35b9d4..35c3648e16b 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_with_fixed_ksize_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool3d_with_fixed_ksize_cpu_kernel.h @@ -50,8 +50,8 @@ class FractionalMaxPool3DWithFixedKsizeCPUKernelMod : public DeprecatedNativeCpu template bool DoComputeWithRandomSamplesType(const std::vector &inputs, const std::vector &outputs, TypeId random_samples_type); - std::vector input_shape_; - std::vector random_samples_shape_; + std::vector input_shape_; + std::vector random_samples_shape_; std::vector output_shape_; std::vector ksize_; std::string data_format_; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_cpu_kernel.cc index becb03ec6fe..edcf2181874 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_cpu_kernel.cc @@ -42,6 +42,9 @@ void FractionalMaxPoolCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({input_shape_, output_shape_})) { + return; + } if (input_shape_.size() != kInputDims) { MS_EXCEPTION(ValueError) << "For '" << kernel_name_ << "', the input 'x' must be 4-dimensional."; } @@ -125,7 +128,8 @@ static std::vector GeneratePoolingSequenceRandom(size_t input_length, s } } -std::vector GeneratePoolingSequence(size_t input_length, size_t output_length, bool pseudo_random, int seed) { +std::vector GeneratePoolingSequence(int64_t input_length, int64_t output_length, bool pseudo_random, + int seed) { std::vector diff; if (output_length == 0) { MS_EXCEPTION(ValueError) << "For FractionalAvgPool, output_length got 0, please check it."; @@ -140,7 +144,7 @@ std::vector GeneratePoolingSequence(size_t input_length, size_t output_ } } int k = input_length / output_length; - for (size_t i = 0; i < output_length; i++) { + for (size_t i = 0; i < LongToSize(output_length); i++) { if (diff[i] < k || diff[i] > k + 1) { MS_EXCEPTION(ValueError) << "For FractionalAvgPool, GeneratePoolingSequence diff[" << i << "] is error"; } @@ -165,7 +169,7 @@ bool FractionalMaxPoolCpuKernelMod::FractionalMaxPoolLaunch(const std::vector(outputs[2]->addr); MS_EXCEPTION_IF_NULL(col_pooling_sequence_ptr); for (size_t i = 0; i < kInputDims; i++) { - output_shape_[i] = static_cast(std::floor(input_shape_[i] / pooling_ratio_[i])); + output_shape_[i] = static_cast(std::floor(input_shape_[i] / pooling_ratio_[i])); if (output_shape_[i] <= 0) { MS_EXCEPTION(ValueError) << "For '" << kernel_name_ << "', outputsize[" << i << "] cannot be 0."; } @@ -215,7 +219,7 @@ bool FractionalMaxPoolCpuKernelMod::FractionalMaxPoolLaunch(const std::vector(input_shape_[kInputShapeIndexN])); return true; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_cpu_kernel.h index 0d543b23a05..c4e7a5ec711 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_cpu_kernel.h @@ -53,8 +53,8 @@ class FractionalMaxPoolCpuKernelMod : public DeprecatedNativeCpuKernelMod { FractionalMaxPoolCpuKernelMod *, const std::vector &, const std::vector &)>; static std::vector> func_list_; FractionalMaxPoolFunc kernel_func_; - std::vector input_shape_; - std::vector output_shape_; + std::vector input_shape_; + std::vector output_shape_; std::vector pooling_ratio_; bool pseudo_random_{false}; bool overlapping_{false}; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_grad_cpu_kernel.cc index 6ab149a67a0..db634cee923 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_grad_cpu_kernel.cc @@ -150,7 +150,7 @@ bool FractionalMaxPoolGradCpuKernelMod::FractionalMaxPoolGradCompute( const int64_t input_index = (b * tensor_in_shape_[kInputShapeIndexH] + h) * tensor_in_shape_[kInputShapeIndexW] + w; // Walk through each channel (depth). - for (size_t d = 0; d < tensor_in_shape_[kInputShapeIndexC]; ++d) { + for (size_t d = 0; d < LongToSize(tensor_in_shape_[kInputShapeIndexC]); ++d) { const T &input_ref = tensor_in_mat.coeffRef(d, input_index); T &output_ref = output_mat.coeffRef(d, output_index); int64_t &output_index_ref = output_index_mat.coeffRef(d, output_index); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_grad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_grad_cpu_kernel.h index 6cfb7062ab1..dd75428c80e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_grad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fractional_max_pool_grad_cpu_kernel.h @@ -61,8 +61,8 @@ class FractionalMaxPoolGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { const std::vector &)>; static std::vector> func_list_; FractionalMaxPoolGradFunc kernel_func_; - std::vector tensor_in_shape_; - std::vector tensor_out_shape_; + std::vector tensor_in_shape_; + std::vector tensor_out_shape_; bool overlapping_{false}; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fused_ada_factor_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/fused_ada_factor_cpu_kernel.cc index 18fe4dd3c1c..a3d392bbd90 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fused_ada_factor_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fused_ada_factor_cpu_kernel.cc @@ -63,6 +63,9 @@ void FusedAdaFactorCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); param_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, kParamIndex); auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, kParamIndex); + if (AnfAlgo::IsShapesDynamic({shape})) { + return; + } elem_num_ = std::accumulate(shape.begin(), shape.end(), 1UL, std::multiplies()); if (elem_num_ < 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the elem num of 'param' can not be zero."; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/fused_cast_adam_weight_decay_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/fused_cast_adam_weight_decay_cpu_kernel.cc index 2eb2ac58cb5..f5972df42e1 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/fused_cast_adam_weight_decay_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/fused_cast_adam_weight_decay_cpu_kernel.cc @@ -124,7 +124,10 @@ void FusedCastAdamWeightDecayCpuKernelMod::LaunchFusedCastAdamFp16(const std::ve void FusedCastAdamWeightDecayCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector var_shape = AnfAlgo::GetInputDeviceShape(kernel_node, kVarIndex); + auto var_shape = AnfAlgo::GetInputDeviceShape(kernel_node, kVarIndex); + if (AnfAlgo::IsShapesDynamic({var_shape})) { + return; + } var_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, kVarIndex); gradient_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, kGradIndex); size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node); @@ -138,8 +141,8 @@ void FusedCastAdamWeightDecayCpuKernelMod::InitKernel(const CNodePtr &kernel_nod << kFusedCastAdamWeightDecayOutputNum << ", but got: " << output_num; } elem_num_ = 1; - for (size_t i : var_shape) { - elem_num_ *= i; + for (auto i : var_shape) { + elem_num_ *= static_cast(i); } if (elem_num_ < 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of 'var' can not be zero."; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/gather_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/gather_cpu_kernel.cc index 2712ce00718..7874305b272 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/gather_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/gather_cpu_kernel.cc @@ -103,16 +103,16 @@ bool GatherCpuKernelMod::LaunchKernel(const std::vector &inp size_t outer_size = 1, inner_size = 1; auto axis = static_cast(axis_); for (size_t i = 0; i < axis; ++i) { - outer_size *= input_shape_.at(i); + outer_size *= LongToSize(input_shape_.at(i)); } for (size_t i = axis + 1; i < input_shape_.size(); ++i) { - inner_size *= input_shape_.at(i); + inner_size *= LongToSize(input_shape_.at(i)); } size_t indices_element_size = 1; for (size_t i = 0; i < indices_shape_.size(); i++) { - indices_element_size *= indices_shape_.at(i); + indices_element_size *= LongToSize(indices_shape_.at(i)); } - auto limit = input_shape_.at(axis); + auto limit = LongToSize(input_shape_.at(axis)); size_t byte_inner_size = inner_size * sizeof(T); size_t byte_out_stride = indices_element_size * byte_inner_size; auto task = [&](size_t start, size_t end) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/gather_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/gather_cpu_kernel.h index 4e9bc72f572..b79a9918ac0 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/gather_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/gather_cpu_kernel.h @@ -51,9 +51,9 @@ class GatherCpuKernelMod : public DeprecatedNativeCpuKernelMod { static std::vector> func_list_; GatherFunc kernel_func_; - std::vector input_shape_; - std::vector indices_shape_; - std::vector output_shape_; + ShapeVector input_shape_; + ShapeVector indices_shape_; + ShapeVector output_shape_; int64_t axis_{0}; bool is_dynamic_shape_{false}; CNodeWeakPtr node_wpt_; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/gather_d_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/gather_d_cpu_kernel.cc index e955a3d9ac4..fa54aac0a81 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/gather_d_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/gather_d_cpu_kernel.cc @@ -24,12 +24,12 @@ namespace { constexpr size_t kGatherDInputsNum = 3; constexpr size_t kGatherDOutputsNum = 1; -size_t get_element_num(const std::vector &shape) { +int64_t get_element_num(const std::vector &shape) { size_t size = 1; for (size_t i = 0; i < shape.size(); i++) { size *= shape[i]; } - return size; + return SizeToLong(size); } template @@ -87,11 +87,8 @@ int GatherDCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std: } const size_t kIndexIdx = 2; - auto input_shape = inputs[0]->GetShapeVector(); - auto index_shape = inputs[kIndexIdx]->GetShapeVector(); - (void)std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(input_shape_), LongToSize); - (void)std::transform(index_shape.begin(), index_shape.end(), std::back_inserter(index_shape_), LongToSize); - + input_shape_ = Convert2SizeT(inputs[0]->GetShapeVector()); + index_shape_ = Convert2SizeT(inputs[kIndexIdx]->GetShapeVector()); if (input_shape_.size() != index_shape_.size()) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', shape size of 'x' must be equal to 'index', but got shape size of 'x': " @@ -107,10 +104,10 @@ bool GatherDCpuKernelMod::LaunchKernel(const std::vector &in const std::vector &outputs) { CHECK_KERNEL_INPUTS_NUM(inputs.size(), kGatherDInputsNum, kernel_name_); CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kGatherDOutputsNum, kernel_name_); - size_t input_size = get_element_num(input_shape_) * sizeof(T); - size_t index_size = get_element_num(index_shape_) * sizeof(I); + auto input_size = get_element_num(input_shape_) * sizeof(T); + auto index_size = get_element_num(index_shape_) * sizeof(I); size_t dim_size = sizeof(int); - size_t output_size = get_element_num(output_shape_) * sizeof(T); + auto output_size = get_element_num(output_shape_) * sizeof(T); if (inputs[0]->size != input_size) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the address size of 'x' must be " << input_size << ", but got " << inputs[0]->size << "."; @@ -140,7 +137,7 @@ bool GatherDCpuKernelMod::LaunchKernel(const std::vector &in dim[0] = static_cast(dim[0] + input_rank); } // check index - int max_index = SizeToInt(input_shape_[dim[0]]); + int max_index = input_shape_[dim[0]]; index_size = get_element_num(index_shape_); for (size_t i = 0; i < index_size; ++i) { if (index[i] >= max_index || index[i] < -max_index) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/gather_d_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/gather_d_grad_cpu_kernel.cc index bec48a2eacb..251da8d02c9 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/gather_d_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/gather_d_grad_cpu_kernel.cc @@ -25,19 +25,13 @@ namespace { constexpr size_t kGatherDGradInputsNum = 2; constexpr size_t kGatherDGradOutputsNum = 1; -size_t get_element_num(const std::vector &shape) { - size_t size = 1; - for (size_t i = 0; i < shape.size(); i++) { - size *= shape[i]; - } - return size; -} +size_t get_element_num(const ShapeVector &shape) { return SizeOf(shape); } template void GatherDGradCopyTask(size_t cur, std::vector *pos, T *input, I *index, const int &dim, T *output, - const std::vector &output_shape, const std::vector &out_cargo_size, + const ShapeVector &output_shape, const std::vector &out_cargo_size, const std::vector &input_cargo_size) { - for (size_t i = 0; i < output_shape[cur]; ++i) { + for (size_t i = 0; i < LongToSize(output_shape[cur]); ++i) { (*pos)[cur] = i; if (cur == output_shape.size() - 1) { size_t input_offset = 0; @@ -123,7 +117,7 @@ bool GatherDGradCpuKernelMod::LaunchKernel(const std::vector // check index index_size = get_element_num(index_shape_); - int max_index = SizeToInt(output_shape_[axis_]); + int max_index = LongToInt(output_shape_[axis_]); for (size_t i = 0; i < index_size; ++i) { if (index[i] >= max_index || index[i] < -max_index) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'index' must be in [" << -max_index << ", " @@ -142,12 +136,12 @@ bool GatherDGradCpuKernelMod::LaunchKernel(const std::vector // out_cargo_size std::vector out_cargo_size = std::vector(output_shape_.size(), 1); for (int i = out_cargo_size.size() - 2; i >= 0; --i) { - out_cargo_size[i] = output_shape_[i + 1] * out_cargo_size[i + 1]; + out_cargo_size[i] = LongToSize(output_shape_[i + 1]) * out_cargo_size[i + 1]; } // input_cargo_size std::vector input_cargo_size = std::vector(input_shape_.size(), 1); for (int i = input_cargo_size.size() - 2; i >= 0; --i) { - input_cargo_size[i] = input_shape_[i + 1] * input_cargo_size[i + 1]; + input_cargo_size[i] = LongToSize(input_shape_[i + 1]) * input_cargo_size[i + 1]; } // copy task diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/gather_d_grad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/gather_d_grad_cpu_kernel.h index 47924efc92c..d3ae2ac2827 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/gather_d_grad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/gather_d_grad_cpu_kernel.h @@ -45,9 +45,9 @@ class GatherDGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { static std::vector> func_list_; GatherDGradFunc kernel_func_; - std::vector input_shape_; - std::vector index_shape_; - std::vector output_shape_; + ShapeVector input_shape_; + ShapeVector index_shape_; + ShapeVector output_shape_; int32_t axis_{1}; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/gathernd_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/gathernd_cpu_kernel.cc index 62c05d3b0dc..64babafe341 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/gathernd_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/gathernd_cpu_kernel.cc @@ -58,23 +58,26 @@ void GatherNdCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { input_shapes_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); indices_shapes_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); output_shapes_ = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({input_shapes_, indices_shapes_, output_shapes_})) { + return; + } dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); // ReShape() size_t dim_of_indices = 1; for (size_t i = 0; i < indices_shapes_.size() - IntToSize(1); ++i) { - dim_of_indices *= indices_shapes_[i]; + dim_of_indices *= LongToSize(indices_shapes_[i]); } size_t dim_after_indices = 1; - size_t dim_indices_last = indices_shapes_[indices_shapes_.size() - IntToSize(1)]; + size_t dim_indices_last = LongToSize(indices_shapes_[indices_shapes_.size() - IntToSize(1)]); if (dim_indices_last == 0) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of indices_shapes_[" << indices_shapes_.size() << " - 1] can not be 0."; } for (size_t i = dim_indices_last; i < input_shapes_.size(); i++) { - dim_after_indices *= input_shapes_[i]; + dim_after_indices *= LongToSize(input_shapes_[i]); } (void)dims_.emplace_back(dim_of_indices); @@ -91,7 +94,7 @@ void GatherNdCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { for (size_t i = dim_indices_last - 1; i > 0; --i) { batch_strides_[i - 1] = input_shapes_[i - 1]; - batch_indices_[i - 1] = batch_indices_[i] * SizeToInt(input_shapes_[i]); + batch_indices_[i - 1] = batch_indices_[i] * LongToInt(input_shapes_[i]); } std::vector support_list; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/gathernd_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/gathernd_cpu_kernel.h index 03ddaf2b4e7..b8428e9ea67 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/gathernd_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/gathernd_cpu_kernel.h @@ -45,9 +45,9 @@ class GatherNdCpuKernelMod : public DeprecatedNativeCpuKernelMod { static std::vector> func_list_; GatherNdFunc kernel_func_; - std::vector input_shapes_; - std::vector indices_shapes_; - std::vector output_shapes_; + ShapeVector input_shapes_; + ShapeVector indices_shapes_; + ShapeVector output_shapes_; std::vector dims_; std::vector batch_indices_; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/gcd_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/gcd_cpu_kernel.cc index dc092d69995..1a65bc4c12f 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/gcd_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/gcd_cpu_kernel.cc @@ -37,21 +37,9 @@ const size_t kGcdOutputsNum = 1; bool GcdCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector &inputs, const std::vector &outputs) { kernel_name_ = base_operator->name(); - std::vector x1_shape = inputs[0]->GetShapeVector(); - std::vector x2_shape = inputs[1]->GetShapeVector(); - std::vector y_shape = outputs[0]->GetShapeVector(); - x1_shape_.resize(x1_shape.size(), 1); - x2_shape_.resize(x2_shape.size(), 1); - y_shape_.resize(y_shape.size(), 1); - for (size_t i = 0; i < x1_shape.size(); i++) { - x1_shape_[i] = static_cast(x1_shape[i]); - } - for (size_t i = 0; i < x2_shape.size(); i++) { - x2_shape_[i] = static_cast(x2_shape[i]); - } - for (size_t i = 0; i < y_shape.size(); i++) { - y_shape_[i] = static_cast(y_shape[i]); - } + x1_shape_ = inputs[0]->GetShapeVector(); + x2_shape_ = inputs[1]->GetShapeVector(); + y_shape_ = outputs[0]->GetShapeVector(); auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs); auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport()); if (!is_match) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/gcd_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/gcd_cpu_kernel.h index 6f1a3b7afd1..a2367ac364b 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/gcd_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/gcd_cpu_kernel.h @@ -56,9 +56,9 @@ class GcdCpuKernelMod : public NativeCpuKernelMod { static std::vector> func_list_; GcdLaunchFunc kernel_func_; - std::vector x1_shape_; - std::vector x2_shape_; - std::vector y_shape_; + ShapeVector x1_shape_; + ShapeVector x2_shape_; + ShapeVector y_shape_; bool need_bcast_{false}; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ger_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/ger_cpu_kernel.cc index cb4b9e62b98..1323ee5c8f2 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ger_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ger_cpu_kernel.cc @@ -109,8 +109,9 @@ bool GerCpuKernelMod::LaunchKernel(const std::vector &inputs } size_t output_size_ = 1; for (size_t i = 0; i < output_shape_.size(); ++i) { - output_size_ *= output_shape_[i]; + output_size_ *= LongToSize(output_shape_[i]); } + size_t input1_size = input_shape_1_[input_shape_1_.size() - 1]; size_t input2_size = input_shape_2_[input_shape_2_.size() - 1]; size_t output_size = input1_size * input2_size; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_cpu_kernel.cc index b2a6e57689f..9646f9d61ee 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_cpu_kernel.cc @@ -31,13 +31,16 @@ void GridSampler2DCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { x_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); grid_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); - output_number_ = output_shape_[kZero] * output_shape_[kOne] * output_shape_[kTwo] * output_shape_[kThree]; + if (AnfAlgo::IsShapesDynamic({x_shape_, grid_shape_, output_shape_})) { + return; + } + output_number_ = LongToSize(output_shape_[kZero] * output_shape_[kOne] * output_shape_[kTwo] * output_shape_[kThree]); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); size_t stride_tmp = 1; - auto stride_compute = [this, &stride_tmp](std::vector &stride, std::vector shape) { + auto stride_compute = [this, &stride_tmp](std::vector &stride, ShapeVector shape) { for (int32_t i = 3; i > -1; i--) { stride.insert(stride.begin(), stride_tmp); - stride_tmp *= shape[i]; + stride_tmp *= LongToSize(shape[i]); } stride_tmp = 1; }; @@ -139,10 +142,10 @@ void GridSampler2DCpuKernelMod::ComputeTask(float16 *x_addr, float16 *grid_addr, if (count == 1) { count--; } - out_iter[count] = out_iter[kOne] % output_shape_[count]; - out_iter[1] /= output_shape_[count--]; + out_iter[count] = out_iter[kOne] % LongToSize(output_shape_[count]); + out_iter[1] /= LongToSize(output_shape_[count--]); } - const size_t out_c = output_shape_[kOne]; + const size_t out_c = LongToSize(output_shape_[kOne]); int64_t grid_offset = out_iter[kZero] * grid_stride_[kZero] + out_iter[kTwo] * grid_stride_[kOne] + out_iter[kThree] * grid_stride_[kTwo]; float x = static_cast(grid_addr[grid_offset]); @@ -206,7 +209,7 @@ void GridSampler2DCpuKernelMod::LaunchKernel(const std::vector &inpu auto x_data_addr = reinterpret_cast(inputs[0]->addr); auto grid_data_addr = reinterpret_cast(inputs[1]->addr); auto output_data_addr = reinterpret_cast(outputs[0]->addr); - size_t loop_count = output_shape_[0] * output_shape_[2] * output_shape_[3]; + size_t loop_count = LongToSize(output_shape_[0] * output_shape_[2] * output_shape_[3]); auto task = [this, &x_data_addr, &grid_data_addr, &output_data_addr](size_t start, size_t end) { for (size_t i = start; i < end; i++) { ComputeTask(x_data_addr, grid_data_addr, output_data_addr, i); @@ -224,7 +227,7 @@ void GridSampler2DCpuKernelMod::LaunchKernel(const std::vector &inpu auto x_data_addr = reinterpret_cast(inputs[0]->addr); auto grid_data_addr = reinterpret_cast(inputs[1]->addr); auto output_data_addr = reinterpret_cast(outputs[0]->addr); - size_t loop_count = output_shape_[0] * output_shape_[2] * output_shape_[3]; + size_t loop_count = LongToSize(output_shape_[0] * output_shape_[2] * output_shape_[3]); auto task = [this, &x_data_addr, &grid_data_addr, &output_data_addr](size_t start, size_t end) { for (size_t i = start; i < end; i++) { ComputeTask(x_data_addr, grid_data_addr, output_data_addr, i); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_cpu_kernel.h index 510952a6533..0f2da9e9f3b 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_cpu_kernel.h @@ -46,9 +46,9 @@ class GridSampler2DCpuKernelMod : public DeprecatedNativeCpuKernelMod { } private: - std::vector x_shape_; - std::vector grid_shape_; - std::vector output_shape_; + ShapeVector x_shape_; + ShapeVector grid_shape_; + ShapeVector output_shape_; std::vector x_stride_; std::vector grid_stride_; std::vector output_stride_; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_grad_cpu_kernel.cc index 9f3245f6b00..cc85d4c4fd2 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_grad_cpu_kernel.cc @@ -33,8 +33,11 @@ void GridSampler2DGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { grid_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, kTwo); dx_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, kZero); dgrid_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, kOne); - dx_size_ = dx_shape_[kZero] * dx_shape_[kOne] * dx_shape_[kTwo] * dx_shape_[kThree]; - grid_size_ = grid_shape_[kZero] * grid_shape_[kOne] * grid_shape_[kTwo] * grid_shape_[kThree]; + if (AnfAlgo::IsShapesDynamic({x_shape_, grad_shape_, grid_shape_, dx_shape_, dgrid_shape_})) { + return; + } + dx_size_ = LongToSize(dx_shape_[kZero] * dx_shape_[kOne] * dx_shape_[kTwo] * dx_shape_[kThree]); + grid_size_ = LongToSize(grid_shape_[kZero] * grid_shape_[kOne] * grid_shape_[kTwo] * grid_shape_[kThree]); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, kZero); interpolation_mode_ = common::AnfAlgo::GetNodeAttr(kernel_node, "interpolation_mode"); padding_mode_ = common::AnfAlgo::GetNodeAttr(kernel_node, "padding_mode"); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_grad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_grad_cpu_kernel.h index 57814555175..be80c71ace6 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_grad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_2d_grad_cpu_kernel.h @@ -76,11 +76,11 @@ class GridSampler2DGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { } private: - std::vector grad_shape_; - std::vector x_shape_; - std::vector grid_shape_; - std::vector dx_shape_; - std::vector dgrid_shape_; + ShapeVector grad_shape_; + ShapeVector x_shape_; + ShapeVector grid_shape_; + ShapeVector dx_shape_; + ShapeVector dgrid_shape_; std::string interpolation_mode_; std::string padding_mode_; bool align_corners_; @@ -450,7 +450,7 @@ class TensorAcc { }; template -TensorAcc accessor(T *data_ptr, std::vector sizess) { +TensorAcc accessor(T *data_ptr, std::vector sizess) { static_assert(N > 0, "accessor is used for indexing tensor, for scalars use *data_ptr()"); int64_t stride_tmp = 1; int64_t *strid = new int64_t[N]; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_cpu_kernel.cc index 80a7b391c75..23c57125c70 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_cpu_kernel.cc @@ -32,14 +32,17 @@ void GridSampler3DCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { x_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, kZero); grid_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, kOne); output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, kZero); - output_number_ = - output_shape_[kZero] * output_shape_[kOne] * output_shape_[kTwo] * output_shape_[kThree] * output_shape_[kFour]; + if (AnfAlgo::IsShapesDynamic({x_shape_, grid_shape_, output_shape_})) { + return; + } + output_number_ = static_cast(output_shape_[kZero] * output_shape_[kOne] * output_shape_[kTwo] * + output_shape_[kThree] * output_shape_[kFour]); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, kZero); size_t stride_tmp = kOne; - auto stride_compute = [&](std::vector &stride, std::vector shape) { + auto stride_compute = [&](std::vector &stride, std::vector shape) { for (int i = kFour; i > -static_cast(kOne); i--) { (void)stride.insert(stride.begin(), stride_tmp); - stride_tmp *= shape[static_cast(i)]; + stride_tmp *= static_cast(shape[static_cast(i)]); } stride_tmp = kOne; }; @@ -220,9 +223,8 @@ T GridSampler3DCpuKernelMod::reflect_coordinates(T coord, int64_t twice_low, int } } -bool GridSampler3DCpuKernelMod::within_bounds_3d(int64_t d, int64_t h, int64_t w, size_t D, size_t H, size_t W) { - return d >= 0 && d < static_cast(D) && h >= 0 && h < static_cast(H) && w >= 0 && - w < static_cast(W); +bool GridSampler3DCpuKernelMod::within_bounds_3d(int64_t d, int64_t h, int64_t w, int64_t D, int64_t H, int64_t W) { + return d >= 0 && d < D && h >= 0 && h < H && w >= 0 && w < W; } MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, GridSampler3D, GridSampler3DCpuKernelMod); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_cpu_kernel.h index 98247812510..9983f44359c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_cpu_kernel.h @@ -46,9 +46,9 @@ class GridSampler3DCpuKernelMod : public DeprecatedNativeCpuKernelMod { } private: - std::vector x_shape_; - std::vector grid_shape_; - std::vector output_shape_; + std::vector x_shape_; + std::vector grid_shape_; + std::vector output_shape_; std::vector x_stride_; std::vector grid_stride_; std::vector output_stride_; @@ -66,7 +66,7 @@ class GridSampler3DCpuKernelMod : public DeprecatedNativeCpuKernelMod { template T reflect_coordinates(T coord, int64_t twice_low, int64_t twice_high); - bool within_bounds_3d(int64_t d, int64_t h, int64_t w, size_t D, size_t H, size_t W); + bool within_bounds_3d(int64_t d, int64_t h, int64_t w, int64_t D, int64_t H, int64_t W); }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_grad_cpu_kernel.cc index 814775b6584..786e6dfaa9a 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_grad_cpu_kernel.cc @@ -34,11 +34,14 @@ void GridSampler3DGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { grid_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, kTwo); dx_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, kZero); dgrid_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, kOne); - dx_size_ = dx_shape_[kZero] * dx_shape_[kOne] * dx_shape_[kTwo] * dx_shape_[kThree] * dx_shape_[kFour]; - grid_size_ = grid_shape_[kZero] * grid_shape_[kOne] * grid_shape_[kTwo] * grid_shape_[kThree]; + if (AnfAlgo::IsShapesDynamic({x_shape_, grad_shape_, grid_shape_, dx_shape_, dgrid_shape_})) { + return; + } + dx_size_ = LongToSize(dx_shape_[kZero] * dx_shape_[kOne] * dx_shape_[kTwo] * dx_shape_[kThree] * dx_shape_[kFour]); + grid_size_ = LongToSize(grid_shape_[kZero] * grid_shape_[kOne] * grid_shape_[kTwo] * grid_shape_[kThree]); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, kZero); size_t stride_tmp = kOne; - auto stride_compute = [&](std::vector &stride, std::vector shape) { + auto stride_compute = [&](std::vector &stride, std::vector shape) { for (int i = kFour; i > -static_cast(kOne); i--) { stride.insert(stride.begin(), stride_tmp); stride_tmp *= shape[i]; @@ -87,7 +90,7 @@ void GridSampler3DGradCpuKernelMod::BilinearKernel(std::vector addr, std::v T bnw = (x_tse - x) * (y_tse - y) * (z - z_tse), bne = (x - x_tsw) * (y_tsw - y) * (z - z_tsw); T bsw = (x_tne - x) * (y - y_tne) * (z - z_tne), bse = (x - x_tnw) * (y - y_tnw) * (z - z_tnw); T gx = static_cast(kZero), gy = static_cast(kZero), gz = static_cast(kZero); - for (size_t c = kZero; c < x_shape_[kOne]; + for (size_t c = kZero; c < LongToSize(x_shape_[kOne]); c++, ptr[kZero] += grad_stride_[kOne], ptr[kOne] += x_stride_[kOne], ptr[kTwo] += dx_stride_[kOne]) { T grad_out = addr[kZero][ptr[kZero]]; safe_add_3d(&addr[kTwo][ptr[kTwo]], z_tnw, y_tnw, x_tnw, dx_stride_[kTwo], dx_stride_[kThree], dx_stride_[kFour], @@ -173,9 +176,9 @@ void GridSampler3DGradCpuKernelMod::ComputeTask(T *grad_addr, T *x_addr, T *grid const size_t &n) { size_t grid_ptr_N = n * grid_stride_[kZero]; size_t dgrid_ptr_NDHW = n * dgrid_stride_[kZero]; - for (size_t d = kZero; d < grid_shape_[kOne]; d++) { - for (size_t h = kZero; h < grid_shape_[kTwo]; h++) { - for (size_t w = kZero; w < grid_shape_[kThree]; w++, dgrid_ptr_NDHW += dgrid_stride_[kThree]) { + for (size_t d = kZero; d < LongToSize(grid_shape_[kOne]); d++) { + for (size_t h = kZero; h < LongToSize(grid_shape_[kTwo]); h++) { + for (size_t w = kZero; w < LongToSize(grid_shape_[kThree]); w++, dgrid_ptr_NDHW += dgrid_stride_[kThree]) { size_t grid_ptr_NDHW = grid_ptr_N + d * grid_stride_[kOne] + h * grid_stride_[kTwo] + w * grid_stride_[kThree]; T x = grid_addr[grid_ptr_NDHW]; T y = grid_addr[grid_ptr_NDHW + grid_stride_[kFour]]; @@ -200,7 +203,7 @@ void GridSampler3DGradCpuKernelMod::ComputeTask(T *grad_addr, T *x_addr, T *grid size_t grad_ptr_NCDHW = n * grad_stride_[kZero] + d * grad_stride_[kTwo] + h * grad_stride_[kThree] + w * grad_stride_[kFour]; size_t dx_ptr_NC = n * dx_stride_[kZero]; - for (size_t c = kZero; c < x_shape_[kOne]; + for (size_t c = kZero; c < LongToSize(x_shape_[kOne]); c++, grad_ptr_NCDHW += grad_stride_[kOne], dx_ptr_NC += dx_stride_[kOne]) { safe_add_3d(&dx_addr[dx_ptr_NC], z_nearest, y_nearest, x_nearest, dx_stride_[kTwo], dx_stride_[kThree], dx_stride_[kFour], x_shape_[kTwo], x_shape_[kThree], x_shape_[kFour], @@ -223,7 +226,7 @@ void GridSampler3DGradCpuKernelMod::LaunchKernel(const std::vector & auto grid_data_addr = reinterpret_cast(inputs[kTwo]->addr); auto dx_data_addr = reinterpret_cast(outputs[kZero]->addr); auto dgrid_data_addr = reinterpret_cast(outputs[kOne]->addr); - size_t loop_count = x_shape_[kZero]; + size_t loop_count = LongToSize(x_shape_[kZero]); for (size_t i = kZero; i < dx_size_; i++) { dx_data_addr[i] = static_cast(kZero); } @@ -241,7 +244,7 @@ void GridSampler3DGradCpuKernelMod::LaunchKernel(const std::vector & } template -T GridSampler3DGradCpuKernelMod::grid_sampler_compute_source_index_set_grad(T coord, size_t size, +T GridSampler3DGradCpuKernelMod::grid_sampler_compute_source_index_set_grad(T coord, int64_t size, const std::string &padding_mode, bool align_corners, T *grad_x) { T grad_clip, grad_refl; @@ -253,15 +256,15 @@ T GridSampler3DGradCpuKernelMod::grid_sampler_compute_source_index_set_grad(T co coord = ((coord + kOne) * size - kOne) / kTwo; } if (padding_mode == "border") { - coord = clip_coordinates_set_grad(coord, static_cast(size), &grad_clip); + coord = clip_coordinates_set_grad(coord, size, &grad_clip); *grad_x = (*grad_x) * grad_clip; } else if (padding_mode == "reflection") { if (align_corners) { - coord = reflect_coordinates_set_grad(coord, static_cast(kZero), kTwo * (size - kOne), &grad_refl); + coord = reflect_coordinates_set_grad(coord, kZero, kTwo * (size - kOne), &grad_refl); } else { - coord = reflect_coordinates_set_grad(coord, -static_cast(kOne), kTwo * size - kOne, &grad_refl); + coord = reflect_coordinates_set_grad(coord, -kOne, kTwo * size - kOne, &grad_refl); } - coord = clip_coordinates_set_grad(coord, static_cast(size), &grad_clip); + coord = clip_coordinates_set_grad(coord, size, &grad_clip); *grad_x = (*grad_x) * grad_refl * grad_clip; } return coord; @@ -313,16 +316,16 @@ T GridSampler3DGradCpuKernelMod::reflect_coordinates_set_grad(T x, int64_t twice template void GridSampler3DGradCpuKernelMod::safe_add_3d(T *data, int64_t d, int64_t h, int64_t w, size_t sD, size_t sH, - size_t sW, size_t D, size_t H, size_t W, T delta) { + size_t sW, int64_t D, int64_t H, int64_t W, T delta) { if (within_bounds_3d(d, h, w, D, H, W)) { data[d * sD + h * sH + w * sW] += static_cast(delta); } } -bool GridSampler3DGradCpuKernelMod::within_bounds_3d(int64_t d, int64_t h, int64_t w, size_t D, size_t H, size_t W) { - int64_t iD = static_cast(D); - int64_t iH = static_cast(H); - int64_t iW = static_cast(W); +bool GridSampler3DGradCpuKernelMod::within_bounds_3d(int64_t d, int64_t h, int64_t w, int64_t D, int64_t H, int64_t W) { + int64_t iD = D; + int64_t iH = H; + int64_t iW = W; return d >= 0 && d < iD && h >= 0 && h < iH && w >= 0 && w < iW; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_grad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_grad_cpu_kernel.h index daa5a647ea0..6a6e923e879 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_grad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/grid_sampler_3d_grad_cpu_kernel.h @@ -54,11 +54,11 @@ class GridSampler3DGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { } private: - std::vector grad_shape_; - std::vector x_shape_; - std::vector grid_shape_; - std::vector dx_shape_; - std::vector dgrid_shape_; + std::vector grad_shape_; + std::vector x_shape_; + std::vector grid_shape_; + std::vector dx_shape_; + std::vector dgrid_shape_; std::vector grad_stride_; std::vector x_stride_; std::vector grid_stride_; @@ -77,7 +77,7 @@ class GridSampler3DGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { void ComputeTask(T *grad_addr, T *x_addr, T *grid_addr, T *dx_addr, T *dgrid_addr, const size_t &n); template - T grid_sampler_compute_source_index_set_grad(T coord, size_t size, const std::string &padding_mode, + T grid_sampler_compute_source_index_set_grad(T coord, int64_t size, const std::string &padding_mode, bool align_corners, T *grad_x); template @@ -87,10 +87,10 @@ class GridSampler3DGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { T clip_coordinates_set_grad(T x, int64_t clip_limit, T *grad_x); template - void safe_add_3d(T *data, int64_t d, int64_t h, int64_t w, size_t sD, size_t sH, size_t sW, size_t D, size_t H, - size_t W, T delta); + void safe_add_3d(T *data, int64_t d, int64_t h, int64_t w, size_t sD, size_t sH, size_t sW, int64_t D, int64_t H, + int64_t W, T delta); - bool within_bounds_3d(int64_t d, int64_t h, int64_t w, size_t D, size_t H, size_t W); + bool within_bounds_3d(int64_t d, int64_t h, int64_t w, int64_t D, int64_t H, int64_t W); }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_cpu_kernel.cc index 1ac0826363e..00d33c0d8c0 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_cpu_kernel.cc @@ -42,9 +42,8 @@ void HSigmoidCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); x_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - for (const uint64_t &d : x_shape_) { - tensor_size_ *= d; - } + + tensor_size_ = SizeOf(x_shape_); std::vector support_list; (void)std::transform(func_list_.begin(), func_list_.end(), std::back_inserter(support_list), diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_cpu_kernel.h index a54bb646e72..723ed403fe3 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_cpu_kernel.h @@ -45,7 +45,7 @@ class HSigmoidCpuKernelMod : public DeprecatedNativeCpuKernelMod { const std::vector &)>; static std::vector> func_list_; HSigmoidFunc kernel_func_; - std::vector x_shape_; + ShapeVector x_shape_; uint64_t tensor_size_ = 1; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_grad_cpu_kernel.cc index ab2f09ac2c3..a7f16c0008e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_grad_cpu_kernel.cc @@ -42,9 +42,8 @@ void HSigmoidGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); x_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - for (const uint64_t &d : x_shape_) { - tensor_size_ *= d; - } + + tensor_size_ = SizeOf(x_shape_); std::vector support_list; (void)std::transform(func_list_.begin(), func_list_.end(), std::back_inserter(support_list), diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_grad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_grad_cpu_kernel.h index 905560db970..a76645490fa 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_grad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/hsigmoid_grad_cpu_kernel.h @@ -45,7 +45,7 @@ class HSigmoidGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { const std::vector &)>; static std::vector> func_list_; HSigmoidGradFunc kernel_func_; - std::vector x_shape_; + ShapeVector x_shape_; uint64_t tensor_size_ = 1; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/hsv_to_rgb_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/hsv_to_rgb_cpu_kernel.cc index 6de7be9dd9a..5ddf2905119 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/hsv_to_rgb_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/hsv_to_rgb_cpu_kernel.cc @@ -31,6 +31,9 @@ void HSVToRGBCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_LOG(EXCEPTION) << "Needs " << kOutputNum << " output, but got " << output_num << "."; } shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({shape})) { + return; + } input_dtype = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); if (shape.cend()[-1] != kNumDims) { MS_LOG(EXCEPTION) << "The last dimension of the input tensor must be size 3."; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/hsv_to_rgb_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/hsv_to_rgb_cpu_kernel.h index d9f1d4b53f0..1e805b694df 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/hsv_to_rgb_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/hsv_to_rgb_cpu_kernel.h @@ -47,7 +47,7 @@ class HSVToRGBCpuKernelMod : public DeprecatedNativeCpuKernelMod { template void ComputeFloat(void *input, void *output, int64_t pixel_num); void ComputeHalf(void *input, void *output, int64_t pixel_num); - std::vector shape; + ShapeVector shape; const size_t kInputNum = 1; const size_t kOutputNum = 1; }; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_cpu_kernel.cc index c8b6de38f3f..3e5badb69d0 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_cpu_kernel.cc @@ -29,9 +29,8 @@ void HSwishCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); x_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - for (const uint64_t &d : x_shape_) { - tensor_size_ *= d; - } + + tensor_size_ = SizeOf(x_shape_); auto kernel_attr = GetKernelAttrFromNode(kernel_node); std::vector support_list; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_cpu_kernel.h index 6a3bf116ed8..f4bda47f32e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_cpu_kernel.h @@ -45,7 +45,7 @@ class HSwishCpuKernelMod : public DeprecatedNativeCpuKernelMod { const std::vector &)>; static std::vector> func_list_; HSwishFunc kernel_func_; - std::vector x_shape_; + ShapeVector x_shape_; uint64_t tensor_size_ = 1; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_grad_cpu_kernel.cc index 1cd1f0f3f42..60227f59ff0 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_grad_cpu_kernel.cc @@ -30,9 +30,7 @@ void HSwishGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); x_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - for (const uint64_t &d : x_shape_) { - tensor_size_ *= d; - } + tensor_size_ = SizeOf(x_shape_); auto kernel_attr = GetKernelAttrFromNode(kernel_node); std::vector support_list; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_grad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_grad_cpu_kernel.h index 61749f08ef4..9d16d53d507 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_grad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/hswish_grad_cpu_kernel.h @@ -45,7 +45,7 @@ class HSwishGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { const std::vector &)>; static std::vector> func_list_; HSwishGradFunc kernel_func_; - std::vector x_shape_; + ShapeVector x_shape_; uint64_t tensor_size_ = 1; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/igamma_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/igamma_cpu_kernel.cc index d95e5c87c48..a83a28dcfc8 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/igamma_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/igamma_cpu_kernel.cc @@ -59,13 +59,7 @@ constexpr int64_t kYOneElement = 2; constexpr size_t kInputNum = 2; constexpr size_t kOutputNum = 1; -size_t get_element_num(const std::vector &shape) { - size_t size = 1; - for (size_t i = 0; i < shape.size(); i++) { - size *= shape[i]; - } - return size; -} +int64_t get_element_num(const std::vector &shape) { return SizeToLong(SizeOf(shape)); } } // namespace /** Compute the Lgamma function using Lanczos' approximation from "A Precision * Approximation of the Gamma Function". SIAM Journal on Numerical Analysis @@ -329,9 +323,9 @@ void IgammaCpuKernelMod::NoBcastCompute(const std::vector &i auto in0 = reinterpret_cast(inputs[0]->addr); auto in1 = reinterpret_cast(inputs[1]->addr); auto out0 = reinterpret_cast(outputs[0]->addr); - size_t in0_elements_nums = get_element_num(a_shape_); - size_t in1_elements_nums = get_element_num(x_shape_); - size_t data_num = get_element_num(z_shape_); + int64_t in0_elements_nums = get_element_num(a_shape_); + int64_t in1_elements_nums = get_element_num(x_shape_); + int64_t data_num = get_element_num(z_shape_); int64_t type = in0_elements_nums == in1_elements_nums ? kSameShape : (in0_elements_nums == 1 ? kXOneElement : kYOneElement); if (data_num < kParallelDataNums) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/igamma_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/igamma_cpu_kernel.h index 50417054153..0ac33635956 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/igamma_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/igamma_cpu_kernel.h @@ -48,9 +48,9 @@ class IgammaCpuKernelMod : public DeprecatedNativeCpuKernelMod { } private: - std::vector a_shape_; - std::vector x_shape_; - std::vector z_shape_; + std::vector a_shape_; + std::vector x_shape_; + std::vector z_shape_; TypeId dtype_{kTypeUnknown}; template void LaunchKernel(const std::vector &inputs, const std::vector &outputs); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/igammac_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/igammac_cpu_kernel.cc index 4512ed6608a..e7c87e2c384 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/igammac_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/igammac_cpu_kernel.cc @@ -59,13 +59,7 @@ constexpr int64_t kYOneElement = 2; constexpr size_t kInputNum = 2; constexpr size_t kOutputNum = 1; -size_t get_element_num(const std::vector &shape) { - size_t size = 1; - for (size_t i = 0; i < shape.size(); i++) { - size *= shape[i]; - } - return size; -} +int64_t get_element_num(const std::vector &shape) { return SizeToLong(SizeOf(shape)); } } // namespace /** Compute the Lgamma function using Lanczos' approximation from "A Precision * Approximation of the Gamma Function". SIAM Journal on Numerical Analysis diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/igammac_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/igammac_cpu_kernel.h index c804d10a869..3839f8ca69f 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/igammac_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/igammac_cpu_kernel.h @@ -47,9 +47,9 @@ class IgammacCpuKernelMod : public DeprecatedNativeCpuKernelMod { } private: - std::vector a_shape_; - std::vector x_shape_; - std::vector z_shape_; + std::vector a_shape_; + std::vector x_shape_; + std::vector z_shape_; TypeId dtype_{kTypeUnknown}; template void LaunchKernel(const std::vector &inputs, const std::vector &outputs); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/igammagrada_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/igammagrada_cpu_kernel.cc index 84ab840fc7a..579f7534918 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/igammagrada_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/igammagrada_cpu_kernel.cc @@ -60,10 +60,10 @@ constexpr size_t kInputNum = 2; constexpr size_t kOutputNum = 1; constexpr int64_t VALUE = 1; constexpr int64_t DERIVATIVE = 2; -size_t get_element_num(const std::vector &shape) { +size_t get_element_num(const std::vector &shape) { size_t size = 1; for (size_t i = 0; i < shape.size(); i++) { - size *= shape[i]; + size *= static_cast(shape[i]); } return size; } @@ -356,9 +356,9 @@ void IgammaGradACpuKernelMod::NoBcastCompute(const std::vector(inputs[0]->addr); auto in1 = reinterpret_cast(inputs[1]->addr); auto out0 = reinterpret_cast(outputs[0]->addr); - size_t in0_elements_nums = get_element_num(a_shape_); - size_t in1_elements_nums = get_element_num(x_shape_); - size_t data_num = get_element_num(z_shape_); + auto in0_elements_nums = get_element_num(a_shape_); + auto in1_elements_nums = get_element_num(x_shape_); + auto data_num = get_element_num(z_shape_); int64_t type = in0_elements_nums == in1_elements_nums ? kSameShape : (in0_elements_nums == 1 ? kXOneElement : kYOneElement); if (data_num < kParallelDataNums) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/igammagrada_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/igammagrada_cpu_kernel.h index ead85215fb1..1cbab9f5805 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/igammagrada_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/igammagrada_cpu_kernel.h @@ -47,9 +47,9 @@ class IgammaGradACpuKernelMod : public DeprecatedNativeCpuKernelMod { } private: - std::vector a_shape_; - std::vector x_shape_; - std::vector z_shape_; + std::vector a_shape_; + std::vector x_shape_; + std::vector z_shape_; TypeId dtype_{kTypeUnknown}; template void LaunchKernel(const std::vector &inputs, const std::vector &outputs); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/in_top_k_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/in_top_k_cpu_kernel.cc index 894d5ee2443..1f49c22f434 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/in_top_k_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/in_top_k_cpu_kernel.cc @@ -31,8 +31,8 @@ constexpr size_t kInTopkTargetShapeSize = 1; void InTopKCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - auto prediction_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto target_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto prediction_shape = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); + auto target_shape = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1)); if (prediction_shape.size() != kInTopKShapeRank) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the rank of the first input must be equal to " << kInTopKShapeRank << ", but got " << prediction_shape.size(); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/iou_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/iou_cpu_kernel.cc index 9e8a65f68bf..ce3b0e11c0a 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/iou_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/iou_cpu_kernel.cc @@ -32,6 +32,10 @@ void IOUCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); auto anchor_boxes_shape = AnfAlgo::GetInputDeviceShape(kernel_node, ANCHOR_BOXES); + auto gt_boxes_shape = AnfAlgo::GetInputDeviceShape(kernel_node, GT_BOXES); + if (AnfAlgo::IsShapesDynamic({anchor_boxes_shape, gt_boxes_shape})) { + return; + } constexpr size_t BOX_SHAPE_SIZE = 2; constexpr size_t BOX_SIZE_INDEX = 0; constexpr size_t BOX_COORDINATE_INDEX = 1; @@ -40,13 +44,12 @@ void IOUCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of 'anchor_boxes' must be [N, 4], but got: " << Vector2Str(anchor_boxes_shape); } - anchor_boxes_size_ = anchor_boxes_shape[BOX_SIZE_INDEX]; - auto gt_boxes_shape = AnfAlgo::GetInputDeviceShape(kernel_node, GT_BOXES); + anchor_boxes_size_ = static_cast(anchor_boxes_shape[BOX_SIZE_INDEX]); if (gt_boxes_shape.size() != BOX_SHAPE_SIZE || gt_boxes_shape[BOX_COORDINATE_INDEX] != kBoxCoordinateLen) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of 'gt_boxes' must be [N, 4], but got: " << Vector2Str(gt_boxes_shape); } - gt_boxes_size_ = gt_boxes_shape[BOX_SIZE_INDEX]; + gt_boxes_size_ = static_cast(gt_boxes_shape[BOX_SIZE_INDEX]); iou_size_ = anchor_boxes_size_ * gt_boxes_size_; std::string iou_mode = common::AnfAlgo::GetNodeAttr(kernel_node, "mode"); if (iou_mode != "iou" && iou_mode != "iof") { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/is_close_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/is_close_cpu_kernel.h index 04853a7d32d..2d38615f185 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/is_close_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/is_close_cpu_kernel.h @@ -54,9 +54,10 @@ class IsCloseCpuKernelMod : public NativeCpuKernelMod, public MatchKernelHelper< float rtol_{1e-5}; float atol_{1e-8}; bool equal_nan_{true}; - std::vector input_shape_; - std::vector other_shape_; - std::vector output_shape_; + + ShapeVector input_shape_; + ShapeVector other_shape_; + ShapeVector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/l2_normalize_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/l2_normalize_cpu_kernel.cc index beffe7b6218..d7a22491dc1 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/l2_normalize_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/l2_normalize_cpu_kernel.cc @@ -41,12 +41,12 @@ class L2NormalizeCpuFunc : public DeprecatedCpuKernelFunc { void CalcDenominator(const T *input_addr, const size_t reduce_size, const int dims, std::unique_ptr *denominator_addr); - void CalcOutput(const T *input_addr, const std::vector &reduce_shape, const size_t output_size, - T *output_addr, std::unique_ptr const &denominator_addr); + void CalcOutput(const T *input_addr, const ShapeVector &reduce_shape, const size_t output_size, T *output_addr, + std::unique_ptr const &denominator_addr); private: - std::vector input_shape_; - std::vector output_shape_; + ShapeVector input_shape_; + ShapeVector output_shape_; T epsilon_{0}; int axis_{0}; void CheckParam(const CNodePtr &kernel_node); @@ -88,12 +88,12 @@ void L2NormalizeCpuFunc::CalcDenominator(const T *input_addr, const size_t re axes[k] = i; ++k; } else { - stride *= input_shape_[i]; + stride *= LongToSize(input_shape_[i]); } } axes[k] = axis_size; - std::vector transpose_shape(input_shape_.size()); + ShapeVector transpose_shape(input_shape_.size()); for (size_t i = 0; i < IntToSize(dims); ++i) { transpose_shape[i] = input_shape_[axes[i]]; } @@ -122,9 +122,8 @@ void L2NormalizeCpuFunc::CalcDenominator(const T *input_addr, const size_t re } template -void L2NormalizeCpuFunc::CalcOutput(const T *input_addr, const std::vector &reduce_shape, - const size_t output_size, T *output_addr, - std::unique_ptr const &denominator_addr) { +void L2NormalizeCpuFunc::CalcOutput(const T *input_addr, const ShapeVector &reduce_shape, const size_t output_size, + T *output_addr, std::unique_ptr const &denominator_addr) { BroadcastIterator broad_base_iter(input_shape_, reduce_shape, output_shape_); auto task = [&](size_t start, size_t end) { auto iter = broad_base_iter; @@ -161,11 +160,11 @@ bool L2NormalizeCpuFunc::RunFunc(const std::vector &input auto output_addr = reinterpret_cast(outputs[0]->addr); int dims = SizeToInt(input_shape_.size()); - std::vector reduce_shape = input_shape_; + auto reduce_shape = input_shape_; size_t reduce_size = 1; reduce_shape[axis_] = 1; for (size_t i = 0; i < input_shape_.size(); ++i) { - reduce_size *= reduce_shape[i]; + reduce_size *= LongToSize(reduce_shape[i]); } auto denominator_addr = std::make_unique(reduce_size); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/l2loss_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/l2loss_cpu_kernel.cc index 63a0c893cd4..a0538b0f812 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/l2loss_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/l2loss_cpu_kernel.cc @@ -30,10 +30,8 @@ void L2LossCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); input_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - tensor_size_ = 1; - for (const size_t &d : input_shape_) { - tensor_size_ *= d; - } + + tensor_size_ = SizeOf(input_shape_); auto kernel_attr = GetKernelAttrFromNode(kernel_node); std::vector support_list; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/l2loss_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/l2loss_cpu_kernel.h index 9c4eb93a61c..8eb0a3fec34 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/l2loss_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/l2loss_cpu_kernel.h @@ -47,7 +47,7 @@ class L2LossCpuKernelMod : public DeprecatedNativeCpuKernelMod { L2LossFunc kernel_func_; TypeId dtype_{kTypeUnknown}; size_t tensor_size_ = 1; - std::vector input_shape_; + ShapeVector input_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/l2normalize_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/l2normalize_grad_cpu_kernel.cc index 0ba0e7db1c0..0852462ca32 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/l2normalize_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/l2normalize_grad_cpu_kernel.cc @@ -36,14 +36,14 @@ class L2NormalizeGradCpuFunc : public DeprecatedCpuKernelFunc { const std::vector &outputs) override; private: - void CheckInputShape(const std::vector &output_shape); + void CheckInputShape(const ShapeVector &output_shape); std::vector OneDimIndexToHighDimIndex(size_t one_dim_index); void HighDimIndexToOneDimIndex(size_t *one_dim_index, const std::vector &high_dim_index); std::vector GetVector(const std::vector &high_dim_index, const T *x); void GetSumOfProduct(const std::vector &x_vector, const std::vector &y_vector, T *ss); void GetOutput(const std::vector &input_x_vector, const std::vector &y_vector, const std::vector &dout_vector, const std::vector &high_dim_index, T *output); - std::vector> input_shape_list_; + std::vector input_shape_list_; std::vector dim_elem_num_list_; int axis_{0}; T epsilon_{0}; @@ -63,7 +63,7 @@ void L2NormalizeGradCpuFunc::InitFunc(const CNodePtr &kernel_node) { int output_dim_length = output_shape.size(); dim_elem_num_list_.resize(output_dim_length, 1); for (int i = output_dim_length - 2; i >= 0; i--) { // from -2 to 0 dim - dim_elem_num_list_[i] = output_shape[i + 1] * dim_elem_num_list_[i + 1]; + dim_elem_num_list_[i] = LongToSize(output_shape[i + 1]) * dim_elem_num_list_[i + 1]; } int axis = LongToInt(common::AnfAlgo::GetNodeAttr(kernel_node, "axis")); @@ -96,7 +96,7 @@ bool L2NormalizeGradCpuFunc::RunFunc(const std::vector &inputs, c } template -void L2NormalizeGradCpuFunc::CheckInputShape(const std::vector &output_shape) { +void L2NormalizeGradCpuFunc::CheckInputShape(const ShapeVector &output_shape) { for (const auto &shape : input_shape_list_) { if (output_shape != shape) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ @@ -106,7 +106,7 @@ void L2NormalizeGradCpuFunc::CheckInputShape(const std::vector &outpu } auto input_x_shape = input_shape_list_[0]; if (input_x_shape.size() != 0) { - if (std::any_of(input_x_shape.begin(), input_x_shape.end(), [](size_t i) { return i == 0; })) { + if (std::any_of(input_x_shape.begin(), input_x_shape.end(), [](int64_t i) { return i == 0; })) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the input 'x' can not be null."; } } @@ -140,7 +140,7 @@ std::vector L2NormalizeGradCpuFunc::GetVector(const std::vector &h auto x_shape = input_shape_list_[0]; std::vector x_vector; x_vector.reserve(x_shape[axis_]); - for (size_t i = 0; i < x_shape[axis_]; i++) { + for (size_t i = 0; i < LongToSize(x_shape[axis_]); i++) { size_t oneDimIndex = 0; std::vector tmp_high_dim_index = high_dim_index; tmp_high_dim_index[axis_] = i; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/layer_norm_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/layer_norm_cpu_kernel.cc index 6af82475b13..f439edca2da 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/layer_norm_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/layer_norm_cpu_kernel.cc @@ -30,7 +30,7 @@ void LayerNormCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); - std::vector x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto x_shape = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); auto begin_norm_axis = common::AnfAlgo::GetNodeAttr(kernel_node, "begin_norm_axis"); auto begin_params_axis = common::AnfAlgo::GetNodeAttr(kernel_node, "begin_params_axis"); if (begin_norm_axis < 0) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/layer_norm_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/layer_norm_grad_cpu_kernel.cc index 20a73d32e7e..7cbea11b63e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/layer_norm_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/layer_norm_grad_cpu_kernel.cc @@ -30,7 +30,7 @@ void LayerNormGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); - std::vector x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto x_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); auto begin_norm_axis = common::AnfAlgo::GetNodeAttr(kernel_node, "begin_norm_axis"); auto begin_params_axis = common::AnfAlgo::GetNodeAttr(kernel_node, "begin_params_axis"); if (begin_norm_axis < 0) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/lcm_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/lcm_cpu_kernel.cc index 24e96bcd2ae..6124992c8ab 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/lcm_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/lcm_cpu_kernel.cc @@ -37,21 +37,10 @@ const size_t kLcmOutputsNum = 1; bool LcmCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector &inputs, const std::vector &outputs) { kernel_name_ = base_operator->name(); - std::vector x1_shape = inputs[0]->GetShapeVector(); - std::vector x2_shape = inputs[1]->GetShapeVector(); - std::vector y_shape = outputs[0]->GetShapeVector(); - x1_shape_.resize(x1_shape.size(), 1); - x2_shape_.resize(x2_shape.size(), 1); - y_shape_.resize(y_shape.size(), 1); - for (size_t i = 0; i < x1_shape.size(); i++) { - x1_shape_[i] = static_cast(x1_shape[i]); - } - for (size_t i = 0; i < x2_shape.size(); i++) { - x2_shape_[i] = static_cast(x2_shape[i]); - } - for (size_t i = 0; i < y_shape.size(); i++) { - y_shape_[i] = static_cast(y_shape[i]); - } + x1_shape_ = inputs[0]->GetShapeVector(); + x2_shape_ = inputs[1]->GetShapeVector(); + y_shape_ = outputs[0]->GetShapeVector(); + auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs); auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport()); if (!is_match) { @@ -73,10 +62,8 @@ bool LcmCpuKernelMod::LaunchKernel(const std::vector &inputs if (y_shape_.size() == 0) { (void)y_shape_.insert(y_shape_.begin(), 1); } - int64_t output_size_ = 1; - for (size_t i = 0; i < y_shape_.size(); ++i) { - output_size_ *= y_shape_[i]; - } + auto output_size = SizeOf(y_shape_); + BroadcastIterator base_iter(x1_shape_, x2_shape_, y_shape_); auto task = [this, &x1, &x2, &y, &base_iter](size_t start, size_t end) { auto iter = base_iter; @@ -86,7 +73,7 @@ bool LcmCpuKernelMod::LaunchKernel(const std::vector &inputs iter.GenNextPos(); } }; - ParallelLaunchAutoSearch(task, output_size_, this, ¶llel_search_info_); + ParallelLaunchAutoSearch(task, output_size, this, ¶llel_search_info_); return true; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/lcm_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/lcm_cpu_kernel.h index 5a802191825..1f4f3be009f 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/lcm_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/lcm_cpu_kernel.h @@ -55,9 +55,9 @@ class LcmCpuKernelMod : public NativeCpuKernelMod { const std::vector &)>; static std::vector> func_list_; LcmLaunchFunc kernel_func_; - std::vector x1_shape_; - std::vector x2_shape_; - std::vector y_shape_; + ShapeVector x1_shape_; + ShapeVector x2_shape_; + ShapeVector y_shape_; bool need_bcast_{false}; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/lerp_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/lerp_cpu_kernel.cc index fd3aebdb86d..8647b1a3240 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/lerp_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/lerp_cpu_kernel.cc @@ -42,18 +42,11 @@ int LerpCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::ve if (auto ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) { return ret; } - auto start_shape = inputs.at(kIndex0)->GetShapeVector(); - start_shape_.clear(); - (void)std::transform(start_shape.begin(), start_shape.end(), std::back_inserter(start_shape_), LongToSize); - end_shape_.clear(); - auto end_shape = inputs.at(kIndex1)->GetShapeVector(); - (void)std::transform(end_shape.begin(), end_shape.end(), std::back_inserter(end_shape_), LongToSize); - weight_shape_.clear(); - auto weight_shape = inputs.at(kIndex2)->GetShapeVector(); - output_shape_.clear(); - (void)std::transform(weight_shape.begin(), weight_shape.end(), std::back_inserter(weight_shape_), LongToSize); - auto output_shape = outputs.at(kIndex0)->GetShapeVector(); - (void)std::transform(output_shape.begin(), output_shape.end(), std::back_inserter(output_shape_), LongToSize); + + start_shape_ = inputs.at(kIndex0)->GetShapeVector(); + end_shape_ = inputs.at(kIndex1)->GetShapeVector(); + weight_shape_ = inputs.at(kIndex2)->GetShapeVector(); + output_shape_ = outputs.at(kIndex0)->GetShapeVector(); output_size_ = std::accumulate(output_shape_.begin(), output_shape_.end(), 1, std::multiplies()); return KRET_OK; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/lerp_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/lerp_cpu_kernel.h index 95eb0120cd7..1828cc0d92f 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/lerp_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/lerp_cpu_kernel.h @@ -54,10 +54,10 @@ class LerpCpuKernelMod : public NativeCpuKernelMod, public MatchKernelHelper &inputs, const std::vector &, const std::vector &outputs); size_t output_size_{1}; - std::vector start_shape_; - std::vector end_shape_; - std::vector weight_shape_; - std::vector output_shape_; + ShapeVector start_shape_; + ShapeVector end_shape_; + ShapeVector weight_shape_; + ShapeVector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/list_diff_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/list_diff_cpu_kernel.cc index c069d03e760..91c1438fc7c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/list_diff_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/list_diff_cpu_kernel.cc @@ -48,8 +48,8 @@ void ListDiffCPUKernelMod::InitKernel(const CNodePtr &kernel_node) { } auto x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex0); auto y_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex1); - x_size_ = static_cast(x_shape[0]); - y_size_ = static_cast(y_shape[0]); + x_size_ = x_shape[0]; + y_size_ = y_shape[0]; auto out_idx = common::AnfAlgo::GetNodeAttr(kernel_node, kAttrOutIdx); MS_EXCEPTION_IF_NULL(out_idx); idx_type_ = out_idx->type_id(); @@ -96,8 +96,8 @@ bool ListDiffCPUKernelMod::LaunchKernel(const std::vector &inputs, c if (!node_) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', cnode_ptr_(kernel_node) is expired. Error no: " << node_; } - std::vector out_shape = {static_cast(out_size_)}; - std::vector idx_shape = {static_cast(out_size_)}; + ShapeVector out_shape = {out_size_}; + ShapeVector idx_shape = {out_size_}; size_t output_num = common::AnfAlgo::GetOutputTensorNum(node_); std::vector dtypes(output_num); for (size_t i = 0; i < output_num; i++) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/log_matrix_determinant_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/log_matrix_determinant_cpu_kernel.cc index 80e172c4b5f..4f7701355fc 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/log_matrix_determinant_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/log_matrix_determinant_cpu_kernel.cc @@ -41,6 +41,9 @@ void LogMatrixDeterminantCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { auto shape_x = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto shape_sign = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); auto shape_y = common::AnfAlgo::GetOutputInferShape(kernel_node, 1); + if (AnfAlgo::IsShapesDynamic({shape_x, shape_sign, shape_y})) { + return; + } size_t shape_size_x = shape_x.size(); size_t shape_size_sign = shape_sign.size(); size_t shape_size_y = shape_y.size(); @@ -98,7 +101,7 @@ void LogMatrixDeterminantCpuKernelMod::LaunchLogMatrixDeterminant(const std::vec auto output_sign = reinterpret_cast(outputs[0]->addr); auto output_y = reinterpret_cast(outputs[1]->addr); - auto shape_x = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, 0); + auto shape_x = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(node_, 0)); size_t shape_size = shape_x.size(); size_t m = shape_x[shape_size - 1]; size_t size_mm = m * m; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/lstsq_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/lstsq_cpu_kernel.cc index 5ad8cf306d5..5408a21625f 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/lstsq_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/lstsq_cpu_kernel.cc @@ -73,13 +73,13 @@ void LstsqCpuKernelMod::LaunchKernel(const std::vector &inputs, cons auto input_0_addr = reinterpret_cast(inputs[0]->addr); auto input_1_addr = reinterpret_cast(inputs[1]->addr); auto output_addr = reinterpret_cast(outputs[0]->addr); - size_t m = input_0_shape_[0]; - size_t n = input_0_shape_[1]; + size_t m = static_cast(input_0_shape_[0]); + size_t n = static_cast(input_0_shape_[1]); size_t k = 0; if (input_1_shape_.size() == kADimNum_1) { k = 1; } else { - k = input_1_shape_[1]; + k = static_cast(input_1_shape_[1]); } typedef Eigen::Matrix MartixXd; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/lstsq_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/lstsq_cpu_kernel.h index 94fbb78ac43..0a13b38566f 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/lstsq_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/lstsq_cpu_kernel.h @@ -47,8 +47,8 @@ class LstsqCpuKernelMod : public DeprecatedNativeCpuKernelMod { } private: - std::vector input_0_shape_; - std::vector input_1_shape_; + std::vector input_0_shape_; + std::vector input_1_shape_; TypeId dtype_0_{kTypeUnknown}; TypeId dtype_1_{kTypeUnknown}; }; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/lu_solve_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/lu_solve_cpu_kernel.cc index c610f5e963a..a04baf3d125 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/lu_solve_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/lu_solve_cpu_kernel.cc @@ -24,13 +24,7 @@ namespace { constexpr size_t kDimNum = 2; } -size_t get_element_num(const std::vector &shape) { - size_t size = 1; - for (size_t i = 0; i < shape.size(); i++) { - size *= shape[i]; - } - return size; -} +int64_t get_element_num(const std::vector &shape) { return SizeToLong(SizeOf(shape)); } void LuSolveCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { node_wpt_ = kernel_node; @@ -42,6 +36,9 @@ void LuSolveCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { auto x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); auto lu_data_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); auto lu_pivots_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); + if (AnfAlgo::IsShapesDynamic({x_shape, lu_data_shape, lu_pivots_shape})) { + return; + } if (lu_data_shape.size() < kDimNum) { MS_EXCEPTION(ValueError) << "For LuSolveCPUKercel lu_data's dimensions must be greater than or equal to 2."; } @@ -115,13 +112,13 @@ void LuSolveCpuKernelMod::LuSolve(const std::vector &inputs, auto input_1_Shape = AnfAlgo::GetInputDeviceShape(node_wpt_, 1); auto output_y = reinterpret_cast(outputs[0]->addr); size_t lu_dims = input_1_Shape.size(); - size_t lu_maxtrix_sizes = input_1_Shape[lu_dims - 2]; + size_t lu_maxtrix_sizes = LongToSize(input_1_Shape[lu_dims - 2]); size_t b_dim = input_0_Shape.size(); - size_t b_m = input_0_Shape[b_dim - 1]; + size_t b_m = LongToSize(input_0_Shape[b_dim - 1]); typedef Eigen::Matrix MatrixXd; MatrixXd matrix_b = Eigen::Map(b_working_ptr, lu_maxtrix_sizes, b_m); MatrixXd matrix_A = Eigen::Map(lu_working_ptr, lu_maxtrix_sizes, lu_maxtrix_sizes); - for (size_t i = 0; i < input_0_Shape[b_dim - kDimNum]; i++) { + for (size_t i = 0; i < LongToSize(input_0_Shape[b_dim - kDimNum]); i++) { matrix_b.row(i).swap(matrix_b.row(*(pivots_working_ptr + i) - 1)); } MatrixXd L = matrix_A.template triangularView(); @@ -142,20 +139,20 @@ bool LuSolveCpuKernelMod::LaunchKernel(const std::vector &in auto input_0_Shape = AnfAlgo::GetInputDeviceShape(node_wpt_, 0); auto input_1_Shape = AnfAlgo::GetInputDeviceShape(node_wpt_, 1); auto output_Shape = AnfAlgo::GetOutputDeviceShape(node_wpt_, 0); - size_t input0_element_num = get_element_num(input_0_Shape); - size_t input1_element_num = get_element_num(input_1_Shape); - size_t output_element_num = get_element_num(output_Shape); + auto input0_element_num = get_element_num(input_0_Shape); + auto input1_element_num = get_element_num(input_1_Shape); + auto output_element_num = get_element_num(output_Shape); std::vector input_0(input_x0, input_x0 + input0_element_num); std::vector input_1(input_x1, input_x1 + input1_element_num); size_t b_dims = input_0_Shape.size(); - std::vector b_dims_vector = input_0_Shape; + std::vector b_dims_vector = input_0_Shape; size_t lu_dims = input_1_Shape.size(); - std::vector lu_dims_vector = input_1_Shape; - size_t b_stride = input_0_Shape[b_dims - 1] * input_0_Shape[b_dims - 2]; - size_t lu_stride = input_1_Shape[lu_dims - 1] * input_1_Shape[lu_dims - 2]; - size_t pivots_stride = input_1_Shape[lu_dims - 1]; + std::vector lu_dims_vector = input_1_Shape; + size_t b_stride = static_cast(input_0_Shape[b_dims - 1] * input_0_Shape[b_dims - 2]); + size_t lu_stride = static_cast(input_1_Shape[lu_dims - 1] * input_1_Shape[lu_dims - 2]); + size_t pivots_stride = static_cast(input_1_Shape[lu_dims - 1]); MS_EXCEPTION_IF_ZERO("b_stride", b_stride); - size_t batch_num = output_element_num / b_stride; + size_t batch_num = static_cast(output_element_num / b_stride); if (b_dims == lu_dims) { for (size_t i = 0; i < batch_num; i++) { T1 *b_working_ptr = input_0.data() + i * b_stride; @@ -164,8 +161,8 @@ bool LuSolveCpuKernelMod::LaunchKernel(const std::vector &in LuSolve(inputs, outputs, b_working_ptr, lu_working_ptr, pivots_working_ptr, b_stride, i); } } else { - std::vector b_shape = b_dims_vector; - std::vector lu_shape = lu_dims_vector; + std::vector b_shape = b_dims_vector; + std::vector lu_shape = lu_dims_vector; for (size_t i = 0; i < kDimNum; i++) { b_shape.pop_back(); lu_shape.pop_back(); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/map_cache_idx_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/map_cache_idx_cpu_kernel.cc index 3f4e07ebcde..aae19a78bcc 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/map_cache_idx_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/map_cache_idx_cpu_kernel.cc @@ -49,8 +49,8 @@ int Compress(HashmapEntry *entry_p, const size_t &length, T entry) { } void UpdateShape(size_t miss_count, const CNodePtr &node) { - std::vector out_shape; - (void)out_shape.emplace_back(miss_count); + ShapeVector out_shape; + (void)out_shape.emplace_back(SizeToLong(miss_count)); size_t output_num = common::AnfAlgo::GetOutputTensorNum(node); std::vector dtypes(output_num); for (size_t i = 0; i < output_num; i++) { @@ -75,11 +75,14 @@ void MapCacheIdxCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); node_wpt_ = kernel_node; auto hashmap_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + if (IsDynamic(hashmap_shape)) { + return; + } if (hashmap_shape.size() != 2) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of 'HashMap' must be 2-D, but got " << hashmap_shape.size() << "-D."; } - hashmap_length_ = hashmap_shape[0]; + hashmap_length_ = LongToSize(hashmap_shape[0]); if (hashmap_length_ == 0) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the first dimension of 'HashMap' must be greater than 0, but got " << hashmap_length_; @@ -108,10 +111,7 @@ void MapCacheIdxCpuKernelMod::LaunchKernel(const std::vector &inputs const std::vector &outputs) { auto node = node_wpt_.lock(); auto emb_idx_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1); - batch_size_ = 1; - for (size_t i = 0; i < emb_idx_shape.size(); ++i) { - batch_size_ *= emb_idx_shape[i]; - } + batch_size_ = SizeOf(emb_idx_shape); HashmapEntry *hashmap = reinterpret_cast *>(inputs[0]->addr); auto input_indices = reinterpret_cast(inputs[1]->addr); T *step_ = reinterpret_cast(inputs[2]->addr); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/map_uniform_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/map_uniform_cpu_kernel.cc index 36d82a512e0..027839c1e19 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/map_uniform_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/map_uniform_cpu_kernel.cc @@ -57,10 +57,7 @@ void MapUniformCpuKernelMod::LaunchKernel(const std::vector &inputs, MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', node_wpt_(kernel_node) is expired. Error no: " << node; } auto input_x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); - batch_size_ = 1; - for (size_t i = 0; i < input_x_shape.size(); ++i) { - batch_size_ *= input_x_shape[i]; - } + batch_size_ = SizeOf(input_x_shape); MS_LOG(INFO) << "Input size: " << batch_size_; auto input_x = reinterpret_cast(inputs[0]->addr); auto per_group_size = *reinterpret_cast(inputs[1]->addr); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/masked_fill_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/masked_fill_cpu_kernel.h index 0637421e47b..6836d78961d 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/masked_fill_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/masked_fill_cpu_kernel.h @@ -52,9 +52,9 @@ class MaskedFillCpuKernelMod : public NativeCpuKernelMod { MaskedFillFunc kernel_func_; size_t output_size_{1}; size_t inner_size_{1}; - std::vector input_shape_; - std::vector mask_shape_; - std::vector output_shape_; + std::vector input_shape_; + std::vector mask_shape_; + std::vector output_shape_; bool need_broadcast_{false}; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_cpu_kernel.cc index 50971770440..db0ef8608d5 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_cpu_kernel.cc @@ -33,7 +33,7 @@ void MaskedSelectCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { input_shape_b_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); output_shape_ = CPUKernelUtils::GetBroadcastShape(input_shape_a_, input_shape_b_); for (const uint64_t &d : output_shape_) { - tensor_size_ *= d; + tensor_size_ *= static_cast(d); } node_wpt_ = kernel_node; @@ -76,8 +76,8 @@ bool MaskedSelectCpuKernelMod::LaunchKernel(const std::vector out_shape; - (void)out_shape.emplace_back(j); + ShapeVector out_shape; + (void)out_shape.emplace_back(UlongToLong(j)); size_t output_num = common::AnfAlgo::GetOutputTensorNum(node_); std::vector dtypes(output_num); for (size_t i = 0; i < output_num; i++) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_cpu_kernel.h index 4e5bcff6f44..0374be82c6e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_cpu_kernel.h @@ -47,9 +47,9 @@ class MaskedSelectCpuKernelMod : public DeprecatedNativeCpuKernelMod { const std::vector &)>; static std::vector> func_list_; MaskedSelectFunc kernel_func_; - std::vector input_shape_a_; - std::vector input_shape_b_; - std::vector output_shape_; + std::vector input_shape_a_; + std::vector input_shape_b_; + std::vector output_shape_; uint64_t tensor_size_ = 1; CNodeWeakPtr node_wpt_; }; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_grad_cpu_kernel.cc index b335409eacc..5cd663def0c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_grad_cpu_kernel.cc @@ -37,7 +37,7 @@ void MaskedSelectGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { grad_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, kIndexGrad); output_shape_ = CPUKernelUtils::GetBroadcastShape(input_shape_a_, input_shape_b_); for (const uint64_t &d : output_shape_) { - tensor_size_ *= d; + tensor_size_ *= static_cast(d); } auto kernel_attr = GetKernelAttrFromNode(kernel_node); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_grad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_grad_cpu_kernel.h index d8e6a63c596..976d6ad52dd 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_grad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_grad_cpu_kernel.h @@ -48,10 +48,10 @@ class MaskedSelectGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { static std::vector> func_list_; MaskedSelectGradFunc kernel_func_; - std::vector input_shape_a_; - std::vector input_shape_b_; - std::vector grad_shape_; - std::vector output_shape_; + std::vector input_shape_a_; + std::vector input_shape_b_; + std::vector grad_shape_; + std::vector output_shape_; uint64_t tensor_size_ = 1; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_band_part_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_band_part_cpu_kernel.cc index 421ddf7756f..8df115b2f16 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_band_part_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_band_part_cpu_kernel.cc @@ -40,10 +40,8 @@ bool MatrixBandPartCpuKernelMod::Init(const BaseOperatorPtr &base_operator, cons return true; } -void MatrixBandPartCpuKernelMod::BroadcastShape(const std::vector &x_shape, - const std::vector &lower_shape, - const std::vector &upper_shape, - const std::vector &output_shape) { +void MatrixBandPartCpuKernelMod::BroadcastShape(const ShapeVector &x_shape, const ShapeVector &lower_shape, + const ShapeVector &upper_shape, const ShapeVector &output_shape) { broadcast_x_shape_.clear(); broadcast_lower_shape_.clear(); broadcast_upper_shape_.clear(); @@ -52,8 +50,8 @@ void MatrixBandPartCpuKernelMod::BroadcastShape(const std::vector &x_sha broadcast_lower_shape_.resize(kMaxDims, 1); broadcast_upper_shape_.resize(kMaxDims, 1); broadcast_output_shape_.resize(kMaxDims, 1); - auto expanded_lower_shape = ops::GetExpandedShape(lower_shape); - auto expanded_upper_shape = ops::GetExpandedShape(upper_shape); + auto expanded_lower_shape = ops::GetExpandedShape(lower_shape); + auto expanded_upper_shape = ops::GetExpandedShape(upper_shape); for (size_t i = 0; i < output_shape.size(); i++) { broadcast_output_shape_[i] = output_shape[i]; @@ -84,20 +82,13 @@ int MatrixBandPartCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, con if (int ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) { return ret; } - auto x_shape_temp = inputs.at(kIndex0)->GetShapeVector(); - auto lower_shape_temp = inputs.at(kIndex1)->GetShapeVector(); - auto upper_shape_temp = inputs.at(kIndex2)->GetShapeVector(); - auto output_shape_temp = outputs.at(kIndex0)->GetShapeVector(); - std::vector x_shape{}; - std::vector lower_shape{}; - std::vector upper_shape{}; - std::vector output_shape{}; - (void)std::transform(x_shape_temp.begin(), x_shape_temp.end(), std::back_inserter(x_shape), LongToSize); - (void)std::transform(lower_shape_temp.begin(), lower_shape_temp.end(), std::back_inserter(lower_shape), LongToSize); - (void)std::transform(upper_shape_temp.begin(), upper_shape_temp.end(), std::back_inserter(upper_shape), LongToSize); - (void)std::transform(output_shape_temp.begin(), output_shape_temp.end(), std::back_inserter(output_shape), - LongToSize); - size_t input_element_num = std::accumulate(x_shape.begin(), x_shape.end(), 1, std::multiplies()); + + auto x_shape = inputs.at(kIndex0)->GetShapeVector(); + auto lower_shape = inputs.at(kIndex1)->GetShapeVector(); + auto upper_shape = inputs.at(kIndex2)->GetShapeVector(); + auto output_shape = outputs.at(kIndex0)->GetShapeVector(); + + size_t input_element_num = SizeOf(x_shape); is_null_input_ = (input_element_num == 0); if (is_null_input_) { return KRET_OK; @@ -109,8 +100,8 @@ int MatrixBandPartCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, con << "but got " << x_shape.size() << "D."; return KRET_RESIZE_FAILED; } - m_ = x_shape[dim_size_ - kDim2]; - n_ = x_shape[dim_size_ - kDim1]; + m_ = LongToSize(x_shape[dim_size_ - kDim2]); + n_ = LongToSize(x_shape[dim_size_ - kDim1]); if (m_ == 0 || n_ == 0) { MS_LOG(ERROR) << "For '" << kernel_name_ << "', the size of -2 axis or -1 axis can not be 0, " << "but got m_=" << m_ << ", n_=" << n_; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_band_part_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_band_part_cpu_kernel.h index f06e8926382..1a412c3baa7 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_band_part_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_band_part_cpu_kernel.h @@ -55,8 +55,8 @@ class MatrixBandPartCpuKernelMod : public NativeCpuKernelMod, public MatchKernel bool LaunchKernelNotBroadcast(const T *x_ptr, const LU *lower_ptr, const LU *upper_ptr, T *output_ptr); template bool LaunchKernelBroadcast(const T *x_ptr, const LU *lower_ptr, const LU *upper_ptr, T *output_ptr); - void BroadcastShape(const std::vector &x_shape, const std::vector &lower_shape, - const std::vector &upper_shape, const std::vector &output_shape); + void BroadcastShape(const ShapeVector &x_shape, const ShapeVector &lower_shape, const ShapeVector &upper_shape, + const ShapeVector &output_shape); bool is_null_input_{false}; size_t dim_size_{1}; size_t output_element_num_{0}; @@ -66,10 +66,10 @@ class MatrixBandPartCpuKernelMod : public NativeCpuKernelMod, public MatchKernel size_t lower_{0}; size_t upper_{0}; bool need_broadcast_; - std::vector broadcast_x_shape_; - std::vector broadcast_lower_shape_; - std::vector broadcast_upper_shape_; - std::vector broadcast_output_shape_; + ShapeVector broadcast_x_shape_; + ShapeVector broadcast_lower_shape_; + ShapeVector broadcast_upper_shape_; + ShapeVector broadcast_output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_determinant_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_determinant_cpu_kernel.cc index d17e9d0af6e..e2e1596e037 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_determinant_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_determinant_cpu_kernel.cc @@ -76,8 +76,8 @@ void MatrixDeterminantCpuKernelMod::LaunchMatrixDeterminant(const std::vector std::pair alignment_{MatrixDiag::RIGHT, MatrixDiag::LEFT}; - std::vector shapes_{}; + ShapeVector shapes_{}; int64_t out_range_size_{1}; size_t dim_size_{1}; int64_t m_{1}; int64_t n_{1}; - std::vector out_shapes_{}; + ShapeVector out_shapes_{}; CNodeWeakPtr node_wpt_; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_part_v3_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_part_v3_cpu_kernel.cc index 603e0a8fa6d..371708a65b1 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_part_v3_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_part_v3_cpu_kernel.cc @@ -117,9 +117,9 @@ bool MatrixDiagPartV3CpuKernelMod::LaunchKernel(const std::vector(inputs[0]->size / sizeof(T)); num_array_ = (SizeToLong(input_numelements)) / (num_rows_ * num_cols_); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_part_v3_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_part_v3_cpu_kernel.h index 7013e3b7ebc..83ebe847a47 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_part_v3_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_part_v3_cpu_kernel.h @@ -52,8 +52,8 @@ class MatrixDiagPartV3CpuKernelMod : public DeprecatedNativeCpuKernelMod { template bool DoLaunch(const std::vector &inputs, const std::vector &outputs); - std::vector x_shape_; - std::vector k_shape_; + std::vector x_shape_; + std::vector k_shape_; TypeId input_dtype_; std::string align_; int64_t num_diags_ = 1; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_v3_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_v3_cpu_kernel.cc index 36e414f4f87..a9f220f59a4 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_v3_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_v3_cpu_kernel.cc @@ -99,7 +99,7 @@ bool MatrixDiagV3CpuKernelMod::LaunchKernel(const std::vector(inputs[1]->addr); MS_EXCEPTION_IF_NULL(k_data); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_v3_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_v3_cpu_kernel.h index d3014b8ed16..91da7665bca 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_v3_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_diag_v3_cpu_kernel.h @@ -52,8 +52,8 @@ class MatrixDiagV3CpuKernelMod : public DeprecatedNativeCpuKernelMod { template bool DoLaunch(const std::vector &inputs, const std::vector &outputs); - std::vector diagonal_shape_; - std::vector k_shape_; + std::vector diagonal_shape_; + std::vector k_shape_; TypeId diagonal_data_type_; std::string align_; bool align_superdiag_ = true; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_inverse_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_inverse_cpu_kernel.cc index f970a54c9b2..ce20e91f11c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_inverse_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_inverse_cpu_kernel.cc @@ -68,7 +68,7 @@ void MatrixInverseCpuKernelMod::LaunchMatrixInverse(const std::vector(outputs[0]->addr); MS_EXCEPTION_IF_NULL(output_ptr); // Judge whether the input shape matches - auto shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, 0); + auto shape = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(node_, 0)); if (shape.size() < kNumber2) { MS_LOG(EXCEPTION) << "Input x must be at least rank 2."; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_cpu_kernel.cc index 4339e13b7a7..6cda9aab531 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_cpu_kernel.cc @@ -46,6 +46,9 @@ void MatrixSetDiagCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { auto diag_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, diag_index); auto diag_k_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, diag_k_index); auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, output_index); + if (AnfAlgo::IsShapesDynamic({input_shape, diag_shape, diag_k_shape, output_shape})) { + return; + } constexpr size_t temporary_2d_dim = 2; constexpr size_t temporary_1d_dim = 1; @@ -60,13 +63,13 @@ void MatrixSetDiagCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { } size_t input_rank = input_shape.size(); for (size_t i = 0; i < input_rank - temporary_2d_dim; ++i) { - outer_batch_ *= SizeToInt(input_shape.at(i)); + outer_batch_ *= LongToInt(input_shape.at(i)); } input_shape_ = input_shape; - inner_rows_ = SizeToInt(input_shape.at(input_rank - temporary_2d_dim)); - inner_cols_ = SizeToInt(input_shape.at(input_rank - temporary_1d_dim)); + inner_rows_ = LongToInt(input_shape.at(input_rank - temporary_2d_dim)); + inner_cols_ = LongToInt(input_shape.at(input_rank - temporary_1d_dim)); - expected_num_diags_ = diag_shape.size() == input_rank ? SizeToInt(diag_shape.at(input_rank - temporary_2d_dim)) : 1; + expected_num_diags_ = diag_shape.size() == input_rank ? LongToInt(diag_shape.at(input_rank - temporary_2d_dim)) : 1; data_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_cpu_kernel.h index 8d119b7d304..70c3682b413 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_cpu_kernel.h @@ -73,7 +73,7 @@ class MatrixSetDiagCpuKernelMod : public DeprecatedNativeCpuKernelMod { int max_diag_len_{0}; int outer_batch_{1}; bool is_single_diag_{true}; - std::vector input_shape_; + ShapeVector input_shape_; // std::pair alignment_{MatrixDiag::RIGHT, MatrixDiag::LEFT}; TypeId data_type_{0}; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_v3_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_v3_cpu_kernel.cc index 2e2ec71d962..9b38cd10c27 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_v3_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_v3_cpu_kernel.cc @@ -92,15 +92,15 @@ bool MatrixSetDiagV3CpuKernelMod::LaunchKernel(const std::vector(x_shape_[input_dims - 1]); + input_rows_ = static_cast(x_shape_[input_dims - toCalRow]); input_numelements_ = static_cast(inputs[0]->size / sizeof(T)); size_t diagonal_dims = diagonal_shape_.size(); - diagonal_columns_ = diagonal_shape_[diagonal_dims - 1]; + diagonal_columns_ = static_cast(diagonal_shape_[diagonal_dims - 1]); diagonal_rows_ = 1; if (diagonal_dims > 1) { - diagonal_rows_ = diagonal_shape_[diagonal_dims - toCalRow]; + diagonal_rows_ = static_cast(diagonal_shape_[diagonal_dims - toCalRow]); } k_len_ = static_cast(inputs[kIndexK]->size / sizeof(int32_t)); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_v3_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_v3_cpu_kernel.h index e472c73aa8a..8744af04599 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_v3_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/matrix_set_diag_v3_cpu_kernel.h @@ -54,9 +54,9 @@ class MatrixSetDiagV3CpuKernelMod : public DeprecatedNativeCpuKernelMod { template void singleCal(const std::vector &inputs, const std::vector &outputs); - std::vector diagonal_shape_; - std::vector k_shape_; - std::vector x_shape_; + std::vector diagonal_shape_; + std::vector k_shape_; + std::vector x_shape_; TypeId input_dtype_; std::string align_; size_t input_columns_ = 1; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/maximum_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/maximum_grad_cpu_kernel.cc index 795b53740a2..4729344c14c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/maximum_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/maximum_grad_cpu_kernel.cc @@ -24,7 +24,7 @@ namespace { constexpr size_t kMaximumGradInputsNum = 3; constexpr size_t kMaximumGradOutputsNum = 2; -void CheckShape(std::vector *shape) { +void CheckShape(ShapeVector *shape) { MS_EXCEPTION_IF_NULL(shape); if (shape->empty()) { shape->push_back(1); @@ -104,19 +104,19 @@ void GetCargo(std::vector *cargo, const std::vector &shape, cons } } -size_t GetTensorLen(const std::vector &shape) { - size_t len = 1; +size_t GetTensorLen(const ShapeVector &shape) { + int64_t len = 1; for (size_t i = 0; i < shape.size(); i++) { len *= shape[i]; } - return len; + return LongToSize(len); } -void GetShape(std::vector *shape, const std::vector &shape_, const std::vector &dout_shape) { +void GetShape(std::vector *shape, const ShapeVector &shape_, const ShapeVector &dout_shape) { int k = dout_shape.size() - 1; int i = shape_.size() - 1; for (; i >= 0; i--, k--) { - (*shape)[k] = shape_[i]; + (*shape)[k] = LongToSize(shape_[i]); } } @@ -144,16 +144,17 @@ void MaximumGradCpuKernelMod::LaunchKernel(const std::vector &inputs std::vector x_cargo(dout_shape.size(), 0); std::vector y_cargo(dout_shape.size(), 0); std::vector dout_cargo(dout_shape.size(), 0); + auto dout_shape_sizet = Convert2SizeT(dout_shape); GetShape(&x_shape, x_shape_, dout_shape); GetShape(&y_shape, y_shape_, dout_shape); - GetCargo(&x_cargo, x_shape, dout_shape); - GetCargo(&y_cargo, y_shape, dout_shape); - GetCargo(&dout_cargo, dout_shape, dout_shape); + GetCargo(&x_cargo, x_shape, dout_shape_sizet); + GetCargo(&y_cargo, y_shape, dout_shape_sizet); + GetCargo(&dout_cargo, dout_shape_sizet, dout_shape_sizet); MaximumGradRecTask(x_addr, y_addr, dout_addr, dx_addr, dy_addr, 0, 0, 0, 0, x_cargo, y_cargo, dout_cargo, x_shape, - y_shape, dout_shape); + y_shape, dout_shape_sizet); } MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, MaximumGrad, MaximumGradCpuKernelMod); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/maximum_grad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/maximum_grad_cpu_kernel.h index a5d66252bf9..380545dc46c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/maximum_grad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/maximum_grad_cpu_kernel.h @@ -39,11 +39,11 @@ class MaximumGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { template void LaunchKernel(const std::vector &inputs, const std::vector &outputs); - std::vector x_shape_; - std::vector y_shape_; - std::vector dout_shape; - std::vector dx_shape; - std::vector dy_shape; + ShapeVector x_shape_; + ShapeVector y_shape_; + ShapeVector dout_shape; + ShapeVector dx_shape; + ShapeVector dy_shape; TypeId dtype_{kTypeUnknown}; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_cpu_kernel.cc index 513ebfd376d..95043b2cb38 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_cpu_kernel.cc @@ -43,7 +43,7 @@ void MinimumCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { size_t max_input_shape_size = input_x_shape_.size() > input_y_shape_.size() ? input_x_shape_.size() : input_y_shape_.size(); for (size_t i = 0; i < output_shape_.size(); i++) { - output_num_ *= output_shape_[i]; + output_num_ *= static_cast(output_shape_[i]); } if ((input_x_shape_.size() == 0 && input_y_shape_.size() != 0) || (input_x_shape_.size() != 0 && input_y_shape_.size() == 0)) { @@ -149,13 +149,13 @@ void MinimumCpuKernelMod::InitTensorBroadcastShape() { size_t input_x_dim_offset = output_shape_.size() - input_x_shape_.size(); for (size_t j = 0; j < input_x_shape_.size(); j++) { broadcast_input_x_shape_[j + input_x_dim_offset] = input_x_shape_[j]; - input_x_num_ *= input_x_shape_[j]; + input_x_num_ *= static_cast(input_x_shape_[j]); } size_t input_y_dim_offset = output_shape_.size() - input_y_shape_.size(); for (size_t k = 0; k < input_y_shape_.size(); k++) { if (need_broadcast_) { broadcast_input_y_shape_[k + input_y_dim_offset] = input_y_shape_[k]; - input_y_num_ *= input_y_shape_[k]; + input_y_num_ *= static_cast(input_y_shape_[k]); } } } @@ -165,20 +165,20 @@ size_t MinimumCpuKernelMod::Index(const size_t &index, const size_t &dim) const // Broadcast Arithmetic template -void MinimumCpuKernelMod::BroadcastArithKernel(const size_t l0, const size_t l1, const size_t l2, const size_t l3, - const size_t l4, const size_t l5, const size_t l6, const size_t r0, - const size_t r1, const size_t r2, const size_t r3, const size_t r4, - const size_t r5, const size_t r6, const size_t d0, const size_t d1, - const size_t d2, const size_t d3, const size_t d4, const size_t d5, - const size_t d6, const T *input_x, const T *input_y, T *output) const { +void MinimumCpuKernelMod::BroadcastArithKernel(const int64_t l0, const int64_t l1, const int64_t l2, const int64_t l3, + const int64_t l4, const int64_t l5, const int64_t l6, const int64_t r0, + const int64_t r1, const int64_t r2, const int64_t r3, const int64_t r4, + const int64_t r5, const int64_t r6, const int64_t d0, const int64_t d1, + const int64_t d2, const int64_t d3, const int64_t d4, const int64_t d5, + const int64_t d6, const T *input_x, const T *input_y, T *output) const { for (size_t pos = 0; pos < output_num_; pos++) { - size_t i = pos / (d1 * d2 * d3 * d4 * d5 * d6) % d0; - size_t j = pos / (d2 * d3 * d4 * d5 * d6) % d1; - size_t k = pos / (d3 * d4 * d5 * d6) % d2; - size_t l = pos / (d4 * d5 * d6) % d3; - size_t m = pos / (d5 * d6) % d4; - size_t n = pos / d6 % d5; - size_t o = pos % d6; + int64_t i = pos / (d1 * d2 * d3 * d4 * d5 * d6) % d0; + int64_t j = pos / (d2 * d3 * d4 * d5 * d6) % d1; + int64_t k = pos / (d3 * d4 * d5 * d6) % d2; + int64_t l = pos / (d4 * d5 * d6) % d3; + int64_t m = pos / (d5 * d6) % d4; + int64_t n = pos / d6 % d5; + int64_t o = pos % d6; size_t l_index = Index(i, l0) * l1 * l2 * l3 * l4 * l5 * l6; l_index += Index(j, l1) * l2 * l3 * l4 * l5 * l6; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_cpu_kernel.h index 52775a5a6e6..a6c6934fefd 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_cpu_kernel.h @@ -45,11 +45,11 @@ class MinimumCpuKernelMod : public DeprecatedNativeCpuKernelMod { // Broadcast Arithmetic template - void BroadcastArithKernel(const size_t l0, const size_t l1, const size_t l2, const size_t l3, const size_t l4, - const size_t l5, const size_t l6, const size_t r0, const size_t r1, const size_t r2, - const size_t r3, const size_t r4, const size_t r5, const size_t r6, const size_t d0, - const size_t d1, const size_t d2, const size_t d3, const size_t d4, const size_t d5, - const size_t d6, const T *input_x, const T *input_y, T *output) const; + void BroadcastArithKernel(const int64_t l0, const int64_t l1, const int64_t l2, const int64_t l3, const int64_t l4, + const int64_t l5, const int64_t l6, const int64_t r0, const int64_t r1, const int64_t r2, + const int64_t r3, const int64_t r4, const int64_t r5, const int64_t r6, const int64_t d0, + const int64_t d1, const int64_t d2, const int64_t d3, const int64_t d4, const int64_t d5, + const int64_t d6, const T *input_x, const T *input_y, T *output) const; template T MinimumFunc(const T &lhs, const T &rhs) const { return lhs < rhs ? lhs : rhs; @@ -72,12 +72,12 @@ class MinimumCpuKernelMod : public DeprecatedNativeCpuKernelMod { size_t input_x_num_{1}; size_t input_y_num_{1}; size_t output_num_{1}; - std::vector input_x_shape_; - std::vector input_y_shape_; - std::vector output_shape_; - std::vector broadcast_input_x_shape_; - std::vector broadcast_input_y_shape_; - std::vector broadcast_output_shape_; + std::vector input_x_shape_; + std::vector input_y_shape_; + std::vector output_shape_; + std::vector broadcast_input_x_shape_; + std::vector broadcast_input_y_shape_; + std::vector broadcast_output_shape_; const size_t max_dims_{7}; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_grad_cpu_kernel.cc index fa86e0be936..eeacde2250f 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_grad_cpu_kernel.cc @@ -36,23 +36,23 @@ void GetCargo(std::vector *cargo, const std::vector &shape, cons } } -size_t GetTensorLen(const std::vector &shape) { - size_t len = 1; +size_t GetTensorLen(const ShapeVector &shape) { + int64_t len = 1; for (size_t i = 0; i < shape.size(); i++) { len *= shape[i]; } - return len; + return LongToSize(len); } -void GetShape(std::vector *shape, const std::vector &shape_, const std::vector &dout_shape) { +void GetShape(std::vector *shape, const ShapeVector &shape_, const ShapeVector &dout_shape) { int k = dout_shape.size() - 1; int i = shape_.size() - 1; for (; i >= 0; i--, k--) { - (*shape)[k] = shape_[i]; + (*shape)[k] = LongToSize(shape_[i]); } } -void CheckShape(std::vector *shape) { +void CheckShape(ShapeVector *shape) { MS_EXCEPTION_IF_NULL(shape); if (shape->empty()) { shape->push_back(1); @@ -144,16 +144,17 @@ void MinimumGradCpuKernelMod::LaunchKernel(const std::vector &inputs std::vector x_cargo(dout_shape.size(), 0); std::vector y_cargo(dout_shape.size(), 0); std::vector dout_cargo(dout_shape.size(), 0); + auto dout_shape_sizet = Convert2SizeT(dout_shape); GetShape(&x_shape, x_shape_, dout_shape); GetShape(&y_shape, y_shape_, dout_shape); - GetCargo(&x_cargo, x_shape, dout_shape); - GetCargo(&y_cargo, y_shape, dout_shape); - GetCargo(&dout_cargo, dout_shape, dout_shape); + GetCargo(&x_cargo, x_shape, dout_shape_sizet); + GetCargo(&y_cargo, y_shape, dout_shape_sizet); + GetCargo(&dout_cargo, dout_shape_sizet, dout_shape_sizet); MinimumGradRecTask(x_addr, y_addr, dout_addr, dx_addr, dy_addr, 0, 0, 0, 0, x_cargo, y_cargo, dout_cargo, x_shape, - y_shape, dout_shape); + y_shape, dout_shape_sizet); } MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, MinimumGrad, MinimumGradCpuKernelMod); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_grad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_grad_cpu_kernel.h index f34518e13b6..850d32fbc50 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_grad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_grad_cpu_kernel.h @@ -39,11 +39,11 @@ class MinimumGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { template void LaunchKernel(const std::vector &inputs, const std::vector &outputs); - std::vector x_shape_; - std::vector y_shape_; - std::vector dout_shape; - std::vector dx_shape; - std::vector dy_shape; + ShapeVector x_shape_; + ShapeVector y_shape_; + ShapeVector dout_shape; + ShapeVector dx_shape; + ShapeVector dy_shape; TypeId dtype_{kTypeUnknown}; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mirror_pad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mirror_pad_cpu_kernel.cc index 3f47fe053a1..4cfe8783884 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mirror_pad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mirror_pad_cpu_kernel.cc @@ -52,24 +52,18 @@ void MirrorPadCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'mode' must be 'REFLECT' or 'SYMMETRIC', but got " << mode; } - std::vector input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - shape_size_ = input_shape.size(); - (void)input_shape.insert(input_shape.begin(), kPadMaxSupportDim - shape_size_, 1); + input_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + shape_size_ = input_shape_.size(); + (void)input_shape_.insert(input_shape_.begin(), SizeToLong(kPadMaxSupportDim - shape_size_), 1); shape_size_ = kPadMaxSupportDim; - for (size_t i = 0; i < shape_size_; ++i) { - tensor_size_ *= input_shape[i]; - input_shape_.push_back(SizeToLong(input_shape[i])); - } + tensor_size_ = SizeOf(input_shape_); - std::vector padding_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - num_paddings_ = SizeToLong(padding_shape[0]); + auto padding_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + num_paddings_ = padding_shape[0]; - auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); - for (auto x : output_shape) { - output_size_ *= x; - output_shape_.push_back(SizeToLong(x)); - } + output_shape_ = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + output_size_ = SizeOf(output_shape_); int64_t max_width = input_shape_[3]; int64_t max_height = input_shape_[2]; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mirror_pad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/mirror_pad_cpu_kernel.h index 70ca1692971..44a3b496000 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mirror_pad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mirror_pad_cpu_kernel.h @@ -61,8 +61,8 @@ class MirrorPadCpuKernelMod : public DeprecatedNativeCpuKernelMod { size_t output_size_{1}; int64_t mode_{0}; int64_t num_paddings_{0}; - std::vector input_shape_; - std::vector output_shape_; + ShapeVector input_shape_; + ShapeVector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mirror_pad_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mirror_pad_grad_cpu_kernel.cc index 4759c034b8a..5e0a5b51e49 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mirror_pad_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mirror_pad_grad_cpu_kernel.cc @@ -68,35 +68,32 @@ void MirrorPadGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'mode' must be 'REFLECT' or 'SYMMETRIC', but got " << mode; } - std::vector input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - shape_size_ = input_shape.size(); - (void)input_shape.insert(input_shape.begin(), kPadMaxSupportDim - shape_size_, 1); + input_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + shape_size_ = input_shape_.size(); + (void)input_shape_.insert(input_shape_.begin(), SizeToLong(kPadMaxSupportDim - shape_size_), 1); shape_size_ = kPadMaxSupportDim; for (size_t i = 0; i < shape_size_; ++i) { - tensor_size_ *= input_shape[i]; - input_shape_.push_back(SizeToLong(input_shape[i])); + tensor_size_ *= LongToSize(input_shape_[i]); } - std::vector padding_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - num_paddings_ = SizeToLong(padding_shape[0]); + auto padding_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + num_paddings_ = padding_shape[0]; - std::vector output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); - - if (output_shape.size() == 4) { - } else if (output_shape.size() == 3) { - (void)output_shape.insert(output_shape.begin(), 1); // batch padding - } else if (output_shape.size() == 2) { - (void)output_shape.insert(output_shape.begin(), 2, 1); // channel padding + output_shape_ = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + if (output_shape_.size() == 4) { + } else if (output_shape_.size() == 3) { + (void)output_shape_.insert(output_shape_.begin(), 1); // batch padding + } else if (output_shape_.size() == 2) { + (void)output_shape_.insert(output_shape_.begin(), 2, 1); // channel padding } - for (auto x : output_shape) { - output_size_ *= x; - output_shape_.push_back(SizeToLong(x)); + for (auto x : output_shape_) { + output_size_ *= SizeToLong(x); } for (size_t i = 0; i < 2; i++) { - workspace_size_ *= output_shape[i]; - workspace_size_ *= input_shape[i + 2]; + workspace_size_ *= LongToSize(output_shape_[i]); + workspace_size_ *= LongToSize(input_shape_[i + 2]); } int64_t max_width = input_shape_[3]; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/addn_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/addn_cpu_kernel.cc index 95710e0ed0e..3e3e2cfde4f 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/addn_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/addn_cpu_kernel.cc @@ -53,9 +53,12 @@ void AddNCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { } CheckParam(kernel_node); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); - std::vector src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - std::vector src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); - std::vector dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + std::vector src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + std::vector src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + std::vector dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({src0_shape, src1_shape, dst_shape})) { + return; + } dnnl::memory::desc src0_mem_desc = GetDefaultMemDesc(src0_shape); dnnl::memory::desc src1_mem_desc = GetDefaultMemDesc(src1_shape); dnnl::memory::desc dst_mem_desc = GetDefaultMemDesc(dst_shape); @@ -122,11 +125,17 @@ bool AddNCpuKernelMod::LaunchKernel(const std::vector &input void AddNCpuKernelMod::CheckParam(const CNodePtr &kernel_node) { auto src0_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({src0_shape, dst_shape})) { + return; + } if (src0_shape != dst_shape) { MS_LOG(EXCEPTION) << "AddN output shape must be equal to input shape."; } for (size_t index = 1; index < input_num_; ++index) { auto src_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, index); + if (IsDynamic(src_shape)) { + return; + } if (src0_shape != src_shape) { MS_LOG(EXCEPTION) << "AddN input shapes must be equal."; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_cpu_kernel.cc index a749ed4f18b..6d4addf9182 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_cpu_kernel.cc @@ -30,8 +30,8 @@ constexpr size_t kBatchNormInputShapeSize2 = 2; void BatchNormCpuKernelMod::InitInputOutputSize(const CNodePtr &kernel_node) { DeprecatedNativeCpuKernelMod::InitInputOutputSize(kernel_node); size_t type_size = sizeof(float); - std::vector shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - size_t tensor_size = shape[1] * 2 * type_size; // [2, c] to store scale and bias + auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + size_t tensor_size = static_cast(shape[1]) * 2 * type_size; // [2, c] to store scale and bias (void)workspace_size_list_.emplace_back(tensor_size); } @@ -40,7 +40,7 @@ void BatchNormCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); is_train = common::AnfAlgo::GetNodeAttr(kernel_node, "is_training"); momentum = common::AnfAlgo::GetNodeAttr(kernel_node, "momentum"); - std::vector x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); if (x_shape.size() == kBatchNormInputShapeSize2) { (void)x_shape.insert(x_shape.end(), kBatchNormInputShapeSize - kBatchNormInputShapeSize2, 1); } else if (x_shape.size() != kBatchNormInputShapeSize) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_cpu_kernel.h index 9f692db3e16..a4f870b3acf 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_cpu_kernel.h @@ -51,10 +51,10 @@ class BatchNormCpuKernelMod : public DeprecatedMKLCpuKernelMod { private: bool is_train{false}; float momentum{0.9}; - size_t batch_size{0}; - size_t channel{0}; - size_t hw_size{0}; - size_t nhw_size{0}; + int64_t batch_size{0}; + int64_t channel{0}; + int64_t hw_size{0}; + int64_t nhw_size{0}; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_grad_cpu_kernel.cc index 7da85d1ce7d..459e11c7caf 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_grad_cpu_kernel.cc @@ -31,8 +31,8 @@ constexpr size_t kScaleShiftNum = 2; void BatchNormGradCpuKernelMod::InitInputOutputSize(const CNodePtr &kernel_node) { DeprecatedNativeCpuKernelMod::InitInputOutputSize(kernel_node); size_t type_size = sizeof(float); - std::vector shape = AnfAlgo::GetInputDeviceShape(kernel_node, Y_BACKPROP); - size_t tensor_size = shape[C] * kScaleShiftNum * type_size; + auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, Y_BACKPROP); + size_t tensor_size = static_cast(shape[C]) * kScaleShiftNum * type_size; input_size_list_.pop_back(); // [2, c] to store scale and bias (void)workspace_size_list_.emplace_back(tensor_size); @@ -43,7 +43,7 @@ void BatchNormGradCpuKernelMod::InitInputOutputSize(const CNodePtr &kernel_node) void BatchNormGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); if (x_shape.size() == NC_LEN) { (void)x_shape.insert(x_shape.end(), (SHAPE_4D - NC_LEN), 1); } else if (x_shape.size() != SHAPE_4D) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_grad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_grad_cpu_kernel.h index 4a099d39244..95a83224204 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_grad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/batch_norm_grad_cpu_kernel.h @@ -49,10 +49,10 @@ class BatchNormGradCpuKernelMod : public DeprecatedMKLCpuKernelMod { private: float momentum{0.9}; - size_t batch_size{0}; - size_t channel{0}; - size_t hw_size{0}; - size_t nhw_size{0}; + int64_t batch_size{0}; + int64_t channel{0}; + int64_t hw_size{0}; + int64_t nhw_size{0}; enum format_ { N, C, H, W }; enum input_list_ { Y_BACKPROP, X, SCALE, SAVE_MEAN, SAVE_VARIANCE, RESERVE }; enum workspace_list_ { SCALE_BIAS, DIFF_SCALE_BIAS }; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_cpu_kernel.cc index 62f8ac97c46..663738db988 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_cpu_kernel.cc @@ -30,9 +30,10 @@ constexpr size_t kConvOutputsNum = 1; void ConvCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - std::vector weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); - std::vector dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + std::vector weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + std::vector dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + size_t src_dim = src_shape.size(); if (src_dim != SHAPE_4D && src_dim != SHAPE_5D) { MS_LOG(EXCEPTION) << "Conv only supports 4D/5D input, but got " << src_dim << "D!"; @@ -44,9 +45,10 @@ void ConvCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { if (src_dim == SHAPE_5D && format != NCDHW) { MS_LOG(EXCEPTION) << kernel_name_ << " only supports 5D input with format NCDHW, but got format " << format; } - dnnl::memory::dims kernel_size; - (void)std::transform(weight_shape.begin() + NC_LEN, weight_shape.end(), std::back_inserter(kernel_size), - [](const size_t &value) { return SizeToLong(value); }); + if (AnfAlgo::IsShapesDynamic({src_shape, weight_shape, dst_shape})) { + return; + } + dnnl::memory::dims kernel_size(weight_shape.begin() + NC_LEN, weight_shape.end()); const size_t group = LongToSize(common::AnfAlgo::GetNodeAttr(kernel_node, GROUP)); if (group > 1) { if (src_shape[1] % group != 0) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_filter_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_filter_cpu_kernel.cc index 07703b255c9..8cf39065366 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_filter_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_filter_cpu_kernel.cc @@ -36,9 +36,10 @@ void ConvGradFilterCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { src_index_ = 1; diff_dst_index_ = 0; } - std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, src_index_); - std::vector dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, diff_dst_index_); - std::vector weight_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, src_index_); + std::vector dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, diff_dst_index_); + std::vector weight_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + size_t src_dim = src_shape.size(); if (src_dim != SHAPE_4D && src_dim != SHAPE_5D) { MS_LOG(EXCEPTION) << "Conv Grad only supports 4D/5D input, but got " << src_dim << "D!"; @@ -50,9 +51,11 @@ void ConvGradFilterCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { if (src_dim == SHAPE_5D && format != NCDHW) { MS_LOG(EXCEPTION) << kernel_name_ << " only supports 5D input with NCDHW format, but got fornat " << format; } - dnnl::memory::dims kernel_size; - (void)std::transform(weight_shape.begin() + NC_LEN, weight_shape.end(), std::back_inserter(kernel_size), - [](const size_t &value) { return SizeToLong(value); }); + + if (AnfAlgo::IsShapesDynamic({src_shape, weight_shape, dst_shape})) { + return; + } + dnnl::memory::dims kernel_size(weight_shape.begin(), weight_shape.end()); const size_t group = LongToSize(common::AnfAlgo::GetNodeAttr(kernel_node, GROUP)); if (group > 1) { if (src_shape[1] % group != 0) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_input_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_input_cpu_kernel.cc index a058637f495..1374c5e729e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_input_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_input_cpu_kernel.cc @@ -37,9 +37,9 @@ void ConvGradInputCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { weight_index_ = 1; diff_dst_index_ = 0; } - std::vector src_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); - std::vector weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, weight_index_); - std::vector dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, diff_dst_index_); + std::vector src_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + std::vector weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, weight_index_); + std::vector dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, diff_dst_index_); size_t src_dim = src_shape.size(); if (src_dim != SHAPE_4D && src_dim != SHAPE_5D) { MS_LOG(EXCEPTION) << "Conv grad only supports 4D/5D input, but got " << src_dim << "D!"; @@ -51,9 +51,10 @@ void ConvGradInputCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { if (src_dim == SHAPE_5D && format != NCDHW) { MS_LOG(EXCEPTION) << kernel_name_ << " only supports 5D input with NCDHW format, but got format " << format; } - dnnl::memory::dims kernel_size; - (void)std::transform(weight_shape.begin() + NC_LEN, weight_shape.end(), std::back_inserter(kernel_size), - [](const size_t &value) { return SizeToLong(value); }); + if (AnfAlgo::IsShapesDynamic({src_shape, weight_shape, dst_shape})) { + return; + } + dnnl::memory::dims kernel_size(weight_shape.begin(), weight_shape.end()); const size_t group = LongToSize(common::AnfAlgo::GetNodeAttr(kernel_node, GROUP)); if (group > 1) { if (src_shape[1] % group != 0) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/log_softmax_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/log_softmax_grad_cpu_kernel.cc index aef3d12efe9..8340c09f884 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/log_softmax_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/log_softmax_grad_cpu_kernel.cc @@ -29,7 +29,10 @@ constexpr size_t kLogSoftmaxGradOutputsNum = 1; void LogSoftmaxGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + if (IsDynamic(src_shape)) { + return; + } int axis = common::AnfAlgo::GetNodeAttr(kernel_node, AXIS); if (axis >= SizeToInt(src_shape.size())) { axis = SizeToInt(src_shape.size()) - 1; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/lrn_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/lrn_grad_cpu_kernel.cc index 18adaa48a2e..bcb953c3884 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/lrn_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/lrn_grad_cpu_kernel.cc @@ -67,10 +67,8 @@ int LrnGradCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std: if ((ret = KernelMod::Resize(base_operator, inputs, outputs)) != KRET_OK) { return ret; } - std::vector input_shape_; auto input_shape = inputs.at(kIndex0)->GetShapeVector(); - (void)std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(input_shape_), LongToSize); - dnnl::memory::desc src_desc = GetDefaultMemDesc(input_shape_); + dnnl::memory::desc src_desc = GetDefaultMemDesc(input_shape); const auto lrn_multiple = 2; dnnl::memory::dim local_size = lrn_multiple * depth_radius_ + 1; const auto dnnl_alpha = static_cast(local_size) * alpha_; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/lstm_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/lstm_cpu_kernel.cc index 2c07eb87c59..d0d91fa7edb 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/lstm_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/lstm_cpu_kernel.cc @@ -40,13 +40,13 @@ void LstmCpuKernelMod::InitInputOutputSize(const CNodePtr &kernel_node) { auto output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node); auto output_type = common::AnfAlgo::GetOutputInferDataType(kernel_node, 0); auto output_types = std::vector(output_num, output_type); - std::vector> output_shapes; + std::vector output_shapes; for (size_t output_index = 0; output_index < output_num; ++output_index) { auto shape = common::AnfAlgo::GetOutputInferShape(kernel_node, output_index); (void)output_shapes.emplace_back(shape); } size_t len = reserve_size_ / IntToSize(kGateNum); - output_shapes[kOutputWorkSpaceIndex] = {len, 1}; + output_shapes[kOutputWorkSpaceIndex] = {SizeToLong(len), 1}; common::AnfAlgo::SetOutputInferTypeAndShape(output_types, output_shapes, kernel_node.get()); } @@ -122,16 +122,19 @@ void LstmCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { void LstmCpuKernelMod::CheckParam(const CNodePtr &kernel_node) { constexpr int kBidirectional = 2; - std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - std::vector src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); - std::vector src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); + auto src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + auto src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); + if (AnfAlgo::IsShapesDynamic({src_shape, src_h_shape, src_c_shape})) { + return; + } bidirectional_ = common::AnfAlgo::GetNodeAttr(kernel_node, "bidirectional"); input_size_ = LongToInt(common::AnfAlgo::GetNodeAttr(kernel_node, "input_size")); hidden_size_ = LongToInt(common::AnfAlgo::GetNodeAttr(kernel_node, "hidden_size")); num_layers_ = LongToInt(common::AnfAlgo::GetNodeAttr(kernel_node, "num_layers")); has_bias_ = common::AnfAlgo::GetNodeAttr(kernel_node, "has_bias"); - batch_size_ = SizeToInt(src_shape[1]); - seq_len_ = SizeToInt(src_shape[0]); + batch_size_ = src_shape[1]; + seq_len_ = src_shape[0]; num_directions_ = 1; if (bidirectional_) { num_directions_ = kBidirectional; @@ -149,7 +152,7 @@ void LstmCpuKernelMod::CheckParam(const CNodePtr &kernel_node) { } weight_size_ = weight_size_ * num_directions_; weight_h_size_ = weight_h_size_ * num_directions_; - if (num_directions_ * num_layers_ != SizeToInt(src_h_shape[0])) { + if (num_directions_ * num_layers_ != src_h_shape[0]) { MS_LOG(EXCEPTION) << "Error iteration shape!"; } if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/lstm_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/lstm_grad_cpu_kernel.cc index 167f16db585..e529452e5c4 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/lstm_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/lstm_grad_cpu_kernel.cc @@ -142,9 +142,12 @@ void LSTMGradCpuKernelMod::AddArgumentOp(const dnnl::memory::desc &src_desc, con } void LSTMGradCpuKernelMod::CheckParam(const CNodePtr &kernel_node) { - std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - std::vector src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); - std::vector src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); + auto src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + auto src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); + if (AnfAlgo::IsShapesDynamic({src_shape, src_h_shape, src_c_shape})) { + return; + } if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) { MS_LOG(EXCEPTION) << "Lstm only support 3-D input!"; } @@ -153,8 +156,8 @@ void LSTMGradCpuKernelMod::CheckParam(const CNodePtr &kernel_node) { hidden_size_ = common::AnfAlgo::GetNodeAttr(kernel_node, "hidden_size"); num_layers_ = common::AnfAlgo::GetNodeAttr(kernel_node, "num_layers"); has_bias_ = common::AnfAlgo::GetNodeAttr(kernel_node, "has_bias"); - batch_size_ = SizeToInt(src_shape[1]); - seq_len_ = SizeToInt(src_shape[0]); + batch_size_ = src_shape[1]; + seq_len_ = src_shape[0]; num_directions_ = 1; if (bidirectional_) { num_directions_ = 2; @@ -172,7 +175,7 @@ void LSTMGradCpuKernelMod::CheckParam(const CNodePtr &kernel_node) { } weight_size_ = weight_size_ * num_directions_; weight_h_size_ = weight_h_size_ * num_directions_; - if (num_directions_ * num_layers_ != SizeToLong(src_h_shape[0])) { + if (num_directions_ * num_layers_ != src_h_shape[0]) { MS_LOG(EXCEPTION) << "Error iteration shape!"; } } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/matmul_cpu_kernel_func.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/matmul_cpu_kernel_func.cc index 7281c0becda..b7ca5bea9ce 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/matmul_cpu_kernel_func.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/matmul_cpu_kernel_func.cc @@ -36,9 +36,12 @@ using dims = dnnl::memory::dims; void MatMulCpuKernelFunc::InitFunc(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector a_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - std::vector b_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); - std::vector o_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + std::vector a_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + std::vector b_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + std::vector o_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({a_shape, b_shape, o_shape})) { + return; + } if (a_shape.size() < kRankMin || b_shape.size() < kRankMin || o_shape.size() < kRankMin) { MS_LOG(EXCEPTION) << "The tensor rank of MatMul must be greater than or equal to " << kRankMin; } @@ -47,16 +50,16 @@ void MatMulCpuKernelFunc::InitFunc(const CNodePtr &kernel_node) { auto rank = a_shape.size(); int64_t batch = 1; for (size_t i = 0; i < rank - kIndexOffset; ++i) { - batch *= SizeToLong(a_shape[i]); + batch *= a_shape[i]; } - int64_t dim_m = SizeToLong(o_shape[rank - kIndexOffset]); - int64_t dim_n = SizeToLong(o_shape[rank - 1]); + int64_t dim_m = o_shape[rank - kIndexOffset]; + int64_t dim_n = o_shape[rank - 1]; int64_t dim_k = 1; if (trans_a) { - dim_k = SizeToLong(a_shape[rank - kIndexOffset]); + dim_k = a_shape[rank - kIndexOffset]; } else { - dim_k = SizeToLong(a_shape[rank - 1]); + dim_k = a_shape[rank - 1]; } dims src_dims, weights_dims, dst_dims, a_strides, b_strides, o_strides; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/mkl_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/mkl_cpu_kernel.cc index 805335d688b..75f13c9d353 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/mkl_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/mkl_cpu_kernel.cc @@ -57,7 +57,7 @@ void GeneratePaddingForPadMode(const PaddingInfo &padding_info, std::vector &src_shape, +void DeprecatedMKLCpuKernelMod::GetPadding(const CNodePtr &kernel_node, const std::vector &src_shape, const PaddingInfo &padding_info) const { MS_EXCEPTION_IF_NULL(kernel_node); MS_EXCEPTION_IF_NULL(padding_info.padding_l); @@ -69,7 +69,7 @@ void DeprecatedMKLCpuKernelMod::GetPadding(const CNodePtr &kernel_node, const st const size_t dim_exclude_nc = src_dim - NC_LEN; std::vector shape_exclude_nc; for (size_t i = NC_LEN; i < src_dim; ++i) { - shape_exclude_nc.push_back(SizeToLong(src_shape[i])); + shape_exclude_nc.push_back(src_shape[i]); } if (padding_info.pad_mode == PAD_MODE_LOWER_SAME || padding_info.pad_mode == PAD_MODE_UPPER_SAME) { @@ -151,7 +151,7 @@ dnnl::memory::format_tag DeprecatedMKLCpuKernelMod::GetDefaultFormatTag(const dn return tag_vec[rank - 1]; } -dnnl::memory::desc DeprecatedMKLCpuKernelMod::GetDefaultMemDesc(const std::vector &shape) const { +dnnl::memory::desc DeprecatedMKLCpuKernelMod::GetDefaultMemDesc(const std::vector &shape) const { dnnl::memory::dims dims; if (shape.empty()) { (void)dims.insert(dims.end(), 1); @@ -264,7 +264,7 @@ void DeprecatedMKLCpuKernelMod::Reorder(dnnl::memory *src_mem, dnnl::memory *dst MS_LOG(DEBUG) << "begin to invoke primitive::execute"; } -void MKLCpuKernelMod::GetPadding(const BaseOperatorPtr &base_operator, const std::vector &src_shape, +void MKLCpuKernelMod::GetPadding(const BaseOperatorPtr &base_operator, const std::vector &src_shape, const PaddingInfo &padding_info) const { MS_EXCEPTION_IF_NULL(base_operator); MS_EXCEPTION_IF_NULL(padding_info.padding_l); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/mkl_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/mkl_cpu_kernel.h index 288d8d825b4..8759295b48d 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/mkl_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/mkl_cpu_kernel.h @@ -190,12 +190,12 @@ class DeprecatedMKLCpuKernelMod : public DeprecatedNativeCpuKernelMod { protected: bool BinaryBroadCast(std::vector *src0_shape, std::vector *src1_shape, std::vector *dst_shape) const; - void GetPadding(const CNodePtr &kernel_node, const std::vector &src_shape, + void GetPadding(const CNodePtr &kernel_node, const std::vector &src_shape, const PaddingInfo &padding_info) const; void AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc = false); void SetArgumentHandle(int arg_key, void *ptr); dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const; - dnnl::memory::desc GetDefaultMemDesc(const std::vector &shape) const; + dnnl::memory::desc GetDefaultMemDesc(const std::vector &shape) const; void ExecutePrimitive(); inline dnnl::memory::desc formatted_md(const dnnl::memory::dims &dimensions, dnnl::memory::format_tag layout) const { MS_LOG(DEBUG) << "begin to invoke constructor of dnnl::memory::desc"; @@ -236,11 +236,12 @@ class MKLCpuKernelMod : public NativeCpuKernelMod { protected: bool BinaryBroadCast(std::vector *src0_shape, std::vector *src1_shape, std::vector *dst_shape) const; - void GetPadding(const BaseOperatorPtr &base_operator, const std::vector &src_shape, + void GetPadding(const BaseOperatorPtr &base_operator, const std::vector &src_shape, const PaddingInfo &padding_info) const; void AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc = false); void SetArgumentHandle(int arg_key, void *ptr); dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const; + dnnl::memory::desc GetExactMemDesc(const std::vector &shape, dnnl::memory::data_type type = dnnl::memory::data_type::f32) const; dnnl::memory::desc GetExactMemDesc(const std::vector &shape, diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/pooling_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/pooling_cpu_kernel.cc index 1d297b29050..95c82af0bd5 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/pooling_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/pooling_cpu_kernel.cc @@ -52,11 +52,14 @@ void PoolingCpuKernelMod::InitPoolingFields(const CNodePtr &kernel_node) { void PoolingCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); InitPoolingFields(kernel_node); - std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); const size_t src_dim = src_shape.size(); if (src_dim != SHAPE_4D && src_dim != SHAPE_5D) { MS_LOG(EXCEPTION) << "Pooling only supports 4D/5D input, but got " << src_dim << "D!"; } + if (AnfAlgo::IsShapesDynamic({src_shape, dst_shape_})) { + return; + } const dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); const dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape_); const auto format = common::AnfAlgo::GetNodeAttr(kernel_node, FORMAT); @@ -99,9 +102,9 @@ void PoolingCpuKernelMod::EliminateInvalidPadding(float *dst) { padding_invalid_.size() + NC_LEN < SHAPE_5D) { MS_LOG(ERROR) << "The dst_shape must be 5D, the kernel and the padding_invalid must be 3D!"; } - const size_t d_max = dst_shape_[D_INDEX] - 1; - const size_t h_max = dst_shape_[H_INDEX] - 1; - const size_t w_max = dst_shape_[W_INDEX] - 1; + const auto d_max = LongToSize(dst_shape_[D_INDEX] - 1); + const auto h_max = LongToSize(dst_shape_[H_INDEX] - 1); + const auto w_max = LongToSize(dst_shape_[W_INDEX] - 1); const size_t d_index = D_INDEX - NC_LEN; const size_t h_index = H_INDEX - NC_LEN; const size_t w_index = W_INDEX - NC_LEN; @@ -130,8 +133,9 @@ void PoolingCpuKernelMod::EliminateInvalidPadding(float *dst) { const int kernel_index = std::stoi(bin, nullptr, base); const int64_t valid_kernel_size = valid_kernel_array[kernel_index]; if (valid_kernel_size != kernel_size) { - const size_t index = i * dst_shape_[D_INDEX] * dst_shape_[H_INDEX] * dst_shape_[W_INDEX] + - d * dst_shape_[H_INDEX] * dst_shape_[W_INDEX] + h * dst_shape_[W_INDEX] + w; + const size_t index = + static_cast(i * dst_shape_[D_INDEX] * dst_shape_[H_INDEX] * dst_shape_[W_INDEX] + + d * dst_shape_[H_INDEX] * dst_shape_[W_INDEX] + h * dst_shape_[W_INDEX] + w); dst[index] = dst[index] * LongToFloat(kernel_size) / LongToFloat(valid_kernel_size); } } @@ -139,7 +143,8 @@ void PoolingCpuKernelMod::EliminateInvalidPadding(float *dst) { } } }; - ParallelLaunchAutoSearch(task, dst_shape_[N_INDEX] * dst_shape_[C_INDEX], this, ¶llel_search_info_); + ParallelLaunchAutoSearch(task, static_cast(dst_shape_[N_INDEX] * dst_shape_[C_INDEX]), this, + ¶llel_search_info_); } void PoolingCpuKernelMod::ReComputeDivisor(float *dst) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/pooling_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/pooling_cpu_kernel.h index a9d4f6925a1..510ebeaf9c9 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/pooling_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/pooling_cpu_kernel.h @@ -48,7 +48,7 @@ class PoolingCpuKernelMod : public DeprecatedMKLCpuKernelMod { dnnl::algorithm algorithm_{dnnl::algorithm::pooling_max}; bool ceil_mode_{false}; int64_t divisor_override_{0}; - std::vector dst_shape_; + std::vector dst_shape_; std::vector kernel_; std::vector padding_invalid_; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.cc index b7526caf341..b851eb06b99 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.cc @@ -69,7 +69,10 @@ void PoolingGradCpuKernelMod::InitPoolingGradFields(const CNodePtr &kernel_node) void PoolingGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); InitPoolingGradFields(kernel_node); - std::vector src_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + auto src_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + if (IsDynamic(src_shape)) { + return; + } const size_t src_dim = src_shape.size(); if (src_dim != SHAPE_4D && src_dim != SHAPE_5D) { MS_LOG(EXCEPTION) << "PoolingGrad only supports 4D/5D input, but got " << src_dim << "D"; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/reduction_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/reduction_cpu_kernel.cc index 0204f7ae9dc..33f9e87feae 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/reduction_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/reduction_cpu_kernel.cc @@ -78,6 +78,7 @@ int ReductionCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const st if (int ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) { return ret; } + // For Scalar Tensor, input shape is empty. auto input_shape = inputs.at(kIndex0)->GetShapeVector(); is_scalar_input_ = input_shape.empty(); @@ -85,22 +86,20 @@ int ReductionCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const st return KRET_OK; } is_scalar_input_ = false; - std::vector input_shape_; - (void)std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(input_shape_), LongToSize); // For Reduction kernel, mkl required keep_dims is True. // So we should recover output_shape from input_shape. // axis_'s validation has been check in core/ops/lp_norm.cc, just using it. std::vector axis; - int64_t input_rank = SizeToLong(input_shape_.size()); + int64_t input_rank = SizeToLong(input_shape.size()); (void)std::transform(axis_.begin(), axis_.end(), std::back_inserter(axis), [&input_rank](const int64_t &dim) { return dim < 0 ? LongToSize(dim + input_rank) : LongToSize(dim); }); - std::vector mkl_output_shape = input_shape_; + auto mkl_output_shape = input_shape; for (const auto &dim : axis) { mkl_output_shape[dim] = 1; } - dnnl::memory::desc src_desc = GetDefaultMemDesc(input_shape_); + dnnl::memory::desc src_desc = GetDefaultMemDesc(input_shape); dnnl::memory::desc dst_desc = GetDefaultMemDesc(mkl_output_shape); auto desc = GetReductionDesc(src_desc, dst_desc); auto prim_desc = CreateDesc(desc, engine_); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/softmax_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/softmax_cpu_kernel.cc index b556c1ab22c..3b209b47146 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/softmax_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/softmax_cpu_kernel.cc @@ -29,7 +29,7 @@ constexpr size_t kSoftmaxOutputsNum = 1; void SoftmaxCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); std::vector axis_list; std::vector axis_list_me = common::AnfAlgo::GetNodeAttr>(kernel_node, AXIS); (void)std::transform(axis_list_me.begin(), axis_list_me.end(), std::back_inserter(axis_list), diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc index 76905186356..6033188d4ef 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc @@ -33,7 +33,7 @@ void SoftmaxCrossEntropyWithLogitsCpuKernelMod::InitInputOutputSize(const CNodeP DeprecatedNativeCpuKernelMod::InitInputOutputSize(kernel_node); MS_EXCEPTION_IF_NULL(kernel_node); size_t type_size = sizeof(float); - std::vector shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); size_t tensor_size = std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); (void)workspace_size_list_.emplace_back(tensor_size); } @@ -41,14 +41,17 @@ void SoftmaxCrossEntropyWithLogitsCpuKernelMod::InitInputOutputSize(const CNodeP void SoftmaxCrossEntropyWithLogitsCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({shape})) { + return; + } dnnl::memory::dims mem_dims; (void)mem_dims.insert(mem_dims.end(), shape.begin(), shape.end()); if (mem_dims.size() != 2) { MS_LOG(EXCEPTION) << "SoftmaxCrossEntropyWithLogits kernel dims invalid " << mem_dims.size(); } - batch_size_ = shape[0]; - class_num_ = shape[1]; + batch_size_ = static_cast(shape[0]); + class_num_ = static_cast(shape[1]); if (batch_size_ == 0 || class_num_ == 0) { MS_LOG(EXCEPTION) << "Invalid batch size or class num input!"; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc index ffc5a05248e..66731ec86a3 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc @@ -34,7 +34,7 @@ void SparseSoftmaxCrossEntropyWithLogitsCpuKernelMod::InitInputOutputSize(const DeprecatedNativeCpuKernelMod::InitInputOutputSize(kernel_node); MS_EXCEPTION_IF_NULL(kernel_node); size_t type_size = sizeof(float); - std::vector shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); size_t tensor_size = std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); (void)workspace_size_list_.emplace_back(tensor_size); } @@ -42,8 +42,11 @@ void SparseSoftmaxCrossEntropyWithLogitsCpuKernelMod::InitInputOutputSize(const void SparseSoftmaxCrossEntropyWithLogitsCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - std::vector label_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto label_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + if (AnfAlgo::IsShapesDynamic({shape, label_shape})) { + return; + } if (label_shape.size() > 1) { MS_LOG(EXCEPTION) << "Labels shape length must be equal to Logits shape length minus 1"; } @@ -52,8 +55,8 @@ void SparseSoftmaxCrossEntropyWithLogitsCpuKernelMod::InitKernel(const CNodePtr if (mem_dims.size() != 2) { MS_LOG(EXCEPTION) << "SparseSoftmaxCrossEntropyWithLogits kernel dims invalid " << mem_dims.size(); } - batch_size_ = shape[0]; - class_num_ = shape[1]; + batch_size_ = static_cast(shape[0]); + class_num_ = static_cast(shape[1]); if (batch_size_ == 0 || class_num_ == 0) { MS_LOG(EXCEPTION) << "Invalid batch size or class num input!"; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mul_no_nan_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/mul_no_nan_cpu_kernel.h index e624831c465..59d605b6aa5 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mul_no_nan_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mul_no_nan_cpu_kernel.h @@ -61,9 +61,9 @@ class MulNoNanCPUKernelMod : public DeprecatedNativeCpuKernelMod { } private: - std::vector input0_shape_; - std::vector input1_shape_; - std::vector output_shape_; + std::vector input0_shape_; + std::vector input1_shape_; + std::vector output_shape_; TypeId input_dtype_{kTypeUnknown}; TypeId output_dtype_{kTypeUnknown}; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/multinomial_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/multinomial_cpu_kernel.cc index db19aac1a8e..bc7ba9eb086 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/multinomial_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/multinomial_cpu_kernel.cc @@ -24,12 +24,15 @@ void MultinomialCpuKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); input_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({input_shape_})) { + return; + } // The dimensions of input tensor must be 1 or 2, with data type of float32. if (input_shape_.size() == 1) { - workspace_size_list_.push_back(input_shape_[0] * sizeof(float)); + workspace_size_list_.push_back(LongToSize(input_shape_[0]) * sizeof(float)); } else if (input_shape_.size() == 2) { - workspace_size_list_.push_back(input_shape_[1] * sizeof(float)); + workspace_size_list_.push_back(LongToSize(input_shape_[1]) * sizeof(float)); } seed_ = static_cast(GetValue(common::AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed"))); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/multinomial_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/multinomial_cpu_kernel.h index 47c7b813351..23f26a805bd 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/multinomial_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/multinomial_cpu_kernel.h @@ -44,7 +44,7 @@ class MultinomialCpuKernel : public DeprecatedNativeCpuKernelMod { } private: - std::vector input_shape_; + ShapeVector input_shape_; int seed_{0}; int seed2_{0}; std::default_random_engine rng_; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nllloss_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/nllloss_cpu_kernel.cc index dfe51132944..ab74789da82 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/nllloss_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nllloss_cpu_kernel.cc @@ -39,7 +39,7 @@ void NLLLossCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_LOG(EXCEPTION) << kernel_name_ << " does not support this kernel data type: " << kernel_attr; } - std::vector logits_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto logits_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); auto reduction = common::AnfAlgo::GetNodeAttr(kernel_node, REDUCTION); auto pair = kReductionMap.find(reduction); if (pair == kReductionMap.end()) { @@ -47,8 +47,8 @@ void NLLLossCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { << ", the attr 'reduction' only support 'mean', 'sum' and 'none', but got " << reduction; } - nllloss_param_.batch_ = SizeToInt(logits_shape[0]); - nllloss_param_.class_num_ = SizeToInt(logits_shape[1]); + nllloss_param_.batch_ = logits_shape[0]; + nllloss_param_.class_num_ = logits_shape[1]; nllloss_param_.reduction_type_ = pair->second; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nllloss_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/nllloss_grad_cpu_kernel.cc index cdcc87de3f1..f04e372bcf2 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/nllloss_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nllloss_grad_cpu_kernel.cc @@ -39,7 +39,7 @@ void NLLLossGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_LOG(EXCEPTION) << kernel_name_ << " does not support this kernel data type: " << kernel_attr; } - std::vector logits_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto logits_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); auto reduction = common::AnfAlgo::GetNodeAttr(kernel_node, REDUCTION); auto pair = kReductionMap.find(reduction); if (pair == kReductionMap.end()) { @@ -47,8 +47,8 @@ void NLLLossGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { << ", the attr 'reduction' only support 'mean', 'sum' and 'none', but got " << reduction; } - nllloss_param_.batch_ = SizeToInt(logits_shape[0]); - nllloss_param_.class_num_ = SizeToInt(logits_shape[1]); + nllloss_param_.batch_ = logits_shape[0]; + nllloss_param_.class_num_ = logits_shape[1]; nllloss_param_.reduction_type_ = pair->second; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nms_with_mask_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/nms_with_mask_cpu_kernel.cc index 187fbea7cfb..bf51a77537f 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/nms_with_mask_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nms_with_mask_cpu_kernel.cc @@ -233,7 +233,7 @@ template void NMSWithMaskCpuKernelMod::InitIOSize(const CNodePtr &kernel_node) { DeprecatedNativeCpuKernelMod::InitInputOutputSize(kernel_node); auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - num_input_ = SizeToInt(input_shape[0]); // Get N values in [N, 5] data. + num_input_ = LongToInt(input_shape[0]); // Get N values in [N, 5] data. ceil_power_2 = static_cast(NmsRoundUpPower2(num_input_)); workspace_size_list_.push_back(ceil_power_2 * sizeof(T)); // data buff diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/non_deterministic_ints_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/non_deterministic_ints_cpu_kernel.cc index cec9a28da51..23cbf0a070a 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/non_deterministic_ints_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/non_deterministic_ints_cpu_kernel.cc @@ -41,6 +41,9 @@ void NonDeterministicIntsCPUKernelMod::InitKernel(const CNodePtr &kernel_node) { input_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); output_type_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, 0); auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({input_shape})) { + return; + } if (input_shape[0] < kInpuSizes) { MS_EXCEPTION(ValueError) << "The input tensor shape must >= 2."; } @@ -83,7 +86,7 @@ bool NonDeterministicIntsCPUKernelMod::LaunchKernel(const std::vector(inputs[0]->addr); size_t input_elem_num = inputs[0]->size / sizeof(T2); size_t output_elem_num = outputs[0]->size / sizeof(T1); - std::vector out_shape; + ShapeVector out_shape; for (size_t i = 0; i < input_elem_num; i++) { if (input[i] <= 0) { MS_EXCEPTION(ValueError) << "Each dimension must be greater than 0."; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/non_zero_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/non_zero_cpu_kernel.cc index 863f6571f8c..852ec38357e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/non_zero_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/non_zero_cpu_kernel.cc @@ -30,8 +30,13 @@ constexpr size_t kOutputDim = 2; void NonZeroCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - output_shape_ = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({input_shape, output_shape})) { + return; + } + input_shape_ = Convert2SizeT(input_shape); + output_shape_ = Convert2SizeT(output_shape); input_rank_ = input_shape_.size(); node_wpt_ = kernel_node; if (input_shape_.size() < kInputMinDim) { @@ -74,7 +79,7 @@ bool NonZeroCpuKernelMod::LaunchKernel(const std::vector &in if (!node_) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', node_wpt_(kernel_node) is expired. Error no: " << node_ << "."; } - std::vector output_shape = {non_zero_num, input_rank_}; + ShapeVector output_shape = {SizeToLong(non_zero_num), SizeToLong(input_rank_)}; std::vector dtype = {AnfAlgo::GetOutputDeviceDataType(node_, 0)}; common::AnfAlgo::SetOutputInferTypeAndShape(dtype, {output_shape}, node_.get()); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nth_element_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/nth_element_cpu_kernel.cc index 669fa705a84..8db192f9b40 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/nth_element_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nth_element_cpu_kernel.cc @@ -24,13 +24,7 @@ constexpr size_t kNthElementOutputsNum = 1; constexpr size_t kParallelDataNums = 32 * 1024; } // namespace -size_t get_nth_element_num(const std::vector &shape) { - size_t size = 1; - for (size_t i = 0; i < shape.size(); i++) { - size *= shape[i]; - } - return size; -} +size_t get_nth_element_num(const ShapeVector &shape) { return SizeOf(shape); } void NthElementCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nth_element_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nth_element_cpu_kernel.h index 08e4ec6dbea..6faf725a687 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/nth_element_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nth_element_cpu_kernel.h @@ -40,9 +40,9 @@ class NthElementCpuKernelMod : public DeprecatedNativeCpuKernelMod { private: template void LaunchKernel(const std::vector &inputs, const std::vector &outputs); - std::vector input_n_shape_; - std::vector input_shape_; - std::vector output_shape_; + ShapeVector input_n_shape_; + ShapeVector input_shape_; + ShapeVector output_shape_; int32_t input_n_val_{0}; size_t input_elements_{0}; size_t output_elements_{0}; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/one_hot_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/one_hot_cpu_kernel.cc index 80195f01e28..e89aafd71c0 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/one_hot_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/one_hot_cpu_kernel.cc @@ -78,10 +78,10 @@ void OneHotCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { } else { axis_ = LongToSize(axis); } - depth_ = output_shape[axis_]; + depth_ = LongToSize(output_shape[axis_]); stride_ = 1; for (size_t i = axis_ + 1; i < output_shape.size(); ++i) { - stride_ *= output_shape[i]; + stride_ *= LongToSize(output_shape[i]); } } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ones_like_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/ones_like_cpu_kernel.h index a44f9d0e265..a7577d277a4 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ones_like_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ones_like_cpu_kernel.h @@ -52,8 +52,8 @@ class OnesLikeCpuKernelMod : public DeprecatedNativeCpuKernelMod { static std::vector> func_list_; OnesLikeFunc kernel_func_; - std::vector input_shape_; - std::vector output_shape_; + std::vector input_shape_; + std::vector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pack_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/pack_cpu_kernel.cc index de4f856005f..708f252fbde 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/pack_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pack_cpu_kernel.cc @@ -40,13 +40,13 @@ void PackFwdCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { // calculate elements while dim >= axis auto first_input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); for (size_t i = IntToSize(axis_); i < first_input_shape.size(); i++) { - dims_behind_axis_ *= first_input_shape[i]; + dims_behind_axis_ *= static_cast(first_input_shape[i]); } auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); output_size_ = 1; for (size_t i = 0; i < output_shape.size(); i++) { - output_size_ *= output_shape[i]; + output_size_ *= static_cast(output_shape[i]); } auto kernel_attr = GetKernelAttrFromNode(kernel_node); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pad_and_shift_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/pad_and_shift_cpu_kernel.cc index 159d68f2260..5084d699947 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/pad_and_shift_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pad_and_shift_cpu_kernel.cc @@ -30,17 +30,14 @@ void PadAndShiftCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { input_x_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); type_size_ = GetTypeByte(TypeIdToType(input_x_dtype_)); auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - batch_size_ = 1; - for (size_t i = 0; i < indices_shape.size(); ++i) { - batch_size_ *= indices_shape[i]; - } + batch_size_ = SizeOf(indices_shape); MS_LOG(INFO) << "PadAndShift batch_size:" << batch_size_; auto cum_sum_arr_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); if (cum_sum_arr_shape.size() != 1) { MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of 'cum_sum_arr' must be 1, but got " << cum_sum_arr_shape.size() << "."; } - cum_sum_size_ = cum_sum_arr_shape[0]; + cum_sum_size_ = LongToSize(cum_sum_arr_shape[0]); is_need_retrieve_output_shape_ = true; } @@ -95,7 +92,7 @@ void PadAndShiftCpuKernelMod::LaunchKernel(const std::vector &inputs for (size_t i = 0; i < output_nums; i++) { dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i); } - common::AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape}, node_.get()); + common::AnfAlgo::SetOutputInferTypeAndShape(dtypes, {Convert2Long(out_shape)}, node_.get()); } MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, PadAndShift, PadAndShiftCpuKernelMod); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/pad_cpu_kernel.cc index 43a85811562..45e3c5d76e3 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/pad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pad_cpu_kernel.cc @@ -30,8 +30,13 @@ void PadCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); paddings_ = common::AnfAlgo::GetNodeAttr>>(kernel_node, "paddings"); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); - input_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - std::vector output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({input_shape, output_shape})) { + return; + } + + input_shape_ = Convert2SizeT(input_shape); input_rank_ = input_shape_.size(); if (paddings_.size() != input_rank_) { @@ -70,7 +75,7 @@ void PadCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { strides_[input_rank_ - 1] = 1; for (int32_t i = SizeToInt(input_rank_) - 2; i >= 0; i--) { size_t ind = IntToSize(i); - strides_[ind] = output_shape[ind + 1] * strides_[ind + 1]; + strides_[ind] = static_cast(output_shape[ind + 1]) * strides_[ind + 1]; } } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/print_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/print_cpu_kernel.cc index 3e252a47292..2df1f68a949 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/print_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/print_cpu_kernel.cc @@ -31,11 +31,11 @@ void PrintCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { for (size_t i = 0; i < input_tensor_num; ++i) { auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); (void)input_shapes_.emplace_back(input_shape); - size_t size = input_shape.size() ? 1 : 0; + int64_t size = input_shape.size() ? 1 : 0; for (size_t j = 0; j < input_shape.size(); ++j) { size *= input_shape[j]; } - (void)input_sizes_.emplace_back(size); + (void)input_sizes_.emplace_back(LongToSize(size)); } auto kernel_attr = GetKernelAttrFromNode(kernel_node); @@ -61,10 +61,7 @@ bool PrintCpuKernelMod::LaunchKernel(const std::vector &inpu auto num = reinterpret_cast(inputs[i]->addr); std::cout << *num << std::endl; } else { - ShapeVector shape; - (void)std::transform(input_shapes_[i].begin(), input_shapes_[i].end(), std::back_inserter(shape), - [](const size_t &value) { return SizeToLong(value); }); - Tensor tensor(data_type, shape, inputs[i]->addr, input_sizes_[i] * sizeof(T)); + Tensor tensor(data_type, input_shapes_[i], inputs[i]->addr, input_sizes_[i] * sizeof(T)); std::cout << tensor.ToStringNoLimit() << std::endl; } } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/print_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/print_cpu_kernel.h index dd316df8439..e55c2b9914e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/print_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/print_cpu_kernel.h @@ -50,7 +50,7 @@ class PrintCpuKernelMod : public DeprecatedNativeCpuKernelMod { static std::vector> func_list_; PrintFunc kernel_func_; - std::vector> input_shapes_; + std::vector input_shapes_; std::vector input_sizes_; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/embedding_look_up_proxy_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/embedding_look_up_proxy_kernel.cc index 89fe51bdff9..bff537f62e4 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/embedding_look_up_proxy_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/embedding_look_up_proxy_kernel.cc @@ -29,9 +29,9 @@ constexpr size_t kEmbeddingLookUpProxyOutputsNum = 1; void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); EmbeddingLookUpCpuKernelMod::InitKernel(kernel_node); - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); + auto indices_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1)); + auto output_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); size_t axis = kShape2dDims - input_shape.size(); if (input_shape.empty() || input_shape.size() > kShape2dDims) { MS_LOG(EXCEPTION) << "Input shape can not empty or greater than " << kShape2dDims << "-D, but got " diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/embedding_look_up_ps_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/embedding_look_up_ps_kernel.cc index 3052b9df60d..ff2a78b78c6 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/embedding_look_up_ps_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/embedding_look_up_ps_kernel.cc @@ -28,9 +28,8 @@ using mindspore::ps::Util; constexpr int kAxis = 0; constexpr size_t kEmbeddingLookUpPSInputSize = 3; -void EmbeddingLookUpPSKernelMod::InitKernel( - const std::shared_ptr>>> &shapes) { - const std::vector>> &shape_vec = *shapes; +void EmbeddingLookUpPSKernelMod::InitKernel(const std::shared_ptr>> &shapes) { + const std::vector> &shape_vec = *shapes; if (shape_vec.size() < kEmbeddingLookUpPSInputSize) { MS_LOG(EXCEPTION) << "EmbeddingLookUpPSKernelMod needs " << kEmbeddingLookUpPSInputSize << " input shapes, but got " << shape_vec.size(); @@ -38,42 +37,39 @@ void EmbeddingLookUpPSKernelMod::InitKernel( for (auto shape : shape_vec) { MS_EXCEPTION_IF_NULL(shape); } - input_shape_ = *(shape_vec[0]); - if (input_shape_.empty()) { + auto input_shape = *(shape_vec[0]); + if (input_shape.empty()) { MS_LOG(EXCEPTION) << "Input shape can not empty"; } - first_dim_size_ = input_shape_[0]; - for (size_t i = 1; i < input_shape_.size(); ++i) { - outer_dim_size_ *= input_shape_[i]; - } + + first_dim_size_ = LongToSize(input_shape[0]); + outer_dim_size_ *= SizeOf(input_shape); auto indices_shape = *(shape_vec[1]); - indices_lens_ = 1; - for (auto shape : indices_shape) { - indices_lens_ = indices_lens_ * shape; - } + indices_lens_ = SizeOf(indices_shape); size_t output_index = 2; auto output_shape = *(shape_vec[output_index]); int64_t offset = 0; for (size_t i = 0; i < rank_id_; i++) { - offset += Util::LocalShard(SizeToLong(input_shape_[kAxis]), SizeToLong(i), SizeToLong(pserver_num_)); + offset += Util::LocalShard(input_shape[kAxis], SizeToLong(i), SizeToLong(pserver_num_)); } offset_ = offset; // input shape must be sharded after computing offset_; - Shard(&input_shape_, kAxis); + Shard(&input_shape, kAxis); - size_t output_size = - std::accumulate(output_shape.begin(), output_shape.end(), sizeof(float), std::multiplies()); + input_shape_ = Convert2SizeT(input_shape); + + size_t output_size = sizeof(float) * SizeOf(output_shape); (void)output_size_list_.emplace_back(output_size); } -void EmbeddingLookUpPSKernelMod::ReInit(const std::vector> &shapes) { +void EmbeddingLookUpPSKernelMod::ReInit(const std::vector &shapes) { if (shapes.empty() || shapes[0].empty()) { MS_LOG(EXCEPTION) << "Shape can not empty"; } const auto &indices_shape = shapes[0]; - indices_lens_ = indices_shape[0]; + indices_lens_ = LongToSize(indices_shape[0]); size_t output_size = sizeof(float) * indices_lens_; for (size_t i = kAxis + 1; i < input_shape_.size(); i++) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/embedding_look_up_ps_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/embedding_look_up_ps_kernel.h index af05744d0f1..154352c6ee8 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/embedding_look_up_ps_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/embedding_look_up_ps_kernel.h @@ -31,8 +31,8 @@ class EmbeddingLookUpPSKernelMod : public EmbeddingLookUpCpuKernelMod, public PS : PServerKernel(rank_id, pserver_num, worker_num) {} ~EmbeddingLookUpPSKernelMod() override = default; - void InitKernel(const std::shared_ptr>>> &) override; - void ReInit(const std::vector> &) override; + void InitKernel(const std::shared_ptr>> &) override; + void ReInit(const std::vector &) override; bool Execute(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) override; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/pserver_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/pserver_kernel.cc index 6cf1457c86f..b62d7471419 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/pserver_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/pserver_kernel.cc @@ -19,13 +19,13 @@ namespace mindspore { namespace kernel { namespace ps { -void PServerKernel::Shard(std::vector *shape, int axis) { +void PServerKernel::Shard(ShapeVector *shape, int axis) { MS_EXCEPTION_IF_NULL(shape); if ((*shape).size() <= IntToSize(axis)) { MS_LOG(EXCEPTION) << "Shape size is invalid."; } (*shape)[IntToSize(axis)] = - LongToSize(Util::LocalShard(SizeToLong((*shape)[IntToSize(axis)]), SizeToLong(rank_id_), SizeToLong(pserver_num_))); + LongToSize(Util::LocalShard((*shape)[IntToSize(axis)], SizeToLong(rank_id_), SizeToLong(pserver_num_))); } } // namespace ps } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/pserver_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/pserver_kernel.h index eef1e55ed75..65ac5ed7141 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/pserver_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/pserver_kernel.h @@ -32,10 +32,9 @@ class PServerKernel { ~PServerKernel() = default; PServerKernel(const PServerKernel &) = delete; PServerKernel &operator=(const PServerKernel &) = delete; - virtual void InitKernel(const std::shared_ptr>>> &) {} - virtual void InitKernel(const CNodePtr &cnode, - const std::shared_ptr>>> &) {} - virtual void ReInit(const std::vector> &) {} + virtual void InitKernel(const std::shared_ptr>> &) {} + virtual void InitKernel(const CNodePtr &cnode, const std::shared_ptr>> &) {} + virtual void ReInit(const std::vector &) {} virtual bool Execute(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) = 0; virtual void UpdateEmbeddings(float *embedding_table, const size_t *lookup_ids, const float *update_vals, @@ -47,7 +46,7 @@ class PServerKernel { protected: virtual void ReInit(const std::vector &) {} - void Shard(std::vector *shape, int axis); + void Shard(ShapeVector *shape, int axis); size_t rank_id_; size_t pserver_num_; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/pull_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/pull_kernel.h index 132900f6260..875af5a7cab 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/pull_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/pull_kernel.h @@ -53,13 +53,11 @@ class PullKernelMod : public DeprecatedNativeCpuKernelMod { } auto key_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - for (size_t i = 0; i < key_shape.size(); i++) { - keys_size_ *= key_shape[i]; - } + keys_size_ *= SizeOf(key_shape); + auto var_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - for (size_t i = 0; i < var_shape.size(); i++) { - var_size_ *= var_shape[i]; - } + var_size_ *= SizeOf(var_shape); + auto param_node = common::AnfAlgo::GetInputNode(kernel_node, 1); MS_EXCEPTION_IF_NULL(param_node); param_name_ = param_node->fullname_with_scope(); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_adam_ps_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_adam_ps_kernel.cc index cf82fb5564e..7931cf3dfb9 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_adam_ps_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_adam_ps_kernel.cc @@ -25,19 +25,19 @@ namespace kernel { namespace ps { constexpr size_t kSparseApplyAdamPSInputsShapeSize = 11; -void SparseApplyAdamPSKernelMod::InitKernel( - const CNodePtr &cnode, const std::shared_ptr>>> &shapes) { +void SparseApplyAdamPSKernelMod::InitKernel(const CNodePtr &cnode, + const std::shared_ptr>> &shapes) { MS_EXCEPTION_IF_NULL(cnode); MS_EXCEPTION_IF_NULL(shapes); - const std::vector>> &shape_vec = *shapes; + const std::vector> &shape_vec = *shapes; if (shape_vec.size() < kSparseApplyAdamPSInputsShapeSize) { MS_LOG(EXCEPTION) << "SparseApplyAdamPSKernelMod needs 10 input shapes, but got " << shape_vec.size(); } - std::vector &var_shape = *(shape_vec[var_index_]); - std::vector &m_shape = *(shape_vec[m_index_]); - std::vector &v_shape = *(shape_vec[v_index_]); - const std::vector &grad_shape = *(shape_vec[grad_index_]); - const std::vector &indices_shape = *(shape_vec[indices_index_]); + ShapeVector &var_shape = *(shape_vec[var_index_]); + ShapeVector &m_shape = *(shape_vec[m_index_]); + ShapeVector &v_shape = *(shape_vec[v_index_]); + const ShapeVector &grad_shape = *(shape_vec[grad_index_]); + const ShapeVector &indices_shape = *(shape_vec[indices_index_]); Shard(&var_shape, 0); Shard(&m_shape, 0); @@ -54,18 +54,18 @@ void SparseApplyAdamPSKernelMod::InitKernel( if (var_shape.size() != grad_shape.size()) { MS_LOG(EXCEPTION) << "var and grad must have the same shape size"; } - var_first_dim_size_ = var_shape[0]; + var_first_dim_size_ = LongToSize(var_shape[0]); for (size_t i = 1; i < var_shape.size(); ++i) { if (var_shape[i] != grad_shape[i]) { MS_LOG(EXCEPTION) << "The shape of var and grad must be equal in dimension " << i; } - var_outer_dim_size_ *= var_shape[i]; + var_outer_dim_size_ *= LongToSize(var_shape[i]); } if (indices_shape.size() != 1) { MS_LOG(EXCEPTION) << "indices must be 1D"; } - indices_size_ = indices_shape[0]; - if (grad_shape[0] != indices_size_) { + indices_size_ = LongToSize(indices_shape[0]); + if (grad_shape[0] != SizeToLong(indices_size_)) { MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices"; } if (common::AnfAlgo::HasNodeAttr(USE_NESTEROV, cnode)) { @@ -78,12 +78,12 @@ void SparseApplyAdamPSKernelMod::InitKernel( (void)workspace_size_list_.emplace_back(var_first_dim_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_); } -void SparseApplyAdamPSKernelMod::ReInit(const std::vector> &shapes) { +void SparseApplyAdamPSKernelMod::ReInit(const std::vector &shapes) { if (shapes.empty() || shapes[0].empty()) { MS_LOG(EXCEPTION) << "Shape is empty"; } - const std::vector &indices_shape = shapes[0]; - indices_size_ = indices_shape[0]; + const auto &indices_shape = shapes[0]; + indices_size_ = LongToSize(indices_shape[0]); workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_; workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_adam_ps_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_adam_ps_kernel.h index e6ea96e1f29..9cc36abbd32 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_adam_ps_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_adam_ps_kernel.h @@ -32,9 +32,8 @@ class SparseApplyAdamPSKernelMod : public SparseApplyAdamCpuKernelMod, public PS : PServerKernel(rank_id, pserver_num, worker_num) {} ~SparseApplyAdamPSKernelMod() override = default; - void InitKernel(const CNodePtr &cnode, - const std::shared_ptr>>> &) override; - void ReInit(const std::vector> &) override; + void InitKernel(const CNodePtr &cnode, const std::shared_ptr>> &) override; + void ReInit(const std::vector &) override; bool Execute(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) override; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_ftrl_ps_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_ftrl_ps_kernel.cc index b198d8151a5..1db135ecbd0 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_ftrl_ps_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_ftrl_ps_kernel.cc @@ -22,20 +22,20 @@ namespace kernel { namespace ps { constexpr size_t kSparseApplyFtrlPSInputSize = 5; -void SparseApplyFtrlPSKernelMod::InitKernel( - const CNodePtr &cnode, const std::shared_ptr>>> &shapes) { +void SparseApplyFtrlPSKernelMod::InitKernel(const CNodePtr &cnode, + const std::shared_ptr>> &shapes) { MS_EXCEPTION_IF_NULL(cnode); MS_EXCEPTION_IF_NULL(shapes); - const std::vector>> &shape_vec = *shapes; + const std::vector> &shape_vec = *shapes; if (shape_vec.size() < kSparseApplyFtrlPSInputSize) { MS_LOG(EXCEPTION) << "SparseApplyAdamPSKernelMod needs " << kSparseApplyFtrlPSInputSize << " input shapes, but got " << shape_vec.size(); } - std::vector var_shape = *(shape_vec[var_index_]); - std::vector accum_shape = *(shape_vec[accum_index_]); - std::vector linear_shape = *(shape_vec[linear_index_]); - std::vector grad_shape = *(shape_vec[grad_index_]); - std::vector indices_shape = *(shape_vec[indices_index_]); + auto var_shape = *(shape_vec[var_index_]); + auto accum_shape = *(shape_vec[accum_index_]); + auto linear_shape = *(shape_vec[linear_index_]); + auto grad_shape = *(shape_vec[grad_index_]); + auto indices_shape = *(shape_vec[indices_index_]); Shard(&var_shape, 0); Shard(&accum_shape, 0); @@ -47,20 +47,20 @@ void SparseApplyFtrlPSKernelMod::InitKernel( if (var_shape.empty()) { MS_LOG(EXCEPTION) << "var must be at least 1D"; } else { - var_first_dim_size_ = var_shape[0]; + var_first_dim_size_ = LongToSize(var_shape[0]); } for (size_t i = 1; i < var_shape.size(); ++i) { if (var_shape[i] != grad_shape[i]) { MS_LOG(EXCEPTION) << "The shape of var and grad must be equal in dimension " << i; } - var_outer_dim_size_ *= var_shape[i]; + var_outer_dim_size_ *= LongToSize(var_shape[i]); } if (indices_shape.size() != 1) { MS_LOG(EXCEPTION) << "indices must be a 1D vector"; } - indices_size_ = indices_shape[0]; - if (grad_shape[0] != indices_size_) { + indices_size_ = LongToSize(indices_shape[0]); + if (grad_shape[0] != SizeToLong(indices_size_)) { MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices"; } init_accum_ = common::AnfAlgo::GetNodeAttr(cnode, "init_accum"); @@ -89,12 +89,12 @@ void SparseApplyFtrlPSKernelMod::InitKernel( (void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_); } -void SparseApplyFtrlPSKernelMod::ReInit(const std::vector> &shapes) { +void SparseApplyFtrlPSKernelMod::ReInit(const std::vector &shapes) { if (shapes.empty() || shapes[0].empty()) { MS_LOG(EXCEPTION) << "Shape can not empty"; } - const std::vector &indices_shape = shapes[0]; - indices_size_ = indices_shape[0]; + const auto &indices_shape = shapes[0]; + indices_size_ = LongToSize(indices_shape[0]); workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_; workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_ftrl_ps_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_ftrl_ps_kernel.h index a74df2156bc..0669d1eda64 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_ftrl_ps_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_ftrl_ps_kernel.h @@ -32,9 +32,8 @@ class SparseApplyFtrlPSKernelMod : public SparseApplyFtrlCpuKernelMod, public PS : PServerKernel(rank_id, pserver_num, worker_num), init_accum_(0.1) {} ~SparseApplyFtrlPSKernelMod() override = default; - void InitKernel(const CNodePtr &cnode, - const std::shared_ptr>>> &) override; - void ReInit(const std::vector> &) override; + void InitKernel(const CNodePtr &cnode, const std::shared_ptr>> &) override; + void ReInit(const std::vector &) override; bool Execute(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) override; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_lazy_adam_ps_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_lazy_adam_ps_kernel.cc index 9175393c1e2..8c75ff808ad 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_lazy_adam_ps_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_lazy_adam_ps_kernel.cc @@ -26,19 +26,19 @@ namespace ps { constexpr size_t kSparseApplyLazyAdamPSInputsSize = 11; void SparseApplyLazyAdamPSKernelMod::InitKernel( - const CNodePtr &cnode, const std::shared_ptr>>> &shapes) { + const CNodePtr &cnode, const std::shared_ptr>> &shapes) { MS_EXCEPTION_IF_NULL(cnode); MS_EXCEPTION_IF_NULL(shapes); - const std::vector>> &shape_vec = *shapes; + const std::vector> &shape_vec = *shapes; if (shape_vec.size() < kSparseApplyLazyAdamPSInputsSize) { MS_LOG(EXCEPTION) << "SparseApplyLazyAdamPSKernelMod needs " << kSparseApplyLazyAdamPSInputsSize << " input shapes, but got " << shape_vec.size(); } - std::vector &var_shape = *(shape_vec[var_index_]); - std::vector &m_shape = *(shape_vec[m_index_]); - std::vector &v_shape = *(shape_vec[v_index_]); - const std::vector &grad_shape = *(shape_vec[grad_index_]); - const std::vector &indices_shape = *(shape_vec[indices_index_]); + ShapeVector &var_shape = *(shape_vec[var_index_]); + ShapeVector &m_shape = *(shape_vec[m_index_]); + ShapeVector &v_shape = *(shape_vec[v_index_]); + const ShapeVector &grad_shape = *(shape_vec[grad_index_]); + const ShapeVector &indices_shape = *(shape_vec[indices_index_]); Shard(&var_shape, 0); Shard(&m_shape, 0); @@ -61,13 +61,13 @@ void SparseApplyLazyAdamPSKernelMod::InitKernel( if (var_shape[i] != grad_shape[i]) { MS_LOG(EXCEPTION) << "The shape of var and grad must be equal in dimension " << i; } - var_outer_dim_size_ *= var_shape[i]; + var_outer_dim_size_ *= LongToSize(var_shape[i]); } if (indices_shape.size() != 1) { MS_LOG(EXCEPTION) << "indices must be 1D"; } - indices_size_ = indices_shape[0]; - if (grad_shape[0] != indices_size_) { + indices_size_ = LongToSize(indices_shape[0]); + if (grad_shape[0] != SizeToLong(indices_size_)) { MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices"; } if (common::AnfAlgo::HasNodeAttr(USE_NESTEROV, cnode)) { @@ -79,12 +79,12 @@ void SparseApplyLazyAdamPSKernelMod::InitKernel( (void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_); } -void SparseApplyLazyAdamPSKernelMod::ReInit(const std::vector> &shapes) { +void SparseApplyLazyAdamPSKernelMod::ReInit(const std::vector &shapes) { if (shapes.empty() || shapes[0].empty()) { MS_LOG(EXCEPTION) << "Shape can not empty"; } - const std::vector &indices_shape = shapes[0]; - indices_size_ = indices_shape[0]; + const auto &indices_shape = shapes[0]; + indices_size_ = LongToSize(indices_shape[0]); workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_; workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_lazy_adam_ps_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_lazy_adam_ps_kernel.h index cdc7b8a3ec4..f75da51211b 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_lazy_adam_ps_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/ps/sparse_apply_lazy_adam_ps_kernel.h @@ -31,9 +31,8 @@ class SparseApplyLazyAdamPSKernelMod : public SparseApplyLazyAdamCpuKernelMod, p : PServerKernel(rank_id, pserver_num, worker_num) {} ~SparseApplyLazyAdamPSKernelMod() override = default; - void InitKernel(const CNodePtr &cnode, - const std::shared_ptr>>> &) override; - void ReInit(const std::vector> &) override; + void InitKernel(const CNodePtr &cnode, const std::shared_ptr>> &) override; + void ReInit(const std::vector &) override; bool Execute(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) override; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pyfunc/py_func_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/pyfunc/py_func_cpu_kernel.cc index bb73197902b..b51c271d5b6 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/pyfunc/py_func_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyfunc/py_func_cpu_kernel.cc @@ -257,11 +257,8 @@ void PyFuncCpuKernelMod::BuildFuncInfo(const CNodePtr &kernel_node) { in_shapes = common::AnfAlgo::GetNodeAttr>>(kernel_node, "in_shapes"); } else { for (size_t i = 0; i < common::AnfAlgo::GetInputTensorNum(kernel_node); i++) { - std::vector in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i); - std::vector in_shape_tmp; - (void)std::for_each(in_shape.begin(), in_shape.end(), - [&in_shape_tmp](size_t c) { in_shape_tmp.push_back(SizeToLong(c)); }); - (void)in_shapes.emplace_back(in_shape_tmp); + auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i); + (void)in_shapes.emplace_back(in_shape); } } @@ -269,11 +266,8 @@ void PyFuncCpuKernelMod::BuildFuncInfo(const CNodePtr &kernel_node) { out_shapes = common::AnfAlgo::GetNodeAttr>>(kernel_node, "out_shapes"); } else { for (size_t i = 0; i < common::AnfAlgo::GetOutputTensorNum(kernel_node); i++) { - std::vector out_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, i); - std::vector out_shape_tmp; - (void)std::for_each(out_shape.begin(), out_shape.end(), - [&out_shape_tmp](size_t c) { out_shape_tmp.push_back(SizeToLong(c)); }); - (void)out_shapes.emplace_back(out_shape_tmp); + auto out_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, i); + (void)out_shapes.emplace_back(out_shape); } } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/reduce_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/reduce_cpu_kernel.cc index 441c4a554ef..83752cbc9bb 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/reduce_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/reduce_cpu_kernel.cc @@ -56,7 +56,7 @@ class ReduceCpuKernelFunc : public DeprecatedCpuKernelFunc { kReduceMeanType, kReduceProdType }; - std::vector input_shape_; + std::vector input_shape_; std::vector axis_; ReduceFuncType reduce_type_{ReduceFuncType::kReduceAllType}; std::function reduce_func_; @@ -191,7 +191,7 @@ bool ReduceCpuKernelFunc::RunFunc(const std::vector &inpu axes[k] = i; ++k; } else { - stride *= input_shape_[i]; + stride *= static_cast(input_shape_[i]); ++j; } } @@ -216,7 +216,7 @@ bool ReduceCpuKernelFunc::RunFunc(const std::vector &inpu } } // Calculate transpose shape - std::vector transpose_shape(input_shape_.size()); + std::vector transpose_shape(input_shape_.size()); for (int i = 0; i < dimension; ++i) { transpose_shape[i] = input_shape_[axes[i]]; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/resize_bilinear_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/resize_bilinear_cpu_kernel.cc index 079e6f216f8..099bbf1fed1 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/resize_bilinear_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/resize_bilinear_cpu_kernel.cc @@ -30,7 +30,7 @@ constexpr size_t kResizeBilinearAttrSize = 2; void ResizeBilinearCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + shape_ = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); size_ = common::AnfAlgo::GetNodeAttr>(kernel_node, SIZE); align_corners_ = common::AnfAlgo::GetNodeAttr(kernel_node, "align_corners"); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/resize_bilinear_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/resize_bilinear_grad_cpu_kernel.cc index a0ef0a0c4d1..a1341332642 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/resize_bilinear_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/resize_bilinear_grad_cpu_kernel.cc @@ -30,8 +30,8 @@ constexpr size_t kResizeBilinearGradInputsXShapeSize = 4; void ResizeBilinearGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - size_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + shape_ = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); + size_ = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1)); align_corners_ = common::AnfAlgo::GetNodeAttr(kernel_node, "align_corners"); dtype_ = common::AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); if (shape_.size() < kResizeBilinearGradInputsDoutShapeSize) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/resize_nearest_neighbor_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/resize_nearest_neighbor_cpu_kernel.cc index 58d8b3cfe3b..44c7463d631 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/resize_nearest_neighbor_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/resize_nearest_neighbor_cpu_kernel.cc @@ -30,7 +30,7 @@ constexpr size_t kResizeNearestNeighborAttrSize = 2; void ResizeNearestNeighborCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); std::vector output_size = common::AnfAlgo::GetNodeAttr>(kernel_node, SIZE); align_corners_ = common::AnfAlgo::GetNodeAttr(kernel_node, "align_corners"); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/resize_nearest_neighbor_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/resize_nearest_neighbor_grad_cpu_kernel.cc index 2a8d67da1ca..b351c4144ad 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/resize_nearest_neighbor_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/resize_nearest_neighbor_grad_cpu_kernel.cc @@ -30,8 +30,8 @@ constexpr size_t kResizeNearestNeighborGradOutputsShapeSize = 4; void ResizeNearestNeighborGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - std::vector output_size = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); + auto output_size = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); align_corners_ = common::AnfAlgo::GetNodeAttr(kernel_node, "align_corners"); dtype_ = common::AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); @@ -49,8 +49,8 @@ void ResizeNearestNeighborGradCpuKernelMod::InitKernel(const CNodePtr &kernel_no channel_ = input_shape[1]; in_height_ = input_shape[2]; in_width_ = input_shape[3]; - out_height_ = output_size[2]; - out_width_ = output_size[3]; + out_height_ = LongToSize(output_size[2]); + out_width_ = LongToSize(output_size[3]); height_scale_ = Scaling(out_height_, in_height_, align_corners_); width_scale_ = Scaling(out_width_, in_width_, align_corners_); } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/right_shift_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/right_shift_cpu_kernel.cc index 0d3753feec8..607c637bc1e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/right_shift_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/right_shift_cpu_kernel.cc @@ -83,10 +83,11 @@ bool RightShiftCpuKernelMod::IntCompute(const std::vector &inputs, c if (output_shape_.size() == 0) { (void)output_shape_.insert(output_shape_.begin(), 1); } - size_t output_size_ = 1; + int64_t size_tmp = 1; for (size_t i = 0; i < output_shape_.size(); ++i) { - output_size_ *= output_shape_[i]; + size_tmp *= output_shape_[i]; } + size_t output_size = LongToSize(size_tmp); BroadcastIterator base_iter(input_shape_1_, input_shape_2_, output_shape_); auto task = [&input1, &input2, &output, &base_iter](size_t start, size_t end) { auto iter = base_iter; @@ -104,7 +105,7 @@ bool RightShiftCpuKernelMod::IntCompute(const std::vector &inputs, c iter.GenNextPos(); } }; - ParallelLaunchAutoSearch(task, output_size_, this, ¶llel_search_info_); + ParallelLaunchAutoSearch(task, output_size, this, ¶llel_search_info_); return true; } @@ -117,10 +118,11 @@ bool RightShiftCpuKernelMod::UIntCompute(const std::vector &inputs, if (output_shape_.size() == 0) { (void)output_shape_.insert(output_shape_.begin(), 1); } - size_t output_size_ = 1; + int64_t size_tmp = 1; for (size_t i = 0; i < output_shape_.size(); ++i) { - output_size_ *= output_shape_[i]; + size_tmp *= output_shape_[i]; } + size_t output_size = LongToSize(size_tmp); BroadcastIterator base_iter(input_shape_1_, input_shape_2_, output_shape_); auto task = [&input1, &input2, &output, &base_iter](size_t start, size_t end) { auto iter = base_iter; @@ -135,7 +137,7 @@ bool RightShiftCpuKernelMod::UIntCompute(const std::vector &inputs, iter.GenNextPos(); } }; - ParallelLaunchAutoSearch(task, output_size_, this, ¶llel_search_info_); + ParallelLaunchAutoSearch(task, output_size, this, ¶llel_search_info_); return true; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/right_shift_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/right_shift_cpu_kernel.h index 5b988199bec..b0f2f954313 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/right_shift_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/right_shift_cpu_kernel.h @@ -42,9 +42,9 @@ class RightShiftCpuKernelMod : public DeprecatedNativeCpuKernelMod { private: TypeId input_type_1_{kTypeUnknown}; TypeId input_type_2_{kTypeUnknown}; - std::vector input_shape_1_; - std::vector input_shape_2_; - std::vector output_shape_; + ShapeVector input_shape_1_; + ShapeVector input_shape_2_; + ShapeVector output_shape_; template bool IntCompute(const std::vector &inputs, const std::vector &outputs); template diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/priority_replay_buffer_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/priority_replay_buffer_cpu_kernel.h index 19c0e64c201..fa930ad5e5b 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/priority_replay_buffer_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/priority_replay_buffer_cpu_kernel.h @@ -108,8 +108,8 @@ class PriorityReplayBufferUpdateCpuKernel : public DeprecatedNativeCpuKernelMod private: int64_t handle_{-1}; - std::vector indices_shape_; - std::vector priorities_shape_; + std::vector indices_shape_; + std::vector priorities_shape_; std::shared_ptr prioriory_replay_buffer_{nullptr}; }; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_create_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_create_kernel.cc index 5c2973bd2f7..97f2f6f7c71 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_create_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_create_kernel.cc @@ -28,10 +28,7 @@ TensorArrayCreateCpuKernelMod::TensorArrayCreateCpuKernelMod() : is_dynamic_(tru void TensorArrayCreateCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); - auto shape = common::AnfAlgo::GetNodeAttr>(kernel_node, "element_shape"); - for (auto i : shape) { - shapes_.push_back(LongToSize(i)); - } + shapes_ = common::AnfAlgo::GetNodeAttr>(kernel_node, "element_shape"); type_ = common::AnfAlgo::GetNodeAttr(kernel_node, "dtype"); size_ = common::AnfAlgo::GetNodeAttr(kernel_node, "size"); is_dynamic_ = common::AnfAlgo::GetNodeAttr(kernel_node, "dynamic_size"); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_create_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_create_kernel.h index bc5e11e5cd7..67b8d425deb 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_create_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_create_kernel.h @@ -41,7 +41,7 @@ class TensorArrayCreateCpuKernelMod : public DeprecatedNativeCpuKernelMod { private: bool is_dynamic_; int64_t size_; - std::vector shapes_; + ShapeVector shapes_; TypePtr type_; std::string name_; }; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_stack_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_stack_kernel.cc index 68a40eeaa61..8553959ef72 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_stack_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_stack_kernel.cc @@ -62,7 +62,7 @@ void TensorArrayStackCpuKernelMod::PostExecute() { auto shape = shapes_; (void)shape.insert(shape.cbegin(), tensor_size); MS_LOG(DEBUG) << "After postexecute, the real shape of TensorArrayStack is " << shape; - common::AnfAlgo::SetOutputInferTypeAndShape({type_->type_id()}, {shape}, kernel_node_.lock().get()); + common::AnfAlgo::SetOutputInferTypeAndShape({type_->type_id()}, {Convert2Long(shape)}, kernel_node_.lock().get()); } void TensorArrayStackCpuKernelMod::ResetResource() noexcept { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_write_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_write_kernel.cc index d0f7dd431f8..3de282d252e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_write_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_write_kernel.cc @@ -33,7 +33,7 @@ void TensorArrayWriteCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { shapes_ = AnfAlgo::GetInputDeviceShape(kernel_node, kSecondInputIndex); value_size_ = GetTypeByte(TypeIdToType(type_)); for (auto i : shapes_) { - value_size_ *= i; + value_size_ *= static_cast(i); } input_size_list_.push_back(sizeof(int64_t)); input_size_list_.push_back(sizeof(int64_t)); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_write_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_write_kernel.h index 3f3a9b9de30..1afb4768d19 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_write_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_write_kernel.h @@ -38,7 +38,7 @@ class TensorArrayWriteCpuKernelMod : public DeprecatedNativeCpuKernelMod { private: size_t value_size_; - std::vector shapes_; + std::vector shapes_; TypeId type_; static std::vector support_list_; }; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/rmsprop_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/rmsprop_cpu_kernel.cc index 73b599cd154..0f88f77cbae 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/rmsprop_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/rmsprop_cpu_kernel.cc @@ -91,7 +91,7 @@ void RMSPropCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { momentum_ = common::AnfAlgo::GetNodeAttr(kernel_node, "momentum"); epsilon_ = common::AnfAlgo::GetNodeAttr(kernel_node, "epsilon"); } - auto input_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeT(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); for (auto &dim : input_shape) { size_ *= dim; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/roi_align_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/roi_align_cpu_kernel.cc index e6b10ab62c9..47fdbecc0ed 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/roi_align_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/roi_align_cpu_kernel.cc @@ -75,12 +75,12 @@ void ROIAlignCpuKernelFunc::InitFunc(const CNodePtr &kernel_node) { MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of 'features' must be 4, but got " << x_shape_size; } - channels_ = SizeToInt(x_shape[CHANNEL]); - height_ = SizeToInt(x_shape[HEIGHT]); - width_ = SizeToInt(x_shape[WIDTH]); + channels_ = LongToInt(x_shape[CHANNEL]); + height_ = LongToInt(x_shape[HEIGHT]); + width_ = LongToInt(x_shape[WIDTH]); - roi_rows_ = SizeToInt(rois_shape[0]); - roi_cols_ = SizeToInt(rois_shape[1]); + roi_rows_ = LongToInt(rois_shape[0]); + roi_cols_ = LongToInt(rois_shape[1]); pooled_height_ = static_cast(common::AnfAlgo::GetNodeAttr(kernel_node, "pooled_height")); pooled_width_ = static_cast(common::AnfAlgo::GetNodeAttr(kernel_node, "pooled_width")); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/roi_align_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/roi_align_grad_cpu_kernel.cc index 1852be2c6e5..c3d4e215377 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/roi_align_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/roi_align_grad_cpu_kernel.cc @@ -133,8 +133,8 @@ void ROIAlignGradCpuKernelFunc::InitFunc(const CNodePtr &kernel_node) { CheckParam(kernel_node); auto rois_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - roi_rows_ = SizeToInt(rois_shape[0]); - roi_cols_ = SizeToInt(rois_shape[1]); + roi_rows_ = LongToInt(rois_shape[0]); + roi_cols_ = LongToInt(rois_shape[1]); std::vector xdiff_shape_me = common::AnfAlgo::GetNodeAttr>(kernel_node, "xdiff_shape"); (void)std::transform(xdiff_shape_me.begin(), xdiff_shape_me.end(), std::back_inserter(xdiff_shape_), diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/rpc/rpc_recv_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/rpc/rpc_recv_kernel.cc index 2ccb0230d82..bcbdda9ca6f 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/rpc/rpc_recv_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/rpc/rpc_recv_kernel.cc @@ -27,27 +27,23 @@ int RpcRecvKernelMod::Resize(const BaseOperatorPtr &, const std::vectorGetShapeVector(); - std::vector size_t_shape = {}; - if (AnfUtils::IsShapeDynamic(int64_shape)) { + if (IsDynamic(int64_shape)) { MS_LOG(EXCEPTION) << "The recv kernel's input " << i << " shape inferred is still dynamic:" << int64_shape; } - std::transform(int64_shape.begin(), int64_shape.end(), std::back_inserter(size_t_shape), LongToSize); int64_t size = 1; - GetShapeSize(size_t_shape, TypeIdToType(inputs[i]->GetDtype()), &size); + GetShapeSize(int64_shape, TypeIdToType(inputs[i]->GetDtype()), &size); input_size_list_[i] = LongToSize(size); } // Reassign the memory size of recv kernel's outputs. for (size_t i = 0; i < outputs.size(); i++) { auto int64_shape = outputs[i]->GetShapeVector(); - std::vector size_t_shape; - if (AnfUtils::IsShapeDynamic(int64_shape)) { + if (IsDynamic(int64_shape)) { MS_LOG(EXCEPTION) << "The recv kernel's output " << i << " shape inferred is still dynamic:" << int64_shape; } - std::transform(int64_shape.begin(), int64_shape.end(), std::back_inserter(size_t_shape), LongToSize); int64_t size = 1; - GetShapeSize(size_t_shape, TypeIdToType(outputs[i]->GetDtype()), &size); + GetShapeSize(int64_shape, TypeIdToType(outputs[i]->GetDtype()), &size); output_size_list_[i] = LongToSize(size); } return 0; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/sample_distorted_bounding_box_v2_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/sample_distorted_bounding_box_v2_cpu_kernel.cc index 7000f0d0f6c..2c994742d8b 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/sample_distorted_bounding_box_v2_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/sample_distorted_bounding_box_v2_cpu_kernel.cc @@ -199,6 +199,9 @@ void SampleDistortedBoundingBoxV2CPUKernelMod::InitKernel(const CNodePtr &kernel auto shape_image_size = AnfAlgo::GetInputDeviceShape(kernel_node, 0); auto shape_bounding_boxes = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + if (AnfAlgo::IsShapesDynamic({shape_image_size, shape_bounding_boxes})) { + return; + } seed = common::AnfAlgo::GetNodeAttr(kernel_node, "seed"); seed2 = common::AnfAlgo::GetNodeAttr(kernel_node, "seed2"); aspect_ratio_range = common::AnfAlgo::GetNodeAttr>(kernel_node, "aspect_ratio_range"); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_arithmetic_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_arithmetic_cpu_kernel.cc index 8573e22019a..528c41b2b42 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_arithmetic_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_arithmetic_cpu_kernel.cc @@ -85,27 +85,33 @@ void ScatterArithmeticCpuKernelFunc::InitFunc(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + if (IsDynamic(input_shape)) { + return; + } if (input_shape.size() < 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of 'input_x' must be greater than or equal to 1, but got " << input_shape.size() << "."; } - first_dim_size = SizeToInt(input_shape[0]); + first_dim_size = LongToInt(input_shape[0]); input_size_ = 1; inner_size_ = 1; if (input_shape.empty()) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of 'input_x' can not be empty."; } + int64_t size_tmp = 1; for (size_t i = 1; i < input_shape.size(); i++) { - inner_size_ *= input_shape[i]; + size_tmp *= input_shape[i]; } - input_size_ = input_shape[0] * inner_size_; + inner_size_ = LongToSize(size_tmp); + input_size_ = LongToSize(input_shape[0]) * inner_size_; auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - indices_size_ = 1; + size_tmp = 1; for (size_t i = 0; i < indices_shape.size(); i++) { - indices_size_ *= indices_shape[i]; + size_tmp *= indices_shape[i]; } + indices_size_ = LongToSize(size_tmp); InitComputeFunc(); } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_arithmetic_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_arithmetic_cpu_kernel.cc index 632f06c1ec1..b82929eb377 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_arithmetic_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_arithmetic_cpu_kernel.cc @@ -44,6 +44,7 @@ inline T RealDiv(const T &a, const T &b) { return static_cast(a / b); } } // namespace + bool ScatterNdArithmeticCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector &inputs, const std::vector &outputs) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_cpu_kernel.cc index 16cdbbc2dbb..1798cbc462a 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_cpu_kernel.cc @@ -70,9 +70,9 @@ void ScatterNdCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); Check(kernel_node); - auto shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); - auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto updates_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape = Convert2SizeT(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); + auto indices_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); + auto updates_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1)); auto indices_unit_rank = indices_shape.back(); if (indices_unit_rank > shape.size()) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_max_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_max_cpu_kernel.cc index 9f1338dc216..139a0fdbd2e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_max_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_max_cpu_kernel.cc @@ -68,7 +68,10 @@ void ScatterMaxCPUKernelMod::InitKernel(const CNodePtr &kernel_node) { auto shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); auto updates_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); - auto indices_unit_rank = indices_shape.back(); + if (AnfAlgo::IsShapesDynamic({shape, indices_shape, updates_shape})) { + return; + } + auto indices_unit_rank = LongToSize(indices_shape.back()); if (indices_unit_rank > shape.size()) { MS_EXCEPTION(ValueError) << "For '" << kernel_name_ @@ -85,12 +88,12 @@ void ScatterMaxCPUKernelMod::InitKernel(const CNodePtr &kernel_node) { indices_unit_rank_ = SizeToInt(indices_unit_rank); unit_size_ = 1; for (size_t i = indices_shape.size() - 1; i < updates_shape.size(); ++i) { - unit_size_ *= SizeToInt(updates_shape[i]); + unit_size_ *= LongToInt(updates_shape[i]); } num_units_ = 1; num_units_ *= updates_shape[indices_shape.size() - kNumUnits]; for (int i = SizeToInt(indices_shape.size()) - 3; i >= 0; i--) { - num_units_ *= updates_shape[i]; + num_units_ *= LongToSize(updates_shape[i]); } int out_stride = 1; out_strides_.push_back(out_stride); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_min_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_min_cpu_kernel.cc index acd3ccf7372..c6327b63f03 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_min_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_min_cpu_kernel.cc @@ -68,7 +68,10 @@ void ScatterMinCPUKernelMod::InitKernel(const CNodePtr &kernel_node) { auto shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); auto updates_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); - auto indices_unit_rank = indices_shape.back(); + if (AnfAlgo::IsShapesDynamic({shape, indices_shape, updates_shape})) { + return; + } + auto indices_unit_rank = LongToSize(indices_shape.back()); if (indices_unit_rank > shape.size()) { MS_EXCEPTION(ValueError) << "For '" << kernel_name_ diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_update_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_update_cpu_kernel.cc index bdce332579c..cdb63e8ff07 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_update_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_update_cpu_kernel.cc @@ -72,8 +72,11 @@ void ScatterUpdateCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); auto shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - auto updates_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + auto indices_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1)); + auto updates_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2)); + if (AnfAlgo::IsShapesDynamic({shape})) { + return; + } auto indices_unit_rank = indices_shape.back(); if (indices_unit_rank > shape.size()) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/searchsorted_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/searchsorted_cpu_kernel.cc index efcb15ce3c0..fdb2e99af5d 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/searchsorted_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/searchsorted_cpu_kernel.cc @@ -67,7 +67,7 @@ bool SearchSortedCpuKernelMod::LaunchKernel(const std::vector(outputs[0]->addr); size_t elem_num = inputs[1]->size / sizeof(S); size_t seq_dim = sequence_shape_.size(); - size_t search_repeat = values_shape_.back(); + size_t search_repeat = static_cast(values_shape_.back()); auto task = [this, &sequence, &values, &output, seq_dim, search_repeat](size_t start, size_t end) { for (size_t i = start; i < end; i++) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/searchsorted_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/searchsorted_cpu_kernel.h index 380ec28f2ee..b4cd6806aa1 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/searchsorted_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/searchsorted_cpu_kernel.h @@ -53,9 +53,9 @@ class SearchSortedCpuKernelMod : public DeprecatedNativeCpuKernelMod { bool right_{false}; size_t search_len{0}; - std::vector sequence_shape_; - std::vector values_shape_; - std::vector output_shape_; + std::vector sequence_shape_; + std::vector values_shape_; + std::vector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/segment_max_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/segment_max_cpu_kernel.cc index ea9cda359c6..6675a3f9a08 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/segment_max_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/segment_max_cpu_kernel.cc @@ -29,9 +29,9 @@ void SegmentMaxCPUKernelMod::InitKernel(const CNodePtr &kernel_node) { segment_ids_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); output_dtype_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, 0); - input_x_num_ = CPUKernelUtils::CalcElementNum(input_x_shape_); - segment_ids_num_ = CPUKernelUtils::CalcElementNum(segment_ids_shape_); - output_num_ = CPUKernelUtils::CalcElementNum(output_shape_); + input_x_num_ = SizeOf(input_x_shape_); + segment_ids_num_ = SizeOf(segment_ids_shape_); + output_num_ = SizeOf(output_shape_); auto kernel_attr = GetKernelAttrFromNode(kernel_node); auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport()); if (!is_match) { @@ -126,7 +126,7 @@ bool SegmentMaxCPUKernelMod::LaunchKernel(const std::vector for (size_t i = 0; i < output_num_; ++i) { output_data_addr[i] = static_cast(0); } - const size_t num_compare_per = input_x_num_ / input_x_shape_[0]; + const size_t num_compare_per = input_x_num_ / LongToSize(input_x_shape_[0]); const size_t num_segments = segments.size(); if (num_segments < kSegmentsThreshold) { for (size_t i = 0; i < num_segments; ++i) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/segment_max_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/segment_max_cpu_kernel.h index c3299f6041c..e3349de006b 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/segment_max_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/segment_max_cpu_kernel.h @@ -40,9 +40,9 @@ class SegmentMaxCPUKernelMod : public DeprecatedNativeCpuKernelMod { std::vector GetOpSupport() override; private: - std::vector input_x_shape_; - std::vector segment_ids_shape_; - std::vector output_shape_; + ShapeVector input_x_shape_; + ShapeVector segment_ids_shape_; + ShapeVector output_shape_; size_t input_x_num_; size_t segment_ids_num_; size_t output_num_; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/segment_min_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/segment_min_cpu_kernel.cc index d486381262c..b2a45ae2ef8 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/segment_min_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/segment_min_cpu_kernel.cc @@ -29,9 +29,9 @@ void SegmentMinCPUKernelMod::InitKernel(const CNodePtr &kernel_node) { segment_ids_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); output_dtype_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, 0); - input_x_num_ = CPUKernelUtils::CalcElementNum(input_x_shape_); - segment_ids_num_ = CPUKernelUtils::CalcElementNum(segment_ids_shape_); - output_num_ = CPUKernelUtils::CalcElementNum(output_shape_); + input_x_num_ = SizeOf(input_x_shape_); + segment_ids_num_ = SizeOf(segment_ids_shape_); + output_num_ = SizeOf(output_shape_); auto kernel_attr = GetKernelAttrFromNode(kernel_node); auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport()); if (!is_match) { @@ -126,7 +126,7 @@ bool SegmentMinCPUKernelMod::LaunchKernel(const std::vector for (size_t i = 0; i < output_num_; ++i) { output_data_addr[i] = static_cast(0); } - const size_t num_compare_per = input_x_num_ / input_x_shape_[0]; + const size_t num_compare_per = input_x_num_ / LongToSize(input_x_shape_[0]); const size_t num_segments = segments.size(); if (num_segments < kSegmentsThreshold) { for (size_t i = 0; i < num_segments; ++i) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/segment_min_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/segment_min_cpu_kernel.h index 08e8603a8d3..d102c4bbc0f 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/segment_min_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/segment_min_cpu_kernel.h @@ -40,9 +40,9 @@ class SegmentMinCPUKernelMod : public DeprecatedNativeCpuKernelMod { std::vector GetOpSupport() override; private: - std::vector input_x_shape_; - std::vector segment_ids_shape_; - std::vector output_shape_; + ShapeVector input_x_shape_; + ShapeVector segment_ids_shape_; + ShapeVector output_shape_; size_t input_x_num_; size_t segment_ids_num_; size_t output_num_; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/segment_sum_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/segment_sum_cpu_kernel.cc index e2bb5f44cd6..1ab29df0db8 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/segment_sum_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/segment_sum_cpu_kernel.cc @@ -36,9 +36,9 @@ void SegmentSumCPUKernelMod::InitKernel(const CNodePtr &kernel_node) { input_x_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); segment_ids_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 1); output_dtype_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, 0); - input_x_num_ = CPUKernelUtils::CalcElementNum(input_x_shape_); - segment_ids_num_ = CPUKernelUtils::CalcElementNum(segment_ids_shape_); - output_num_ = CPUKernelUtils::CalcElementNum(output_shape_); + input_x_num_ = SizeOf(input_x_shape_); + segment_ids_num_ = SizeOf(segment_ids_shape_); + output_num_ = SizeOf(output_shape_); } std::vector SegmentSumCPUKernelMod::GetOpSupport() { @@ -158,7 +158,7 @@ bool SegmentSumCPUKernelMod::LaunchKernel(const std::vector for (size_t i = 0; i < output_num_; ++i) { output_data_addr[i] = static_cast(0); } - const size_t num_compare_per = input_x_num_ / input_x_shape_[0]; + const size_t num_compare_per = input_x_num_ / LongToSize(input_x_shape_[0]); const size_t num_segments = segments.size(); if (num_segments < kSegmentsThreshold) { for (size_t i = 0; i < num_segments; ++i) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/segment_sum_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/segment_sum_cpu_kernel.h index 674bbca2a2a..36568cee356 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/segment_sum_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/segment_sum_cpu_kernel.h @@ -42,9 +42,9 @@ class SegmentSumCPUKernelMod : public DeprecatedNativeCpuKernelMod { std::vector GetOpSupport() override; private: - std::vector input_x_shape_; - std::vector segment_ids_shape_; - std::vector output_shape_; + ShapeVector input_x_shape_; + ShapeVector segment_ids_shape_; + ShapeVector output_shape_; size_t input_x_num_; size_t segment_ids_num_; size_t output_num_; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/select_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/select_cpu_kernel.cc index df883ba1214..24b8e2f3e2c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/select_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/select_cpu_kernel.cc @@ -33,9 +33,7 @@ void SelectCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); auto shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - for (size_t x : shape) { - element_num_ *= x; - } + element_num_ = SizeOf(shape); auto kernel_attr = GetKernelAttrFromNode(kernel_node); auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport()); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/sigmoid_cross_entropy_with_logits_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/sigmoid_cross_entropy_with_logits_cpu_kernel.cc index e50419e7cd4..e8990cf40d5 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/sigmoid_cross_entropy_with_logits_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/sigmoid_cross_entropy_with_logits_cpu_kernel.cc @@ -28,10 +28,8 @@ void SigmoidCrossEntropyWithLogitsCpuKernelMod::InitKernel(const CNodePtr &kerne MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); - std::vector x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - for (const uint64_t &d : x_shape) { - tensor_size_ *= d; - } + auto x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + tensor_size_ = SizeOf(x_shape); } bool SigmoidCrossEntropyWithLogitsCpuKernelMod::Launch(const std::vector &inputs, diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/sigmoid_cross_entropy_with_logits_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/sigmoid_cross_entropy_with_logits_grad_cpu_kernel.cc index 20a5ad643fd..1ee90938313 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/sigmoid_cross_entropy_with_logits_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/sigmoid_cross_entropy_with_logits_grad_cpu_kernel.cc @@ -28,10 +28,8 @@ void SigmoidCrossEntropyWithLogitsGradCpuKernelMod::InitKernel(const CNodePtr &k MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); - std::vector x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - for (const uint64_t &d : x_shape) { - tensor_size_ *= d; - } + auto x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + tensor_size_ = SizeOf(x_shape); } bool SigmoidCrossEntropyWithLogitsGradCpuKernelMod::Launch(const std::vector &inputs, diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/slice_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/slice_cpu_kernel.cc index 25969429540..4da6c89a4bd 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/slice_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/slice_cpu_kernel.cc @@ -86,12 +86,12 @@ void SliceCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { data_size_ = size_pair->second; } -void SliceCpuKernelMod::InitSliceParam(const std::vector &input_shape, const std::vector &begin, +void SliceCpuKernelMod::InitSliceParam(const ShapeVector &input_shape, const std::vector &begin, const std::vector &size) { origin_dim_size_ = input_shape.size(); for (size_t i = 0; i < DIMENSION_8D; i++) { if (i < input_shape.size()) { - int dim_len = SizeToInt(input_shape[i]); + int dim_len = LongToInt(input_shape[i]); int begin_pos = LongToInt(begin[i]); int slice_size = LongToInt(size[i]); if (slice_size == -1) { @@ -150,7 +150,7 @@ bool SliceCpuKernelMod::Launch(const std::vector &inputs, co << "', the dimensions of 'begin' and 'size' must be 1, but got the dimension of 'begin': " << begin_shape.size() << " and the dimension of 'size': " << size_shape.size(); } - if (begin_shape[0] != input_shape.size() || size_shape[0] != input_shape.size()) { + if (begin_shape[0] != SizeToLong(input_shape.size()) || size_shape[0] != SizeToLong(input_shape.size())) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the lengths of 'begin' and 'size' must be equal to " "the dimension of input tensor, but got the length of 'begin' " @@ -162,7 +162,7 @@ bool SliceCpuKernelMod::Launch(const std::vector &inputs, co std::vector begin{begin_ptr, begin_ptr + begin_shape[0]}; std::vector size{size_ptr, size_ptr + size_shape[0]}; for (size_t i = 0; i < begin.size(); ++i) { - if (input_shape[i] < LongToSize(begin[i] + size[i])) { + if (input_shape[i] < begin[i] + size[i]) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', slice shape should be not greater than origin shape. But in dimension i=" << i << ", origin shape 'input_shape[i]' is " << input_shape[i] diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/slice_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/slice_cpu_kernel.h index 188685d8ccf..52c3fa2aae4 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/slice_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/slice_cpu_kernel.h @@ -70,7 +70,7 @@ class SliceCpuKernelMod : public DeprecatedNativeCpuKernelMod { } private: - void InitSliceParam(const std::vector &input_shape, const std::vector &begin, + void InitSliceParam(const ShapeVector &input_shape, const std::vector &begin, const std::vector &size); size_t origin_dim_size_{0}; int data_size_{4}; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/slice_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/slice_grad_cpu_kernel.cc index cd54a7e7eae..3c19514e3aa 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/slice_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/slice_grad_cpu_kernel.cc @@ -111,7 +111,7 @@ void SliceGradCpuKernelMod::ExpandAllMemberDims(size_t expand_dims) { if (ax < 0) { ax = 0; } - input_shape_.push_back(IntToSize(ax)); + input_shape_.push_back(ax); } } } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/slice_grad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/slice_grad_cpu_kernel.h index f75f3aed140..b4e584cd140 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/slice_grad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/slice_grad_cpu_kernel.h @@ -79,9 +79,9 @@ class SliceGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { std::vector end_; std::vector strides_; std::vector size_; - std::vector input_shape_; + ShapeVector input_shape_; std::vector input_element_num_; - std::vector output_shape_; + ShapeVector output_shape_; std::vector output_element_num_; TypeId dtype_{kTypeUnknown}; std::string kernel_type_{kUnknown}; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/smooth_l1_loss_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/smooth_l1_loss_cpu_kernel.cc index 508c573b8e5..ba28064edf5 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/smooth_l1_loss_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/smooth_l1_loss_cpu_kernel.cc @@ -32,10 +32,8 @@ void SmoothL1LossCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { if (beta_ == 0.0) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", the 'beta' can not be 0."; } - std::vector x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - for (const uint64_t &d : x_shape) { - tensor_size_ *= d; - } + auto x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + tensor_size_ = SizeOf(x_shape); auto kernel_attr = GetKernelAttrFromNode(kernel_node); auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport()); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/smooth_l1_loss_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/smooth_l1_loss_grad_cpu_kernel.cc index 6e500850bb9..9ae75ff3330 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/smooth_l1_loss_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/smooth_l1_loss_grad_cpu_kernel.cc @@ -33,10 +33,8 @@ void SmoothL1LossGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { if (beta_ == 0.0) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", the 'beta' can not be 0."; } - std::vector x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - for (const uint64_t &d : x_shape) { - tensor_size_ *= d; - } + auto x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + tensor_size_ = SizeOf(x_shape); auto kernel_attr = GetKernelAttrFromNode(kernel_node); auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport()); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/spacetodepth_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/spacetodepth_cpu_kernel.cc index 06205ffed7c..7ed3caac48d 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/spacetodepth_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/spacetodepth_cpu_kernel.cc @@ -62,11 +62,11 @@ bool SpaceToDepthCpuKernelMod::LaunchKernel(const std::vector(outputs[0]->addr); size_t size = inputs[0]->size / sizeof(T); - std::vector input_shape = input_shape_; - std::vector output_shape = output_shape_; + auto input_shape = input_shape_; + auto output_shape = output_shape_; size_t block_size = block_size_; size_t input_dimension = input_shape.size(); - size_t input_strides[3] = {1, 1, 1}; + int64_t input_strides[3] = {1, 1, 1}; for (size_t i = input_dimension - 1; i >= 1; --i) { for (size_t j = 0; j < i; ++j) { @@ -83,7 +83,7 @@ bool SpaceToDepthCpuKernelMod::LaunchKernel(const std::vector input_shape_; - std::vector output_shape_; + std::vector input_shape_; + std::vector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_adam_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_adam_cpu_kernel.cc index 75abc46257a..c381d851a3f 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_adam_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_adam_cpu_kernel.cc @@ -104,11 +104,14 @@ void SparseApplyAdamCpuKernelMod::InitInputOutputSize(const CNodePtr &kernel_nod void SparseApplyAdamCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector var_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - std::vector m_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - std::vector v_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); - std::vector grad_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); - std::vector indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 10); + ShapeVector var_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + ShapeVector m_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + ShapeVector v_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + ShapeVector grad_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); + ShapeVector indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 10); + if (AnfAlgo::IsShapesDynamic({var_shape, m_shape, v_shape, grad_shape, indices_shape})) { + return; + } if (var_shape.empty()) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of 'var' must be at least 1-D, but got scalar or None."; @@ -143,7 +146,7 @@ void SparseApplyAdamCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { << indices_shape.size() << "-D."; } indices_size_ = indices_shape[0]; - if (grad_shape[0] != indices_size_) { + if (grad_shape[0] != SizeToLong(indices_size_)) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the first dimension value of 'grad' must be equal to " "the first dimension value of 'indices', but got the first dimension value of 'grad': " diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.cc index b311163eac8..31957d54f2d 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.cc @@ -90,11 +90,14 @@ void SparseApplyFtrlCpuKernelMod::InitInputOutputSize(const CNodePtr &kernel_nod void SparseApplyFtrlCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector var_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - std::vector accum_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - std::vector linear_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); - std::vector grad_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); - std::vector indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + ShapeVector var_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + ShapeVector accum_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + ShapeVector linear_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + ShapeVector grad_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + ShapeVector indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + if (AnfAlgo::IsShapesDynamic({var_shape, accum_shape, linear_shape, grad_shape, indices_shape})) { + return; + } if (var_shape.empty()) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'var' must be at least 1-D, but got scalar or None."; } @@ -131,7 +134,7 @@ void SparseApplyFtrlCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { << indices_shape.size() << "-D."; } indices_size_ = indices_shape[0]; - if (grad_shape[0] != indices_size_) { + if (grad_shape[0] != SizeToLong(indices_size_)) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the first dimension value of 'grad' must be equal to " "the first dimension value of 'indices', but got the first dimension value of 'grad': " diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_lazy_adam_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_lazy_adam_cpu_kernel.cc index ac182418d08..9486c1e81fa 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_lazy_adam_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_lazy_adam_cpu_kernel.cc @@ -84,11 +84,14 @@ void SparseApplyLazyAdamCpuKernelMod::InitInputOutputSize(const CNodePtr &kernel void SparseApplyLazyAdamCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector var_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - std::vector m_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - std::vector v_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); - std::vector grad_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); - std::vector indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 10); + ShapeVector var_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + ShapeVector m_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + ShapeVector v_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + ShapeVector grad_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); + ShapeVector indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 10); + if (AnfAlgo::IsShapesDynamic({var_shape, m_shape, v_shape, grad_shape, indices_shape})) { + return; + } if (var_shape.empty()) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of 'var' must be at least 1-D, but got scalar or None."; @@ -124,7 +127,7 @@ void SparseApplyLazyAdamCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { << indices_shape.size() << "-D."; } indices_size_ = indices_shape[0]; - if (grad_shape[0] != indices_size_) { + if (grad_shape[0] != SizeToLong(indices_size_)) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the first dimension value of 'grad' must be equal to " "the first dimension value of 'indices', but got the first dimension value of 'grad': " diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_proximal_adagrad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_proximal_adagrad_cpu_kernel.cc index 163d7c18ffd..638a51bc5a4 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_proximal_adagrad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_proximal_adagrad_cpu_kernel.cc @@ -95,13 +95,16 @@ void SparseApplyProximalAdagradCpuKernelMod::InitInputOutputSize(const CNodePtr void SparseApplyProximalAdagradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector var_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kVarIndex); - std::vector accum_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kAccIndex); - std::vector lr_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kLRIndex); - std::vector l1_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kL1Index); - std::vector l2_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kL2Index); - std::vector grad_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kGradIndex); - std::vector indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndicesIndex); + ShapeVector var_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kVarIndex); + ShapeVector accum_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kAccIndex); + ShapeVector lr_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kLRIndex); + ShapeVector l1_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kL1Index); + ShapeVector l2_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kL2Index); + ShapeVector grad_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kGradIndex); + ShapeVector indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndicesIndex); + if (AnfAlgo::IsShapesDynamic({var_shape, accum_shape, lr_shape, l1_shape, l2_shape, grad_shape, indices_shape})) { + return; + } if (var_shape.empty()) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of 'var' must be at least 1-D, but got scalar or None."; @@ -118,21 +121,21 @@ void SparseApplyProximalAdagradCpuKernelMod::InitKernel(const CNodePtr &kernel_n "'var', but got the dimension of 'grad': " << grad_shape.size() << " and the dimension of 'var': " << var_shape.size() << "."; } - var_first_dim_size_ = var_shape[0]; + var_first_dim_size_ = LongToSize(var_shape[0]); for (size_t i = 1; i < var_shape.size(); ++i) { if (var_shape[i] != grad_shape[i]) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of 'var' and 'grad' must be equal in dimension i=" << i << ", but got 'var_shape[i]': " << var_shape[i] << " and 'grad_shape[i]': " << grad_shape[i]; } - var_outer_dim_size_ *= var_shape[i]; + var_outer_dim_size_ *= LongToSize(var_shape[i]); } if (indices_shape.size() != 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'indices' must be a 1-D vector, but got " << indices_shape.size() << "-D."; } - indices_size_ = indices_shape[0]; - if (grad_shape[0] != indices_size_) { + indices_size_ = LongToSize(indices_shape[0]); + if (grad_shape[0] != SizeToLong(indices_size_)) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the first dimension value of 'grad' must be equal to " "the first dimension value of 'indices', but got the first dimension value of 'grad': " diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_tensor_dense_matmul_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_tensor_dense_matmul_cpu_kernel.cc index 39553175d82..507479f7fa7 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_tensor_dense_matmul_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_tensor_dense_matmul_cpu_kernel.cc @@ -43,15 +43,18 @@ void SparseTensorDenseMatmulCpuKernelMod::InitKernel(const CNodePtr &kernel_node << Vector2Str(indices_shape); } auto values_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, VALUES); + if (AnfAlgo::IsShapesDynamic({values_shape, indices_shape})) { + return; + } if (values_shape.size() != 1 || values_shape[0] != indices_shape[0]) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it requires 'values' must be a 1-D Tensor and the first dimension length " " must be equal to the first dimension length of 'indices', but got 'values' shape: " << Vector2Str(values_shape) << " and 'indices' shape: " << Vector2Str(indices_shape); } - output_shape_ = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); - values_size_ = values_shape[0]; - b_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, DENSE); + output_shape_ = Convert2SizeT(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); + values_size_ = LongToSize(values_shape[0]); + b_shape_ = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, DENSE)); if (b_shape_.size() != kSparseTensorDenseMatmulDenseShapeSize) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of 'dense' must be " << kSparseTensorDenseMatmulDenseShapeSize << "-D, but got " << b_shape_.size() << "-D"; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_to_dense_cpu_kernal.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_to_dense_cpu_kernal.cc index 31126de7064..7b3dc45f127 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_to_dense_cpu_kernal.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_to_dense_cpu_kernal.cc @@ -36,14 +36,17 @@ void SparseToDenseCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { << "-D Tensor, but got " << indices_shape.size() << "-D"; } auto values_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + if (AnfAlgo::IsShapesDynamic({values_shape, indices_shape})) { + return; + } if (values_shape.size() != 1 || values_shape[0] != indices_shape[0]) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it requires 'values' must be a 1-D Tensor and the first dimension length " "must be equal to the first dimension length of 'indices', but got 'values' shape: " << Vector2Str(values_shape) << " and 'indices' shape: " << Vector2Str(indices_shape); } - values_size_ = values_shape[0]; - output_shape_ = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + values_size_ = LongToSize(values_shape[0]); + output_shape_ = Convert2SizeT(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); auto kernel_attr = GetKernelAttrFromNode(kernel_node); auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport()); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/split_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/split_cpu_kernel.cc index 5a7741f602d..129826a5b9f 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/split_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/split_cpu_kernel.cc @@ -36,7 +36,7 @@ void SplitCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { } auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); (void)std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(input_shape_), - [](const size_t &value) { return SizeToInt(value); }); + [](const int64_t &value) { return LongToInt(value); }); if (input_shape_.size() < 1 || input_shape_.size() > SPLIT_STRIDES_SIZE) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input tensor must be in range [1, " << SPLIT_STRIDES_SIZE << "], but got " << input_shape_.size(); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/strided_slice_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/strided_slice_grad_cpu_kernel.cc index ca1f98c52d1..3e1ec240bc9 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/strided_slice_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/strided_slice_grad_cpu_kernel.cc @@ -39,11 +39,9 @@ void StridedSliceGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { default: MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dtype of input must be float32, but got " << dtype_; } - std::vector input_shape_me = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - (void)std::transform(input_shape_me.begin(), input_shape_me.end(), std::back_inserter(input_shape_), - [](const int64_t &value) { return static_cast(value); }); - param_->num_axes_ = input_shape_me.size(); - param_->in_shape_length_ = input_shape_me.size(); + input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + param_->num_axes_ = input_shape_.size(); + param_->in_shape_length_ = input_shape_.size(); std::vector begin_me = common::AnfAlgo::GetNodeAttr>(kernel_node, BEGIN); (void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_), [](const int64_t &value) { return static_cast(value); }); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/strided_slice_grad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/strided_slice_grad_cpu_kernel.h index fbf29cf8415..be2328c46a9 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/strided_slice_grad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/strided_slice_grad_cpu_kernel.h @@ -40,8 +40,8 @@ class StridedSliceGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { std::vector begin_; std::vector end_; std::vector strides_; - std::vector input_shape_; - std::vector output_shape_; + std::vector input_shape_; + std::vector output_shape_; TypeId dtype_{kTypeUnknown}; StridedSliceParameter *param_{nullptr}; }; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/stridedslice_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/stridedslice_cpu_kernel.cc index 8d360c36374..a0bc550be1c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/stridedslice_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/stridedslice_cpu_kernel.cc @@ -83,16 +83,16 @@ bool StridedSliceCpuKernelMod::MatchParallelPattern() { } void StridedSliceCpuKernelMod::InitParallelParam() { - outer_ = SizeToInt( - std::accumulate(input_shape_.begin(), input_shape_.begin() + split_axis_, size_t(1), std::multiplies())); - inner_ = SizeToInt( - std::accumulate(input_shape_.begin() + split_axis_ + 1, input_shape_.end(), size_t(1), std::multiplies())); + outer_ = LongToInt( + std::accumulate(input_shape_.begin(), input_shape_.begin() + split_axis_, size_t(1), std::multiplies())); + inner_ = LongToInt( + std::accumulate(input_shape_.begin() + split_axis_ + 1, input_shape_.end(), size_t(1), std::multiplies())); int max_thread_num = SizeToInt(common::ThreadPool::GetInstance().GetSyncRunThreadNum()); int thread_num = 1; if (outer_ == 1) { parallel_strategy_ = kOnSplitAxis; - thread_num = std::min(SizeToInt(output_shape_[split_axis_]), max_thread_num); + thread_num = std::min(LongToInt(output_shape_[split_axis_]), max_thread_num); cal_num_per_thread_ = UP_DIV(output_shape_[split_axis_], thread_num); } else { parallel_strategy_ = kOnOuter; @@ -118,7 +118,7 @@ void StridedSliceCpuKernelMod::InitSliceParam(const CNodePtr &kernel_node, std:: } data_size_ = type_pair->second.second; slice_param_.data_type = type_pair->second.first; - std::vector input_shape_pad = input_shape_; + auto input_shape_pad = input_shape_; FillEmptyDims(kernel_node, begin, end, stride, &input_shape_pad); ParseStrideSliceMasks(kernel_node, begin, end, stride, input_shape_pad); @@ -126,7 +126,7 @@ void StridedSliceCpuKernelMod::InitSliceParam(const CNodePtr &kernel_node, std:: std::vector &_end = *end; std::vector &_stride = *stride; for (size_t i = 0; i < DIMENSION_8D; i++) { - slice_param_.in_shape_[i] = SizeToInt(input_shape_pad[i]); + slice_param_.in_shape_[i] = LongToInt(input_shape_pad[i]); slice_param_.begins_[i] = LongToInt(_begin[i]); slice_param_.ends_[i] = LongToInt(_end[i]); slice_param_.strides_[i] = LongToInt(_stride[i]); @@ -139,15 +139,16 @@ common::Status StridedSliceCpuKernelMod::RunTaskOnOuter(const uint8_t *input_add int start_pos) { int begin_index = slice_param_.begins_[split_axis_]; int inner_size = inner_ * data_size_; - const uint8_t *cur_in_ptr = input_addr + (start_pos * input_shape_[split_axis_] + begin_index) * inner_size; - uint8_t *cur_out_ptr = output_addr + start_pos * output_shape_[split_axis_] * inner_size; + const uint8_t *cur_in_ptr = + input_addr + (start_pos * LongToInt(input_shape_[split_axis_]) + begin_index) * inner_size; + uint8_t *cur_out_ptr = output_addr + start_pos * LongToInt(output_shape_[split_axis_]) * inner_size; int cur_outer = outer_ - start_pos; if (cur_outer <= 0) { return common::SUCCESS; } cur_outer = cur_outer > cal_num_per_thread_ ? cal_num_per_thread_ : cur_outer; - FastStride(cur_in_ptr, cur_out_ptr, output_shape_[split_axis_], slice_param_.strides_[split_axis_], cur_outer, - inner_size, input_shape_[split_axis_] * inner_size); + FastStride(cur_in_ptr, cur_out_ptr, LongToInt(output_shape_[split_axis_]), slice_param_.strides_[split_axis_], + cur_outer, inner_size, LongToSize(input_shape_[split_axis_]) * inner_size); return common::SUCCESS; } @@ -157,7 +158,7 @@ common::Status StridedSliceCpuKernelMod::RunTaskOnSplitAxis(const uint8_t *input int inner_size = inner_ * data_size_; const uint8_t *cur_in_ptr = input_addr + (start_pos * slice_param_.strides_[split_axis_] + begin_index) * inner_size; uint8_t *cur_out_ptr = output_addr + start_pos * inner_size; - int cal_axis_num = output_shape_[split_axis_] - start_pos; + int cal_axis_num = LongToInt(output_shape_[split_axis_]) - start_pos; if (cal_axis_num <= 0) { return common::SUCCESS; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/stridedslice_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/stridedslice_cpu_kernel.h index 7d21d00f8a4..eb6695f9190 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/stridedslice_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/stridedslice_cpu_kernel.h @@ -88,8 +88,8 @@ class StridedSliceCpuKernelMod : public DeprecatedNativeCpuKernelMod { int cal_num_per_thread_{1}; bool parallel_{false}; ParallelStrategy parallel_strategy_{kOnSplitAxis}; - std::vector input_shape_; - std::vector output_shape_; + ShapeVector input_shape_; + ShapeVector output_shape_; StridedSliceParameter slice_param_; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/sub_and_filter_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/sub_and_filter_cpu_kernel.cc index 7d33127aab7..a1e5f183e13 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/sub_and_filter_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/sub_and_filter_cpu_kernel.cc @@ -56,10 +56,7 @@ void SubAndFilterCpuKernelMod::LaunchKernel(const std::vector &input MS_EXCEPTION_IF_NULL(node); auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); - batch_size_ = 1; - for (size_t i = 0; i < indices_shape.size(); ++i) { - batch_size_ *= indices_shape[i]; - } + batch_size_ = SizeOf(indices_shape); MS_LOG(INFO) << "SubAndFilter batch_size:" << batch_size_; T *input_x = reinterpret_cast(inputs[0]->addr); @@ -68,7 +65,7 @@ void SubAndFilterCpuKernelMod::LaunchKernel(const std::vector &input T *filter_res = reinterpret_cast(outputs[0]->addr); T *filter_idx = reinterpret_cast(outputs[1]->addr); - size_t count = 0; + int64_t count = 0; for (size_t i = 0; i < batch_size_; ++i) { T temp = input_x[i] - offset; if (temp < 0 || temp >= max_num) continue; @@ -77,7 +74,7 @@ void SubAndFilterCpuKernelMod::LaunchKernel(const std::vector &input count++; } MS_LOG(INFO) << "SubAndFilter output count is " << count; - std::vector out_shape; + ShapeVector out_shape; (void)out_shape.emplace_back(count); size_t output_num = common::AnfAlgo::GetOutputTensorNum(node); std::vector dtypes(output_num); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/tensor_copy_slices_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/tensor_copy_slices_cpu_kernel.cc index ca1122957cc..63ccf560328 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/tensor_copy_slices_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/tensor_copy_slices_cpu_kernel.cc @@ -31,13 +31,9 @@ constexpr size_t kTensorCopySlicesOutputsNum = 1; void TensorCopySlicesCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto update_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); - - CastShapeSizeToLong(input_shape, &input_shape_); - CastShapeSizeToLong(update_shape, &update_shape_); - CastShapeSizeToLong(output_shape, &output_shape_); + input_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + update_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + output_shape_ = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); auto begin = common::AnfAlgo::GetNodeAttr>(kernel_node, BEGIN); auto end = common::AnfAlgo::GetNodeAttr>(kernel_node, END); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/tensoradd_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/tensoradd_cpu_kernel.h index f06be97f6cd..36f10933cc8 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/tensoradd_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/tensoradd_cpu_kernel.h @@ -49,9 +49,9 @@ class TensorAddCpuKernelMod : public DeprecatedNativeCpuKernelMod { static std::vector> func_list_; AddFunc kernel_func_; - std::vector input_shape_a_; - std::vector input_shape_b_; - std::vector output_shape_; + std::vector input_shape_a_; + std::vector input_shape_b_; + std::vector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/tile_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/tile_cpu_kernel.cc index 4f0122b4373..d5d2be59962 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/tile_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/tile_cpu_kernel.cc @@ -139,11 +139,11 @@ void TileCpuKernelMod::LaunchKernel(const std::vector &inputs, const if (input_num == kTileDynamicInputsNum) { auto multiples_addr = reinterpret_cast(inputs[1]->addr); auto multiple_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(cnode, 1); - size_t multiple_nums = 1; + int64_t multiple_nums = 1; for (size_t i = 0; i < multiple_shape.size(); ++i) { multiple_nums *= multiple_shape[i]; } - for (size_t i = 0; i < multiple_nums; ++i) { + for (size_t i = 0; i < LongToSize(multiple_nums); ++i) { (void)multiples_.emplace_back(multiples_addr[i]); } TileMultipleCompute(); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/tile_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/tile_cpu_kernel.h index 20be5f5cb64..e647e88a374 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/tile_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/tile_cpu_kernel.h @@ -61,8 +61,8 @@ class TileCpuKernelMod : public DeprecatedNativeCpuKernelMod { void TileMultipleCompute(void); - std::vector x_shape_; - std::vector y_shape_; + ShapeVector x_shape_; + ShapeVector y_shape_; std::vector multiples_; TypeId dtype_{kTypeUnknown}; using TypeKernel = std::function &inputs, diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/topk_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/topk_cpu_kernel.cc index 5d4b59852e6..8b82abe8968 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/topk_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/topk_cpu_kernel.cc @@ -92,7 +92,7 @@ void TopKCpuKernelMod::LaunchKernel(const std::vector &inputs, const void TopKCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - auto x_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto x_shape_ = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); if (x_shape_.empty()) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input must be greater than 0, but got empty input."; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/trace_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/trace_cpu_kernel.cc index 3547ee202b2..6c59985d31d 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/trace_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/trace_cpu_kernel.cc @@ -28,7 +28,7 @@ constexpr size_t kOutputNum = 1; void TraceCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + input_shape_ = Convert2SizeT(AnfAlgo::GetInputDeviceShape(kernel_node, 0)); values_type = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); if (input_shape_.size() != kInputDim) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', input tensor's dimension should be " << kInputDim diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/trace_grad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/trace_grad_cpu_kernel.h index da79a312ba6..cf694f9ee78 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/trace_grad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/trace_grad_cpu_kernel.h @@ -56,7 +56,7 @@ class TraceGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { } private: - std::vector input_shape_; + std::vector input_shape_; TypeId values_type_{kTypeUnknown}; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/transpose_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/transpose_cpu_kernel.cc index 027ade8fb4b..c0f9d851c2d 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/transpose_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/transpose_cpu_kernel.cc @@ -68,8 +68,8 @@ void TransposeFwdCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { transpose_param_.strides_[num_axes - 1] = 1; transpose_param_.out_strides_[num_axes - 1] = 1; for (size_t i = num_axes - 1; i >= 1; i--) { - transpose_param_.strides_[i - 1] = SizeToInt(input_shape_[i]) * transpose_param_.strides_[i]; - transpose_param_.out_strides_[i - 1] = SizeToInt(output_shape_[i]) * transpose_param_.out_strides_[i]; + transpose_param_.strides_[i - 1] = input_shape_[i] * transpose_param_.strides_[i]; + transpose_param_.out_strides_[i - 1] = output_shape_[i] * transpose_param_.out_strides_[i]; } launch_map_[kNumberTypeBool] = &TransposeFwdCpuKernelMod::LaunchKernel; launch_map_[kNumberTypeInt8] = &TransposeFwdCpuKernelMod::LaunchKernel; @@ -110,7 +110,7 @@ void TransposeFwdCpuKernelMod::LaunchKernel(const std::vector &input transpose_param_.data_num_ = SizeToInt(inputs[0]->size / sizeof(T)); int output_shape[SizeToInt(output_shape_.size())]; for (size_t i = 0; i < output_shape_.size(); ++i) { - output_shape[i] = SizeToInt(output_shape_[i]); + output_shape[i] = output_shape_[i]; } size_t data_count = (inputs[0]->size) / sizeof(T); if (axes_.size() > kIndex7 || data_count >= kMaxTransposeSerialSize) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/transpose_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/transpose_cpu_kernel.h index 101d8a16737..addd5d6ea0a 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/transpose_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/transpose_cpu_kernel.h @@ -87,8 +87,8 @@ class TransposeFwdCpuKernelMod : public DeprecatedNativeCpuKernelMod { int task_id, int thread_num); TransposeParameter transpose_param_; - std::vector input_shape_; - std::vector output_shape_; + std::vector input_shape_; + std::vector output_shape_; std::vector axes_; TypeId dtype_{kTypeUnknown}; using TypeKernel = diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/tril_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/tril_cpu_kernel.cc index 6a1cde993cb..7bcf55959a8 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/tril_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/tril_cpu_kernel.cc @@ -32,7 +32,7 @@ constexpr size_t kDim = 2; void TrilCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - input_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + input_shape_ = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); input_dims_ = input_shape_.size(); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/triu_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/triu_cpu_kernel.cc index c0f02fa6591..3b242b04b67 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/triu_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/triu_cpu_kernel.cc @@ -30,7 +30,7 @@ constexpr size_t kDim = 2; void TriuCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - input_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + input_shape_ = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); input_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); input_dims_ = input_shape_.size(); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_div_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_div_cpu_kernel.cc index 853fbd0b5b1..baaefdc3120 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_div_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_div_cpu_kernel.cc @@ -53,23 +53,9 @@ int TruncateDivCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const return ret; } - std::vector input_shape_1 = inputs[0]->GetShapeVector(); - std::vector input_shape_2 = inputs[1]->GetShapeVector(); - std::vector output_shape = outputs[0]->GetShapeVector(); - - input_shape_1_.resize(input_shape_1.size(), 1); - input_shape_2_.resize(input_shape_2.size(), 1); - output_shape_.resize(output_shape.size(), 1); - - for (size_t i = 0; i < input_shape_1.size(); i++) { - input_shape_1_[i] = static_cast(input_shape_1[i]); - } - for (size_t i = 0; i < input_shape_2.size(); i++) { - input_shape_2_[i] = static_cast(input_shape_2[i]); - } - for (size_t i = 0; i < output_shape.size(); i++) { - output_shape_[i] = static_cast(output_shape[i]); - } + input_shape_1_ = inputs[0]->GetShapeVector(); + input_shape_2_ = inputs[1]->GetShapeVector(); + output_shape_ = outputs[0]->GetShapeVector(); return KRET_OK; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_div_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_div_cpu_kernel.h index 2445641db01..3ee721de2ac 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_div_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_div_cpu_kernel.h @@ -55,9 +55,9 @@ class TruncateDivCpuKernelMod : public NativeCpuKernelMod { static std::vector> func_list_; TruncateDivFunc kernel_func_; - std::vector input_shape_1_; - std::vector input_shape_2_; - std::vector output_shape_; + ShapeVector input_shape_1_; + ShapeVector input_shape_2_; + ShapeVector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_mod_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_mod_cpu_kernel.cc index f99d1841915..fe4f7f9ef07 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_mod_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_mod_cpu_kernel.cc @@ -52,23 +52,9 @@ int TruncateModCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const return ret; } - std::vector input_shape_1 = inputs[0]->GetShapeVector(); - std::vector input_shape_2 = inputs[1]->GetShapeVector(); - std::vector output_shape = outputs[0]->GetShapeVector(); - - input_shape_1_.resize(input_shape_1.size(), 1); - input_shape_2_.resize(input_shape_2.size(), 1); - output_shape_.resize(output_shape.size(), 1); - - for (size_t i = 0; i < input_shape_1.size(); i++) { - input_shape_1_[i] = static_cast(input_shape_1[i]); - } - for (size_t i = 0; i < input_shape_2.size(); i++) { - input_shape_2_[i] = static_cast(input_shape_2[i]); - } - for (size_t i = 0; i < output_shape.size(); i++) { - output_shape_[i] = static_cast(output_shape[i]); - } + input_shape_1_ = inputs[0]->GetShapeVector(); + input_shape_2_ = inputs[1]->GetShapeVector(); + output_shape_ = outputs[0]->GetShapeVector(); return KRET_OK; } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_mod_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_mod_cpu_kernel.h index 1efc0757c23..28f30fba302 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_mod_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/truncate_mod_cpu_kernel.h @@ -55,9 +55,9 @@ class TruncateModCpuKernelMod : public NativeCpuKernelMod { static std::vector> func_list_; TruncateModFunc kernel_func_; - std::vector input_shape_1_; - std::vector input_shape_2_; - std::vector output_shape_; + ShapeVector input_shape_1_; + ShapeVector input_shape_2_; + ShapeVector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/truncated_normal_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/truncated_normal_cpu_kernel.cc index 3798e326a56..28cd6dfb580 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/truncated_normal_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/truncated_normal_cpu_kernel.cc @@ -40,6 +40,9 @@ void TruncatedNormalCPUKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + if (IsDynamic(input_shape)) { + return; + } input_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); output_type_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, 0); seed_ = static_cast(common::AnfAlgo::GetNodeAttr(kernel_node, "seed")); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/unique_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/unique_cpu_kernel.cc index 246f32bdc8b..f15844aeb7c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/unique_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/unique_cpu_kernel.cc @@ -35,7 +35,7 @@ void UniqueCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input must be 1D, but got " << input_shape.size() << "D"; } - input_size_ = input_shape[0]; + input_size_ = static_cast(input_shape[0]); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); if (common::AnfAlgo::HasNodeAttr(SORTED, kernel_node)) { sorted_ = common::AnfAlgo::GetNodeAttr(kernel_node, SORTED); @@ -70,8 +70,8 @@ bool UniqueCpuKernelMod::Launch(const std::vector &inputs, if (!node_) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', node_wpt_(kernel_node) is expired. Error no: " << node_; } - std::vector out_shape; - (void)out_shape.emplace_back(output_size_); + ShapeVector out_shape; + (void)out_shape.emplace_back(SizeToLong(output_size_)); size_t output_num = common::AnfAlgo::GetOutputTensorNum(node_); std::vector dtypes(output_num); for (size_t i = 0; i < output_num; i++) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/unpack_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/unpack_cpu_kernel.cc index 68639e3ea87..e3a66633af3 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/unpack_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/unpack_cpu_kernel.cc @@ -43,11 +43,11 @@ void UnpackCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { for (size_t i = 0; i < input_shape.size(); i++) { if (i < IntToSize(unstack_param_.axis_)) { - unstack_param_.pre_dims_ *= SizeToInt(input_shape[i]); + unstack_param_.pre_dims_ *= LongToInt(input_shape[i]); } else if (i > IntToSize(unstack_param_.axis_)) { - unstack_param_.after_dims_ *= SizeToInt(input_shape[i]); + unstack_param_.after_dims_ *= LongToInt(input_shape[i]); } else { - unstack_param_.axis_dim_ = SizeToInt(input_shape[i]); + unstack_param_.axis_dim_ = LongToInt(input_shape[i]); } } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/unsorted_segment_sum_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/unsorted_segment_sum_cpu_kernel.cc index 8bc992b697a..6659a450d9e 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/unsorted_segment_sum_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/unsorted_segment_sum_cpu_kernel.cc @@ -30,9 +30,9 @@ void UnsortedSegmentSumCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); segment_ids_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 1); - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); auto segment_ids_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto output_shape = Convert2SizeT(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); if (output_shape.empty()) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of output must be at least 1, but got shape: " << output_shape; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/update_cache_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/update_cache_cpu_kernel.cc index 6ce4335c866..93cf99cbf47 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/update_cache_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/update_cache_cpu_kernel.cc @@ -66,20 +66,17 @@ void UpdateCacheCpuKernelMod::LaunchKernel(const std::vector &inputs MS_EXCEPTION_IF_NULL(node); auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1); auto update_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 2); + if (AnfAlgo::IsShapesDynamic({indices_shape, update_shape})) { + return; + } if (update_shape.size() < kMinUpdateShapeSize) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of 'update' must be at least " << kMinUpdateShapeSize << "D, but got: " << update_shape.size() << "D"; } - batch_size_ = 1; - for (size_t i = 0; i < indices_shape.size(); ++i) { - batch_size_ *= indices_shape[i]; - } + batch_size_ = SizeOf(indices_shape); MS_LOG(INFO) << "UpdateCache batch_size:" << batch_size_; - update_size_ = 1; - for (size_t i = 0; i < update_shape.size(); ++i) { - update_size_ *= SizeToLong(update_shape[i]); - } - update_length_ = update_shape[1]; + update_size_ = SizeOf(update_shape); + update_length_ = LongToSize(update_shape[1]); char *input_x = reinterpret_cast(inputs[0]->addr); T *indices = reinterpret_cast(inputs[1]->addr); char *update = reinterpret_cast(inputs[2]->addr); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/upper_bound_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/upper_bound_cpu_kernel.cc index a016b4bfb33..b1418fd9207 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/upper_bound_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/upper_bound_cpu_kernel.cc @@ -24,14 +24,17 @@ void UpperBoundCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { sorted_x_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); values_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({sorted_x_shape_, values_shape_, output_shape_})) { + return; + } size_t size_exp = 2; if (sorted_x_shape_.size() != values_shape_.size() || sorted_x_shape_.size() != size_exp || sorted_x_shape_[0] != values_shape_[0]) { MS_LOG(EXCEPTION) << "The shape of input is invalid."; } - sorted_x_num_ = sorted_x_shape_[0] * sorted_x_shape_[1]; - values_num_ = values_shape_[0] * values_shape_[1]; - output_num_ = output_shape_[0] * output_shape_[1]; + sorted_x_num_ = static_cast(sorted_x_shape_[0] * sorted_x_shape_[1]); + values_num_ = static_cast(values_shape_[0] * values_shape_[1]); + output_num_ = static_cast(output_shape_[0] * output_shape_[1]); if (values_num_ != output_num_) { MS_LOG(EXCEPTION) << "Infer the shape of output error."; } @@ -51,8 +54,8 @@ bool UpperBoundCpuKernelMod::LaunchKernel(const std::vector auto sorted_x_data_addr = reinterpret_cast(inputs[0]->addr); auto values_data_addr = reinterpret_cast(inputs[1]->addr); auto output_data_addr = reinterpret_cast(outputs[0]->addr); - size_t sorted_x_data_column = sorted_x_shape_[1]; - size_t values_data_column = values_shape_[1]; + size_t sorted_x_data_column = static_cast(sorted_x_shape_[1]); + size_t values_data_column = static_cast(values_shape_[1]); auto task = [this, &values_data_addr, &sorted_x_data_addr, &output_data_addr, &sorted_x_data_column, &values_data_column](size_t start, size_t end) { const size_t kNumber2 = 2; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/upper_bound_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/upper_bound_cpu_kernel.h index 4de6f0f507e..5b7bea949e1 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/upper_bound_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/upper_bound_cpu_kernel.h @@ -48,9 +48,9 @@ class UpperBoundCpuKernelMod : public DeprecatedNativeCpuKernelMod { static std::vector> func_list_; UpperBoundFunc kernel_func_; - std::vector sorted_x_shape_; - std::vector values_shape_; - std::vector output_shape_; + std::vector sorted_x_shape_; + std::vector values_shape_; + std::vector output_shape_; size_t sorted_x_num_; size_t values_num_; size_t output_num_; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/zeros_like_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/zeros_like_cpu_kernel.h index 1be5839b19b..8f737c70bb6 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/zeros_like_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/zeros_like_cpu_kernel.h @@ -51,8 +51,8 @@ class ZerosLikeCpuKernelMod : public DeprecatedNativeCpuKernelMod { static std::vector> func_list_; ZerosLikeFunc kernel_func_; - std::vector input_shape_; - std::vector output_shape_; + std::vector input_shape_; + std::vector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/zeta_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/zeta_cpu_kernel.h index 93e2e424c26..6a0c23acd57 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/zeta_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/zeta_cpu_kernel.h @@ -37,9 +37,9 @@ class ZetaCpuKernelMod : public DeprecatedNativeCpuKernelMod { std::vector GetOpSupport() override; private: - std::vector input0_shape_; - std::vector input1_shape_; - std::vector output_shape_; + ShapeVector input0_shape_; + ShapeVector input1_shape_; + ShapeVector output_shape_; TypeId dtype_{kTypeUnknown}; template bool CheckZeta(const std::vector &inputs, const std::vector &outputs, diff --git a/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_common.h b/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_common.h index 483ea83ed29..3e7e73b8bbe 100644 --- a/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_common.h +++ b/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_common.h @@ -228,11 +228,12 @@ namespace gpu { #define VARIABLE_NOT_USED(var) \ { (void)(var); } -inline bool CheckNullInput(const std::vector &input_shape) { +template +inline bool CheckNullInput(const std::vector &input_shape) { // If input_shape.size() == 0, it means a scalar input; If input_shape.size() != 0 and input_shape contains 0, // it means a null input. Just return a null output. if (input_shape.size() != 0) { - if (std::any_of(input_shape.begin(), input_shape.end(), [](size_t i) { return i == 0; })) { + if (std::any_of(input_shape.begin(), input_shape.end(), [](T i) { return i == 0; })) { return true; } } @@ -257,7 +258,8 @@ inline std::string ConvertVectorToString(const std::vector &value) { #define CONVERT_VECTOR_TO_STRING(value) mindspore::device::gpu::ConvertVectorToString(value) -inline bool CheckShapeNull(const std::vector &shape, std::string kernel_name, std::string param_name) { +template +inline bool CheckShapeNull(const std::vector &shape, std::string kernel_name, std::string param_name) { if (CHECK_NULL_INPUT(shape)) { MS_LOG(WARNING) << "For '" << kernel_name << "', the shape of " << param_name << " cannot contain zero, but got " << CONVERT_VECTOR_TO_STRING(shape); diff --git a/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_tensor_array.h b/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_tensor_array.h index 8cc68641625..091124cd3eb 100644 --- a/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_tensor_array.h +++ b/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_tensor_array.h @@ -29,7 +29,7 @@ namespace device { namespace gpu { class GPUTensorArray : public TensorArray { public: - GPUTensorArray(const string &name, const TypePtr &dtype, const std::vector &shapes) + GPUTensorArray(const string &name, const TypePtr &dtype, const ShapeVector &shapes) : TensorArray(name, dtype, shapes) {} ~GPUTensorArray() override = default; void FreeMemory(const DeviceMemPtr addr) override; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/argmaxandminwithvalue_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/argmaxandminwithvalue_gpu_kernel.h index 869f9ed8ac8..724bf3cd769 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/argmaxandminwithvalue_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/argmaxandminwithvalue_gpu_kernel.h @@ -48,8 +48,8 @@ class ArgMaxAndMinWithValueGpuKernelMod : public DeprecatedNativeGpuKernelMod { std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; small_ = (kernel_name == "ArgMinWithValue") ? true : false; - std::vector shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 1); + auto shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); + auto output_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 1)); is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output"); if (is_null_input_) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/array_reduce_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/array_reduce_gpu_kernel.h index 619f6d7e41e..e496a0bb7f1 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/array_reduce_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/array_reduce_gpu_kernel.h @@ -244,22 +244,22 @@ class ArrayReduceGpuKernelMod : public DeprecatedNativeGpuKernelMod { "cudnnSetReduceTensorDescriptor failed"); return; } - void InferInAndOutDesc(const std::vector &input_shape, const std::vector &output_shape) { - std::vector inputA; - std::vector outputC_shape = output_shape; + void InferInAndOutDesc(const ShapeVector &input_shape, const ShapeVector &output_shape) { + ShapeVector inputA; + ShapeVector outputC_shape = output_shape; const int split_dim = 4; CheckTensorSize({input_shape, output_shape}); if (input_shape.size() <= split_dim) { ShapeNdTo4d(input_shape, &inputA); CHECK_CUDNN_RET_WITH_EXCEPT( kernel_node_, - cudnnSetTensor4dDescriptor(inputA_descriptor_, CUDNN_TENSOR_NCHW, data_type_, SizeToInt(inputA[0]), - SizeToInt(inputA[1]), SizeToInt(inputA[2]), SizeToInt(inputA[3])), + cudnnSetTensor4dDescriptor(inputA_descriptor_, CUDNN_TENSOR_NCHW, data_type_, LongToInt(inputA[0]), + LongToInt(inputA[1]), LongToInt(inputA[2]), LongToInt(inputA[3])), "cudnnSetTensor4dDescriptor failed"); } else { CudnnSetTensorNdDescriptor(input_shape, inputA_descriptor_, data_type_, kernel_node_); for (auto dim : input_shape) { - inputA.emplace_back(SizeToInt(dim)); + inputA.emplace_back(dim); } } @@ -283,7 +283,7 @@ class ArrayReduceGpuKernelMod : public DeprecatedNativeGpuKernelMod { return; } - std::vector outputC; + ShapeVector outputC; if (!keep_dims_) { for (auto i : axis_) { (void)(outputC_shape.insert(outputC_shape.begin() + i, 1)); @@ -300,7 +300,7 @@ class ArrayReduceGpuKernelMod : public DeprecatedNativeGpuKernelMod { } else { CudnnSetTensorNdDescriptor(outputC_shape, outputC_descriptor_, data_type_, kernel_node_); for (auto dim : outputC_shape) { - outputC.emplace_back(SizeToInt(dim)); + outputC.emplace_back(dim); } } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/batchtospace_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/batchtospace_gpu_kernel.h index 8a68f4495c6..22b16cf0cb6 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/batchtospace_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/batchtospace_gpu_kernel.h @@ -55,14 +55,14 @@ class BatchToSpaceGpuKernelMod : public DeprecatedNativeGpuKernelMod { (void)CheckParam(kernel_node); input_size_ = sizeof(T); for (size_t idx = 0; idx < input_shape_.size(); ++idx) { - input_size_ *= input_shape_[idx]; + input_size_ *= static_cast(input_shape_[idx]); } constexpr int IDX_2 = 2; constexpr int IDX_3 = 3; - in_ = input_shape_[0]; - ic_ = input_shape_[1]; - ih_ = input_shape_[IDX_2]; - iw_ = input_shape_[IDX_3]; + in_ = static_cast(input_shape_[0]); + ic_ = static_cast(input_shape_[1]); + ih_ = static_cast(input_shape_[IDX_2]); + iw_ = static_cast(input_shape_[IDX_3]); on_ = in_ / (block_size_ * block_size_); oc_ = ic_; @@ -164,7 +164,7 @@ class BatchToSpaceGpuKernelMod : public DeprecatedNativeGpuKernelMod { std::vector workspace_size_list_; std::vector> crops_; - std::vector input_shape_; + std::vector input_shape_; size_t block_size_; size_t input_size_; size_t output_size_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/broadcast_to_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/broadcast_to_gpu_kernel.h index 33df7e3e15b..b15f7edfb19 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/broadcast_to_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/broadcast_to_gpu_kernel.h @@ -72,11 +72,11 @@ class BroadcastToGpuKernelMod : public DeprecatedNativeGpuKernelMod { size_t offset = output_shapes.size() - input_shapes.size(); for (size_t i = 0; i < input_shapes.size(); i++) { - input_shape_[i + offset] = input_shapes[i]; + input_shape_[i + offset] = LongToSizeClipNeg(input_shapes[i]); } for (size_t j = 0; j < output_shapes.size(); j++) { - output_shape_[j] = output_shapes[j]; + output_shape_[j] = LongToSizeClipNeg(output_shapes[j]); } InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/concatv2_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/concatv2_gpu_kernel.h index 47f16fd9941..9f6e0c65b87 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/concatv2_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/concatv2_gpu_kernel.h @@ -97,14 +97,14 @@ class ConcatV2FwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { size_t input_size = 1; auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, i); for (size_t j = 0; j < input_shape.size(); j++) { - input_size *= input_shape[j]; + input_size *= static_cast(input_shape[j]); } if (input_size == 0) { input_num_--; } else { input_size_list_.push_back(input_size * sizeof(T)); - len_axis_[current_dim] = SizeToInt(input_shape[axis_]); + len_axis_[current_dim] = LongToInt(input_shape[axis_]); current_dim++; } } @@ -153,7 +153,7 @@ class ConcatV2FwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { } int axis_; int input_num_; - size_t output_size_; + int output_size_; int all_size_before_axis_; int all_size_axis_; std::string kernel_name_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/crop_and_resize_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/crop_and_resize_gpu_kernel.h index 09654dcfa12..edbbd65651d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/crop_and_resize_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/crop_and_resize_gpu_kernel.h @@ -93,7 +93,7 @@ class CropAndResizeGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto input_boxes_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); auto input_box_index_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); auto input_crop_size_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); - auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto output_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_image_shape, kernel_name, "x") || CHECK_SHAPE_NULL(input_boxes_shape, kernel_name, "boxes") || CHECK_SHAPE_NULL(input_box_index_shape, kernel_name, "boxes_index") || diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/dynamic_range_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/dynamic_range_gpu_kernel.h index 986b96707f5..0319545ddc4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/dynamic_range_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/dynamic_range_gpu_kernel.h @@ -133,7 +133,7 @@ class DynamicRangeGpuKernelMod : public DeprecatedNativeGpuKernelMod { "cudaStreamSynchronize failed"); std::vector output_type = {common::AnfAlgo::GetOutputInferDataType(kernel_node_.lock(), 0)}; - std::vector> output_shape = {{static_cast(output_shape_)}}; + std::vector output_shape = {{output_shape_}}; common::AnfAlgo::SetOutputInferTypeAndShape(output_type, output_shape, kernel_node_.lock().get()); } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/dynamic_shape_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/dynamic_shape_gpu_kernel.h index 8588c8d9996..bf0a06789c0 100755 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/dynamic_shape_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/dynamic_shape_gpu_kernel.h @@ -56,22 +56,16 @@ class TensorShapeGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs must be 1, but got " << input_count; } - std::vector prev_node_output_shape_tmp = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - is_null_input_ = CHECK_SHAPE_NULL(prev_node_output_shape_tmp, kernel_name, "input"); + auto shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); return true; } - input_size_ = 1; - for (const size_t &e : prev_node_output_shape_tmp) { - input_size_ *= e; - // shapes are Tensors with elements of type S (int32, or int64) but - // GetPrevNodeOutputInferShape returns vector of size_t, so we use - // an S* for allocated output memory and cast to an integral type here, - // otherwise the memcpy will fail silently. - prev_node_output_shape_.push_back(e); + input_size_ = SizeOf(shape); + for (auto x : shape) { + prev_node_output_shape_.push_back(static_cast(x)); } - output_size_ = prev_node_output_shape_.size(); InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/embedding_lookup_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/embedding_lookup_gpu_kernel.h index 1df63dd9259..512c67f5aff 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/embedding_lookup_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/embedding_lookup_gpu_kernel.h @@ -116,7 +116,7 @@ class EmbeddingLookupKernelMod : public DeprecatedNativeGpuKernelMod { private: void Reshape() { int64_t axis = 0; - size_t dim_before_axis = 1; + int64_t dim_before_axis = 1; for (size_t i = 0; i < LongToSize(axis); i++) { dim_before_axis *= output_shapes_[i]; } @@ -124,7 +124,7 @@ class EmbeddingLookupKernelMod : public DeprecatedNativeGpuKernelMod { for (size_t i = 0; i < indices_shapes_.size(); i++) { dim_of_indices *= indices_shapes_[i]; } - size_t dim_after_indices = 1; + int64_t dim_after_indices = 1; for (size_t i = LongToSize(axis) + indices_shapes_.size(); i < output_shapes_.size(); i++) { dim_after_indices *= output_shapes_[i]; } @@ -133,20 +133,20 @@ class EmbeddingLookupKernelMod : public DeprecatedNativeGpuKernelMod { dims_[2] = dim_after_indices; return; } - size_t GetSize(const std::vector &shape) const { + size_t GetSize(const std::vector &shape) const { if (shape.size() == 0) { return 0; } size_t result = sizeof(T); for (size_t i = 0; i < shape.size(); i++) { - result *= shape[i]; + result *= static_cast(shape[i]); } return result; } - std::vector input_shapes_; - std::vector indices_shapes_; - std::vector output_shapes_; + std::vector input_shapes_; + std::vector indices_shapes_; + std::vector output_shapes_; size_t dims_[3] = {}; int64_t offset_; bool is_dynamic_shape_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/extract_image_patches_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/extract_image_patches_gpu_kernel.h index ce61ebb3d3d..65709e85ba7 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/extract_image_patches_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/extract_image_patches_gpu_kernel.h @@ -92,8 +92,8 @@ class ExtractImagePatchesKernelMod : public DeprecatedNativeGpuKernelMod { if (output_num != 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs must be 1, but got " << output_num; } - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); + auto output_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output"); if (is_null_input_) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gather_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gather_gpu_kernel.h index a2e5ca46a96..d082208fc8b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gather_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gather_gpu_kernel.h @@ -38,6 +38,7 @@ class GatherFwdGpuKernelMod : public NativeGpuKernelMod { MS_LOG(ERROR) << "GatherFwdGpu's kernel function is not initialized."; return false; } + return kernel_func_(this, inputs, workspace, outputs, stream_ptr); } @@ -81,9 +82,9 @@ class GatherFwdGpuKernelMod : public NativeGpuKernelMod { GatherFwdFunc kernel_func_; static std::vector> func_list_; - std::vector input_shapes_; - std::vector index_shapes_; - std::vector output_shapes_; + ShapeVector input_shapes_; + ShapeVector index_shapes_; + ShapeVector output_shapes_; size_t dims_[4] = {}; bool is_null_input_{false}; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gather_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gather_grad_gpu_kernel.h index c8f8a4b538a..a8209b4cbba 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gather_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gather_grad_gpu_kernel.h @@ -98,34 +98,31 @@ class GatherGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { private: void Reshape() { - size_t dim_before_axis = 1; + int64_t dim_before_axis = 1; for (size_t i = 0; i < IntToSize(axis_); i++) { dim_before_axis *= output_shapes_[i]; } - size_t dim_at_axis_index = index_shapes_[IntToSize(axis_)]; - size_t dim_at_axis_output = output_shapes_[IntToSize(axis_)]; - size_t dim_after_axis = 1; + size_t dim_at_axis_index = LongToSizeClipNeg(index_shapes_[IntToSize(axis_)]); + size_t dim_at_axis_output = LongToSizeClipNeg(output_shapes_[IntToSize(axis_)]); + int64_t dim_after_axis = 1; for (size_t i = IntToSize(axis_) + 1; i < output_shapes_.size(); i++) { dim_after_axis *= output_shapes_[i]; } - dims_[0] = dim_before_axis; + dims_[0] = LongToSize(dim_before_axis); dims_[1] = dim_at_axis_index; dims_[2] = dim_at_axis_output; - dims_[3] = dim_after_axis; + dims_[3] = LongToSize(dim_after_axis); return; } - size_t GetSize(const std::vector &shape, const bool flag = true) const { + size_t GetSize(const ShapeVector &shape, const bool flag = true) const { size_t result = flag ? sizeof(T) : sizeof(S); - for (size_t i = 0; i < shape.size(); i++) { - result *= shape[i]; - } - return result; + return result * SizeOf(shape); } - std::vector index_shapes_; - std::vector grad_shapes_; - std::vector output_shapes_; + ShapeVector index_shapes_; + ShapeVector grad_shapes_; + ShapeVector output_shapes_; size_t dims_[4] = {}; int axis_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gathernd_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gathernd_gpu_kernel.h index caa661e1fac..e4a0c051b08 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gathernd_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gathernd_gpu_kernel.h @@ -80,6 +80,11 @@ class GatherNdFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { input_shapes_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); indices_shapes_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); output_shapes_ = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + + if (AnfAlgo::IsShapesDynamic({input_shapes_, indices_shapes_, output_shapes_})) { + return true; + } + is_null_input_ = CHECK_SHAPE_NULL(input_shapes_, kernel_name, "input_x") || CHECK_SHAPE_NULL(indices_shapes_, kernel_name, "indices") || CHECK_SHAPE_NULL(output_shapes_, kernel_name, "output"); @@ -152,25 +157,25 @@ class GatherNdFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { private: void Reshape() { - size_t dim_of_indices = 1; + int64_t dim_of_indices = 1; for (size_t i = 0; i < indices_shapes_.size() - IntToSize(1); i++) { dim_of_indices *= indices_shapes_[i]; } - size_t dim_after_indices = 1; + int64_t dim_after_indices = 1; size_t dim_indices_last = indices_shapes_[indices_shapes_.size() - IntToSize(1)]; for (size_t i = dim_indices_last; i < input_shapes_.size(); i++) { dim_after_indices *= input_shapes_[i]; } - dims_.emplace_back(dim_of_indices); - dims_.emplace_back(dim_after_indices); + dims_.emplace_back(LongToSize(dim_of_indices)); + dims_.emplace_back(LongToSize(dim_after_indices)); dims_.emplace_back(dim_indices_last); return; } - std::vector input_shapes_; - std::vector indices_shapes_; - std::vector output_shapes_; + ShapeVector input_shapes_; + ShapeVector indices_shapes_; + ShapeVector output_shapes_; std::vector dims_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gatherv2_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gatherv2_gpu_kernel.h index 4e6f5602e62..0e38f441b35 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gatherv2_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gatherv2_gpu_kernel.h @@ -56,7 +56,7 @@ class GatherV2FwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_EXCEPTION_IF_NULL(input_addr); MS_EXCEPTION_IF_NULL(indices_addr); - GatherV2(input_addr, indices_addr, output_addr, dims_[0], dims_[1], dims_[2], input_dim1, + GatherV2(input_addr, indices_addr, output_addr, dims_[0], dims_[1], dims_[2], LongToSize(input_dim1), reinterpret_cast(stream_ptr)); return true; } @@ -126,15 +126,15 @@ class GatherV2FwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (axis_ < 0) { axis_ = axis_ + SizeToInt(input_shapes_.size()); } - size_t dim_before_axis = 1; + int64_t dim_before_axis = 1; for (size_t i = 0; i < std::min(IntToSize(axis_), output_shapes_.size()); i++) { dim_before_axis *= output_shapes_[i]; } - size_t dim_of_indices = 1; + int64_t dim_of_indices = 1; for (size_t i = 0; i < indices_shapes_.size(); i++) { dim_of_indices *= indices_shapes_[i]; } - size_t dim_after_indices = 1; + int64_t dim_after_indices = 1; for (size_t i = IntToSize(axis_) + indices_shapes_.size(); i < output_shapes_.size(); i++) { dim_after_indices *= output_shapes_[i]; } @@ -144,10 +144,10 @@ class GatherV2FwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { return; } - std::vector input_shapes_; - std::vector indices_shapes_; - std::vector output_shapes_; - size_t dims_[3] = {}; + std::vector input_shapes_; + std::vector indices_shapes_; + std::vector output_shapes_; + int64_t dims_[3] = {}; G axis_; bool is_dynamic_shape_; bool is_null_input_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/in_top_k_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/in_top_k_gpu_kernel.h index 436c621270f..cfe313704c5 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/in_top_k_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/in_top_k_gpu_kernel.h @@ -116,13 +116,13 @@ class InTopKGpuKernelMod : public DeprecatedNativeGpuKernelMod { input_rank_ = input_shape_.size(); input_size_ = 1; for (size_t i = 0; i < input_rank_; i++) { - input_size_ *= input_shape_[i]; + input_size_ *= static_cast(input_shape_[i]); } k_ = GetAttr(kernel_node, "k"); - inner_size_ = input_shape_[1]; - outer_size_ = input_shape_[0]; + inner_size_ = static_cast(input_shape_[1]); + outer_size_ = static_cast(input_shape_[0]); if (std::is_same::value) { // min value representable by float16, std::numeric_limits doesn't support half @@ -153,18 +153,18 @@ class InTopKGpuKernelMod : public DeprecatedNativeGpuKernelMod { protected: void InitSizeLists() override { input_size_list_.push_back(input_size_ * sizeof(T)); - input_size_list_.push_back(input_shape_[0] * sizeof(int32_t)); - output_size_list_.push_back(input_shape_[0] * sizeof(bool)); + input_size_list_.push_back(static_cast(input_shape_[0]) * sizeof(int32_t)); + output_size_list_.push_back(static_cast(input_shape_[0]) * sizeof(bool)); if (k_ > 0) { - workspace_size_list_.push_back(input_shape_[0] * k_ * sizeof(T)); - workspace_size_list_.push_back(input_shape_[0] * k_ * sizeof(int32_t)); + workspace_size_list_.push_back(static_cast(input_shape_[0]) * k_ * sizeof(T)); + workspace_size_list_.push_back(static_cast(input_shape_[0]) * k_ * sizeof(int32_t)); } // remove later! urgent fix for bug: topk has incorrect output for float16 if (std::is_same::value) { workspace_size_list_.push_back(input_size_ * sizeof(float)); if (k_ > 0) { - workspace_size_list_.push_back(input_shape_[0] * k_ * sizeof(float)); + workspace_size_list_.push_back(static_cast(input_shape_[0]) * k_ * sizeof(float)); } } } @@ -173,7 +173,7 @@ class InTopKGpuKernelMod : public DeprecatedNativeGpuKernelMod { size_t input_size_; T top_k_init_; int64_t k_; - std::vector input_shape_; + std::vector input_shape_; size_t input_rank_; // for topk diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/meshgrid_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/meshgrid_gpu_kernel.h index 69daad1abf1..c219c15fbe2 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/meshgrid_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/meshgrid_gpu_kernel.h @@ -95,7 +95,7 @@ class MeshgridGpuKernelMod : public DeprecatedNativeGpuKernelMod { output_count_ = common::AnfAlgo::GetOutputTensorNum(kernel_node); // inferred shape swaps output shape for us if needed - output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + output_shape_ = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(output_shape_, kernel_name, "output"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/one_hot_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/one_hot_gpu_kernel.h index 7cbe4b459f3..13a16c96483 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/one_hot_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/one_hot_gpu_kernel.h @@ -56,8 +56,8 @@ class OneHotFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; int64_t axis = GetAttr(kernel_node, "axis"); - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); + auto output_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node); if (input_num == DynamicInputNum) { is_dynamic_shape_ = true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/oneslike_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/oneslike_gpu_kernel.h index ce9b07a5724..38126c4fecc 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/oneslike_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/oneslike_gpu_kernel.h @@ -58,12 +58,8 @@ class OnesLikeGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - size_t shape_size = input_shape.size(); - input_size_ = sizeof(T); - for (size_t i = 0; i < shape_size; i++) { - input_size_ *= input_shape[i]; - } + input_size_ = sizeof(T) * SizeOf(input_shape); output_size_ = input_size_; InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/pack_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/pack_gpu_kernel.h index 242b32b72de..e5578d7fba1 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/pack_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/pack_gpu_kernel.h @@ -82,9 +82,9 @@ class PackFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { return true; } for (size_t j = 0; j < input_shape.size(); j++) { - input_size *= input_shape[j]; + input_size *= static_cast(input_shape[j]); if (i == 0 && j >= IntToSize(axis_)) { - dims_behind_axis_ *= input_shape[j]; + dims_behind_axis_ *= static_cast(input_shape[j]); } } input_size_list_.push_back(input_size * sizeof(T)); @@ -99,7 +99,7 @@ class PackFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { } output_size_ = 1; for (size_t i = 0; i < output_shape.size(); i++) { - output_size_ *= output_shape[i]; + output_size_ *= static_cast(output_shape[i]); } output_size_list_.push_back(output_size_ * sizeof(T)); InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/range_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/range_gpu_kernel.h index 8301cecb063..cf0d1503acd 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/range_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/range_gpu_kernel.h @@ -69,7 +69,7 @@ class RangeGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_size = input_shape.size(); input_size_ = 1; for (size_t i = 0; i < shape_size; i++) { - input_size_ *= input_shape[i]; + input_size_ *= static_cast(input_shape[i]); } input_size_ *= sizeof(T); output_size_ = input_size_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/resize_nearest_neighbor_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/resize_nearest_neighbor_gpu_kernel.h index ab611a62861..464fa04db34 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/resize_nearest_neighbor_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/resize_nearest_neighbor_gpu_kernel.h @@ -81,18 +81,16 @@ class ResizeNearestNeighborGpuKernelMod : public DeprecatedNativeGpuKernelMod { << "', the dimension of input and output must be the same, but got the dimension of input: " << shape_size_ << ", the dimension of output: " << output_shape.size(); } - input_size_ = 1; + for (size_t i = 0; i < shape_size_; i++) { - input_size_ *= input_shape[i]; - input_shape_.push_back(input_shape[i]); + input_shape_.push_back(LongToInt(input_shape[i])); } - input_size_ *= sizeof(T); - output_size_ = 1; + input_size_ = sizeof(T) * SizeOf(input_shape); + for (size_t i = 0; i < shape_size_; i++) { - output_size_ *= output_shape[i]; - output_shape_.push_back(output_shape[i]); + output_shape_.push_back(LongToInt(output_shape[i])); } - output_size_ *= sizeof(T); + output_size_ = sizeof(T) * SizeOf(output_shape); align_corners_ = GetAttr(kernel_node, "align_corners"); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/resize_nearest_neighbor_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/resize_nearest_neighbor_grad_gpu_kernel.h index ba0fef06189..7c72f5a60ec 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/resize_nearest_neighbor_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/resize_nearest_neighbor_grad_gpu_kernel.h @@ -81,26 +81,23 @@ class ResizeNearestNeighborGradGpuKernelMod : public DeprecatedNativeGpuKernelMo << "', the dimension of input and output must be the same, but got the dimension of input: " << shape_size_ << ", the dimension of output: " << output_shape.size(); } - input_size_ = 1; + for (size_t i = 0; i < shape_size_; i++) { - input_size_ *= input_shape[i]; if (input_shape[i] == 0) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the shape of input at " << i << " index cannot be 0, " << "but got " << input_shape[i]; } - input_shape_.push_back(input_shape[i]); + input_shape_.push_back(LongToInt(input_shape[i])); } - input_size_ *= sizeof(T); - output_size_ = 1; + input_size_ = sizeof(T) * SizeOf(input_shape); for (size_t i = 0; i < shape_size_; i++) { - output_size_ *= output_shape[i]; - if (input_shape[i] == 0) { + if (output_shape[i] == 0) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the shape of output at " << i << " index cannot be 0, " - << "but got " << input_shape[i]; + << "but got " << output_shape[i]; } - output_shape_.push_back(output_shape[i]); + output_shape_.push_back(LongToInt(output_shape[i])); } - output_size_ *= sizeof(T); + output_size_ = sizeof(T) * SizeOf(output_shape); align_corners_ = GetAttr(kernel_node, "align_corners"); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/reverse_sequence_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/reverse_sequence_gpu_kernel.h index 4d14615d931..9651b3c10ec 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/reverse_sequence_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/reverse_sequence_gpu_kernel.h @@ -87,11 +87,8 @@ class ReverseSequenceFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 1, but got " << input_shape_.size(); } - input_size_ = 1; shape_size_ = input_shape_.size(); // required for calls - for (size_t i = 0; i < shape_size_; i++) { - input_size_ *= input_shape_[i]; - } + input_size_ = SizeOf(input_shape_); // get seq len shape seq_len_size_ = seq_len_shape.size(); output_size_ = input_size_; // size does not change @@ -122,7 +119,7 @@ class ReverseSequenceFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { size_t total_index_dim_; size_t output_size_; size_t workspace_size_; - std::vector input_shape_; + ShapeVector input_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/reverse_v2_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/reverse_v2_gpu_kernel.h index 611420ec429..0959778126d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/reverse_v2_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/reverse_v2_gpu_kernel.h @@ -90,7 +90,7 @@ class ReverseV2GpuKernelMod : public DeprecatedNativeGpuKernelMod { } input_size_ = 1; for (size_t i = 0; i < input_rank_; i++) { - input_size_ *= input_shape_[i]; + input_size_ *= static_cast(input_shape_[i]); } strides_.resize(input_rank_); @@ -141,7 +141,7 @@ class ReverseV2GpuKernelMod : public DeprecatedNativeGpuKernelMod { private: size_t input_size_; size_t input_rank_; - std::vector input_shape_; + std::vector input_shape_; std::vector strides_; std::vector axis_; bool is_null_input_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/scatter_functor_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/scatter_functor_gpu_kernel.h index 33bc24ae3f7..2e47cd97873 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/scatter_functor_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/scatter_functor_gpu_kernel.h @@ -72,7 +72,7 @@ class ScatterFunctorKernelMod : public DeprecatedNativeGpuKernelMod { if (output_num != 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs must be 1, but got " << output_num; } - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); if (input_shape.empty()) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the input can not be empty"; } @@ -84,10 +84,7 @@ class ScatterFunctorKernelMod : public DeprecatedNativeGpuKernelMod { } input_size_ = input_shape[0] * inner_size_; auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - indices_size_ = 1; - for (size_t i = 0; i < indices_shape.size(); i++) { - indices_size_ *= indices_shape[i]; - } + indices_size_ = SizeOf(indices_shape); updates_size_ = indices_size_ * inner_size_; InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/select_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/select_gpu_kernel.h index d46150c58f4..00329892701 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/select_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/select_gpu_kernel.h @@ -49,7 +49,7 @@ class SelectGpuKernelMod : public DeprecatedNativeGpuKernelMod { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; (void)CheckParam(kernel_node); - auto shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/slice_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/slice_grad_gpu_kernel.h index bf87bdd2a03..a2b4e4f235a 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/slice_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/slice_grad_gpu_kernel.h @@ -73,7 +73,7 @@ class SliceGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { is_strided_slice_ = true; std::vector shapex = GetAttr>(kernel_node, "shapex"); for (auto x : shapex) { - input_shape_.push_back(static_cast(x)); + input_shape_.push_back(x); } for (auto i = input_shape_.size(); i < kSliceGradDefaultInputShapeSize; i++) { (void)input_shape_.insert(input_shape_.begin(), 1); @@ -107,17 +107,19 @@ class SliceGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { ShapeNdTo4d(dy_shape, &dy_shape_); begin_ = GetAttr>(kernel_node, "begin"); CalcBeginAndSize(data_format, kSliceGradDefaultInputShapeSize); - input_size_ = input_shape_[0] * input_shape_[1] * input_shape_[2] * input_shape_[3] * sizeof(T); + input_size_ = + static_cast(input_shape_[0] * input_shape_[1] * input_shape_[2] * input_shape_[3] * sizeof(T)); } else { ShapeNdTo7d(dy_shape, &dy_shape_); begin_ = GetAttr>(kernel_node, "begin"); CalcBeginAndSize(data_format, kSliceGradMaxInputShapeSize); - input_size_ = input_shape_[0] * input_shape_[1] * input_shape_[2] * input_shape_[3] * input_shape_[4] * - input_shape_[5] * input_shape_[6] * input_shape_[7] * sizeof(T); + input_size_ = + static_cast(input_shape_[0] * input_shape_[1] * input_shape_[2] * input_shape_[3] * input_shape_[4] * + input_shape_[5] * input_shape_[6] * input_shape_[7] * sizeof(T)); } output_size_ = sizeof(T); for (auto x : dy_shape_) { - output_size_ = output_size_ * x; + output_size_ = output_size_ * static_cast(x); } InitSizeLists(); return true; @@ -170,8 +172,8 @@ class SliceGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { std::vector begin_; std::vector size_; std::vector strides_; - std::vector input_shape_; - std::vector dy_shape_; + ShapeVector input_shape_; + ShapeVector dy_shape_; bool is_strided_slice_; bool is_null_input_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/sort_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/sort_gpu_kernel.h index 68fd4c6d747..a84b1ed3fc4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/sort_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/sort_gpu_kernel.h @@ -141,7 +141,7 @@ class SortGpuKernelMod : public DeprecatedNativeGpuKernelMod { input_size_ = 1; for (size_t i = 0; i < input_rank_; i++) { - input_size_ *= input_shape_[i]; + input_size_ *= static_cast(input_shape_[i]); } descending_ = GetAttr(kernel_node, "descending"); @@ -163,7 +163,7 @@ class SortGpuKernelMod : public DeprecatedNativeGpuKernelMod { transposed_shape_ = input_shape_; std::swap(transposed_shape_[input_rank_ - 1], transposed_shape_[axis_]); - inner_size_ = input_shape_[axis_]; + inner_size_ = static_cast(input_shape_[axis_]); outer_size_ = input_size_ / inner_size_; if (std::is_same::value) { @@ -218,11 +218,11 @@ class SortGpuKernelMod : public DeprecatedNativeGpuKernelMod { int64_t axis_; bool descending_; bool is_null_input_; - std::vector input_shape_; + std::vector input_shape_; size_t input_rank_; // for transpose - std::vector transposed_shape_; + std::vector transposed_shape_; std::vector perm_; // for topk diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetobatch_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetobatch_gpu_kernel.h index b7509a4bd0d..8bb6d57b0bc 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetobatch_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetobatch_gpu_kernel.h @@ -42,9 +42,9 @@ class SpaceToBatchGpuKernelMod : public DeprecatedNativeGpuKernelMod { size_t size = input_size_ / sizeof(T); - CalSpaceToBatch(size, input, in_, ih_, iw_, ic_, on_, oh_, ow_, oc_, paddings_[0][0], paddings_[0][1], - paddings_[1][0], paddings_[1][1], block_size_, output, - reinterpret_cast(stream_ptr)); + CalSpaceToBatch(size, input, in_, ih_, iw_, ic_, on_, oh_, ow_, oc_, LongToSize(paddings_[0][0]), + LongToSize(paddings_[0][1]), LongToSize(paddings_[1][0]), LongToSize(paddings_[1][1]), + block_size_, output, reinterpret_cast(stream_ptr)); return true; } @@ -52,19 +52,16 @@ class SpaceToBatchGpuKernelMod : public DeprecatedNativeGpuKernelMod { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; (void)CheckParam(kernel_node); - input_size_ = sizeof(T); - for (size_t idx = 0; idx < input_shape_.size(); ++idx) { - input_size_ *= input_shape_[idx]; - } - in_ = input_shape_[0]; - ic_ = input_shape_[1]; - ih_ = input_shape_[2]; - iw_ = input_shape_[3]; + input_size_ = sizeof(T) * SizeOf(input_shape_); + in_ = LongToSizeClipNeg(input_shape_[0]); + ic_ = LongToSizeClipNeg(input_shape_[1]); + ih_ = LongToSizeClipNeg(input_shape_[2]); + iw_ = LongToSizeClipNeg(input_shape_[3]); on_ = in_ * block_size_ * block_size_; oc_ = ic_; - oh_ = (ih_ + paddings_[0][0] + paddings_[0][1]) / block_size_; - ow_ = (iw_ + paddings_[1][0] + paddings_[1][1]) / block_size_; + oh_ = (ih_ + LongToSize(paddings_[0][0] + paddings_[0][1])) / block_size_; + ow_ = (iw_ + LongToSize(paddings_[1][0] + paddings_[1][1])) / block_size_; output_size_ = on_ * oc_ * oh_ * ow_ * sizeof(T); InitSizeLists(); return true; @@ -108,12 +105,11 @@ class SpaceToBatchGpuKernelMod : public DeprecatedNativeGpuKernelMod { } // check input_shape - auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); - if (input_shape.size() != SHAPE_SIZE) { + input_shape_ = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); + if (input_shape_.size() != SHAPE_SIZE) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be equal to " << SHAPE_SIZE - << ", but got " << input_shape.size(); + << ", but got " << input_shape_.size(); } - input_shape_.assign(input_shape.begin(), input_shape.end()); // check paddings_ paddings_ = GetAttr>>(kernel_node, "paddings"); if (paddings_.size() != PADDING_SHAPE_0) { @@ -131,7 +127,7 @@ class SpaceToBatchGpuKernelMod : public DeprecatedNativeGpuKernelMod { << "but got paddings[" << idx_i << "][ " << idx_j << "]: " << paddings_[idx_i][idx_j]; } } - auto tmp_shape = input_shape[idx_i + PADDING_SHAPE_1] + paddings_[idx_i][0] + paddings_[idx_i][1]; + auto tmp_shape = input_shape_[idx_i + PADDING_SHAPE_1] + paddings_[idx_i][0] + paddings_[idx_i][1]; if ((tmp_shape % block_size_) != 0) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', padded shape must be divisible by block_size, , but got padded shape: " << tmp_shape @@ -149,7 +145,7 @@ class SpaceToBatchGpuKernelMod : public DeprecatedNativeGpuKernelMod { std::string kernel_name_; std::vector> paddings_; - std::vector input_shape_; + std::vector input_shape_; size_t block_size_; size_t input_size_; size_t output_size_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetodepth_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetodepth_gpu_kernel.h index 0c89ee120cf..a2698018287 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetodepth_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetodepth_gpu_kernel.h @@ -68,7 +68,7 @@ class SpaceToDepthFwdKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs must be 2, but got " << output_num; } // check input_shape - auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); @@ -82,15 +82,15 @@ class SpaceToDepthFwdKernelMod : public DeprecatedNativeGpuKernelMod { // get input and out put information input_size_ = 1; for (size_t i = 0; i < shape_size_; i++) { - input_size_ *= input_shape[i]; + input_size_ *= static_cast(input_shape[i]); } input_size_ *= sizeof(T); output_size_ = input_size_; - in_ = input_shape[0]; - ic_ = input_shape[1]; - ih_ = input_shape[2]; - iw_ = input_shape[3]; + in_ = static_cast(input_shape[0]); + ic_ = static_cast(input_shape[1]); + ih_ = static_cast(input_shape[2]); + iw_ = static_cast(input_shape[3]); on_ = in_; oc_ = ic_ * block_size_ * block_size_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/split_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/split_gpu_kernel.h index b9ba454cb1d..8072b83dbf3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/split_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/split_gpu_kernel.h @@ -54,7 +54,7 @@ class SplitFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { bool Init(const CNodePtr &kernel_node) override { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; - auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input"); if (is_null_input_) { InitSizeLists(); @@ -82,9 +82,9 @@ class SplitFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { all_size_axis_ = 1; for (int i = 0; i < SizeToInt(input_shape.size()); i++) { - input_size_ *= input_shape[i]; + input_size_ *= static_cast(input_shape[i]); if (i > axis_) { - all_size_before_axis_ *= input_shape[i]; + all_size_before_axis_ *= static_cast(input_shape[i]); all_size_axis_ *= input_shape[i]; } if (i == axis_) { @@ -103,7 +103,7 @@ class SplitFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { return true; } for (size_t j = 0; j < output_shape.size(); j++) { - output_size *= output_shape[j]; + output_size *= static_cast(output_shape[j]); } output_size_list_.push_back(output_size * sizeof(T)); } @@ -151,9 +151,9 @@ class SplitFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'axis' must be in the range [-" << dims << "," << dims << "), but got " << axis_; } - if (output_num_ > SizeToInt(input_shape[axis_])) { + if (input_shape[axis_] > 0 && output_num_ > input_shape[axis_]) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs cannot be greater than " - << SizeToInt(input_shape[axis_]) << ", but got " << output_num_; + << input_shape[axis_] << ", but got " << output_num_; } if (output_num_ != output_num) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs must be " << output_num_ << ", but got " diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_gpu_common.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_gpu_common.h index be589433d1f..eac7f0dfb43 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_gpu_common.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_gpu_common.h @@ -39,8 +39,10 @@ class StridedSliceGpuCommon { end_ = common::AnfAlgo::GetNodeAttr>(kernel_node, kAttrEnd); strides_ = common::AnfAlgo::GetNodeAttr>(kernel_node, kAttrStrides); } - FillEmptyDims(kernel_node, &begin_, &end_, &strides_, &input_shape_); - ParseStrideSliceMasks(kernel_node, &begin_, &end_, &strides_, input_shape_); + auto shape_tmp = Convert2Long(input_shape_); + FillEmptyDims(kernel_node, &begin_, &end_, &strides_, &shape_tmp); + input_shape_ = Convert2SizeT(shape_tmp); + ParseStrideSliceMasks(kernel_node, &begin_, &end_, &strides_, shape_tmp); FillOutputDim(); null_output_ = IsNullOutput(); } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_gpu_kernel.h index 2ef61d9dad3..2549985c304 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_gpu_kernel.h @@ -53,7 +53,7 @@ class StridedSliceGpuKernelMod : public DeprecatedNativeGpuKernelMod, public Str if (input_num == DynamicInputNum) { is_dynamic_attr_ = true; } - input_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + input_shape_ = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); kernel_node_ = kernel_node; null_output_ = CHECK_SHAPE_NULL(input_shape_, kernel_name, "input"); if (null_output_) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/tensor_copy_slices_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/tensor_copy_slices_gpu_kernel.h index e6edd9bc43c..3ecd33fad23 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/tensor_copy_slices_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/tensor_copy_slices_gpu_kernel.h @@ -68,7 +68,7 @@ class TensorCopySlicesGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs must be 1, but got " << output_num; } - input_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + input_shape_ = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); auto update_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name_, "input") || CHECK_SHAPE_NULL(update_shape, kernel_name_, "update"); @@ -104,14 +104,14 @@ class TensorCopySlicesGpuKernelMod : public DeprecatedNativeGpuKernelMod { protected: void CheckAtrrAndShapeValid(const CNodePtr &kernel_node) { auto update_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - size_t total_update_num = std::accumulate(update_shape.begin(), update_shape.end(), 1, std::multiplies()); + int64_t total_update_num = std::accumulate(update_shape.begin(), update_shape.end(), 1, std::multiplies()); if (begin_.size() != end_.size() || end_.size() != strides_.size()) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of 'begin', 'strides' and 'end' must be the same " << "but got the size of 'begin': " << begin_.size() << ", the size of 'strides':" << strides_.size() << ", the size of 'end':" << end_.size(); } auto len = begin_.size(); - size_t total_input_num = 1; + int64_t total_input_num = 1; for (size_t i = 0; i < len; ++i) { MS_EXCEPTION_IF_ZERO("strides_[i]", strides_[i]); total_input_num *= ((end_[i] - begin_[i]) / strides_[i]); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/tile_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/tile_gpu_kernel.h index 08b41c8e035..ca1961d306c 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/tile_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/tile_gpu_kernel.h @@ -77,24 +77,18 @@ class TileGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be less than 1, but got " << output_shape_.size(); } - input_size_ = 1; - for (size_t i = 0; i < input_shape_.size(); i++) { - input_size_ *= input_shape_[i]; - } - - output_size_ = 1; + input_size_ = SizeOf(input_shape_); if (output_shape_.size() > TILE_MAX_DIMENSION) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be greater than " << TILE_MAX_DIMENSION << ", but got " << output_shape_.size(); } shape_size_ = output_shape_.size(); - for (size_t i = 0; i < output_shape_.size(); i++) { - output_size_ *= output_shape_[i]; - } + output_size_ = SizeOf(output_shape_); + std::vector multiples = GetAttr>(kernel_node, "multiples"); int64_t filling_value = static_cast(multiples.size()) - static_cast(input_shape_.size()); // input_shape_.size() == output_shape_.size() == shape_size_ - (void)input_shape_.insert(input_shape_.begin(), LongToSize(filling_value), 1); + (void)input_shape_.insert(input_shape_.begin(), filling_value, 1); InitSizeLists(); return true; } @@ -124,8 +118,8 @@ class TileGpuKernelMod : public DeprecatedNativeGpuKernelMod { size_t output_size_; size_t shape_size_; bool is_null_input_; - std::vector input_shape_; - std::vector output_shape_; + ShapeVector input_shape_; + ShapeVector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/topk_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/topk_gpu_kernel.h index 885f7b61adc..c27abc4d674 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/topk_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/topk_gpu_kernel.h @@ -91,10 +91,11 @@ class TopKGpuKernelMod : public DeprecatedNativeGpuKernelMod { } input_shape_size_ = input_shapes.size(); for (size_t i = 0; i < input_shapes.size() - 1; i++) { - outer_size_ *= input_shapes[i]; + outer_size_ *= LongToSize(input_shapes[i]); } - inner_size_ = input_shapes[input_shapes.size() - 1]; - k_ = output_shapes[output_shapes.size() - 1]; + + inner_size_ = LongToSizeClipNeg(input_shapes[input_shapes.size() - 1]); + k_ = LongToSizeClipNeg(output_shapes[output_shapes.size() - 1]); sorted_ = GetAttr(kernel_node, "sorted"); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/transpose_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/transpose_gpu_kernel.h index d98c816f602..54c1fd4e815 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/transpose_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/transpose_gpu_kernel.h @@ -60,7 +60,7 @@ class TransposeFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { "cudaMemcpyAsync input_axis failed"); size_t size = input_size_ / sizeof(T); - size_t *h_input_shape = &input_shape_[0]; + size_t *h_input_shape = reinterpret_cast(&input_shape_[0]); size_t *h_input_axis = &input_axis_[0]; if (shape_size_ == kDimSize4 && h_input_axis[kAxisIndexZero] == kAxisZero && h_input_axis[kAxisIndex1st] == kAxis3rd && h_input_axis[kAxisIndex2nd] == kAxis1st && @@ -92,24 +92,19 @@ class TransposeFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (output_num != 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs must be 1, but got " << output_num; } - auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); - is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); + input_shape_ = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); + is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); return true; } - shape_size_ = input_shape.size(); + shape_size_ = input_shape_.size(); if (shape_size_ > TRANSPOSE_MAX_DIMENSION) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be greater than " << TRANSPOSE_MAX_DIMENSION << ", but got " << shape_size_; } - input_size_ = 1; - for (size_t i = 0; i < shape_size_; i++) { - input_size_ *= input_shape[i]; - input_shape_.push_back(input_shape[i]); - } - input_size_ *= sizeof(T); + input_size_ = sizeof(T) * SizeOf(input_shape_); output_size_ = input_size_; std::vector perm = GetAttr>(kernel_node, "perm"); for (size_t j = 0; j < perm.size(); j++) { @@ -148,7 +143,7 @@ class TransposeFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { } private: - std::vector input_shape_; + std::vector input_shape_; std::vector input_axis_; size_t shape_size_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unpack_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unpack_gpu_kernel.h index d7663cf8033..fff396d51e3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unpack_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unpack_gpu_kernel.h @@ -76,7 +76,7 @@ class UnpackFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { return true; } for (size_t j = 0; j < _shape.size(); j++) { - _size *= _shape[j]; + _size *= static_cast(_shape[j]); } output_size_list_.push_back(_size * sizeof(T)); } @@ -89,9 +89,9 @@ class UnpackFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { return true; } for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; + input_size_ *= static_cast(input_shape[i]); if (i > IntToSize(axis_)) { - dims_after_axis_ *= input_shape[i]; + dims_after_axis_ *= static_cast(input_shape[i]); } } input_size_list_.push_back(input_size_ * sizeof(T)); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_max_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_max_gpu_kernel.h index c4df612c264..171137323cc 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_max_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_max_gpu_kernel.h @@ -52,9 +52,9 @@ class UnsortedSegmentMaxGpuKernelMod : public DeprecatedNativeGpuKernelMod { bool Init(const CNodePtr &kernel_node) override { auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; - auto input_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); - auto segment_ids_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1); - auto output_shapes = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0); + auto input_shapes = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); + auto segment_ids_shapes = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1)); + auto output_shapes = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") || CHECK_SHAPE_NULL(segment_ids_shapes, kernel_name, "segment_ids") || CHECK_SHAPE_NULL(output_shapes, kernel_name, "output"); @@ -76,23 +76,23 @@ class UnsortedSegmentMaxGpuKernelMod : public DeprecatedNativeGpuKernelMod { num_segments_ = output_shapes[0]; input_size_ = 1; for (size_t i = 0; i < input_shapes.size(); i++) { - input_size_ *= input_shapes[i]; + input_size_ *= static_cast(input_shapes[i]); } segment_ids_size_ = 1; for (size_t i = 0; i < segment_ids_shapes.size(); i++) { - segment_ids_size_ *= segment_ids_shapes[i]; + segment_ids_size_ *= static_cast(segment_ids_shapes[i]); } output_size_ = 1; for (size_t i = 0; i < output_shapes.size(); i++) { - output_size_ *= output_shapes[i]; + output_size_ *= static_cast(output_shapes[i]); } - outer_size_ = input_shapes[0]; + outer_size_ = static_cast(input_shapes[0]); inner_size_ = 1; for (size_t i = 1; i < input_shapes.size(); i++) { - inner_size_ *= input_shapes[i]; + inner_size_ *= static_cast(input_shapes[i]); } InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_min_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_min_gpu_kernel.h index 2d3a7456d88..56c86f894c8 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_min_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_min_gpu_kernel.h @@ -46,9 +46,9 @@ class UnsortedSegmentMinGpuKernelMod : public DeprecatedNativeGpuKernelMod { bool Init(const CNodePtr &kernel_node) override { auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); - auto input_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); - auto segment_ids_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1); - auto output_shapes = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0); + auto input_shapes = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); + auto segment_ids_shapes = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1)); + auto output_shapes = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0)); kernel_node_ = kernel_node; is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") || CHECK_SHAPE_NULL(segment_ids_shapes, kernel_name, "segment_ids") || @@ -71,23 +71,23 @@ class UnsortedSegmentMinGpuKernelMod : public DeprecatedNativeGpuKernelMod { num_segments_ = output_shapes[0]; input_size_ = 1; for (size_t i = 0; i < input_shapes.size(); i++) { - input_size_ *= input_shapes[i]; + input_size_ *= static_cast(input_shapes[i]); } segment_ids_size_ = 1; for (size_t i = 0; i < segment_ids_shapes.size(); i++) { - segment_ids_size_ *= segment_ids_shapes[i]; + segment_ids_size_ *= static_cast(segment_ids_shapes[i]); } output_size_ = 1; for (size_t i = 0; i < output_shapes.size(); i++) { - output_size_ *= output_shapes[i]; + output_size_ *= static_cast(output_shapes[i]); } - outer_size_ = input_shapes[0]; + outer_size_ = static_cast(input_shapes[0]); inner_size_ = 1; for (size_t i = 1; i < input_shapes.size(); i++) { - inner_size_ *= input_shapes[i]; + inner_size_ *= static_cast(input_shapes[i]); } InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_sum_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_sum_gpu_kernel.h index f63fb5734d4..56b2e3e9fad 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_sum_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_sum_gpu_kernel.h @@ -50,9 +50,9 @@ class UnsortedSegmentSumGpuKernelMod : public DeprecatedNativeGpuKernelMod { bool Init(const CNodePtr &kernel_node) override { auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; - auto input_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); - auto ids_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1); - auto output_shapes = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0); + auto input_shapes = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); + auto ids_shapes = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1)); + auto output_shapes = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") || CHECK_SHAPE_NULL(ids_shapes, kernel_name, "segment_ids") || CHECK_SHAPE_NULL(output_shapes, kernel_name, "output"); @@ -71,15 +71,15 @@ class UnsortedSegmentSumGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto axis = ids_shapes.size(); for (size_t i = 0; i < input_shapes.size(); i++) { if (i < axis) { - input_dim0_ *= input_shapes[i]; + input_dim0_ *= static_cast(input_shapes[i]); } else { - input_dim1_ *= input_shapes[i]; + input_dim1_ *= static_cast(input_shapes[i]); } } - output_dim0_ = output_shapes[0]; + output_dim0_ = static_cast(output_shapes[0]); for (size_t j = 1; j < output_shapes.size(); j++) { - output_dim1_ *= output_shapes[j]; + output_dim1_ *= static_cast(output_shapes[j]); } InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/zeroslike_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/zeroslike_gpu_kernel.h index 0b5e1e588e9..2406bc046cc 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/zeroslike_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/zeroslike_gpu_kernel.h @@ -51,14 +51,14 @@ class ZerosLikeGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; - std::vector input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); return true; } for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; + input_size_ *= static_cast(input_shape[i]); } InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/custom/custom_aot_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/custom/custom_aot_gpu_kernel.h index d9e02107cea..59e36534d94 100755 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/custom/custom_aot_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/custom/custom_aot_gpu_kernel.h @@ -121,12 +121,9 @@ class CustomAOTGpuKernelMod : public DeprecatedNativeGpuKernelMod { for (size_t i = 0; i < num_input_; i++) { auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i); - std::vector in_shape_tmp; - std::for_each(in_shape.begin(), in_shape.end(), - [&in_shape_tmp](size_t c) { in_shape_tmp.push_back(SizeToLong(c)); }); type_list_.emplace_back(TypeIdToString(input_type_list[i], true)); - ndims_.push_back(SizeToInt(in_shape_tmp.size())); - shape_list_.emplace_back(in_shape_tmp); + ndims_.push_back(SizeToInt(in_shape.size())); + shape_list_.emplace_back(in_shape); } num_output_ = common::AnfAlgo::GetOutputTensorNum(kernel_node); @@ -138,12 +135,9 @@ class CustomAOTGpuKernelMod : public DeprecatedNativeGpuKernelMod { } for (size_t i = 0; i < num_output_; i++) { - std::vector out_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, i); - std::vector out_shape_tmp; - std::for_each(out_shape.begin(), out_shape.end(), - [&out_shape_tmp](size_t c) { out_shape_tmp.push_back(SizeToLong(c)); }); - shape_list_.emplace_back(out_shape_tmp); - ndims_.push_back(SizeToInt(out_shape_tmp.size())); + auto out_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, i); + shape_list_.emplace_back(out_shape); + ndims_.push_back(SizeToInt(out_shape.size())); type_list_.emplace_back(TypeIdToString(output_type_list[i], true)); } @@ -160,13 +154,13 @@ class CustomAOTGpuKernelMod : public DeprecatedNativeGpuKernelMod { void InitSizeLists() override { for (size_t i = 0; i < num_input_; i++) { size_t this_size = - LongToSize(std::accumulate(shape_list_[i].begin(), shape_list_[i].end(), 1, std::multiplies())); + LongToSizeClipNeg(std::accumulate(shape_list_[i].begin(), shape_list_[i].end(), 1, std::multiplies())); this_size *= GetDtypeNbyte(type_list_[i]); input_size_list_.push_back(this_size); } for (size_t i = num_input_; i < (num_input_ + num_output_); i++) { size_t this_size = - LongToSize(std::accumulate(shape_list_[i].begin(), shape_list_[i].end(), 1, std::multiplies())); + LongToSizeClipNeg(std::accumulate(shape_list_[i].begin(), shape_list_[i].end(), 1, std::multiplies())); this_size *= GetDtypeNbyte(type_list_[i]); output_size_list_.push_back(this_size); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/data/dataset_iterator_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/data/dataset_iterator_kernel.cc index 8377e353a56..d27632f0e47 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/data/dataset_iterator_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/data/dataset_iterator_kernel.cc @@ -184,9 +184,9 @@ void DatasetIteratorKernelMod::SyncData() { if (dynamic_shape_) { return; } - std::vector> shapes; + std::vector shapes; for (const auto &item : output_data_) { - std::vector shape; + ShapeVector shape; std::transform(item.shapes_.begin(), item.shapes_.end(), std::back_inserter(shape), LongToSize); shapes.push_back(shape); } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/debug/print_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/debug/print_gpu_kernel.h index 7d4608bcd95..631c6d32071 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/debug/print_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/debug/print_gpu_kernel.h @@ -238,7 +238,7 @@ class PrintGpuKernelMod : public DeprecatedNativeGpuKernelMod { std::unordered_map value_type_; // size_in_byte, typeid std::vector> input_info_; - std::vector> input_shape_; + std::vector> input_shape_; bool is_null_input_; }; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/environ/environ_gpu_get.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/environ/environ_gpu_get.cc index 910a1bc57e6..c5b4a80c90e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/environ/environ_gpu_get.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/environ/environ_gpu_get.cc @@ -43,7 +43,7 @@ bool EnvironGetGpuKernelMod::Init(const CNodePtr &kernel_node) { } value_size_ = GetTypeByte(TypeIdToType(value_type)); for (auto &i : value_shapes) { - value_size_ *= i; + value_size_ *= static_cast(i); } InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/environ/environ_gpu_set.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/environ/environ_gpu_set.cc index 0fde180b958..cfe3a6a8985 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/environ/environ_gpu_set.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/environ/environ_gpu_set.cc @@ -46,7 +46,7 @@ bool EnvironSetGpuKernelMod::Init(const CNodePtr &kernel_node) { auto value_shapes = AnfAlgo::GetInputDeviceShape(kernel_node, 2); value_size_ = GetTypeByte(TypeIdToType(value_type)); for (auto &i : value_shapes) { - value_size_ *= i; + value_size_ *= static_cast(i); } InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel.cc index f58af1930c9..4b635a822e7 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel.cc @@ -172,7 +172,7 @@ std::vector ConvertPtrs(const std::vector &input_ptrs) { return out_ptrs; } -bool ShapeNdTo4d(const std::vector &src, std::vector *dst) { +bool ShapeNdTo4d(const ShapeVector &src, ShapeVector *dst) { const size_t nd_maximum_size = 4; if (src.size() > nd_maximum_size) { MS_LOG(ERROR) << src.size() << "-D data is not supported!"; @@ -198,82 +198,82 @@ int AxisTransform(const std::string &origin_data_format, const std::string &cal_ } } -void ShapeNCHW2NHWC(std::vector *shape) { +void ShapeNCHW2NHWC(ShapeVector *shape) { std::swap((*shape)[kShapeIndex1st], (*shape)[kShapeIndex3rd]); std::swap((*shape)[kShapeIndex2nd], (*shape)[kShapeIndex1st]); } -void ShapeNCDHW2NDHWC(std::vector *shape) { +void ShapeNCDHW2NDHWC(ShapeVector *shape) { std::swap((*shape)[kShapeIndex1st], (*shape)[kShapeIndex2nd]); std::swap((*shape)[kShapeIndex2nd], (*shape)[kShapeIndex3rd]); std::swap((*shape)[kShapeIndex3rd], (*shape)[kShapeIndex4th]); } -void SetDimA(const std::vector &shape, int *dimA, size_t len, const std::string &format) { +void SetDimA(const ShapeVector &shape, int *dimA, size_t len, const std::string &format) { if (shape.size() != len) { MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with dimA " << len << "-D."; } if (Anyone(format, "NCHW", "DefaultFormat", "NCDHW")) { for (size_t i = 0; i < len; ++i) { - dimA[i] = SizeToInt(shape[i]); + dimA[i] = LongToInt(shape[i]); } } else if (format == "NHWC") { - dimA[0] = SizeToInt(shape[0]); - dimA[kShapeIndex1st] = SizeToInt(shape[kShapeIndex3rd]); - dimA[kShapeIndex2nd] = SizeToInt(shape[kShapeIndex1st]); - dimA[kShapeIndex3rd] = SizeToInt(shape[kShapeIndex2nd]); + dimA[0] = LongToInt(shape[0]); + dimA[kShapeIndex1st] = LongToInt(shape[kShapeIndex3rd]); + dimA[kShapeIndex2nd] = LongToInt(shape[kShapeIndex1st]); + dimA[kShapeIndex3rd] = LongToInt(shape[kShapeIndex2nd]); } else { MS_LOG(ERROR) << "Unsupported data format " << format; } } -void SetStrideA(const std::vector &shape, int *strideA, size_t len, const std::string &format) { +void SetStrideA(const ShapeVector &shape, int *strideA, size_t len, const std::string &format) { if (shape.size() != len) { MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with strideA " << len << "-D."; } if (Anyone(format, "NCHW", "DefaultFormat", "NCDHW")) { for (size_t i = 0; i < len; ++i) { - strideA[i] = SizeToInt(accumulate(shape.begin() + i + 1, shape.end(), 1, std::multiplies())); + strideA[i] = LongToInt(accumulate(shape.begin() + i + 1, shape.end(), 1, std::multiplies())); } } else if (format == "NHWC") { - strideA[0] = SizeToInt(shape[kShapeIndex1st] * shape[kShapeIndex2nd] * shape[kShapeIndex3rd]); + strideA[0] = LongToInt(shape[kShapeIndex1st] * shape[kShapeIndex2nd] * shape[kShapeIndex3rd]); strideA[1] = 1; - strideA[kShapeIndex2nd] = SizeToInt(shape[kShapeIndex2nd] * shape[kShapeIndex3rd]); - strideA[kShapeIndex3rd] = SizeToInt(shape[kShapeIndex3rd]); + strideA[kShapeIndex2nd] = LongToInt(shape[kShapeIndex2nd] * shape[kShapeIndex3rd]); + strideA[kShapeIndex3rd] = LongToInt(shape[kShapeIndex3rd]); } else { MS_LOG(ERROR) << "Unsupported data format " << format; } } -void SetNCHW(const std::vector &shape, int *n, int *c, int *h, int *w, const std::string &format) { +void SetNCHW(const ShapeVector &shape, int *n, int *c, int *h, int *w, const std::string &format) { if (Anyone(format, "NCHW", "DefaultFormat")) { - *n = SizeToInt(shape[0]); - *c = SizeToInt(shape[kShapeIndex1st]); - *h = SizeToInt(shape[kShapeIndex2nd]); - *w = SizeToInt(shape[kShapeIndex3rd]); + *n = LongToInt(shape[0]); + *c = LongToInt(shape[kShapeIndex1st]); + *h = LongToInt(shape[kShapeIndex2nd]); + *w = LongToInt(shape[kShapeIndex3rd]); } else if (format == "NHWC") { - *n = SizeToInt(shape[0]); - *c = SizeToInt(shape[kShapeIndex3rd]); - *h = SizeToInt(shape[kShapeIndex1st]); - *w = SizeToInt(shape[kShapeIndex2nd]); + *n = LongToInt(shape[0]); + *c = LongToInt(shape[kShapeIndex3rd]); + *h = LongToInt(shape[kShapeIndex1st]); + *w = LongToInt(shape[kShapeIndex2nd]); } else { MS_LOG(ERROR) << "Unsupported data format " << format; } } -void SetNCDHW(const std::vector &shape, int *n, int *c, int *d, int *h, int *w, const std::string &format) { +void SetNCDHW(const ShapeVector &shape, int *n, int *c, int *d, int *h, int *w, const std::string &format) { if (Anyone(format, "NCDHW", "DefaultFormat")) { - *n = SizeToInt(shape[0]); - *c = SizeToInt(shape[kShapeIndex1st]); - *d = SizeToInt(shape[kShapeIndex2nd]); - *h = SizeToInt(shape[kShapeIndex3rd]); - *w = SizeToInt(shape[kShapeIndex4th]); + *n = LongToInt(shape[0]); + *c = LongToInt(shape[kShapeIndex1st]); + *d = LongToInt(shape[kShapeIndex2nd]); + *h = LongToInt(shape[kShapeIndex3rd]); + *w = LongToInt(shape[kShapeIndex4th]); } else if (format == "NDHWC") { - *n = SizeToInt(shape[0]); - *c = SizeToInt(shape[kShapeIndex4th]); - *d = SizeToInt(shape[kShapeIndex1st]); - *h = SizeToInt(shape[kShapeIndex2nd]); - *w = SizeToInt(shape[kShapeIndex3rd]); + *n = LongToInt(shape[0]); + *c = LongToInt(shape[kShapeIndex4th]); + *d = LongToInt(shape[kShapeIndex1st]); + *h = LongToInt(shape[kShapeIndex2nd]); + *w = LongToInt(shape[kShapeIndex3rd]); } else { MS_LOG(ERROR) << "Unsupported data format " << format; } @@ -290,9 +290,9 @@ bool CheckBroadcast4TensorOp(const std::vector &A, const std::vector & return true; } -bool CheckTensorSize(const std::initializer_list> &shapes) { +bool CheckTensorSize(const std::initializer_list &shapes) { for (auto shape : shapes) { - size_t total_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + int64_t total_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); if (total_size >= SHAPE_SIZE_LIMIT) { MS_LOG(ERROR) << "The total size of the tensor exceeds the max_limit of 2 Giga-elements, which is " << total_size << " elements (" << shape << ")."; @@ -302,8 +302,8 @@ bool CheckTensorSize(const std::initializer_list> &shapes) { return true; } -bool CudnnSetTensorNdDescriptor(const std::vector &shape, cudnnTensorDescriptor_t descriptor, - cudnnDataType_t data_type, const std::string &node_name) { +bool CudnnSetTensorNdDescriptor(const ShapeVector &shape, cudnnTensorDescriptor_t descriptor, cudnnDataType_t data_type, + const std::string &node_name) { if (shape.size() < 3) { MS_LOG(ERROR) << "cudnnSetTensorNdDescriptor don't support" << shape.size() << "D."; return false; @@ -313,12 +313,12 @@ bool CudnnSetTensorNdDescriptor(const std::vector &shape, cudnnTensorDes std::unique_ptr stride = std::make_unique(nbDims); for (int i = 0; i < nbDims; i++) { - dim[i] = SizeToInt(shape[i]); + dim[i] = LongToInt(shape[i]); stride[i] = 1; } for (int i = nbDims - 2; i >= 0; i--) { - stride[i] = stride[i + 1] * SizeToInt(shape[i + 1]); + stride[i] = stride[i + 1] * LongToInt(shape[i + 1]); } cudnnStatus_t status = cudnnSetTensorNdDescriptor(descriptor, data_type, nbDims, dim.get(), stride.get()); @@ -383,10 +383,8 @@ bool GetTensorIntValue(const tensor::TensorPtr input_tensor, const size_t input_ return true; } -bool ShapeEqual(const std::vector &s1, const std::vector &s2) { - std::vector s2_trans; - std::transform(s2.begin(), s2.end(), std::back_inserter(s2_trans), [](const int64_t &e) { return LongToSize(e); }); - return std::equal(s1.begin(), s1.end(), s2_trans.begin(), s2_trans.end()); +bool ShapeEqual(const ShapeVector &s1, const ShapeVector &s2) { + return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()); } std::optional> GetDynamicAttrIntValue( diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel.h index fc0d85724f1..e5acb946b34 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel.h @@ -267,7 +267,7 @@ class DeprecatedNativeGpuKernelMod : public NativeGpuKernelMod { } // expand Nd Shape to 4d (N in [0,4]) - void ShapeNdTo4d(const std::vector &src, std::vector *dst) { + void ShapeNdTo4d(const ShapeVector &src, ShapeVector *dst) { const size_t nd_maximum_size = 4; if (src.size() > nd_maximum_size) { MS_EXCEPTION(ValueError) << src.size() << "-D data is not supported!"; @@ -280,7 +280,7 @@ class DeprecatedNativeGpuKernelMod : public NativeGpuKernelMod { } // expand Nd Shape to 7d (N in [0,7]) - void ShapeNdTo7d(const std::vector &src, std::vector *dst) { + void ShapeNdTo7d(const ShapeVector &src, ShapeVector *dst) { const size_t nd_maximum_size = 7; if (src.size() > nd_maximum_size) { MS_EXCEPTION(ValueError) << src.size() << "-D data is not supported!"; @@ -308,82 +308,82 @@ class DeprecatedNativeGpuKernelMod : public NativeGpuKernelMod { } // transpose shape: NCHW To NHWC - void ShapeNCHW2NHWC(std::vector *shape) { + void ShapeNCHW2NHWC(ShapeVector *shape) { std::swap((*shape)[kShapeIndex1st], (*shape)[kShapeIndex3rd]); std::swap((*shape)[kShapeIndex2nd], (*shape)[kShapeIndex1st]); } // transpose shape: NCDHW To NDHWC - void ShapeNCDHW2NDHWC(std::vector *shape) { + void ShapeNCDHW2NDHWC(ShapeVector *shape) { std::swap((*shape)[kShapeIndex1st], (*shape)[kShapeIndex2nd]); std::swap((*shape)[kShapeIndex2nd], (*shape)[kShapeIndex3rd]); std::swap((*shape)[kShapeIndex3rd], (*shape)[kShapeIndex4th]); } - void SetDimA(const std::vector &shape, int *dimA, size_t len, const std::string &format) { + void SetDimA(const ShapeVector &shape, int *dimA, size_t len, const std::string &format) { if (shape.size() != len) { MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with dimA " << len << "-D."; } if (Anyone(format, "NCHW", "DefaultFormat", "NCDHW")) { for (size_t i = 0; i < len; ++i) { - dimA[i] = SizeToInt(shape[i]); + dimA[i] = LongToInt(shape[i]); } } else if (format == "NHWC") { - dimA[0] = SizeToInt(shape[0]); - dimA[kShapeIndex1st] = SizeToInt(shape[kShapeIndex3rd]); - dimA[kShapeIndex2nd] = SizeToInt(shape[kShapeIndex1st]); - dimA[kShapeIndex3rd] = SizeToInt(shape[kShapeIndex2nd]); + dimA[0] = LongToInt(shape[0]); + dimA[kShapeIndex1st] = LongToInt(shape[kShapeIndex3rd]); + dimA[kShapeIndex2nd] = LongToInt(shape[kShapeIndex1st]); + dimA[kShapeIndex3rd] = LongToInt(shape[kShapeIndex2nd]); } else { MS_LOG(ERROR) << "Unsupported data format " << format; } } - void SetStrideA(const std::vector &shape, int *strideA, size_t len, const std::string &format) { + void SetStrideA(const ShapeVector &shape, int *strideA, size_t len, const std::string &format) { if (shape.size() != len) { MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with strideA " << len << "-D."; } if (Anyone(format, "NCHW", "DefaultFormat", "NCDHW")) { for (size_t i = 0; i < len; ++i) { - strideA[i] = SizeToInt(accumulate(shape.begin() + i + 1, shape.end(), 1, std::multiplies())); + strideA[i] = LongToInt(accumulate(shape.begin() + i + 1, shape.end(), 1, std::multiplies())); } } else if (format == "NHWC") { - strideA[0] = SizeToInt(shape[kShapeIndex1st] * shape[kShapeIndex2nd] * shape[kShapeIndex3rd]); + strideA[0] = LongToInt(shape[kShapeIndex1st] * shape[kShapeIndex2nd] * shape[kShapeIndex3rd]); strideA[1] = 1; - strideA[kShapeIndex2nd] = SizeToInt(shape[kShapeIndex2nd] * shape[kShapeIndex3rd]); - strideA[kShapeIndex3rd] = SizeToInt(shape[kShapeIndex3rd]); + strideA[kShapeIndex2nd] = LongToInt(shape[kShapeIndex2nd] * shape[kShapeIndex3rd]); + strideA[kShapeIndex3rd] = LongToInt(shape[kShapeIndex3rd]); } else { MS_LOG(ERROR) << "Unsupported data format " << format; } } - void SetNCHW(const std::vector &shape, int *n, int *c, int *h, int *w, const std::string &format) { + void SetNCHW(const ShapeVector &shape, int *n, int *c, int *h, int *w, const std::string &format) { if (Anyone(format, "NCHW", "DefaultFormat")) { - *n = SizeToInt(shape[0]); - *c = SizeToInt(shape[kShapeIndex1st]); - *h = SizeToInt(shape[kShapeIndex2nd]); - *w = SizeToInt(shape[kShapeIndex3rd]); + *n = LongToInt(shape[0]); + *c = LongToInt(shape[kShapeIndex1st]); + *h = LongToInt(shape[kShapeIndex2nd]); + *w = LongToInt(shape[kShapeIndex3rd]); } else if (format == "NHWC") { - *n = SizeToInt(shape[0]); - *c = SizeToInt(shape[kShapeIndex3rd]); - *h = SizeToInt(shape[kShapeIndex1st]); - *w = SizeToInt(shape[kShapeIndex2nd]); + *n = LongToInt(shape[0]); + *c = LongToInt(shape[kShapeIndex3rd]); + *h = LongToInt(shape[kShapeIndex1st]); + *w = LongToInt(shape[kShapeIndex2nd]); } else { MS_LOG(ERROR) << "Unsupported data format " << format; } } - void SetNCDHW(const std::vector &shape, int *n, int *c, int *d, int *h, int *w, const std::string &format) { + void SetNCDHW(const ShapeVector &shape, int *n, int *c, int *d, int *h, int *w, const std::string &format) { if (Anyone(format, "NCDHW", "DefaultFormat")) { - *n = SizeToInt(shape[0]); - *c = SizeToInt(shape[kShapeIndex1st]); - *d = SizeToInt(shape[kShapeIndex2nd]); - *h = SizeToInt(shape[kShapeIndex3rd]); - *w = SizeToInt(shape[kShapeIndex4th]); + *n = LongToInt(shape[0]); + *c = LongToInt(shape[kShapeIndex1st]); + *d = LongToInt(shape[kShapeIndex2nd]); + *h = LongToInt(shape[kShapeIndex3rd]); + *w = LongToInt(shape[kShapeIndex4th]); } else if (format == "NDHWC") { - *n = SizeToInt(shape[0]); - *c = SizeToInt(shape[kShapeIndex4th]); - *d = SizeToInt(shape[kShapeIndex1st]); - *h = SizeToInt(shape[kShapeIndex2nd]); - *w = SizeToInt(shape[kShapeIndex3rd]); + *n = LongToInt(shape[0]); + *c = LongToInt(shape[kShapeIndex4th]); + *d = LongToInt(shape[kShapeIndex1st]); + *h = LongToInt(shape[kShapeIndex2nd]); + *w = LongToInt(shape[kShapeIndex3rd]); } else { MS_LOG(ERROR) << "Unsupported data format " << format; } @@ -401,9 +401,9 @@ class DeprecatedNativeGpuKernelMod : public NativeGpuKernelMod { } // The tensor size is limited to 2G by cudnn. - inline void CheckTensorSize(const std::initializer_list> &shapes) { + inline void CheckTensorSize(const std::initializer_list &shapes) { for (auto shape : shapes) { - size_t total_size = 1; + int64_t total_size = 1; for (auto i : shape) { total_size *= i; } @@ -415,7 +415,7 @@ class DeprecatedNativeGpuKernelMod : public NativeGpuKernelMod { } // set the tensor descriptor for cudnn/cublas - void CudnnSetTensorNdDescriptor(const std::vector &shape, cudnnTensorDescriptor_t descriptor, + void CudnnSetTensorNdDescriptor(const ShapeVector &shape, cudnnTensorDescriptor_t descriptor, cudnnDataType_t data_type, const std::weak_ptr &node) { if (shape.size() < 3) { MS_EXCEPTION(ValueError) << "cudnnSetTensorNdDescriptor don't support" << shape.size() << "D."; @@ -425,12 +425,12 @@ class DeprecatedNativeGpuKernelMod : public NativeGpuKernelMod { std::unique_ptr stride = std::make_unique(nbDims); for (int i = 0; i < nbDims; i++) { - dim[i] = SizeToInt(shape[i]); + dim[i] = LongToInt(shape[i]); stride[i] = 1; } for (int i = nbDims - 2; i >= 0; i--) { - stride[i] = stride[i + 1] * SizeToInt(shape[i + 1]); + stride[i] = stride[i + 1] * LongToInt(shape[i + 1]); } CHECK_CUDNN_RET_WITH_EXCEPT(node, @@ -454,10 +454,8 @@ class DeprecatedNativeGpuKernelMod : public NativeGpuKernelMod { return type->second; } - inline bool ShapeEqual(const std::vector &s1, const std::vector &s2) { - std::vector s2_trans; - std::transform(s2.begin(), s2.end(), std::back_inserter(s2_trans), [](const int64_t &e) { return LongToSize(e); }); - return std::equal(s1.begin(), s1.end(), s2_trans.begin(), s2_trans.end()); + inline bool ShapeEqual(const ShapeVector &s1, const ShapeVector &s2) { + return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()); } inline bool GetDynamicAttrIntValue(const CNodePtr &kernel_node, const size_t input_index, @@ -492,7 +490,7 @@ class DeprecatedNativeGpuKernelMod : public NativeGpuKernelMod { std::vector ConvertPtrs(const std::vector &input_ptrs); // expand Nd Shape to 4d (N in [0,4]) -bool ShapeNdTo4d(const std::vector &src, std::vector *dst); +bool ShapeNdTo4d(const ShapeVector &src, ShapeVector *dst); template inline T *GetPossiblyNullDeviceAddress(const std::vector &addr_list, size_t index) { @@ -514,27 +512,27 @@ inline T *GetPossiblyNullDeviceAddress(const std::vector &addr_list, int AxisTransform(const std::string &origin_data_format, const std::string &cal_format, int axis); // transpose shape: NCHW To NHWC -void ShapeNCHW2NHWC(std::vector *shape); +void ShapeNCHW2NHWC(ShapeVector *shape); // transpose shape: NCDHW To NDHWC -void ShapeNCDHW2NDHWC(std::vector *shape); +void ShapeNCDHW2NDHWC(ShapeVector *shape); -void SetDimA(const std::vector &shape, int *dimA, size_t len, const std::string &format); +void SetDimA(const ShapeVector &shape, int *dimA, size_t len, const std::string &format); -void SetStrideA(const std::vector &shape, int *strideA, size_t len, const std::string &format); +void SetStrideA(const ShapeVector &shape, int *strideA, size_t len, const std::string &format); -void SetNCHW(const std::vector &shape, int *n, int *c, int *h, int *w, const std::string &format); +void SetNCHW(const ShapeVector &shape, int *n, int *c, int *h, int *w, const std::string &format); -void SetNCDHW(const std::vector &shape, int *n, int *c, int *d, int *h, int *w, const std::string &format); +void SetNCDHW(const ShapeVector &shape, int *n, int *c, int *d, int *h, int *w, const std::string &format); bool CheckBroadcast4TensorOp(const std::vector &A, const std::vector &B, const std::vector &Out); // The tensor size is limited to 2G by cudnn. -bool CheckTensorSize(const std::initializer_list> &shapes); +bool CheckTensorSize(const std::initializer_list &shapes); // set the tensor descriptor for cudnn/cublas -bool CudnnSetTensorNdDescriptor(const std::vector &shape, cudnnTensorDescriptor_t descriptor, - cudnnDataType_t data_type, const std::string &node_name); +bool CudnnSetTensorNdDescriptor(const ShapeVector &shape, cudnnTensorDescriptor_t descriptor, cudnnDataType_t data_type, + const std::string &node_name); // choose the suitable datatype for cudnn/cublas bool GetCudnnDataType(const std::string &Type, cudnnDataType_t *out_type); @@ -544,7 +542,7 @@ bool GetCudaDataType(const std::string &Type, cudaDataType_t *out_type); bool GetTensorIntValue(const tensor::TensorPtr input_tensor, const size_t input_index, const std::string &kernel_name, const std::string &tensor_name, std::vector *tensor_value); -bool ShapeEqual(const std::vector &s1, const std::vector &s2); +bool ShapeEqual(const ShapeVector &s1, const ShapeVector &s2); // This is necessary for gpu kernels to support uint8 data type. In cuda, an unsigned, // 8 bit integral type is represented by an unsigned char, but the MS_REG_GPU_KERNEL diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/addn_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/addn_gpu_kernel.h index de7d905ca8e..a1cf71ace3c 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/addn_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/addn_gpu_kernel.h @@ -82,7 +82,7 @@ class AddNFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { } input_size_ = sizeof(T); for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; + input_size_ *= static_cast(input_shape[i]); } InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/assign_add_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/assign_add_gpu_kernel.h index 3dc7f6c805b..6a3e82f14a3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/assign_add_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/assign_add_gpu_kernel.h @@ -61,10 +61,7 @@ class AssignAddFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - input_size_ = sizeof(T); - for (size_t i : input_shape) { - input_size_ = i * input_size_; - } + input_size_ = sizeof(T) * SizeOf(input_shape); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_complex_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_complex_gpu_kernel.h index 2215dc194ad..4084189b6e6 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_complex_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_complex_gpu_kernel.h @@ -58,9 +58,9 @@ class BroadcastComplexOpGpuKernelMod : public DeprecatedNativeGpuKernelMod { kernel_node_ = kernel_node; GetOpType(kernel_node); - auto shape1 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); - auto shape2 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1); - auto shape3 = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0); + auto shape1 = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); + auto shape2 = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1)); + auto shape3 = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0)); need_broadcast_ = common::AnfAlgo::IsTensorBroadcast(shape1, shape2); if (need_broadcast_ && shape1.size() > MAX_DIMS) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << MAX_DIMS diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_gpu_kernel.h index 3a956f25108..38583a1491f 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_gpu_kernel.h @@ -70,9 +70,9 @@ class BroadcastOpGpuKernelMod : public DeprecatedNativeGpuKernelMod { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; GetOpType(kernel_node); - auto shape1 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); - auto shape2 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1); - auto shape3 = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0); + auto shape1 = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); + auto shape2 = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1)); + auto shape3 = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(shape1, kernel_name_, "input") || CHECK_SHAPE_NULL(shape2, kernel_name_, "input") || CHECK_SHAPE_NULL(shape3, kernel_name_, "output"); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cast_all_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cast_all_gpu_kernel.h index 5b4d8224aeb..841d9124339 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cast_all_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cast_all_gpu_kernel.h @@ -74,10 +74,7 @@ class CastAllFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - size_t s = 1; - for (auto x : shape) { - s = s * x; - } + size_t s = SizeOf(shape); if (max_ < s) { max_ = s; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_gpu_kernel.h index 55d62f09f49..6e2d542af63 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_gpu_kernel.h @@ -68,7 +68,7 @@ class CholeskyGpuKernelMod : public DeprecatedNativeGpuKernelMod { // Get CuSolver Dense matrix handler handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle(); - auto in_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex); + auto in_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex)); is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_solve_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_solve_gpu_kernel.h index f3e19b46df1..91210384c0f 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_solve_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_solve_gpu_kernel.h @@ -59,8 +59,8 @@ class CholeskySolveGpuKernelMod : public DeprecatedNativeGpuKernelMod { } handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle(); - auto in_a_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kDim0); - auto in_b_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kDim1); + auto in_a_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kDim0)); + auto in_b_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kDim1)); is_null_input_ = CHECK_SHAPE_NULL(in_a_shape, kernel_name_, "input_a") || CHECK_SHAPE_NULL(in_b_shape, kernel_name_, "input_b"); if (is_null_input_) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_trsm_solve_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_trsm_solve_gpu_kernel.h index 3f1ad8ddf39..405c33ca491 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_trsm_solve_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_trsm_solve_gpu_kernel.h @@ -69,7 +69,7 @@ class CholeskyTrsmGpuKernelMod : public DeprecatedNativeGpuKernelMod { kernel_node_ = kernel_node; handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle(); blas_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle(); - auto in_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto in_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cumprod_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cumprod_gpu_kernel.h index 81c7bb4bc33..12dabafb7fa 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cumprod_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cumprod_gpu_kernel.h @@ -58,7 +58,7 @@ class CumProdGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num; } input_size_0_ = sizeof(T); - shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + shape_ = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(shape_, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cumsum_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cumsum_gpu_kernel.h index e5b731f76dd..7d9a5a11af5 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cumsum_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cumsum_gpu_kernel.h @@ -75,9 +75,7 @@ class CumSumGpuKernelMod : public DeprecatedNativeGpuKernelMod { while (axis_ < 0) { axis_ += input_dim_length; } - for (size_t i = 0; i < shape_.size(); i++) { - input_size_0_ *= shape_[i]; - } + input_size_0_ *= SizeOf(shape_); Reshape(); InitSizeLists(); return true; @@ -113,7 +111,7 @@ class CumSumGpuKernelMod : public DeprecatedNativeGpuKernelMod { size_t stride_; size_t stride2_; size_t dims_[kMaxDimsSize] = {}; - std::vector shape_; + ShapeVector shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/determinant_triangle_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/determinant_triangle_gpu_kernel.h index 56a1defab6d..10c1ee35c1e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/determinant_triangle_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/determinant_triangle_gpu_kernel.h @@ -71,20 +71,15 @@ class DetTriangleGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + input_size_ *= SizeOf(input_shape); if (input_shape.size() < 2) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than 2, but got " << input_shape.size(); } - matrix_n_ = input_shape[input_shape.size() - 1]; - - for (size_t i = 0; i < output_shape.size(); i++) { - output_size_ *= output_shape[i]; - } + matrix_n_ = LongToSizeClipNeg(input_shape[input_shape.size() - 1]); + output_size_ *= SizeOf(output_shape); if (matrix_n_ == 0 || output_size_ != input_size_ / matrix_n_ / matrix_n_) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of output should be " << (input_size_ / matrix_n_ / matrix_n_) << ", but got " << output_size_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/eigh_c_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/eigh_c_gpu_kernel.h index e83db845428..4e1a43c5d55 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/eigh_c_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/eigh_c_gpu_kernel.h @@ -84,7 +84,7 @@ class EighcGpuKernelMod : public DeprecatedNativeGpuKernelMod { << "', a should be a squre matrix like [N X N], but got shape [" << A_shape[kDim0] << " X " << A_shape[kDim1] << "]."; } - m_ = A_shape[0]; + m_ = LongToSizeClipNeg(A_shape[0]); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/eigh_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/eigh_gpu_kernel.h index 756e1fb5c8f..b4b98a2013e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/eigh_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/eigh_gpu_kernel.h @@ -69,7 +69,7 @@ class EighGpuKernelMod : public DeprecatedNativeGpuKernelMod { << "', a should be a squre matrix like [N X N], but got [" << A_shape[kDim0] << " X " << A_shape[kDim1] << "]."; } - m_ = A_shape[0]; + m_ = LongToSizeClipNeg(A_shape[0]); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/einsum_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/einsum_gpu_kernel.h index ac8abbe8001..32042629ad2 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/einsum_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/einsum_gpu_kernel.h @@ -43,7 +43,7 @@ class EinsumGpuKernelMod : public DeprecatedNativeGpuKernelMod { // workspace[2] : res; workspace[1]:src workspace[2]: dst void RunSingleOpProcess(const OpStruct &op_info, T *src_ptr, T *dst_ptr, void *stream_ptr) { auto name = std::get(op_info); - auto inp_shape = std::get(op_info); + auto inp_shape = Convert2SizeTClipNeg(std::get(op_info)); auto op_param = std::get(op_info); func_helper_.SingleElementProcess(name, src_ptr, dst_ptr, inp_shape, op_param, stream_ptr); } @@ -85,9 +85,9 @@ class EinsumGpuKernelMod : public DeprecatedNativeGpuKernelMod { input_ptr = GetDeviceAddress(inputs, idx); RunSingleOpVecProcess(input_ptr, single_op_[idx], stream_ptr, &src_ptr, &dst_ptr); auto name = std::get(res_op_[count]); - auto shape_a = std::get(res_op_[count]); + auto shape_a = Convert2SizeTClipNeg(std::get(res_op_[count])); auto shape_b = std::get(res_op_[count]); - auto shape_c = std::get(res_op_[count]); + auto shape_c = Convert2SizeTClipNeg(std::get(res_op_[count])); func_helper_.TwoElementProcess(name, res_ptr, src_ptr, dst_ptr, shape_a, shape_b, shape_c, stream_ptr); T *temp = res_ptr; @@ -99,7 +99,7 @@ class EinsumGpuKernelMod : public DeprecatedNativeGpuKernelMod { } name = std::get(res_op_[count]); while (single_op_func_.count(name) != 0) { - shape_a = std::get(res_op_[count]); + shape_a = Convert2SizeTClipNeg(std::get(res_op_[count])); shape_b = std::get(res_op_[count]); func_helper_.SingleElementProcess(name, res_ptr, dst_ptr, shape_a, shape_b, stream_ptr); temp = res_ptr; @@ -137,7 +137,7 @@ class EinsumGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(ERROR) << "For " << node_name << ", input types should be the same, but it does not."; return false; } - std::vector in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, idx); + auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, idx); input_shapes_.push_back(in_shape); } std::string equation = GetAttr(kernel_node, "equation"); @@ -210,8 +210,8 @@ class EinsumGpuKernelMod : public DeprecatedNativeGpuKernelMod { EinsumHelper func_helper_; std::string node_name_; TypeId type_id_; - std::vector> input_shapes_; - std::vector out_shape_; + std::vector> input_shapes_; + std::vector out_shape_; std::vector> single_op_; std::vector res_op_; std::set single_op_func_ = {"ReduceSum", "Diagonal", "Transpose"}; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/einsum_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/einsum_grad_gpu_kernel.h index 6c1a27868bb..1578af52b33 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/einsum_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/einsum_grad_gpu_kernel.h @@ -84,7 +84,7 @@ class EinsumGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(ERROR) << "For " << node_name_ << ", input types should be the same, but it does not."; return false; } - std::vector in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, idx); + auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, idx); input_shapes_.push_back(in_shape); } std::string equation = GetAttr(kernel_node, "equation"); @@ -200,9 +200,9 @@ class EinsumGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { int two_op_cnt = max_x_idx - 1; while (idx_op >= 0) { auto name = std::get(res_op_[idx_op]); - auto shape_a = std::get(res_op_[idx_op]); + auto shape_a = Convert2SizeTClipNeg(std::get(res_op_[idx_op])); auto shape_b = std::get(res_op_[idx_op]); - auto shape_c = std::get(res_op_[idx_op]); + auto shape_c = Convert2SizeTClipNeg(std::get(res_op_[idx_op])); if (single_op_func_.count(name) != 0) { func_helper_.SingleElementProcessGrad(name, src_ptr, dst_ptr, shape_a, shape_b, stream_ptr); if (dst_ptr == work0) { @@ -216,9 +216,9 @@ class EinsumGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (idx_op >= 0) { name = std::get(res_op_[idx_op]); while (idx_op >= 0 && single_op_func_.count(name) != 0) { - shape_a = std::get(res_op_[idx_op]); + shape_a = Convert2SizeTClipNeg(std::get(res_op_[idx_op])); shape_b = std::get(res_op_[idx_op]); - shape_c = std::get(res_op_[idx_op]); + shape_c = Convert2SizeTClipNeg(std::get(res_op_[idx_op])); func_helper_.SingleElementProcessGrad(name, src_ptr, dst_ptr, shape_a, shape_b, stream_ptr); --idx_op; T *temp_ptr = dst_ptr; @@ -231,9 +231,9 @@ class EinsumGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { } } name = std::get(res_op_[idx_op]); - shape_a = std::get(res_op_[idx_op]); + shape_a = Convert2SizeTClipNeg(std::get(res_op_[idx_op])); shape_b = std::get(res_op_[idx_op]); - shape_c = std::get(res_op_[idx_op]); + shape_c = Convert2SizeTClipNeg(std::get(res_op_[idx_op])); T *mid_res; if (two_op_cnt == 0) { mid_res = GetDeviceAddress(outputs, 0); @@ -273,9 +273,9 @@ class EinsumGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { T *dst_ptr = work0; for (int idx = single_op_[idx_op].size() - 1; idx >= 0; --idx) { auto name = std::get(single_op_[idx_op][idx]); - auto inp_shape = std::get(single_op_[idx_op][idx]); + auto inp_shape = Convert2SizeTClipNeg(std::get(single_op_[idx_op][idx])); auto op_param = std::get(single_op_[idx_op][idx]); - auto dout_shape = std::get(single_op_[idx_op][idx]); + auto dout_shape = Convert2SizeTClipNeg(std::get(single_op_[idx_op][idx])); func_helper_.SingleElementProcessGrad(name, src_ptr, dst_ptr, inp_shape, op_param, stream_ptr); T *temp = src_ptr; src_ptr = dst_ptr; @@ -301,14 +301,14 @@ class EinsumGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { T *out_ptr = GetDeviceAddress(outputs, idx_op); T *dst_ptr = out_ptr; auto name = std::get(single_op_[idx_op][0]); - auto inp_shape = std::get(single_op_[idx_op][0]); + auto inp_shape = Convert2SizeTClipNeg(std::get(single_op_[idx_op][0])); auto op_param = std::get(single_op_[idx_op][0]); func_helper_.SingleElementProcess(name, inp_ptr, dst_ptr, inp_shape, op_param, stream_ptr); src_ptr = dst_ptr; dst_ptr = work0; for (size_t idx = 1; idx < single_op_[idx_op].size(); ++idx) { name = std::get(single_op_[idx_op][idx]); - inp_shape = std::get(single_op_[idx_op][idx]); + inp_shape = Convert2SizeTClipNeg(std::get(single_op_[idx_op][idx])); op_param = std::get(single_op_[idx_op][idx]); func_helper_.SingleElementProcess(name, src_ptr, dst_ptr, inp_shape, op_param, stream_ptr); T *temp = src_ptr; @@ -336,9 +336,9 @@ class EinsumGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { T *work0 = GetDeviceAddress(workspace, work_idx); while (idx_op < res_op_.size()) { auto name = std::get(res_op_[idx_op]); - auto shape_a = std::get(res_op_[idx_op]); + auto shape_a = Convert2SizeTClipNeg(std::get(res_op_[idx_op])); auto shape_b = std::get(res_op_[idx_op]); - auto shape_c = std::get(res_op_[idx_op]); + auto shape_c = Convert2SizeTClipNeg(std::get(res_op_[idx_op])); T *src_ptr_2 = GetDeviceAddress(outputs, two_op_cnt + 1); func_helper_.TwoElementProcess(name, src_ptr, src_ptr_2, dst_ptr, shape_a, shape_b, shape_c, stream_ptr); ++idx_op; @@ -348,7 +348,7 @@ class EinsumGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (idx_op < res_op_.size()) { name = std::get(res_op_[idx_op]); while (idx_op < res_op_.size() && single_op_func_.count(name) != 0) { - auto shape_a = std::get(res_op_[idx_op]); + auto shape_a = Convert2SizeTClipNeg(std::get(res_op_[idx_op])); auto shape_b = std::get(res_op_[idx_op]); func_helper_.SingleElementProcess(name, src_ptr, dst_ptr, shape_a, shape_b, stream_ptr); T *temp = src_ptr; @@ -361,7 +361,7 @@ class EinsumGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { } } if (src_ptr != middle_res_ptr) { - shape_c = std::get(res_op_[idx_op - 1]); + auto shape_c = std::get(res_op_[idx_op - 1]); size_t size = func_helper_.GetShapeSize(shape_c); CHECK_CUDA_RET_WITH_ERROR_NOTRACE(cudaMemcpyAsync(middle_res_ptr, src_ptr, size, cudaMemcpyDeviceToDevice, reinterpret_cast(stream_ptr)), @@ -379,8 +379,8 @@ class EinsumGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { size_t work_size_; size_t shape_size_; size_t reduce_sum_wrok_size_; - std::vector> input_shapes_; - std::vector out_shape_; + std::vector> input_shapes_; + std::vector out_shape_; std::vector> single_op_; std::vector res_op_; std::set single_op_func_ = {"ReduceSum", "Diagonal", "Transpose"}; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/einsum_helper.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/einsum_helper.h index d62c649bc84..d8127120884 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/einsum_helper.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/einsum_helper.h @@ -47,7 +47,7 @@ constexpr int DIM_FOUR = 4; constexpr int HALF_TYPE_WORK_SIZE_MUL = 2; constexpr int ELL_LEN = 3; // tuple<>:0:操作信息{diagonal, transpose, reduce_sum}, 1:input_shape, 2:operate_param, 3: out_shape -using OpStruct = std::tuple, std::vector, std::vector>; +using OpStruct = std::tuple, std::vector, std::vector>; template class EinsumHelper { public: @@ -70,10 +70,10 @@ class EinsumHelper { } return static_cast(cur_char - 'A' + BIG_C_BEGIN); } - size_t GetShapeSize(const std::vector &shape) { + size_t GetShapeSize(const std::vector &shape) { size_t size = sizeof(T); for (auto &dim : shape) { - size *= dim; + size *= static_cast(dim); } return size; } @@ -85,7 +85,7 @@ class EinsumHelper { data_type_ = cur_type; } bool Preprocess(const std::string &orig_equatioin, const std::string &node_name, - const std::vector> &input_shapes, std::vector *out_shape, + const std::vector> &input_shapes, std::vector *out_shape, std::vector> *single_op, std::vector *res_op) { std::string equation = orig_equatioin; node_name_ = node_name; @@ -544,7 +544,7 @@ class EinsumHelper { CalTranspose(size, input_ptr, d_shape_ptr, d_info_ptr, inp_shape.size(), output_ptr, reinterpret_cast(stream_ptr)); } - bool SegLeftEquation(const std::string &left_equation, const std::vector> &input_shapes) { + bool SegLeftEquation(const std::string &left_equation, const std::vector> &input_shapes) { size_t cur_element = 0; auto found_ell = false; for (size_t idx = 0; idx < left_equation.length(); ++idx) { @@ -616,7 +616,7 @@ class EinsumHelper { } bool SegRightEquationWithArrow(const std::string &left_equation, const std::string &right_equation, - std::vector *out_shape) { + ShapeVector *out_shape) { auto found_ell = false; if (right_equation.length() == 0) { out_size_ = 0; @@ -668,7 +668,7 @@ class EinsumHelper { } return true; } - bool SegRightEquationWithoutArrow(const std::string &left_equation, std::vector *out_shape) { + bool SegRightEquationWithoutArrow(const std::string &left_equation, std::vector *out_shape) { if (left_equation.find('.') != std::string::npos) { perm_idx_ = element_shape_map_[ELL_VAL].size(); out_shape->insert(out_shape->begin(), element_shape_map_[ELL_VAL].begin(), element_shape_map_[ELL_VAL].end()); @@ -684,7 +684,7 @@ class EinsumHelper { return true; } - bool ElementMapShape(const std::vector> &input_shapes) { + bool ElementMapShape(const std::vector> &input_shapes) { for (size_t idx_input = 0; idx_input < input_shapes.size(); ++idx_input) { auto cur_shape = input_shapes[idx_input]; size_t idx_left = 0; @@ -721,8 +721,8 @@ class EinsumHelper { --idx_shape_right; --idx_element_right; } - std::vector temp_vec(input_shapes[idx_input].begin() + idx_left, - input_shapes[idx_input].begin() + idx_shape_right + 1); + ShapeVector temp_vec(input_shapes[idx_input].begin() + idx_left, + input_shapes[idx_input].begin() + idx_shape_right + 1); if (element_shape_map_.find(ELL_VAL) != element_shape_map_.end()) { if (element_shape_map_[ELL_VAL] != temp_vec) { @@ -742,36 +742,38 @@ class EinsumHelper { } return true; } - void CalAxisShape(const std::vector &axis_val, const std::vector &shape_val, size_t *idx, - std::vector *re_shape, std::vector *res_trans_axis) { + void CalAxisShape(const std::vector &axis_val, const ShapeVector &shape_val, size_t *idx, + ShapeVector *re_shape, std::vector *res_trans_axis) { for (auto val : axis_val) { (*re_shape)[*idx] = shape_val[val]; (*res_trans_axis)[val] = (*idx)++; } } - bool MulOrDot(size_t sum_dims_size, const std::vector &sig_src_shape, std::vector *res_inp_shape, + bool MulOrDot(size_t sum_dims_size, const ShapeVector &sig_src_shape, ShapeVector *res_inp_shape, std::vector *res_op) { - std::vector res_out_shape; + ShapeVector res_out_shape; if (sum_dims_size == 0) { - res_out_shape = std::vector(res_inp_shape->size()); + res_out_shape = ShapeVector(res_inp_shape->size()); for (size_t idx = 0; idx < res_inp_shape->size(); ++idx) { res_out_shape[idx] = (*res_inp_shape)[idx] == 1 ? sig_src_shape[idx] : (*res_inp_shape)[idx]; } - res_op->emplace_back(std::make_tuple("Mul", (*res_inp_shape), sig_src_shape, res_out_shape)); + res_op->emplace_back( + std::make_tuple("Mul", (*res_inp_shape), Convert2SizeTClipNeg(sig_src_shape), res_out_shape)); (*res_inp_shape) = res_out_shape; return true; } if (sum_dims_size == res_inp_shape->size()) { res_out_shape = {1}; - res_op->emplace_back(std::make_tuple("Dot", (*res_inp_shape), sig_src_shape, res_out_shape)); + res_op->emplace_back( + std::make_tuple("Dot", (*res_inp_shape), Convert2SizeTClipNeg(sig_src_shape), res_out_shape)); (*res_inp_shape) = res_out_shape; return true; } return false; } void SumPair(std::vector *res_op, std::vector *single_op, std::vector *sum_dims, - std::vector *res_inp_shape, const std::vector &sig_src_shape) { + ShapeVector *res_inp_shape, const ShapeVector &sig_src_shape) { if (MulOrDot(sum_dims->size(), sig_src_shape, res_inp_shape, res_op)) { return; } @@ -784,15 +786,15 @@ class EinsumHelper { std::vector lo; std::vector ro; std::vector lro; - size_t lo_size = 1; - size_t ro_size = 1; - size_t lro_size = 1; - size_t sum_size = 1; + int64_t lo_size = 1; + int64_t ro_size = 1; + int64_t lro_size = 1; + int64_t sum_size = 1; - std::vector sig_out_shape; - std::vector sig_inp_shape = sig_src_shape; + ShapeVector sig_out_shape; + auto sig_inp_shape = sig_src_shape; std::vector op_info; - std::vector res_out_shape; + ShapeVector res_out_shape; for (size_t idx = 0; idx < res_inp_shape->size(); ++idx) { bool sl = (*res_inp_shape)[idx] > 1; bool sr = sig_inp_shape[idx] > 1; @@ -849,16 +851,17 @@ class EinsumHelper { } single_op->emplace_back(std::make_tuple("Transpose", sig_inp_shape, sig_trans_axis, sig_out_shape)); - std::vector res_inp_reshape = {lro_size, lo_size, sum_size}; - std::vector sig_inp_reshape = {lro_size, sum_size, ro_size}; - std::vector res_out_reshape = {lro_size, lo_size, ro_size}; - res_op->emplace_back(std::make_tuple("Bmm", res_inp_reshape, sig_inp_reshape, res_out_reshape)); + ShapeVector res_inp_reshape = {lro_size, lo_size, sum_size}; + ShapeVector sig_inp_reshape = {lro_size, sum_size, ro_size}; + ShapeVector res_out_reshape = {lro_size, lo_size, ro_size}; + res_op->emplace_back( + std::make_tuple("Bmm", res_inp_reshape, Convert2SizeTClipNeg(sig_inp_reshape), res_out_reshape)); - std::vector res_re_shape(lro.size() + lo.size() + sum_dims->size() + ro.size()); + ShapeVector res_re_shape(LongToSize(lro.size() + lo.size() + sum_dims->size() + ro.size())); size_t idx = 0; CalAxisShape(lro, (*res_inp_shape), &idx, &res_re_shape, &res_trans_axis); CalAxisShape(lo, (*res_inp_shape), &idx, &res_re_shape, &res_trans_axis); - std::vector shape_val(sum_dims_bool.size(), 1); + ShapeVector shape_val(sum_dims_bool.size(), 1); CalAxisShape((*sum_dims), shape_val, &idx, &res_re_shape, &res_trans_axis); CalAxisShape(ro, sig_inp_shape, &idx, &res_re_shape, &res_trans_axis); (*res_inp_shape) = res_re_shape; @@ -878,8 +881,8 @@ class EinsumHelper { return; } - bool CalOutShape(const std::string &equation, const std::vector> &input_shapes, - std::vector *out_shape) { + bool CalOutShape(const std::string &equation, const std::vector> &input_shapes, + std::vector *out_shape) { std::string seg_arrow = "->"; auto seg_pos = equation.find(seg_arrow); std::string left_equation = equation.substr(0, seg_pos); @@ -908,7 +911,7 @@ class EinsumHelper { } return ret_flag; } - void StatSingleOp(const std::vector> &input_shapes, + void StatSingleOp(const std::vector> &input_shapes, std::vector> *single_op) { std::vector op_info; for (size_t idx = 0; idx < input_shapes.size(); ++idx) { @@ -950,7 +953,7 @@ class EinsumHelper { (*single_op)[idx].emplace_back(std::make_tuple("Transpose", sig_inp_shape, op_info, sig_out_shape)); } } - void StatCalProcess(const std::vector> &input_shapes, + void StatCalProcess(const std::vector> &input_shapes, std::vector> *single_op, std::vector *res_op) { StatSingleOp(input_shapes, single_op); // dim_last_op @@ -1028,7 +1031,7 @@ class EinsumHelper { cublasOperation_t transpose_x1_; cublasOperation_t transpose_x2_; std::vector element_count_; - std::unordered_map> element_shape_map_; + std::unordered_map> element_shape_map_; std::vector> left_elements_; std::vector label_perm_idx_; std::vector workspace_ptr_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/equalcount_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/equalcount_gpu_kernel.h index c230fa68c28..65a9775eb53 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/equalcount_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/equalcount_gpu_kernel.h @@ -64,9 +64,7 @@ class EqualCountGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + input_size_ *= SizeOf(input_shape); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/float_status_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/float_status_gpu_kernel.h index 8a3d7cf9346..edad6da55c0 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/float_status_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/float_status_gpu_kernel.h @@ -83,10 +83,7 @@ class FloatStatusGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - input_size_ = sizeof(T); - for (size_t x : shape) { - input_size_ = input_size_ * x; - } + input_size_ = sizeof(T) * SizeOf(shape); auto iter = kOpTypeMap.find(kernel_name); if (iter == kOpTypeMap.end()) { MS_LOG(EXCEPTION) << "For '" << kernel_name << ", only support these types: FloatStatus, IsInf, IsNan, IsFinite " diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/identity_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/identity_gpu_kernel.h index 682f4373f55..9b8f3708b04 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/identity_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/identity_gpu_kernel.h @@ -58,7 +58,7 @@ class IdentityGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (output_num != 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; } - auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/linspace.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/linspace.h index 7c82ef39183..e1bc2cf6092 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/linspace.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/linspace.h @@ -77,7 +77,7 @@ class LinSpaceGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output should be 1, but got " << value_count.size(); } - value_count_ = value_count[0]; + value_count_ = LongToSizeClipNeg(value_count[0]); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/lu_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/lu_gpu_kernel.h index fe4201cba02..348190545d5 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/lu_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/lu_gpu_kernel.h @@ -157,7 +157,7 @@ class LUGpuKernelMod : public DeprecatedNativeGpuKernelMod { kernel_node_ = kernel_node; // 1. get CuSolver Dense matrix handler handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle(); - auto in_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto in_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); // 2. check input shape not null is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "input"); if (is_null_input_) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/lu_solve_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/lu_solve_gpu_kernel.h index 3e8779ed454..bf33f797406 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/lu_solve_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/lu_solve_gpu_kernel.h @@ -133,8 +133,8 @@ class LuSolveGpuKernelMod : public DeprecatedNativeGpuKernelMod { private: bool InitInputSize(const CNodePtr &kernel_node) { - auto input_a_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto input_b_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto input_a_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); + auto input_b_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1)); constexpr size_t input_min_dim = 1; if (input_a_shape.size() <= input_min_dim || input_b_shape.size() <= input_min_dim) { MS_LOG_EXCEPTION << kernel_name_ << " LuSolveGpuKernelMod input shape size is " << input_a_shape.size() diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/matmul_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/matmul_gpu_kernel.h index b8f0ccd0a2d..de6df2a509f 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/matmul_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/matmul_gpu_kernel.h @@ -109,8 +109,8 @@ class MatMulGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(INFO) << "input and output type is float16, allow to use Tensor Core operations if possible"; algo_ = CUBLAS_GEMM_DEFAULT_TENSOR_OP; } - auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); - auto input1_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto output_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); + auto input1_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input1_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); if (is_null_input_) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/matrix_inverse_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/matrix_inverse_gpu_kernel.h index 938f0990f53..3ab9214b99a 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/matrix_inverse_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/matrix_inverse_gpu_kernel.h @@ -98,7 +98,7 @@ class MatrixInverseGpuKernelMod : public DeprecatedNativeGpuKernelMod { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle(); - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/matrix_triangular_solve_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/matrix_triangular_solve_gpu_kernel.h index 7afba157be7..309f16c96db 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/matrix_triangular_solve_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/matrix_triangular_solve_gpu_kernel.h @@ -203,8 +203,8 @@ class MatrixTriangularSolveGpuKernelMod : public DeprecatedNativeGpuKernelMod { } void InitShape(const CNodePtr &kernel_node) { - auto a_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto b_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto a_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); + auto b_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1)); is_null_input_ = CHECK_SHAPE_NULL(a_shape, kernel_name_, "input_a") || CHECK_SHAPE_NULL(b_shape, kernel_name_, "input_b"); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/multinomial_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/multinomial_gpu_kernel.h index 2df78231947..9412de68d3c 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/multinomial_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/multinomial_gpu_kernel.h @@ -89,8 +89,8 @@ class MultinomialGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (output_num != 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num; } - auto input_shape_0 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto input_shape_0 = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); + auto output_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape_0, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); if (is_null_input_) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/nms_with_mask_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/nms_with_mask_gpu_kernel.h index 1b234d4055c..593dde8ba73 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/nms_with_mask_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/nms_with_mask_gpu_kernel.h @@ -82,7 +82,7 @@ class NMSWithMaskFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { return true; } - num_input_ = input_shape[0]; // Get N value in [N,5] data + num_input_ = LongToSizeClipNeg(input_shape[0]); // Get N value in [N,5] data ceil_power_2 = NmsRoundUpPower2(num_input_); input_size_ = num_input_ * sizeof(T) * box_size_; // 5 values per bbox diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/random_op_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/random_op_gpu_kernel.h index 0b0b763cc03..ea7a30709db 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/random_op_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/random_op_gpu_kernel.h @@ -172,17 +172,14 @@ class RandomOpGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; } auto input_shape_0 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto output_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape_0, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output"); if (is_null_input_) { InitSizeLists(); return true; } - for (size_t i = 0; i < input_shape_0.size(); i++) { - input_size_0_ *= input_shape_0[i]; - } - input_size_0_ *= sizeof(int); + input_size_0_ *= (sizeof(int) * SizeOf(input_shape_0)); if (random_op_type_ == RANDOM_OP_UNIFORM_INT) { input_size_1_ *= 1; input_size_2_ *= 1; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/square_sum_all_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/square_sum_all_gpu_kernel.h index 67dacf821ec..07cf8cd0b59 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/square_sum_all_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/square_sum_all_gpu_kernel.h @@ -57,7 +57,7 @@ class SquareSumAllFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { return true; } for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; + input_size_ *= static_cast(input_shape[i]); } InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/squared_difference_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/squared_difference_kernel.h index dee7305af8c..1421ced365b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/squared_difference_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/squared_difference_kernel.h @@ -53,9 +53,9 @@ class SquaredDifferenceOpGpuKernelMod : public DeprecatedNativeGpuKernelMod { bool Init(const CNodePtr &kernel_node) override { auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); - auto input_shape1 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); - auto input_shape2 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1); - auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0); + auto input_shape1 = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); + auto input_shape2 = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1)); + auto output_shape = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0)); kernel_node_ = kernel_node; is_null_input_ = CHECK_SHAPE_NULL(input_shape1, kernel_name, "input") || CHECK_SHAPE_NULL(input_shape2, kernel_name, "input") || @@ -77,7 +77,7 @@ class SquaredDifferenceOpGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (need_broadcast_) { output_shape_[i] = output_shape[i]; } - output_num_ *= output_shape[i]; + output_num_ *= static_cast(output_shape[i]); } int lhs_offset = output_shape.size() - input_shape1.size(); for (size_t j = 0; j < input_shape1.size(); j++) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_complex_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_complex_gpu_kernel.h index 73b48e34f78..83878270160 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_complex_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_complex_gpu_kernel.h @@ -87,8 +87,8 @@ class UnaryOpComplexGpuKernelMod : public DeprecatedNativeGpuKernelMod { return true; } for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - output_size_ *= input_shape[i]; + input_size_ *= static_cast(input_shape[i]); + output_size_ *= static_cast(input_shape[i]); } InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_grad_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_grad_gpu_kernel.cc index 5a249aecc80..54c985dca93 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_grad_gpu_kernel.cc @@ -110,6 +110,7 @@ bool UnaryGradOpGpuKernelMod::Init(const BaseOperatorPtr &base_operator, const s MS_LOG(ERROR) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num; return false; } + auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs); auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport()); if (!is_match) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/update_thor_gradient.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/update_thor_gradient.h index 4c1efbd1c8a..bc347983841 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/update_thor_gradient.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/update_thor_gradient.h @@ -188,7 +188,7 @@ class UpdateThorGradientGpuKernelMod : public DeprecatedNativeGpuKernelMod { private: void SetProperty(const CNodePtr &kernel_node) { auto matrix_a_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto gradient_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto gradient_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1)); auto matrix_g_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); is_null_input_ = CHECK_SHAPE_NULL(matrix_a_shape, kernel_name_, "matrix_a") || CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "gradient") || diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_collective_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_collective_gpu_kernel.h index c67741a66a7..76b3e24bf5b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_collective_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_collective_gpu_kernel.h @@ -92,7 +92,7 @@ class NcclCollectiveGpuKernel : public NcclGpuKernelMod { } size_t size = sizeof(T); for (size_t j = 0; j < shape.size(); j++) { - size *= IntToSize(shape[j]); + size *= LongToSizeClipNeg(shape[j]); } size_t aligned_size = (nccl_kernel_type_ != NCCL_ALL_REDUCE) ? size : AlignMemorySize(size); input_size_list_.push_back(aligned_size); @@ -107,7 +107,7 @@ class NcclCollectiveGpuKernel : public NcclGpuKernelMod { } size_t size = sizeof(T); for (size_t j = 0; j < shape.size(); j++) { - size *= IntToSize(shape[j]); + size *= LongToSizeClipNeg(shape[j]); } size_t aligned_size = (nccl_kernel_type_ != NCCL_ALL_REDUCE) ? size : AlignMemorySize(size); output_size_list_.push_back(aligned_size); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_p2p_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_p2p_gpu_kernel.h index 15b58bb960e..6cc95908f44 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_p2p_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_p2p_gpu_kernel.h @@ -78,7 +78,7 @@ class NcclP2PGpuKernel : public NcclGpuKernelMod { } size_t size = sizeof(T); for (size_t j = 0; j < shape.size(); j++) { - size *= IntToSize(shape[j]); + size *= LongToSizeClipNeg(shape[j]); } input_size_list_.push_back(size); input_size_ += size; @@ -92,7 +92,7 @@ class NcclP2PGpuKernel : public NcclGpuKernelMod { } size_t size = sizeof(I); for (size_t j = 0; j < shape.size(); j++) { - size *= IntToSize(shape[j]); + size *= LongToSizeClipNeg(shape[j]); } output_size_list_.push_back(size); output_size_ += size; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_recv_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_recv_gpu_kernel.h index dc12f073f31..98011f75531 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_recv_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_recv_gpu_kernel.h @@ -53,7 +53,7 @@ class NcclRecvGpuKernel : public NcclGpuKernelMod { group_name_ = GetAttr(kernel_node, kAttrGroup); nccl_data_type_ = nccl_dtype(AnfAlgo::GetOutputDeviceDataType(kernel_node, 0)); - auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto output_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(output_shape, kernel_name, "output"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_send_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_send_gpu_kernel.h index 7eaa3105278..d24a22a1a60 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_send_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_send_gpu_kernel.h @@ -55,7 +55,7 @@ class NcclSendGpuKernel : public NcclGpuKernelMod { nccl_data_type_ = nccl_dtype(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)); MS_LOG(INFO) << "NcclSend dest rank is " << dest_rank_ << ", group name is " << group_name_; - auto input_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_gpu_kernel.cc index d6a2764ed09..083d921ec6e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_gpu_kernel.cc @@ -85,10 +85,8 @@ int ActivationFwdGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, cons MS_LOG(ERROR) << "For '" << kernel_name_ << "', the number of inputs must be 1, but got " << input_num; return KRET_RESIZE_FAILED; } - auto input_shape = inputs.at(kIndex0)->GetShapeVector(); - input_shape_.clear(); - (void)std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(input_shape_), LongToSize); - size_t input_element_num = std::accumulate(input_shape_.begin(), input_shape_.end(), 1, std::multiplies()); + input_shape_ = inputs.at(kIndex0)->GetShapeVector(); + size_t input_element_num = SizeOf(input_shape_); is_null_input_ = (input_element_num == 0); if (is_null_input_) { return KRET_OK; @@ -109,7 +107,7 @@ int ActivationFwdGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, cons "For 'Activation', cudnnCreateActivationDescriptor failed."); cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(inputs.at(kIndex0)->GetDtype())); CheckTensorSize({input_shape_}); - std::vector shape; + ShapeVector shape; double coef = (mode_ == CUDNN_ACTIVATION_CLIPPED_RELU) ? 6.0 : 0.0; if (mode_ == CUDNN_ACTIVATION_ELU) { auto elu_ptr = std::dynamic_pointer_cast(base_operator); @@ -121,17 +119,17 @@ int ActivationFwdGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, cons cudnnSetActivationDescriptor(activation_desc_, mode_, CUDNN_NOT_PROPAGATE_NAN, coef), "For 'Activation', cudnnSetActivationDescriptor failed."); const int split_dim = 4; - if (input_shape.size() <= split_dim) { + if (input_shape_.size() <= split_dim) { ShapeNdTo4d(input_shape_, &shape); if (inputs.at(kIndex0)->GetFormat() == mindspore::Format::NHWC) { CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE( - cudnnSetTensor4dDescriptor(data_descriptor_, CUDNN_TENSOR_NHWC, cudnn_data_type_, SizeToInt(shape[0]), - SizeToInt(shape[3]), SizeToInt(shape[1]), SizeToInt(shape[2])), + cudnnSetTensor4dDescriptor(data_descriptor_, CUDNN_TENSOR_NHWC, cudnn_data_type_, LongToInt(shape[0]), + LongToInt(shape[3]), LongToInt(shape[1]), LongToInt(shape[2])), "For 'Activation', cudnnSetTensor4dDescriptor failed."); } else { CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE( - cudnnSetTensor4dDescriptor(data_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_, SizeToInt(shape[0]), - SizeToInt(shape[1]), SizeToInt(shape[2]), SizeToInt(shape[3])), + cudnnSetTensor4dDescriptor(data_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_, LongToInt(shape[0]), + LongToInt(shape[1]), LongToInt(shape[2]), LongToInt(shape[3])), "For 'Activation', cudnnSetTensor4dDescriptor failed."); } } else { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_gpu_kernel.h index c5a1e7d373b..57784a219e2 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_gpu_kernel.h @@ -50,6 +50,7 @@ class ActivationFwdGpuKernelMod : public NativeGpuKernelMod { if (is_null_input_) { return true; } + return kernel_func_(this, inputs, outputs); } @@ -72,7 +73,7 @@ class ActivationFwdGpuKernelMod : public NativeGpuKernelMod { kernel_attr_map_; std::string kernel_name_{kUnKnown}; ActivationFunc kernel_func_; - std::vector input_shape_{}; + ShapeVector input_shape_{}; bool is_null_input_{true}; cudnnHandle_t cudnn_handle_{nullptr}; cudnnActivationDescriptor_t activation_desc_{nullptr}; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_grad_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_grad_kernel.cc index 28d65dfb87d..7f9c6c35749 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_grad_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_grad_kernel.cc @@ -84,10 +84,8 @@ int ActivationGradGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, con MS_LOG(ERROR) << "For '" << kernel_name_ << "', the number of inputs must be 2, but got " << input_num; return KRET_RESIZE_FAILED; } - auto input_shape = inputs.at(kIndex0)->GetShapeVector(); - input_shape_.clear(); - (void)std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(input_shape_), LongToSize); - size_t input_element_num = std::accumulate(input_shape_.begin(), input_shape_.end(), 1, std::multiplies()); + input_shape_ = inputs.at(kIndex0)->GetShapeVector(); + size_t input_element_num = SizeOf(input_shape_); is_null_input_ = (input_element_num == 0); if (is_null_input_) { return KRET_OK; @@ -108,24 +106,24 @@ int ActivationGradGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, con "For 'ActivationGrad', cudnnCreateActivationDescriptor failed."); cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(inputs.at(kIndex0)->GetDtype())); CheckTensorSize({input_shape_}); - std::vector shape; + ShapeVector shape; double coef = (mode_ == CUDNN_ACTIVATION_CLIPPED_RELU) ? ReLU6_UP_TURNING_POINT : 0.0; if (mode_ == CUDNN_ACTIVATION_ELU) coef = 1.0; CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE(cudnnSetActivationDescriptor(activation_desc_, mode_, CUDNN_PROPAGATE_NAN, coef), "For 'ActivationGrad', cudnnSetActivationDescriptor failed."); const int split_dim = 4; - if (input_shape.size() <= split_dim) { + if (input_shape_.size() <= split_dim) { ShapeNdTo4d(input_shape_, &shape); if (inputs.at(kIndex0)->GetFormat() == mindspore::Format::NHWC) { CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE( - cudnnSetTensor4dDescriptor(data_descriptor_, CUDNN_TENSOR_NHWC, cudnn_data_type_, SizeToInt(shape[0]), - SizeToInt(shape[3]), SizeToInt(shape[1]), SizeToInt(shape[2])), + cudnnSetTensor4dDescriptor(data_descriptor_, CUDNN_TENSOR_NHWC, cudnn_data_type_, LongToInt(shape[0]), + LongToInt(shape[3]), LongToInt(shape[1]), LongToInt(shape[2])), "For 'ActivationGrad', cudnnSetTensor4dDescriptor failed."); } else { CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE( - cudnnSetTensor4dDescriptor(data_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_, SizeToInt(shape[0]), - SizeToInt(shape[1]), SizeToInt(shape[2]), SizeToInt(shape[3])), + cudnnSetTensor4dDescriptor(data_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_, LongToInt(shape[0]), + LongToInt(shape[1]), LongToInt(shape[2]), LongToInt(shape[3])), "For 'ActivationGrad', cudnnSetTensor4dDescriptor failed."); } } else { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_grad_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_grad_kernel.h index ff356884134..9ebd7768c2c 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_grad_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_grad_kernel.h @@ -50,6 +50,7 @@ class ActivationGradGpuKernelMod : public NativeGpuKernelMod { if (is_null_input_) { return true; } + return kernel_func_(this, inputs, outputs); } @@ -72,7 +73,7 @@ class ActivationGradGpuKernelMod : public NativeGpuKernelMod { kernel_attr_map_; std::string kernel_name_{kUnKnown}; ActivationGradFunc kernel_func_; - std::vector input_shape_{}; + ShapeVector input_shape_{}; bool is_null_input_{true}; cudnnHandle_t cudnn_handle_{nullptr}; cudnnActivationDescriptor_t activation_desc_{nullptr}; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adagrad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adagrad_gpu_kernel.h index fbe0c43b4bc..4b1e6febe25 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adagrad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adagrad_gpu_kernel.h @@ -84,21 +84,15 @@ class AdagradGpuKernelMod : public DeprecatedNativeGpuKernelMod { is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name_, "var") || CHECK_SHAPE_NULL(accumulation_shape, kernel_name_, "accum") || CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "grad"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({variable_shape, accumulation_shape, gradient_shape})) { InitSizeLists(); return true; } - for (size_t i = 0; i < variable_shape.size(); i++) { - variable_size_ *= variable_shape[i]; - } - for (size_t i = 0; i < accumulation_shape.size(); i++) { - accumulation_size_ *= accumulation_shape[i]; - } + variable_size_ *= SizeOf(variable_shape); + accumulation_size_ *= SizeOf(accumulation_shape); + gradient_size_ *= SizeOf(gradient_shape); - for (size_t i = 0; i < gradient_shape.size(); i++) { - gradient_size_ *= gradient_shape[i]; - } InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adam_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adam_gpu_kernel.h index 01d548a7b37..032f8993ce7 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adam_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adam_gpu_kernel.h @@ -91,25 +91,15 @@ class AdamGpuKernelMod : public DeprecatedNativeGpuKernelMod { is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name_, "var") || CHECK_SHAPE_NULL(m_shape, kernel_name_, "m") || CHECK_SHAPE_NULL(v_shape, kernel_name_, "v") || CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "gradient"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({variable_shape, m_shape, v_shape, gradient_shape})) { InitSizeLists(); return true; } - for (size_t i = 0; i < variable_shape.size(); i++) { - variable_size_ *= variable_shape[i]; - } - for (size_t i = 0; i < m_shape.size(); i++) { - m_size_ *= m_shape[i]; - } - - for (size_t i = 0; i < v_shape.size(); i++) { - v_size_ *= v_shape[i]; - } - - for (size_t i = 0; i < gradient_shape.size(); i++) { - gradient_size_ *= gradient_shape[i]; - } + variable_size_ *= SizeOf(variable_shape); + m_size_ *= SizeOf(m_shape); + v_size_ *= SizeOf(v_shape); + gradient_size_ *= SizeOf(gradient_shape); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adam_weight_decay_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adam_weight_decay_gpu_kernel.h index 9dcfd6e1127..a31ad3d9bf0 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adam_weight_decay_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adam_weight_decay_gpu_kernel.h @@ -88,25 +88,14 @@ class AdamWeightDecayGpuKernelMod : public DeprecatedNativeGpuKernelMod { is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name_, "var") || CHECK_SHAPE_NULL(m_shape, kernel_name_, "m") || CHECK_SHAPE_NULL(v_shape, kernel_name_, "v") || CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "gradient"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({variable_shape, m_shape, v_shape, gradient_shape})) { InitSizeLists(); return true; } - for (size_t i = 0; i < variable_shape.size(); i++) { - variable_size_ *= variable_shape[i]; - } - - for (size_t i = 0; i < m_shape.size(); i++) { - m_size_ *= m_shape[i]; - } - - for (size_t i = 0; i < v_shape.size(); i++) { - v_size_ *= v_shape[i]; - } - - for (size_t i = 0; i < gradient_shape.size(); i++) { - gradient_size_ *= gradient_shape[i]; - } + variable_size_ *= SizeOf(variable_shape); + m_size_ *= SizeOf(m_shape); + v_size_ *= SizeOf(v_shape); + gradient_size_ *= SizeOf(gradient_shape); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adaptive_avg_pool2d_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adaptive_avg_pool2d_gpu_kernel.h index 6def031213e..c0e6b81e4c4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adaptive_avg_pool2d_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adaptive_avg_pool2d_gpu_kernel.h @@ -83,7 +83,7 @@ class AdaptiveAvgPool2DKernelMod : public DeprecatedNativeGpuKernelMod { auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({input_shape, output_shape})) { InitSizeLists(); return true; } @@ -97,13 +97,8 @@ class AdaptiveAvgPool2DKernelMod : public DeprecatedNativeGpuKernelMod { input_height = static_cast(input_shape[len - 2]); input_width = static_cast(input_shape[len - 1]); size = static_cast(len == 3 ? input_shape[0] : input_shape[0] * input_shape[1]); - for (uint i = 0; i < len; i++) { - input_size_ *= input_shape[i]; - } - - for (size_t i = 0; i < output_shape.size(); i++) { - output_size_ *= output_shape[i]; - } + input_size_ *= SizeOf(input_shape); + output_size_ *= SizeOf(output_shape); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adaptive_avg_pool2d_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adaptive_avg_pool2d_grad_gpu_kernel.h index dabda550905..454a61204d8 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adaptive_avg_pool2d_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adaptive_avg_pool2d_grad_gpu_kernel.h @@ -74,7 +74,7 @@ class AdaptiveAvgPool2DGradKernelMod : public DeprecatedNativeGpuKernelMod { auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); // dx is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({input_shape, output_shape})) { InitSizeLists(); return true; } @@ -93,15 +93,11 @@ class AdaptiveAvgPool2DGradKernelMod : public DeprecatedNativeGpuKernelMod { input_width_ = static_cast(input_shape[input_rank - 1]); size_ = static_cast(input_rank == (kAdaptiveAvgPool2dGradMinRank + 1) ? input_shape[0] : input_shape[0] * input_shape[1]); - for (uint i = 0; i < input_rank; i++) { - input_size_ *= input_shape[i]; - } + input_size_ *= SizeOf(input_shape); output_height_ = static_cast(output_shape[output_rank - 2]); output_width_ = static_cast(output_shape[output_rank - 1]); - for (size_t i = 0; i < output_shape.size(); i++) { - output_size_ *= output_shape[i]; - } + output_size_ *= SizeOf(output_shape); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/apply_gradient_descent_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/apply_gradient_descent_gpu_kernel.h index c033a336f53..325ceaa5218 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/apply_gradient_descent_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/apply_gradient_descent_gpu_kernel.h @@ -58,14 +58,11 @@ class ApplyGradientDescentKernelMod : public DeprecatedNativeGpuKernelMod { } auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "var"); - if (is_null_input_) { + if (is_null_input_ || IsDynamic(input_shape)) { InitSizeLists(); return true; } - input_size_ = 1; - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + input_size_ = SizeOf(input_shape); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/batch_norm_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/batch_norm_gpu_kernel.h index a93c867e20f..ce729ef0be9 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/batch_norm_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/batch_norm_gpu_kernel.h @@ -117,6 +117,10 @@ class BatchNormGpuKernelMod : public DeprecatedNativeGpuKernelMod { << shape.size(); } is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input"); + if (IsDynamic(shape)) { + return true; + } + if (is_null_input_) { InitSizeLists(); return true; @@ -242,26 +246,26 @@ class BatchNormGpuKernelMod : public DeprecatedNativeGpuKernelMod { } private: - void SetTensorDescriptor(const std::string &format, const std::vector &shape) { + void SetTensorDescriptor(const std::string &format, const ShapeVector &shape) { cudnnTensorFormat_t cudnn_format; int batch, channel, height, width; if (shape.size() == 2) { - batch = SizeToInt(shape[0]); - channel = SizeToInt(shape[1]); + batch = LongToInt(shape[0]); + channel = LongToInt(shape[1]); height = 1; width = 1; cudnn_format = CUDNN_TENSOR_NCHW; } else if (format == kOpFormat_NHWC) { - batch = SizeToInt(shape[0]); - height = SizeToInt(shape[1]); - width = SizeToInt(shape[2]); - channel = SizeToInt(shape[3]); + batch = LongToInt(shape[0]); + height = LongToInt(shape[1]); + width = LongToInt(shape[2]); + channel = LongToInt(shape[3]); cudnn_format = CUDNN_TENSOR_NHWC; } else { - batch = SizeToInt(shape[0]); - channel = SizeToInt(shape[1]); - height = SizeToInt(shape[2]); - width = SizeToInt(shape[3]); + batch = LongToInt(shape[0]); + channel = LongToInt(shape[1]); + height = LongToInt(shape[2]); + width = LongToInt(shape[3]); cudnn_format = CUDNN_TENSOR_NCHW; } CHECK_CUDNN_RET_WITH_EXCEPT( diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/batch_norm_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/batch_norm_grad_gpu_kernel.h index 2c7fb95c1b0..31767493799 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/batch_norm_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/batch_norm_grad_gpu_kernel.h @@ -124,6 +124,11 @@ class BatchNormGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input must be 2 or 4, but got " << shape.size(); } + + if (IsDynamic(shape)) { + return true; + } + is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); @@ -245,25 +250,25 @@ class BatchNormGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { } private: - void SetTensorDescriptor(const std::string &format, const std::vector &shape) { + void SetTensorDescriptor(const std::string &format, const ShapeVector &shape) { cudnnTensorFormat_t cudnn_format; if (shape.size() == 2) { - batch_ = SizeToInt(shape[0]); - channel_ = SizeToInt(shape[1]); + batch_ = LongToInt(shape[0]); + channel_ = LongToInt(shape[1]); height_ = 1; width_ = 1; cudnn_format = CUDNN_TENSOR_NCHW; } else if (format == kOpFormat_NHWC) { - batch_ = SizeToInt(shape[0]); - height_ = SizeToInt(shape[1]); - width_ = SizeToInt(shape[2]); - channel_ = SizeToInt(shape[3]); + batch_ = LongToInt(shape[0]); + height_ = LongToInt(shape[1]); + width_ = LongToInt(shape[2]); + channel_ = LongToInt(shape[3]); cudnn_format = CUDNN_TENSOR_NHWC; } else { - batch_ = SizeToInt(shape[0]); - channel_ = SizeToInt(shape[1]); - height_ = SizeToInt(shape[2]); - width_ = SizeToInt(shape[3]); + batch_ = LongToInt(shape[0]); + channel_ = LongToInt(shape[1]); + height_ = LongToInt(shape[2]); + width_ = LongToInt(shape[3]); cudnn_format = CUDNN_TENSOR_NCHW; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/bce_with_logits_loss_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/bce_with_logits_loss_kernel.h index dea55c30235..38f4635db16 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/bce_with_logits_loss_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/bce_with_logits_loss_kernel.h @@ -81,7 +81,7 @@ class BCEWithLogitsLossKernelMod : public DeprecatedNativeGpuKernelMod { is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name_, "logits") || CHECK_SHAPE_NULL(weight_shape_, kernel_name_, "weight") || CHECK_SHAPE_NULL(pos_weight_shape_, kernel_name_, "pos_weight"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({input_shape_, weight_shape_, pos_weight_shape_})) { InitSizeLists(); return true; } @@ -97,25 +97,16 @@ class BCEWithLogitsLossKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of pos_weight cannot be less than 1, but got " << pos_weight_shape_.size(); } - input_size_ = 1; if (input_shape_.size() > MAX_LOGITS_DIMENSION) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of logits cannot be greater than " << MAX_LOGITS_DIMENSION << ", but got " << input_shape_.size(); } - for (size_t i = 0; i < input_shape_.size(); i++) { - input_size_ *= input_shape_[i]; - } + input_size_ = SizeOf(input_shape_); // weight shape - weight_size_ = 1; - for (size_t i = 0; i < weight_shape_.size(); i++) { - weight_size_ *= weight_shape_[i]; - } + weight_size_ = SizeOf(weight_shape_); weight_need_broadcast_ = NeedBroadcast(&weight_shape_, input_shape_); // pos_weight shape - pos_weight_size_ = 1; - for (size_t i = 0; i < pos_weight_shape_.size(); i++) { - pos_weight_size_ *= pos_weight_shape_[i]; - } + pos_weight_size_ = SizeOf(pos_weight_shape_); pos_weight_need_broadcast_ = NeedBroadcast(&pos_weight_shape_, input_shape_); InitSizeLists(); return true; @@ -153,7 +144,7 @@ class BCEWithLogitsLossKernelMod : public DeprecatedNativeGpuKernelMod { } private: - bool NeedBroadcast(std::vector *shape, const std::vector &result_shape) { + bool NeedBroadcast(ShapeVector *shape, const ShapeVector &result_shape) { // result_shape is larger that shape // and shape is able to broadcasted to result_shape if (shape->size() < result_shape.size()) { @@ -176,9 +167,9 @@ class BCEWithLogitsLossKernelMod : public DeprecatedNativeGpuKernelMod { bool pos_weight_need_broadcast_; bool is_null_input_; std::string kernel_name_; - std::vector input_shape_; - std::vector weight_shape_; - std::vector pos_weight_shape_; + ShapeVector input_shape_; + ShapeVector weight_shape_; + ShapeVector pos_weight_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/bias_add_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/bias_add_gpu_kernel.h index 4e696f21530..0a8a309c65e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/bias_add_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/bias_add_gpu_kernel.h @@ -63,6 +63,9 @@ class BiasAddGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitResource(); cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); auto x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + if (IsDynamic(x_shape)) { + return true; + } auto num_dims = x_shape.size(); is_null_input_ = CHECK_SHAPE_NULL(x_shape, kernel_name_, "input_x"); if (is_null_input_) { @@ -86,8 +89,8 @@ class BiasAddGpuKernelMod : public DeprecatedNativeGpuKernelMod { std::unique_ptr x_dims = std::make_unique(cudnn_dims); std::unique_ptr b_dims = std::make_unique(cudnn_dims); for (size_t i = 0; i < cudnn_dims; i++) { - x_dims[i] = (i < num_dims) ? SizeToInt(x_shape[i]) : 1; - b_dims[i] = (i == pos) ? SizeToInt(x_shape[i]) : 1; + x_dims[i] = (i < num_dims) ? LongToInt(x_shape[i]) : 1; + b_dims[i] = (i == pos) ? LongToInt(x_shape[i]) : 1; } auto input_device_format = AnfAlgo::GetInputFormat(kernel_node, 0); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/bias_add_grad_gpu_kenel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/bias_add_grad_gpu_kenel.h index f193505db5e..4938577efcd 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/bias_add_grad_gpu_kenel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/bias_add_grad_gpu_kenel.h @@ -86,7 +86,7 @@ class BiasAddGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { kernel_node_ = kernel_node; auto dy_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); is_null_input_ = CHECK_SHAPE_NULL(dy_shape, kernel_name_, "input"); - if (is_null_input_) { + if (is_null_input_ || IsDynamic(dy_shape)) { InitSizeLists(); return true; } @@ -103,7 +103,7 @@ class BiasAddGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (pos == std::string::npos || pos >= num_dims_) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', 'C' character must be in 'format', but got " << format; } - bias_size_ = dy_shape[pos]; + bias_size_ = LongToSizeClipNeg(dy_shape[pos]); auto num_dims_fix = std::max(num_dims_, 4UL); for (size_t i = 0; i < num_dims_fix; i++) { dy_shape_.push_back((i < num_dims_) ? dy_shape[i] : 1); @@ -112,12 +112,8 @@ class BiasAddGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { same_dims_ = false; } } - for (size_t i = 0; i < dy_shape_.size(); i++) { - dy_num_ *= dy_shape_[i]; - } - for (size_t i = 0; i < db_shape_.size(); i++) { - db_num_ *= db_shape_[i]; - } + dy_num_ *= SizeOf(dy_shape_); + db_num_ *= SizeOf(db_shape_); data_format_ = input_device_format; // for opt implementation if (format == kOpFormat_NHWC) { data_format_ = kOpFormat_NHWC; @@ -193,8 +189,8 @@ class BiasAddGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { std::unique_ptr dy_dims = std::make_unique(cudnn_dims); std::unique_ptr db_dims = std::make_unique(cudnn_dims); for (size_t i = 0; i < cudnn_dims; i++) { - dy_dims[i] = SizeToInt(dy_shape_[i]); - db_dims[i] = SizeToInt(db_shape_[i]); + dy_dims[i] = LongToInt(dy_shape_[i]); + db_dims[i] = LongToInt(db_shape_[i]); } CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetTensorNdDescriptorEx(dy_desc_, cudnn_compute_format_, cudnn_data_type_, @@ -243,9 +239,9 @@ class BiasAddGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { size_t dy_num_; // for own implementation size_t db_num_; size_t num_dims_; - size_t bias_size_; // for own implementation - std::vector dy_shape_; // for own implementation - std::vector db_shape_; // for own implementation + size_t bias_size_; // for own implementation + ShapeVector dy_shape_; // for own implementation + ShapeVector db_shape_; // for own implementation std::string data_format_ = kOpFormat_NCHW; // for cudnn implementation cudnnHandle_t cudnn_handle_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/binary_cross_entropy_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/binary_cross_entropy_gpu_kernel.h index dc4e8d789b3..d0962b7645b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/binary_cross_entropy_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/binary_cross_entropy_gpu_kernel.h @@ -62,15 +62,13 @@ class BinaryCrossEntropyGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); kernel_node_ = kernel_node; is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "logits"); - if (is_null_input_) { + if (is_null_input_ || IsDynamic(input_shape)) { InitSizeLists(); return true; } size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node); weight_defined_ = (input_num == 3); - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + input_size_ *= SizeOf(input_shape); string reduction = GetAttr(kernel_node, "reduction"); reduction_ = kReductionModeMap[reduction]; workspace_size_ = sizeof(T); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/binary_cross_entropy_grad_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/binary_cross_entropy_grad_kernel.h index 7e766f70e60..cd34650dbb5 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/binary_cross_entropy_grad_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/binary_cross_entropy_grad_kernel.h @@ -62,15 +62,13 @@ class BinaryCrossEntropyGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); kernel_node_ = kernel_node; is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input"); - if (is_null_input_) { + if (is_null_input_ || IsDynamic(input_shape)) { InitSizeLists(); return true; } size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node); weight_defined_ = (input_num == 4); - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + input_size_ *= SizeOf(input_shape); string reduction = GetAttr(kernel_node, "reduction"); reduction_ = kReductionModeMap[reduction]; InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/clip_by_norm_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/clip_by_norm_gpu_kernel.cc index a008d79de0f..65d3b9c8a5d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/clip_by_norm_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/clip_by_norm_gpu_kernel.cc @@ -142,18 +142,21 @@ bool ClipByNormGpuKernelMod::DoLaunch(const std::vector &input l2norm_output_addr), kernel_name_ + " running cudnnReduceTensor::cudnnReduceTensorNorm2 failed."); } + auto l2_norm_lhs_shape_size = Convert2SizeTClipNeg(l2_norm_lhs_shape_); + auto l2_norm_rhs_shap_size = Convert2SizeTClipNeg(l2_norm_rhs_shape_); + auto l2_norm_ouths_shape_size = Convert2SizeTClipNeg(l2_norm_ouths_shape_); // Calculation std::max(l2_norm, epsilon) to keep numerical stability. GetMaxWithEpsAndValue(l2_norm_output_size_ / sizeof(float), epsilon_, l2norm_output_addr, reinterpret_cast(stream_ptr)); // Running `x/l2_norm(x)` and broadcast output shape to `input_x` shape - BroadcastArith(l2_norm_lhs_shape_, l2_norm_rhs_shape_, l2_norm_ouths_shape_, BROADCAST_TYPE_REALDIV, x_float_addr, - l2norm_output_addr, div_output_addr, reinterpret_cast(stream_ptr)); + BroadcastArith(l2_norm_lhs_shape_size, l2_norm_rhs_shap_size, l2_norm_ouths_shape_size, BROADCAST_TYPE_REALDIV, + x_float_addr, l2norm_output_addr, div_output_addr, reinterpret_cast(stream_ptr)); // Running `cast(clip_norm)` to the data type of `input_x` Cast(clip_norm_size_ / sizeof(S), clip_norm_addr, clip_norm_float_addr, reinterpret_cast(stream_ptr)); // Running '(x/l2_norm(x)) * clip_norm' and broadcast output shape to `input_x` shape if (clip_norm_need_broadcast_) { - BroadcastArith(l2_norm_ouths_shape_, clip_norm_rhs_shape_, l2_norm_ouths_shape_, BROADCAST_TYPE_MUL, - div_output_addr, clip_norm_float_addr, clip_norm_mul_output_addr, + BroadcastArith(l2_norm_ouths_shape_size, Convert2SizeTClipNeg(clip_norm_rhs_shape_), l2_norm_ouths_shape_size, + BROADCAST_TYPE_MUL, div_output_addr, clip_norm_float_addr, clip_norm_mul_output_addr, reinterpret_cast(stream_ptr)); } else { ElewiseArith(output_size_ / sizeof(float), BROADCAST_TYPE_MUL, div_output_addr, clip_norm_float_addr, @@ -199,27 +202,23 @@ void ClipByNormGpuKernelMod::InitIOShape(const std::vectorGetShapeVector(); - if (!IsValidShape(x_origin_shape)) { + x_shape_ = inputs[0]->GetShapeVector(); + if (!IsValidShape(x_shape_)) { MS_EXCEPTION(ValueError) << "For " << kernel_name_ << ", input `x` is not supported dynamic shape."; } - std::transform(x_origin_shape.begin(), x_origin_shape.end(), std::back_inserter(x_shape_), LongToSize); x_dim_ = x_shape_.size(); // Get input `clip_norm` shape MS_EXCEPTION_IF_NULL(inputs[1]); - const auto clip_norm_origin_shape = inputs[1]->GetShapeVector(); - if (!IsValidShape(clip_norm_origin_shape)) { + clip_norm_shape_ = inputs[1]->GetShapeVector(); + if (!IsValidShape(clip_norm_shape_)) { MS_EXCEPTION(ValueError) << "For " << kernel_name_ << ", input `clip_norm` is not supported dynamic shape."; } - std::transform(clip_norm_origin_shape.begin(), clip_norm_origin_shape.end(), std::back_inserter(clip_norm_shape_), - LongToSize); // Get output shape MS_EXCEPTION_IF_NULL(outputs[0]); - const auto output_origin_shape = outputs[0]->GetShapeVector(); - if (!IsValidShape(output_origin_shape)) { + output_shape_ = outputs[0]->GetShapeVector(); + if (!IsValidShape(output_shape_)) { MS_EXCEPTION(ValueError) << "For " << kernel_name_ << ", output shape is not supported dynamic shape."; } - std::transform(output_origin_shape.begin(), output_origin_shape.end(), std::back_inserter(output_shape_), LongToSize); MS_EXCEPTION_IF_CHECK_FAIL(output_shape_ == x_shape_, "Output shape should be same with input x shape."); } @@ -329,9 +328,7 @@ void ClipByNormGpuKernelMod::InitSizeLists() { // size for running '(x/l2_norm(x)) * clip_norm' workspace_size_list_.emplace_back(x_float_size); // Init output size - output_size_ = - std::accumulate(output_shape_.begin(), output_shape_.end(), float_type_size, std::multiplies()); - output_size_ = std::max(float_type_size, output_size_); + output_size_ = float_type_size * SizeOf(output_shape_); output_size_list_.emplace_back(output_size_); } @@ -342,7 +339,7 @@ void ClipByNormGpuKernelMod::DetermineDeviceDataInfoForCudnn(const KernelT // Determine device data info for `inputA_descriptor` constexpr size_t split_dim = 4; if (x_dim_ <= split_dim) { - std::vector x_4d_shape; + ShapeVector x_4d_shape; ShapeNdTo4d(x_shape_, &x_4d_shape); CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE( cudnnSetTensor4dDescriptor(inputA_descriptor_, CUDNN_TENSOR_NCHW, data_type_, x_4d_shape[kIndex0], @@ -353,7 +350,7 @@ void ClipByNormGpuKernelMod::DetermineDeviceDataInfoForCudnn(const KernelT } // Determine device data info for `outputC_descriptor` if (l2_norm_output_shape_.size() <= split_dim) { - std::vector l2_norm_4d_shape; + ShapeVector l2_norm_4d_shape; ShapeNdTo4d(l2_norm_output_shape_, &l2_norm_4d_shape); CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE( cudnnSetTensor4dDescriptor(outputC_descriptor_, CUDNN_TENSOR_NCHW, data_type_, l2_norm_4d_shape[kIndex0], diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/clip_by_norm_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/clip_by_norm_gpu_kernel.h index ba240953126..be2b2dffd71 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/clip_by_norm_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/clip_by_norm_gpu_kernel.h @@ -87,17 +87,17 @@ class ClipByNormGpuKernelMod : public NativeGpuKernelMod { size_t output_size_{0}; // variables are used for `l2_norm' calculation std::vector axis_; - std::vector x_shape_; - std::vector l2_norm_output_shape_; + ShapeVector x_shape_; + ShapeVector l2_norm_output_shape_; // variables are used for 'clip_norm' coefficient calculation. - std::vector clip_norm_shape_; + ShapeVector clip_norm_shape_; // variables are used for broadcast calculation - std::vector l2_norm_lhs_shape_; // broadcast - std::vector l2_norm_rhs_shape_; // broadcast - std::vector l2_norm_ouths_shape_; // broadcast - std::vector clip_norm_rhs_shape_; // broadcast + ShapeVector l2_norm_lhs_shape_; // broadcast + ShapeVector l2_norm_rhs_shape_; // broadcast + ShapeVector l2_norm_ouths_shape_; // broadcast + ShapeVector clip_norm_rhs_shape_; // broadcast // final output shape of `ClipByNorm` - std::vector output_shape_; + ShapeVector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/combine_momentum_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/combine_momentum_gpu_kernel.h index feb83910312..86ed87d3b10 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/combine_momentum_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/combine_momentum_gpu_kernel.h @@ -71,17 +71,14 @@ class CombineMomentumGpuKernelMod : public DeprecatedNativeGpuKernelMod { input_num_ = 7; } for (size_t i = 0; i < num_; i++) { - element_num_ = 1; auto variable_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i * input_num_ + input_num_ - 5); is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name_, "input[" + std::to_string(i * input_num_ + input_num_ - 5) + "]"); - if (is_null_input_) { + if (is_null_input_ || IsDynamic(variable_shape)) { InitSizeLists(); return true; } - for (size_t j = 0; j < variable_shape.size(); j++) { - element_num_ *= variable_shape[j]; - } + element_num_ = SizeOf(variable_shape); elements_.push_back(element_num_); InitSizeLists(); } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_gpu_kernel.h index 1d2b465d8bb..7be51b20bb2 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_gpu_kernel.h @@ -87,6 +87,11 @@ class Conv2dFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + + if (AnfAlgo::IsShapesDynamic({in_shape, filter_shape, output_shape})) { + return true; + } + is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "x") || CHECK_SHAPE_NULL(filter_shape, kernel_name_, "weight") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); @@ -129,13 +134,11 @@ class Conv2dFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { int dimA[NBDIMS]; int strideApadded[NBDIMS]; if (data_format_ == kOpFormat_NCHW || data_format_ == kOpFormat_DEFAULT) { - auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_height_ + pad_height_), - IntToSize(old_width_ + pad_width_)}; + ShapeVector padded_shape = {n_, c_, old_height_ + pad_height_, old_width_ + pad_width_}; SetDimA(padded_shape, dimA, nbDims, data_format_); SetStrideA(padded_shape, strideApadded, nbDims, data_format_); } else if (data_format_ == kOpFormat_NHWC) { - auto padded_shape = {IntToSize(n_), IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_), - IntToSize(c_)}; + ShapeVector padded_shape = {n_, old_height_ + pad_height_, old_width_ + pad_width_, c_}; SetDimA(padded_shape, dimA, nbDims, data_format_); SetStrideA(padded_shape, strideApadded, nbDims, data_format_); } @@ -284,8 +287,7 @@ class Conv2dFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { } } - void Set4DDesc(const std::vector &in_shape, const std::vector &filter_shape, - const std::vector &output_shape) { + void Set4DDesc(const ShapeVector &in_shape, const ShapeVector &filter_shape, const ShapeVector &output_shape) { const int nbDims = 4; int dimA[NBDIMS]; int strideAin[NBDIMS]; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_filter_gpu_kernel.h index a19315aab6c..96163898904 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_filter_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_filter_gpu_kernel.h @@ -130,7 +130,7 @@ class ConvGradFilterBkwGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto dy_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); is_null_input_ = CHECK_SHAPE_NULL(dy_shape, kernel_name_, "dy") || CHECK_SHAPE_NULL(in_shape, kernel_name_, "x"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({in_shape, dy_shape})) { InitSizeLists(); return true; } @@ -139,7 +139,7 @@ class ConvGradFilterBkwGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (format_attr_ == kOpFormat_NHWC) { data_format_ = kOpFormat_NHWC; } - std::vector filter_shape; + ShapeVector filter_shape; GetFilterShape(kernel_node, &filter_shape); CheckTensorSize({in_shape, dy_shape, filter_shape}); if (data_format_ == kOpFormat_NHWC) { @@ -178,13 +178,11 @@ class ConvGradFilterBkwGpuKernelMod : public DeprecatedNativeGpuKernelMod { int dimA[NBDIMS]; int strideApadded[NBDIMS]; if (data_format_ == kOpFormat_NCHW || data_format_ == kOpFormat_DEFAULT) { - auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_height_ + pad_height_), - IntToSize(old_width_ + pad_width_)}; + ShapeVector padded_shape = {n_, c_, old_height_ + pad_height_, old_width_ + pad_width_}; SetDimA(padded_shape, dimA, NBDIMS, data_format_); SetStrideA(padded_shape, strideApadded, NBDIMS, data_format_); } else if (data_format_ == kOpFormat_NHWC) { - auto padded_shape = {IntToSize(n_), IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_), - IntToSize(c_)}; + ShapeVector padded_shape = {n_, old_height_ + pad_height_, old_width_ + pad_width_, c_}; SetDimA(padded_shape, dimA, NBDIMS, data_format_); SetStrideA(padded_shape, strideApadded, NBDIMS, data_format_); } @@ -367,22 +365,21 @@ class ConvGradFilterBkwGpuKernelMod : public DeprecatedNativeGpuKernelMod { algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1; } } - void GetFilterShape(const CNodePtr &kernel_node, std::vector *filter_shape) { + void GetFilterShape(const CNodePtr &kernel_node, ShapeVector *filter_shape) { if (is_dynamic_attr_ && get_dynamic_attr_value_) { (void)std::transform(std::begin(filter_shape_), std::end(filter_shape_), std::back_inserter(*filter_shape), - [](const int64_t &e) -> size_t { return (LongToSize(e)); }); + [](const int64_t &e) -> size_t { return e; }); } else { auto shp_tuple_x = GetAttrAndConvertValueTuple(kernel_node, "filter_sizes"); (void)std::transform(std::begin(shp_tuple_x), std::end(shp_tuple_x), std::back_inserter(*filter_shape), - [](const ValuePtr &e) -> size_t { + [](const ValuePtr &e) -> int64_t { auto cast_value = e->cast(); MS_EXCEPTION_IF_NULL(cast_value); - return static_cast(cast_value->value()); + return static_cast(cast_value->value()); }); } } - void Set4DDesc(const std::vector &dy_shape, const std::vector &filter_shape, - const std::vector &in_shape) { + void Set4DDesc(const ShapeVector &dy_shape, const ShapeVector &filter_shape, const ShapeVector &in_shape) { const int nbDims = 4; int dimA[NBDIMS]; int strideAin[NBDIMS]; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_input_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_input_gpu_kernel.h index bae293290ab..d638dbe2596 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_input_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_input_gpu_kernel.h @@ -119,7 +119,7 @@ class ConvGradInputBkwGpuKernelMod : public DeprecatedNativeGpuKernelMod { return true; } - bool CheckNull(const std::vector dy_shape, const std::vector filter_shape) { + bool CheckNull(const ShapeVector dy_shape, const ShapeVector filter_shape) { is_null_input_ = CHECK_SHAPE_NULL(dy_shape, kernel_name_, "dy") || CHECK_SHAPE_NULL(filter_shape, kernel_name_, "weight"); if (is_null_input_) { @@ -133,20 +133,14 @@ class ConvGradInputBkwGpuKernelMod : public DeprecatedNativeGpuKernelMod { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); InitResource(); (void)CheckParam(kernel_node); - if (is_dynamic_attr_ && !get_dynamic_attr_value_) { - return true; - } - auto dy_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); - if (CheckNull(dy_shape, filter_shape)) { + if (IsDynamic(dy_shape, filter_shape)) { return true; } auto format_attr = GetAttr(kernel_node, "format"); - if (format_attr == kOpFormat_NHWC) { - data_format_ = kOpFormat_NHWC; - } - std::vector input_shape; + data_format_ = (format_attr == kOpFormat_NHWC) ? kOpFormat_NHWC : data_format_; + ShapeVector input_shape; GetInputShape(kernel_node, &input_shape); if (data_format_ == kOpFormat_NHWC) { compute_format_ = CUDNN_TENSOR_NHWC; @@ -188,13 +182,11 @@ class ConvGradInputBkwGpuKernelMod : public DeprecatedNativeGpuKernelMod { int dimA[k2DPadSize]; int strideApadded[k2DPadSize]; if (data_format_ == kOpFormat_NCHW || data_format_ == kOpFormat_DEFAULT) { - auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_height_ + pad_height_), - IntToSize(old_width_ + pad_width_)}; + ShapeVector padded_shape = {n_, c_, old_height_ + pad_height_, old_width_ + pad_width_}; SetDimA(padded_shape, dimA, k2DPadSize, data_format_); SetStrideA(padded_shape, strideApadded, k2DPadSize, data_format_); } else if (data_format_ == kOpFormat_NHWC) { - auto padded_shape = {IntToSize(n_), IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_), - IntToSize(c_)}; + ShapeVector padded_shape = {n_, old_height_ + pad_height_, old_width_ + pad_width_, c_}; SetDimA(padded_shape, dimA, k2DPadSize, data_format_); SetStrideA(padded_shape, strideApadded, k2DPadSize, data_format_); } @@ -377,22 +369,21 @@ class ConvGradInputBkwGpuKernelMod : public DeprecatedNativeGpuKernelMod { algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1; } } - void GetInputShape(const CNodePtr &kernel_node, std::vector *input_shape) { + void GetInputShape(const CNodePtr &kernel_node, ShapeVector *input_shape) { if (is_dynamic_attr_ && get_dynamic_attr_value_) { (void)std::transform(std::begin(input_shape_), std::end(input_shape_), std::back_inserter(*input_shape), - [](const int64_t &e) -> size_t { return (LongToSize(e)); }); + [](const int64_t &e) -> int64_t { return e; }); } else { auto shp_tuple_x = GetAttrAndConvertValueTuple(kernel_node, "input_sizes"); (void)std::transform(std::begin(shp_tuple_x), std::end(shp_tuple_x), std::back_inserter(*input_shape), - [](const ValuePtr &e) -> size_t { + [](const ValuePtr &e) -> int64_t { auto cast_value = e->cast(); MS_EXCEPTION_IF_NULL(cast_value); - return static_cast(cast_value->value()); + return static_cast(cast_value->value()); }); } } - void Set4DDesc(const std::vector &dy_shape, const std::vector &input_shape, - const std::vector &filter_shape) { + void Set4DDesc(const ShapeVector &dy_shape, const ShapeVector &input_shape, const ShapeVector &filter_shape) { const int kNbDims = 4; int dimA[kNbDims]; int strideAin[kNbDims]; @@ -444,6 +435,10 @@ class ConvGradInputBkwGpuKernelMod : public DeprecatedNativeGpuKernelMod { << "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1]; } } + bool IsDynamic(const ShapeVector &dy_shape, const ShapeVector &filter_shape) { + return AnfAlgo::IsShapesDynamic({filter_shape, dy_shape}) || CheckNull(dy_shape, filter_shape) || + (is_dynamic_attr_ && !get_dynamic_attr_value_); + } cudnnHandle_t cudnn_handle_; cudnnFilterDescriptor_t w_desc_; cudnnConvolutionDescriptor_t conv_desc_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_gpu_kernel.h index 48f39e38c15..7d29a827c7e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_gpu_kernel.h @@ -116,17 +116,17 @@ class Conv3dGpuKernelMod : public DeprecatedNativeGpuKernelMod { is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "x") || CHECK_SHAPE_NULL(filter_shape, kernel_name_, "weight") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({in_shape, filter_shape, output_shape})) { InitSizeLists(); return true; } CheckTensorSize({in_shape}); (void)CheckSize(in_shape.size(), kInputDimSize, "x"); - n_ = SizeToInt(in_shape[kInDimIdxForN]); - c_ = SizeToInt(in_shape[kInDimIdxForC]); - old_depth_ = SizeToInt(in_shape[kInDimIdxForD]); - old_height_ = SizeToInt(in_shape[kInDimIdxForH]); - old_width_ = SizeToInt(in_shape[kInDimIdxForW]); + n_ = LongToInt(in_shape[kInDimIdxForN]); + c_ = LongToInt(in_shape[kInDimIdxForC]); + old_depth_ = LongToInt(in_shape[kInDimIdxForD]); + old_height_ = LongToInt(in_shape[kInDimIdxForH]); + old_width_ = LongToInt(in_shape[kInDimIdxForW]); compute_format_ = CUDNN_TENSOR_NCHW; SetNDDesc(in_shape, filter_shape, output_shape); group_ = static_cast(GetAttr(kernel_node, "group")); @@ -268,8 +268,7 @@ class Conv3dGpuKernelMod : public DeprecatedNativeGpuKernelMod { } } - void SetNDDesc(const std::vector &in_shape, const std::vector &filter_shape, - const std::vector &output_shape) { + void SetNDDesc(const ShapeVector &in_shape, const ShapeVector &filter_shape, const ShapeVector &output_shape) { const int kDims = 5; int dimA[kDims]; int strideAin[kDims]; @@ -359,8 +358,7 @@ class Conv3dGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'data_format' only support 'NCDHW' right now " << ", but got " << data_format_; } - auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_depth_ + pad_depth_), - IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_)}; + ShapeVector padded_shape = {n_, c_, old_depth_ + pad_depth_, old_height_ + pad_height_, old_width_ + pad_width_}; SetDimA(padded_shape, dimA, kNumDims, data_format_); SetStrideA(padded_shape, strideApadded, kNumDims, data_format_); CHECK_CUDNN_RET_WITH_EXCEPT( diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_grad_filter_gpu_kernel.h index 7b16f71b52f..afd7b8396ed 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_grad_filter_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_grad_filter_gpu_kernel.h @@ -124,6 +124,9 @@ class Conv3dGradFilterGpuKernelMod : public DeprecatedNativeGpuKernelMod { cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); auto dy_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + if (AnfAlgo::IsShapesDynamic({in_shape, dy_shape})) { + return true; + } is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "x") || CHECK_SHAPE_NULL(dy_shape, kernel_name_, "dy"); if (is_null_input_) { InitSizeLists(); @@ -132,7 +135,7 @@ class Conv3dGradFilterGpuKernelMod : public DeprecatedNativeGpuKernelMod { CheckTensorSize({in_shape}); data_format_ = kOpFormat_NCDHW; - std::vector filter_shape; + ShapeVector filter_shape; GetFilterShape(kernel_node, &filter_shape); num_output_elements_ = 1; for (auto x : filter_shape) { @@ -141,11 +144,11 @@ class Conv3dGradFilterGpuKernelMod : public DeprecatedNativeGpuKernelMod { compute_format_ = CUDNN_TENSOR_NCHW; (void)CheckSize(in_shape.size(), kInputDimSize, "input shape"); - n_ = SizeToInt(in_shape[kInDimIdxForN]); - c_ = SizeToInt(in_shape[kInDimIdxForC]); - old_depth_ = SizeToInt(in_shape[kInDimIdxForD]); - old_height_ = SizeToInt(in_shape[kInDimIdxForH]); - old_width_ = SizeToInt(in_shape[kInDimIdxForW]); + n_ = LongToInt(in_shape[kInDimIdxForN]); + c_ = LongToInt(in_shape[kInDimIdxForC]); + old_depth_ = LongToInt(in_shape[kInDimIdxForD]); + old_height_ = LongToInt(in_shape[kInDimIdxForH]); + old_width_ = LongToInt(in_shape[kInDimIdxForW]); SetNDDesc(dy_shape, filter_shape, in_shape); group_ = static_cast(GetAttr(kernel_node, "group")); CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetConvolutionGroupCount(conv_desc_, group_), @@ -297,18 +300,17 @@ class Conv3dGradFilterGpuKernelMod : public DeprecatedNativeGpuKernelMod { } } - void GetFilterShape(const CNodePtr &kernel_node, std::vector *filter_shape) { + void GetFilterShape(const CNodePtr &kernel_node, ShapeVector *filter_shape) { auto shp_tuple_x = GetAttrAndConvertValueTuple(kernel_node, "filter_size"); (void)std::transform(std::begin(shp_tuple_x), std::end(shp_tuple_x), std::back_inserter(*filter_shape), - [](const ValuePtr &e) -> size_t { + [](const ValuePtr &e) -> int64_t { auto cast_value = e->cast(); MS_EXCEPTION_IF_NULL(cast_value); - return static_cast(cast_value->value()); + return static_cast(cast_value->value()); }); } - void SetNDDesc(const std::vector &dy_shape, const std::vector &filter_shape, - const std::vector &in_shape) { + void SetNDDesc(const ShapeVector &dy_shape, const ShapeVector &filter_shape, const ShapeVector &in_shape) { const int kDims = 5; int dimA[kDims]; int strideAin[kDims]; @@ -386,8 +388,7 @@ class Conv3dGradFilterGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'data_format' only support 'NCDHW' right now " << ", but got " << data_format_; } - auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_depth_ + pad_depth_), - IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_)}; + ShapeVector padded_shape = {n_, c_, old_depth_ + pad_depth_, old_height_ + pad_height_, old_width_ + pad_width_}; SetDimA(padded_shape, dimA, kNumDims, data_format_); SetStrideA(padded_shape, strideApadded, kNumDims, data_format_); CHECK_CUDNN_RET_WITH_EXCEPT( diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_grad_input_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_grad_input_gpu_kernel.h index d01f676f701..d8ef63a1dcf 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_grad_input_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_grad_input_gpu_kernel.h @@ -109,20 +109,20 @@ class Conv3dGradInputGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto dy_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); is_null_input_ = CHECK_SHAPE_NULL(filter_shape, kernel_name_, "weight") || CHECK_SHAPE_NULL(dy_shape, kernel_name_, "dy"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({filter_shape, dy_shape})) { InitSizeLists(); return true; } - std::vector input_shape; + ShapeVector input_shape; GetInputShape(kernel_node, &input_shape); compute_format_ = CUDNN_TENSOR_NCHW; CheckTensorSize({input_shape}); (void)CheckSize(input_shape.size(), 5, "input"); - n_ = SizeToInt(input_shape[0]); - c_ = SizeToInt(input_shape[1]); - old_depth_ = SizeToInt(input_shape[2]); - old_height_ = SizeToInt(input_shape[3]); - old_width_ = SizeToInt(input_shape[4]); + n_ = LongToInt(input_shape[0]); + c_ = LongToInt(input_shape[1]); + old_depth_ = LongToInt(input_shape[2]); + old_height_ = LongToInt(input_shape[3]); + old_width_ = LongToInt(input_shape[4]); SetNDDesc(dy_shape, input_shape, filter_shape); group_ = static_cast(GetAttr(kernel_node, "group")); CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetConvolutionGroupCount(conv_desc_, group_), @@ -264,18 +264,17 @@ class Conv3dGradInputGpuKernelMod : public DeprecatedNativeGpuKernelMod { algo_ = perf_results.algo; } - void GetInputShape(const CNodePtr &kernel_node, std::vector *input_shape) { + void GetInputShape(const CNodePtr &kernel_node, ShapeVector *input_shape) { auto shp_tuple_x = GetAttrAndConvertValueTuple(kernel_node, "input_size"); (void)std::transform(std::begin(shp_tuple_x), std::end(shp_tuple_x), std::back_inserter(*input_shape), - [](const ValuePtr &e) -> size_t { + [](const ValuePtr &e) -> int64_t { auto cast_value = e->cast(); MS_EXCEPTION_IF_NULL(cast_value); - return static_cast(cast_value->value()); + return static_cast(cast_value->value()); }); } - void SetNDDesc(const std::vector &dy_shape, const std::vector &input_shape, - const std::vector &filter_shape) { + void SetNDDesc(const ShapeVector &dy_shape, const ShapeVector &input_shape, const ShapeVector &filter_shape) { const int kDims = 5; int dimA[kDims]; int strideAin[kDims]; @@ -354,8 +353,7 @@ class Conv3dGradInputGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'data_format' only support 'NCDHW' right now " << ", but got " << data_format_; } - auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_depth_ + pad_depth_), - IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_)}; + ShapeVector padded_shape = {n_, c_, old_depth_ + pad_depth_, old_height_ + pad_height_, old_width_ + pad_width_}; SetDimA(padded_shape, dimA, kNumDims, data_format_); SetStrideA(padded_shape, strideApadded, kNumDims, data_format_); CHECK_CUDNN_RET_WITH_EXCEPT( diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_transpose_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_transpose_gpu_kernel.h index 886e2b3530c..0868f529b57 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_transpose_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_transpose_gpu_kernel.h @@ -117,7 +117,7 @@ class Conv3dTransposeFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { return true; } - bool CheckNull(const std::vector filter_shape, const std::vector input_shape) { + bool CheckNull(const ShapeVector filter_shape, const ShapeVector input_shape) { is_null_input_ = CHECK_SHAPE_NULL(filter_shape, kernel_name_, "weight") || CHECK_SHAPE_NULL(input_shape, kernel_name_, "dout"); if (is_null_input_) { @@ -147,10 +147,14 @@ class Conv3dTransposeFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { } auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({input_shape, filter_shape})) { + return true; + } + if (CheckNull(filter_shape, input_shape)) { return true; } - std::vector output_shape; + ShapeVector output_shape; GetInputShape(kernel_node, &output_shape); if (data_format_ == kOpFormat_NDHWC) { compute_format_ = CUDNN_TENSOR_NHWC; @@ -354,18 +358,17 @@ class Conv3dTransposeFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { } } - void GetInputShape(const CNodePtr &kernel_node, std::vector *input_shape) { + void GetInputShape(const CNodePtr &kernel_node, ShapeVector *input_shape) { auto shp_tuple_x = GetAttrAndConvertValueTuple(kernel_node, "input_size"); (void)std::transform(std::begin(shp_tuple_x), std::end(shp_tuple_x), std::back_inserter(*input_shape), - [](const ValuePtr &e) -> size_t { + [](const ValuePtr &e) -> int64_t { auto cast_value = e->cast(); MS_EXCEPTION_IF_NULL(cast_value); - return static_cast(cast_value->value()); + return static_cast(cast_value->value()); }); } - void Set5DDesc(const std::vector &input_shape, const std::vector &output_shape, - const std::vector &filter_shape) { + void Set5DDesc(const ShapeVector &input_shape, const ShapeVector &output_shape, const ShapeVector &filter_shape) { const int kNbDims = 5; int dim_a[kNbDims]; int stride_a_in[kNbDims]; @@ -412,11 +415,11 @@ class Conv3dTransposeFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { << "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1]; } } - void UpdatePaddingAndDilation(const std::vector &input_shape, const std::vector &filter_shape, - int *pad_list, int *stride_pad_list) { // pad_mode_ = same + void UpdatePaddingAndDilation(const ShapeVector &input_shape, const ShapeVector &filter_shape, int *pad_list, + int *stride_pad_list) { // pad_mode_ = same const size_t kIdxOffset = 2; for (size_t i = 0; i < kConv3dDimSize; i++) { - int pad_sum = SizeToInt(filter_shape[i + kIdxOffset]) * dilation_[i + kIdxOffset] - stride_[i + kIdxOffset] - + int pad_sum = LongToInt(filter_shape[i + kIdxOffset]) * dilation_[i + kIdxOffset] - stride_[i + kIdxOffset] - dilation_[i + kIdxOffset] + 1; if (pad_sum >= 0) { int pad_0 = pad_sum / kSymmetricCoef; @@ -458,21 +461,18 @@ class Conv3dTransposeFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { int input_dim_a[kDataSize]; int input_strideApadded[kDataSize]; if (data_format_ == kOpFormat_NCDHW || data_format_ == kOpFormat_DEFAULT) { - auto padded_shape = {IntToSize(n_), IntToSize(c_), - IntToSize(old_depth_ + (1 + stride_[kDepth3DStrideIdx]) * padding_diff[kHead3DPadIdx]), - IntToSize(old_height_ + (1 + stride_[kHeight3DStrideIdx]) * padding_diff[kTail3DPadIdx]), - IntToSize(old_width_ + (1 + stride_[kWidth3DStrideIdx]) * padding_diff[kTop3DPadIdx])}; + ShapeVector padded_shape = {n_, c_, old_depth_ + (1 + stride_[kDepth3DStrideIdx]) * padding_diff[kHead3DPadIdx], + old_height_ + (1 + stride_[kHeight3DStrideIdx]) * padding_diff[kTail3DPadIdx], + old_width_ + (1 + stride_[kWidth3DStrideIdx]) * padding_diff[kTop3DPadIdx]}; SetDimA(padded_shape, dim_a, kDataSize, data_format_); SetStrideA(padded_shape, strideApadded, kDataSize, data_format_); - std::vector input_padded_shape = {IntToSize(input_n_), IntToSize(input_c_), - IntToSize(input_old_depth_ + padding_diff[0]), - IntToSize(input_old_height_ + padding_diff[kTail3DPadIdx]), - IntToSize(input_old_width_ + padding_diff[kTop3DPadIdx])}; + ShapeVector input_padded_shape = {input_n_, input_c_, input_old_depth_ + padding_diff[0], + input_old_height_ + padding_diff[kTail3DPadIdx], + input_old_width_ + padding_diff[kTop3DPadIdx]}; SetDimA(input_padded_shape, input_dim_a, kDataSize, data_format_); SetStrideA(input_padded_shape, input_strideApadded, kDataSize, data_format_); } else if (data_format_ == kOpFormat_NDHWC) { - auto padded_shape = {IntToSize(n_), IntToSize(old_depth_ + pad_depth_), IntToSize(old_height_ + pad_height_), - IntToSize(old_width_ + pad_width_), IntToSize(c_)}; + ShapeVector padded_shape = {n_, old_depth_ + pad_depth_, old_height_ + pad_height_, old_width_ + pad_width_, c_}; SetDimA(padded_shape, dim_a, kDataSize, data_format_); SetStrideA(padded_shape, strideApadded, kDataSize, data_format_); } @@ -490,8 +490,8 @@ class Conv3dTransposeFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { "cudnnSetConvolutionNdDescriptor failed"); } - void SetPad(const CNodePtr &kernel_node, const std::vector &input_shape, - const std::vector &filter_shape, std::vector *pad_list, std::vector *stride_pad_list) { + void SetPad(const CNodePtr &kernel_node, const ShapeVector &input_shape, const ShapeVector &filter_shape, + std::vector *pad_list, std::vector *stride_pad_list) { pad_mode_ = GetAttr(kernel_node, "pad_mode"); const size_t kFilterSize = 5; (void)CheckSize(filter_shape.size(), kFilterSize, "weight shape"); @@ -542,8 +542,7 @@ class Conv3dTransposeFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { int dim_a[kDataLen]; int strideApadded[kDataLen]; if (data_format_ == kOpFormat_NCDHW || data_format_ == kOpFormat_DEFAULT) { - auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(stride_pad_depth_), - IntToSize(stride_pad_height_), IntToSize(stride_pad_width_)}; + ShapeVector padded_shape = {n_, c_, stride_pad_depth_, stride_pad_height_, stride_pad_width_}; SetDimA(padded_shape, dim_a, kDataLen, data_format_); SetStrideA(padded_shape, strideApadded, kDataLen, data_format_); } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ctcloss_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ctcloss_gpu_kernel.h index 8cfca8b7c20..ac13f5e6c44 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ctcloss_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ctcloss_gpu_kernel.h @@ -100,7 +100,7 @@ class CtcLossGpuKernelMod : public DeprecatedNativeGpuKernelMod { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; InitResource(); - auto probs_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kPrevOutput0th); + auto probs_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kPrevOutput0th)); auto indice_dims = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kPrevOutput1st); auto labels_dims = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kPrevOutput2nd); auto sequence_length_dims = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kPrevOutput3rd); @@ -128,15 +128,11 @@ class CtcLossGpuKernelMod : public DeprecatedNativeGpuKernelMod { << indice_dims.size(); } label_size_ = sizeof(int); - for (auto i : labels_dims) { - label_size_ *= i; - } + label_size_ *= SizeOf(labels_dims); label_indice_size_ = sizeof(int64_t); - for (auto i : indice_dims) { - label_indice_size_ *= i; - } + label_indice_size_ *= SizeOf(indice_dims); - sequence_lengths_size_ = sequence_length_dims[0] * sizeof(int); + sequence_lengths_size_ = LongToSizeClipNeg(sequence_length_dims[0]) * sizeof(int); preprocess_collapse_repeated_ = GetAttr(kernel_node, "preprocess_collapse_repeated"); ctc_merge_repeated_ = GetAttr(kernel_node, "ctc_merge_repeated"); ignore_longer_outputs_than_inputs_ = GetAttr(kernel_node, "ignore_longer_outputs_than_inputs"); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/dropout_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/dropout_gpu_kernel.h index 07030964626..2e029296f22 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/dropout_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/dropout_gpu_kernel.h @@ -65,15 +65,12 @@ class DropoutFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input"); - if (is_null_input_) { + if (is_null_input_ || IsDynamic(input_shape)) { InitSizeLists(); return true; } - num_count_ = 1; - for (size_t x : input_shape) { - num_count_ *= x; - } + num_count_ = SizeOf(input_shape); keep_prob_ = GetAttr(kernel_node, "keep_prob"); if (!states_init_) { int64_t seed = GetAttr(kernel_node, "Seed0"); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/dropout_grad_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/dropout_grad_kernel.h index 5523d4ff4fd..0ee756e5a48 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/dropout_grad_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/dropout_grad_kernel.h @@ -56,9 +56,10 @@ class DropoutGradBwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs must be 2, but got " << input_num; } - auto input_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto shape_signed = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(shape_signed); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input"); - if (is_null_input_) { + if (is_null_input_ || IsDynamic(shape_signed)) { InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_gpu_kernel.h index f10cd5d2dc8..a2274670e07 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_gpu_kernel.h @@ -49,7 +49,7 @@ class FlattenFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { bool Init(const CNodePtr &kernel_node) override { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; - auto shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); + auto shape = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); kernel_node_ = kernel_node; is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input"); if (is_null_input_) { @@ -86,8 +86,8 @@ class FlattenFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { } for (size_t index = 1; index < input_num; ++index) { size_t input_size = sizeof(S); - for (size_t x : AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, index)) { - input_size *= x; + for (auto x : AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, index)) { + input_size *= LongToSizeClipNeg(x); } input_size_list_.push_back(input_size); } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_grad_gpu_kernel.h index aa277410af6..f138337cc32 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_grad_gpu_kernel.h @@ -55,7 +55,7 @@ class FlattenGardBkwGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs must be 1, but got " << input_num; } - auto shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); + auto shape = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ftrl_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ftrl_gpu_kernel.h index f534daa0cdf..8e5c7d2f0f4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ftrl_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ftrl_gpu_kernel.h @@ -90,21 +90,10 @@ class FtrlGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - for (size_t i = 0; i < variable_shape.size(); i++) { - variable_size_ *= variable_shape[i]; - } - - for (size_t i = 0; i < accumulation_shape.size(); i++) { - accumulation_size_ *= accumulation_shape[i]; - } - - for (size_t i = 0; i < linear_shape.size(); i++) { - linear_size_ *= linear_shape[i]; - } - - for (size_t i = 0; i < gradient_shape.size(); i++) { - gradient_size_ *= gradient_shape[i]; - } + variable_size_ *= SizeOf(variable_shape); + accumulation_size_ *= SizeOf(accumulation_shape); + linear_size_ *= SizeOf(linear_shape); + gradient_size_ *= SizeOf(gradient_shape); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_adam_weight_decay.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_adam_weight_decay.h index 39b6fff6a53..5e036d2e7a2 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_adam_weight_decay.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_adam_weight_decay.h @@ -44,10 +44,7 @@ class FusedAdamWeightDecayGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - element_nums_ = 1; - for (auto i : shape) { - element_nums_ *= i; - } + element_nums_ = SizeOf(shape); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_scale_momentum_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_scale_momentum_gpu_kernel.h index 72e7d4c69b9..a697552202a 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_scale_momentum_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_scale_momentum_gpu_kernel.h @@ -61,9 +61,7 @@ class FusedScaleMomentumGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - for (size_t i = 0; i < variable_shape.size(); i++) { - element_num_ *= variable_shape[i]; - } + element_num_ *= SizeOf(variable_shape); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_weightdecay_momentum_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_weightdecay_momentum_gpu_kernel.h index 7852194f3a6..0cff91cc86b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_weightdecay_momentum_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_weightdecay_momentum_gpu_kernel.h @@ -61,10 +61,7 @@ class FusedWeightDecayMomentumGpuKernelMod : public DeprecatedNativeGpuKernelMod InitSizeLists(); return true; } - for (size_t i = 0; i < variable_shape.size(); i++) { - element_num_ *= variable_shape[i]; - } - + element_num_ *= SizeOf(variable_shape); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_weightdecay_scale_momentum_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_weightdecay_scale_momentum_gpu_kernel.h index d5382faafa5..42e8062d041 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_weightdecay_scale_momentum_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/fused_weightdecay_scale_momentum_gpu_kernel.h @@ -62,9 +62,7 @@ class FusedWeightDecayScaleMomentumGpuKernelMod : public DeprecatedNativeGpuKern InitSizeLists(); return true; } - for (size_t i = 0; i < variable_shape.size(); i++) { - element_num_ *= variable_shape[i]; - } + element_num_ *= SizeOf(variable_shape); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/gelu_grad_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/gelu_grad_kernel.h index f17f308569f..b73f011570d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/gelu_grad_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/gelu_grad_kernel.h @@ -55,9 +55,7 @@ class GeLUGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - for (auto dim : input_shape) { - input_size_ *= dim; - } + input_size_ *= SizeOf(input_shape); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/gelu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/gelu_kernel.h index 064c7ffa097..534bb558506 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/gelu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/gelu_kernel.h @@ -54,9 +54,7 @@ class GeluGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - for (auto dim : input_shape) { - input_size_ *= dim; - } + input_size_ *= SizeOf(input_shape); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hsigmoid_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hsigmoid_gpu_kernel.h index 7cf6c405673..0b5f82081b6 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hsigmoid_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hsigmoid_gpu_kernel.h @@ -59,10 +59,7 @@ class HSigmoidKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - input_size_ = 1; - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + input_size_ = SizeOf(input_shape); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hsigmoid_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hsigmoid_grad_gpu_kernel.h index a3de8b81ae1..0ede0057159 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hsigmoid_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hsigmoid_grad_gpu_kernel.h @@ -60,10 +60,7 @@ class HSigmoidGradKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - input_size_ = 1; - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + input_size_ = SizeOf(input_shape); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hswish_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hswish_gpu_kernel.h index d40bb041903..980b97121c4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hswish_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hswish_gpu_kernel.h @@ -59,10 +59,7 @@ class HSwishKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - input_size_ = 1; - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + input_size_ = SizeOf(input_shape); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hswish_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hswish_grad_gpu_kernel.h index 1c1e69eb791..561d59745a2 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hswish_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hswish_grad_gpu_kernel.h @@ -60,10 +60,7 @@ class HSwishGradKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - input_size_ = 1; - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + input_size_ = SizeOf(input_shape); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/im2col_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/im2col_gpu_kernel.h index 3c5caf553e8..91ea820b1c3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/im2col_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/im2col_gpu_kernel.h @@ -99,6 +99,9 @@ class Im2ColFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { (void)CheckParam(kernel_node); auto in_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({in_shape, output_shape})) { + return true; + } is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); if (is_null_input_) { @@ -110,10 +113,7 @@ class Im2ColFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input must be 4, but got " << in_shape.size(); } - std::vector filter_shape; - std::vector filter_shape_me = GetAttr>(kernel_node, "kernel_size"); - (void)std::transform(filter_shape_me.begin(), filter_shape_me.end(), std::back_inserter(filter_shape), - [](const int64_t &value) { return static_cast(value); }); + auto filter_shape = GetAttr>(kernel_node, "kernel_size"); const size_t kFilterDimSize = 2; if (filter_shape.size() < kFilterDimSize) { MS_LOG(EXCEPTION) << "For 'Im2ColGpuKernel', the dimension of filter must be greater than or equal to 2, " @@ -215,7 +215,7 @@ class Im2ColFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs must be 1, but got " << output_num; } } - void SetPad(const std::vector &in_shape, const CNodePtr &kernel_node) { + void SetPad(const ShapeVector &in_shape, const CNodePtr &kernel_node) { std::vector pad_list; std::vector pad_list_me = GetAttr>(kernel_node, "pad_list"); (void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list), @@ -224,10 +224,10 @@ class Im2ColFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { const size_t kInIdxForC = 1; const size_t kInIdxForH = 2; const size_t kInIdxForW = 3; - n_ = SizeToInt(in_shape[kInIdxForN]); - c_ = SizeToInt(in_shape[kInIdxForC]); - old_height_ = SizeToInt(in_shape[kInIdxForH]); - old_width_ = SizeToInt(in_shape[kInIdxForW]); + n_ = LongToInt(in_shape[kInIdxForN]); + c_ = LongToInt(in_shape[kInIdxForC]); + old_height_ = LongToInt(in_shape[kInIdxForH]); + old_width_ = LongToInt(in_shape[kInIdxForW]); if (pad_list.size() != kPadSize) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'pad' must be 4, but got " << pad_list.size(); @@ -256,8 +256,7 @@ class Im2ColFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { "cudnnSetConvolution2dDescriptor failed"); } - void Set4DDesc(const std::vector &in_shape, const std::vector &filter_shape, - const std::vector &output_shape) { + void Set4DDesc(const ShapeVector &in_shape, const ShapeVector &filter_shape, const ShapeVector &output_shape) { const size_t kIdx0 = 0; const size_t kIdx1 = 1; const size_t kIdx2 = 2; @@ -266,20 +265,20 @@ class Im2ColFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { const size_t kIdx5 = 5; CHECK_CUDNN_RET_WITH_EXCEPT( kernel_node_, - cudnnSetTensor4dDescriptor(input_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, SizeToInt(in_shape[kIdx0]), - SizeToInt(in_shape[kIdx1]), SizeToInt(in_shape[kIdx2]), SizeToInt(in_shape[kIdx3])), + cudnnSetTensor4dDescriptor(input_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, LongToInt(in_shape[kIdx0]), + LongToInt(in_shape[kIdx1]), LongToInt(in_shape[kIdx2]), LongToInt(in_shape[kIdx3])), "cudnnSetTensor4dDescriptor failed"); CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetFilter4dDescriptor(filter_desc_, cudnn_data_type_, CUDNN_TENSOR_NCHW, 1, - SizeToInt(in_shape[1]), filter_shape[0], filter_shape[1]), + LongToInt(in_shape[1]), filter_shape[0], filter_shape[1]), "cudnnSetFilter4dDescriptor failed"); auto out_H = output_shape[kIdx0] * output_shape[kIdx1] * output_shape[kIdx2]; auto out_W = output_shape[kIdx3] * output_shape[kIdx4] * output_shape[kIdx5]; CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetTensor4dDescriptor(output_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, - SizeToInt(out_H), SizeToInt(out_W), 1, 1), + LongToInt(out_H), LongToInt(out_W), 1, 1), "cudnnSetTensor4dDescriptor failed"); } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/instance_norm_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/instance_norm_gpu_kernel.cc index 0a11b04f60d..aea85e7c7b7 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/instance_norm_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/instance_norm_gpu_kernel.cc @@ -57,14 +57,14 @@ int InstanceNormGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const if (int ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) { return ret; } - auto input_shape = LongVecToSizeVec(inputs.at(kIndex0)->GetShapeVector()); + auto input_shape = inputs.at(kIndex0)->GetShapeVector(); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input_x"); if (is_null_input_) { return KRET_OK; } - batch_ = input_shape[kIndex0]; - channel_ = input_shape[kIndex1]; + batch_ = LongToSize(input_shape[kIndex0]); + channel_ = LongToSize(input_shape[kIndex1]); CheckTensorSize({input_shape}); const int batch = 1; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/instance_norm_grad_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/instance_norm_grad_gpu_kernel.cc index c7e318aab85..715ba870b71 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/instance_norm_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/instance_norm_grad_gpu_kernel.cc @@ -60,14 +60,14 @@ int InstanceNormGradGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, if (int ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) { return ret; } - auto input_shape = LongVecToSizeVec(inputs.at(kIndex0)->GetShapeVector()); + auto input_shape = inputs.at(kIndex0)->GetShapeVector(); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input_x"); if (is_null_input_) { return KRET_OK; } - batch_ = input_shape[kIndex0]; - channel_ = input_shape[kIndex1]; + batch_ = LongToSize(input_shape[kIndex0]); + channel_ = LongToSize(input_shape[kIndex1]); CheckTensorSize({input_shape}); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/kl_div_loss_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/kl_div_loss_gpu_kernel.h index a857acfee0a..26b31d5bf8a 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/kl_div_loss_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/kl_div_loss_gpu_kernel.h @@ -51,13 +51,11 @@ class KLDivLossGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); kernel_node_ = kernel_node; is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "logits"); - if (is_null_input_) { + if (is_null_input_ || IsDynamic(input_shape)) { InitSizeLists(); return true; } - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + input_size_ *= SizeOf(input_shape); string reduction = GetAttr(kernel_node, "reduction"); reduction_ = kReductionModeMap[reduction]; workspace_size_ = sizeof(T); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/kl_div_loss_grad_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/kl_div_loss_grad_kernel.h index 57482bd4b27..61ead8c5a40 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/kl_div_loss_grad_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/kl_div_loss_grad_kernel.h @@ -50,13 +50,11 @@ class KLDivLossGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex1); kernel_node_ = kernel_node; is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); - if (is_null_input_) { + if (is_null_input_ || IsDynamic(input_shape)) { InitSizeLists(); return true; } - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + input_size_ *= SizeOf(input_shape); string reduction = GetAttr(kernel_node, "reduction"); reduction_ = kReductionModeMap[reduction]; InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/l2_loss_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/l2_loss_gpu_kernel.h index 7183194bf35..98e0258c5ef 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/l2_loss_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/l2_loss_gpu_kernel.h @@ -50,9 +50,7 @@ class L2LossGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + input_size_ *= SizeOf(input_shape); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/l2normalize_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/l2normalize_gpu_kernel.h index 9153c21aaf5..fc2e9f11e51 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/l2normalize_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/l2normalize_gpu_kernel.h @@ -76,8 +76,11 @@ class L2NormalizeGpuKernelMod : public DeprecatedNativeGpuKernelMod { } GetMaxWithEpsAndValue(workspace_size_list_[0] / sizeof(T), epsilon_, reduce_workspace_addr, reinterpret_cast(stream_ptr)); - BroadcastArith(lhs_shape_, rhs_shape_, output_shape_, BROADCAST_TYPE_REALDIV, input_addr, reduce_workspace_addr, - output_addr, reinterpret_cast(stream_ptr)); + auto lhs_shape_size = Convert2SizeTClipNeg(lhs_shape_); + auto rhs_shape_size = Convert2SizeTClipNeg(rhs_shape_); + auto output_shape_size = Convert2SizeTClipNeg(output_shape_); + BroadcastArith(lhs_shape_size, rhs_shape_size, output_shape_size, BROADCAST_TYPE_REALDIV, input_addr, + reduce_workspace_addr, output_addr, reinterpret_cast(stream_ptr)); return true; } @@ -98,21 +101,19 @@ class L2NormalizeGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); is_null_input_ = CHECK_SHAPE_NULL(inputA_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({inputA_shape, output_shape})) { InitSizeLists(); return true; } - output_size_ = sizeof(T); - for (auto dim : output_shape) { - output_size_ *= dim; - } + output_size_ = sizeof(T) * SizeOf(output_shape); + CheckTensorSize({inputA_shape, output_shape}); if (inputA_shape.size() > MAX_DIMS) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << MAX_DIMS << ", but got " << inputA_shape.size(); } - std::vector outputC_shape = output_shape; + ShapeVector outputC_shape = output_shape; if ((size_t)axis_ >= output_shape.size()) { MS_LOG(EXCEPTION) << "For 'L2NormalizeGpuKernelMod', axis_ must be less than the rank of output " << "but got axis_: " << axis_ << ", rank of output: " << output_shape.size(); @@ -200,9 +201,9 @@ class L2NormalizeGpuKernelMod : public DeprecatedNativeGpuKernelMod { reduce_indices_, CUDNN_32BIT_INDICES), "cudnnSetReduceTensorDescriptor failed"); } - void InferInAndOutDesc(const std::vector &input_shape, const std::vector &output_shape) { - std::vector inputA; - std::vector outputC_shape = output_shape; + void InferInAndOutDesc(const ShapeVector &input_shape, const ShapeVector &output_shape) { + ShapeVector inputA; + ShapeVector outputC_shape = output_shape; const int split_dim = 4; if (input_shape.size() <= split_dim) { @@ -218,7 +219,7 @@ class L2NormalizeGpuKernelMod : public DeprecatedNativeGpuKernelMod { } } - std::vector outputC; + ShapeVector outputC; if (outputC_shape.size() <= split_dim) { ShapeNdTo4d(outputC_shape, &outputC); @@ -251,9 +252,9 @@ class L2NormalizeGpuKernelMod : public DeprecatedNativeGpuKernelMod { size_t workspace_size_; float epsilon_; int axis_; - std::vector lhs_shape_; - std::vector rhs_shape_; - std::vector output_shape_; + ShapeVector lhs_shape_; + ShapeVector rhs_shape_; + ShapeVector output_shape_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/l2normalize_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/l2normalize_grad_gpu_kernel.h index 6e0ee918896..de595d0e4e3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/l2normalize_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/l2normalize_grad_gpu_kernel.h @@ -106,7 +106,7 @@ class L2NormalizeGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { return true; } - bool CheckInputShape(const std::vector &output_shape) { + bool CheckInputShape(const ShapeVector &output_shape) { for (auto &shape : input_shape_list_) { if (output_shape != shape) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input and output must be the same, but " @@ -114,6 +114,10 @@ class L2NormalizeGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { << ", the shape of output: " << CONVERT_VECTOR_TO_STRING(output_shape); } } + if (IsDynamic(input_shape_list_[0])) { + return true; + } + is_null_input_ = CHECK_SHAPE_NULL(input_shape_list_[0], kernel_name_, "input"); if (is_null_input_) { InitSizeLists(); @@ -143,7 +147,7 @@ class L2NormalizeGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { } auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); is_null_input_ = CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); - if (is_null_input_) { + if (is_null_input_ || IsDynamic(output_shape)) { InitSizeLists(); return true; } @@ -151,12 +155,8 @@ class L2NormalizeGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { return true; } - output_size_ = sizeof(T); - for (auto dim : output_shape) { - output_size_ *= dim; - } - - std::vector output_reduce_shape = output_shape; + output_size_ = sizeof(T) * SizeOf(output_shape); + ShapeVector output_reduce_shape = output_shape; if ((size_t)axis_ >= output_shape.size()) { MS_LOG(EXCEPTION) << "For 'L2NormalizeGradGpuKernelMod', axis_ must be less than the rank of output " << "but got axis_: " << axis_ << ", rank of output: " << output_shape.size(); @@ -168,9 +168,9 @@ class L2NormalizeGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { output_shape_.resize(MAX_DIMS, 1); all_match_ = true; for (size_t i = 0; i < output_shape.size(); i++) { - output_shape_[i] = output_shape[i]; - lhs_shape_[i] = output_shape[i]; - rhs_shape_[i] = output_reduce_shape[i]; + output_shape_[i] = LongToSizeClipNeg(output_shape[i]); + lhs_shape_[i] = LongToSizeClipNeg(output_shape[i]); + rhs_shape_[i] = LongToSizeClipNeg(output_reduce_shape[i]); if (lhs_shape_[i] != rhs_shape_[i]) { all_match_ = false; } @@ -263,9 +263,9 @@ class L2NormalizeGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { "cudnnSetReduceTensorDescriptor failed"); return; } - void InferInAndOutDesc(const std::vector &input_shape, const std::vector &output_shape) { - std::vector inputA; - std::vector outputC_shape = output_shape; + void InferInAndOutDesc(const ShapeVector &input_shape, const ShapeVector &output_shape) { + ShapeVector inputA; + ShapeVector outputC_shape = output_shape; constexpr int split_dim = 4; CheckTensorSize({input_shape, output_shape}); if (input_shape.size() <= split_dim) { @@ -281,7 +281,7 @@ class L2NormalizeGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { } } - std::vector outputC; + ShapeVector outputC; if (outputC_shape.size() <= split_dim) { ShapeNdTo4d(outputC_shape, &outputC); @@ -312,7 +312,7 @@ class L2NormalizeGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { bool is_null_input_; std::string kernel_name_; - std::vector > input_shape_list_; + std::vector input_shape_list_; size_t output_size_; size_t workspace_size_; float epsilon_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lamb_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lamb_gpu_kernel.h index 51d473e91ab..5bdcbb3b8e4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lamb_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lamb_gpu_kernel.h @@ -117,41 +117,32 @@ class LambGpuKernelMod : public NativeGpuKernelMod { InitResource(); InitParamSizeByType(); - auto covert_int64_shape_to_sizet_shape = [=](std::vector int64_shape) -> std::vector { - std::vector size_t_shape; - (void)std::transform(int64_shape.begin(), int64_shape.end(), std::back_inserter(size_t_shape), LongToSize); - return size_t_shape; - }; - auto variable_int64_shape = inputs[kVarIndex]->GetShapeVector(); auto m_int64_shape = inputs[kMIndex]->GetShapeVector(); auto v_int64_shape = inputs[kVIndex]->GetShapeVector(); auto gradient_int64_shape = inputs[kGradIndex]->GetShapeVector(); + if (AnfAlgo::IsShapesDynamic({variable_int64_shape, m_int64_shape, v_int64_shape, gradient_int64_shape})) { + return true; + } - std::vector variable_shape = covert_int64_shape_to_sizet_shape(variable_int64_shape); - std::vector m_shape = covert_int64_shape_to_sizet_shape(m_int64_shape); - std::vector v_shape = covert_int64_shape_to_sizet_shape(v_int64_shape); - std::vector gradient_shape = covert_int64_shape_to_sizet_shape(gradient_int64_shape); - - is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name_, "var") || - CHECK_SHAPE_NULL(m_shape, kernel_name_, "m") || CHECK_SHAPE_NULL(v_shape, kernel_name_, "v") || - CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "gradient"); + is_null_input_ = CHECK_SHAPE_NULL(variable_int64_shape, kernel_name_, "var") || + CHECK_SHAPE_NULL(m_int64_shape, kernel_name_, "m") || + CHECK_SHAPE_NULL(v_int64_shape, kernel_name_, "v") || + CHECK_SHAPE_NULL(gradient_int64_shape, kernel_name_, "gradient"); if (is_null_input_) { InitSizeLists(); return 0; } - InitParamSizeByShape(variable_shape, m_shape, v_shape, gradient_shape); + InitParamSizeByShape(variable_int64_shape, m_int64_shape, v_int64_shape, gradient_int64_shape); auto output_int64_shape = outputs[0]->GetShapeVector(); - std::vector output_shape = covert_int64_shape_to_sizet_shape(output_int64_shape); - - size_t input_dim = variable_shape.size(); - if (!CheckValidShape(variable_shape, output_shape, input_dim)) { + size_t input_dim = variable_int64_shape.size(); + if (!CheckValidShape(variable_int64_shape, output_int64_shape, input_dim)) { return 0; } - InitShapeInfo(variable_shape, output_shape); + InitShapeInfo(variable_int64_shape, output_int64_shape); // Determine the reduce operation. CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE( cudnnSetReduceTensorDescriptor(reduce_tensor_descriptor_, CUDNN_REDUCE_TENSOR_NORM2, CUDNN_DATA_FLOAT, nan_prop_, @@ -220,8 +211,7 @@ class LambGpuKernelMod : public NativeGpuKernelMod { "For " + kernel_name_ + " cudnnDestroyTensorDescriptor failed."); } - bool CheckValidShape(const std::vector &input_shape, const std::vector &output_shape, - size_t input_dim) { + bool CheckValidShape(const ShapeVector &input_shape, const ShapeVector &output_shape, size_t input_dim) { is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape); if (is_null_input_) { MS_LOG(WARNING) << "For 'LambGpuKernelMod', input or output is null."; @@ -257,8 +247,8 @@ class LambGpuKernelMod : public NativeGpuKernelMod { trust_ratio_size_ = sizeof(float); } - void InitParamSizeByShape(const std::vector &variable_shape, const std::vector &m_shape, - const std::vector &v_shape, const std::vector &gradient_shape) { + void InitParamSizeByShape(const ShapeVector &variable_shape, const ShapeVector &m_shape, const ShapeVector &v_shape, + const ShapeVector &gradient_shape) { for (size_t i = 0; i < variable_shape.size(); i++) { variable_size_ *= variable_shape[i]; // save intermediate value @@ -335,21 +325,21 @@ class LambGpuKernelMod : public NativeGpuKernelMod { } } - void InitShapeInfo(const std::vector &input_shape, const std::vector &output_shape) { + void InitShapeInfo(const ShapeVector &input_shape, const ShapeVector &output_shape) { // Determine which dimension will be reduced. - std::vector reduce_output_shape = output_shape; + ShapeVector reduce_output_shape = output_shape; std::fill(reduce_output_shape.begin(), reduce_output_shape.end(), 1); // Infer input and output descriptor. InferInAndOutDesc(input_shape, reduce_output_shape); } - void InferInAndOutDesc(const std::vector &input_shape, const std::vector &reduce_output_shape) { + void InferInAndOutDesc(const ShapeVector &input_shape, const ShapeVector &reduce_output_shape) { constexpr size_t split_dim = 4; constexpr size_t dim_idx_two = 2; constexpr size_t dim_idx_three = 3; if (input_shape.size() <= split_dim) { - std::vector new_input_shape; + ShapeVector new_input_shape; ShapeNdTo4d(input_shape, &new_input_shape); CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE( cudnnSetTensor4dDescriptor(input_descriptor_, CUDNN_TENSOR_NCHW, data_type_, new_input_shape[0], @@ -359,7 +349,7 @@ class LambGpuKernelMod : public NativeGpuKernelMod { CudnnSetTensorNdDescriptor(input_shape, input_descriptor_, data_type_, kernel_name_); } if (reduce_output_shape.size() <= split_dim) { - std::vector new_reduce_output_shape; + ShapeVector new_reduce_output_shape; ShapeNdTo4d(reduce_output_shape, &new_reduce_output_shape); CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE( diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/layer_norm_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/layer_norm_gpu_kernel.h index 5f08cb0ed03..b6ab3ab9682 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/layer_norm_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/layer_norm_gpu_kernel.h @@ -54,7 +54,7 @@ class LayerNormGpuKernelMod : public DeprecatedNativeGpuKernelMod { epsilon_ = static_cast(GetAttr(kernel_node, "epsilon")); auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input_x"); - if (is_null_input_) { + if (is_null_input_ || IsDynamic(input_shape)) { InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/local_response_norm_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/local_response_norm_gpu_kernel.h index 2be6871d738..3273795fbc5 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/local_response_norm_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/local_response_norm_gpu_kernel.h @@ -109,7 +109,11 @@ class LocalResponseNormGpuKernelMod : public DeprecatedNativeGpuKernelMod { } InitResource(); - auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto shape_signed = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + if (IsDynamic(shape_signed)) { + return true; + } + auto input_shape = Convert2SizeTClipNeg(shape_signed); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/local_response_norm_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/local_response_norm_grad_gpu_kernel.h index 1b42d2a3028..a6021e109ab 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/local_response_norm_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/local_response_norm_grad_gpu_kernel.h @@ -130,7 +130,11 @@ class LocalResponseNormGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { } InitResource(); - auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto shape_signed = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(shape_signed); + if (IsDynamic(shape_signed)) { + return true; + } is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lstm_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lstm_gpu_kernel.h index cbe54da937d..f85102cde87 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lstm_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lstm_gpu_kernel.h @@ -95,7 +95,7 @@ class LstmGpuKernelMod : public DeprecatedNativeGpuKernelMod { cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); - if (is_null_input_) { + if (is_null_input_ || IsDynamic(input_shape)) { InitSizeLists(); return true; } @@ -103,9 +103,9 @@ class LstmGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 3, but got " << input_shape.size(); } - seq_len_ = SizeToInt(input_shape[0]); - batch_size_ = SizeToInt(input_shape[1]); - input_size_ = SizeToInt(input_shape[2]); + seq_len_ = LongToInt(input_shape[0]); + batch_size_ = LongToInt(input_shape[1]); + input_size_ = LongToInt(input_shape[2]); input_size_ = static_cast(GetAttr(kernel_node, "input_size")); hidden_size_ = static_cast(GetAttr(kernel_node, "hidden_size")); @@ -151,7 +151,10 @@ class LstmGpuKernelMod : public DeprecatedNativeGpuKernelMod { "set rnn_desc failed"); #endif auto weight_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); - is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "weight"); + if (IsDynamic(weight_shape)) { + return true; + } + is_null_input_ = CHECK_SHAPE_NULL(weight_shape, kernel_name, "weight"); if (is_null_input_) { InitSizeLists(); return true; @@ -160,7 +163,7 @@ class LstmGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of weight cannot be less than 3, but got " << weight_shape.size(); } - size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T); + size_t weight_size = LongToSizeClipNeg(weight_shape[0] * weight_shape[1] * weight_shape[2]) * sizeof(T); CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnGetRNNParamsSize(handle_, rnn_desc_, x_desc_[0], &weight_size_, cudnn_data_type_), "get weight_size_ failed"); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lstm_grad_data_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lstm_grad_data_gpu_kernel.h index f5899992967..9abd050689f 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lstm_grad_data_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lstm_grad_data_gpu_kernel.h @@ -114,6 +114,9 @@ class LstmGradDataGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitResource(); cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); auto input_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + if (IsDynamic(input_shape)) { + return true; + } is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); @@ -123,8 +126,8 @@ class LstmGradDataGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 2, but got " << input_shape.size(); } - seq_len_ = SizeToInt(input_shape[0]); - batch_size_ = SizeToInt(input_shape[1]); + seq_len_ = LongToInt(input_shape[0]); + batch_size_ = LongToInt(input_shape[1]); GetAttrs(kernel_node); cudnnRNNInputMode_t input_mode = CUDNN_LINEAR_INPUT; cudnnDirectionMode_t direction = bidirectional_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL; @@ -170,8 +173,8 @@ class LstmGradDataGpuKernelMod : public DeprecatedNativeGpuKernelMod { #endif const size_t kPrevOutput4th = 4; auto weight_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kPrevOutput4th); - is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "weight"); - if (is_null_input_) { + is_null_input_ = CHECK_SHAPE_NULL(weight_shape, kernel_name, "weight"); + if (is_null_input_ || IsDynamic(weight_shape)) { InitSizeLists(); return true; } @@ -179,7 +182,7 @@ class LstmGradDataGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of weight cannot be less than 3, but got " << weight_shape.size(); } - size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T); + size_t weight_size = LongToSizeClipNeg(weight_shape[0] * weight_shape[1] * weight_shape[2]) * sizeof(T); CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnGetRNNParamsSize(handle_, rnn_desc_, dx_desc_[0], &weight_size_, cudnn_data_type_), "get weight_size_ failed"); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lstm_grad_weight_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lstm_grad_weight_gpu_kernel.h index 6b23788a277..ae9be8d9628 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lstm_grad_weight_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/lstm_grad_weight_gpu_kernel.h @@ -91,6 +91,9 @@ class LstmGradWeightGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitResource(); cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + if (IsDynamic(input_shape)) { + return true; + } is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); @@ -100,8 +103,8 @@ class LstmGradWeightGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 2, but got " << input_shape.size(); } - seq_len_ = SizeToInt(input_shape[0]); - batch_size_ = SizeToInt(input_shape[1]); + seq_len_ = LongToInt(input_shape[0]); + batch_size_ = LongToInt(input_shape[1]); input_size_ = static_cast(GetAttr(kernel_node, "input_size")); hidden_size_ = static_cast(GetAttr(kernel_node, "hidden_size")); @@ -139,7 +142,11 @@ class LstmGradWeightGpuKernelMod : public DeprecatedNativeGpuKernelMod { hidden_size_, hidden_size_, num_layers_, dropout_desc_, 0), "set rnn_desc failed"); #endif - auto weight_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto shape_signed = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + if (IsDynamic(shape_signed)) { + return true; + } + auto weight_shape = Convert2SizeTClipNeg(shape_signed); is_null_input_ = CHECK_SHAPE_NULL(weight_shape, kernel_name, "weight"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/maxpool_with_argmax_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/maxpool_with_argmax_gpu_kernel.h index 43027ef3a16..f29f1d78d56 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/maxpool_with_argmax_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/maxpool_with_argmax_gpu_kernel.h @@ -88,29 +88,23 @@ class MaxPoolWithArgmaxFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({input_shape, output_shape})) { InitSizeLists(); return true; } - input_size_ = sizeof(T); - for (auto x : input_shape) { - input_size_ *= x; - } - output_size_ = sizeof(T); - for (auto x : output_shape) { - output_size_ *= x; - } + input_size_ = sizeof(T) * SizeOf(input_shape); + output_size_ = sizeof(T) * SizeOf(output_shape); if (input_shape.size() < kInputDimLowerLimit || output_shape.size() < kOutputDimLowerLimit) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input and output cannot be less than 4, but " << "got the dimension of input: " << input_shape.size() << ", the dimension of output: " << output_shape.size(); } - n_ = SizeToInt(input_shape[kInputIndexForN]); - c_ = SizeToInt(input_shape[kInputIndexForC]); - input_height_ = SizeToInt(input_shape[kInputIndexForH]); - input_width_ = SizeToInt(input_shape[kInputIndexForW]); - output_height_ = SizeToInt(output_shape[kOutputIndexForH]); - output_width_ = SizeToInt(output_shape[kOutputIndexForW]); + n_ = LongToInt(input_shape[kInputIndexForN]); + c_ = LongToInt(input_shape[kInputIndexForC]); + input_height_ = LongToInt(input_shape[kInputIndexForH]); + input_width_ = LongToInt(input_shape[kInputIndexForW]); + output_height_ = LongToInt(output_shape[kOutputIndexForH]); + output_width_ = LongToInt(output_shape[kOutputIndexForW]); std::vector window; auto prim = common::AnfAlgo::GetCNodePrimitive(kernel_node); MS_EXCEPTION_IF_NULL(prim); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/maxpool_with_argmax_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/maxpool_with_argmax_grad_gpu_kernel.h index 841117e5a3f..0b0a0193070 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/maxpool_with_argmax_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/maxpool_with_argmax_grad_gpu_kernel.h @@ -84,36 +84,24 @@ class MaxPoolWithArgmaxGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { is_null_input_ = CHECK_SHAPE_NULL(x_shape, kernel_name, "x") || CHECK_SHAPE_NULL(dy_shape, kernel_name, "dy") || CHECK_SHAPE_NULL(index_shape, kernel_name, "index") || CHECK_SHAPE_NULL(dx_shape, kernel_name, "dx"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({x_shape, dy_shape, index_shape, dx_shape})) { InitSizeLists(); return true; } - x_size_ = sizeof(T); - for (auto x : x_shape) { - x_size_ *= x; - } - dy_size_ = sizeof(T); - for (auto x : dy_shape) { - dy_size_ *= x; - } - index_size_ = sizeof(S); - for (auto x : index_shape) { - index_size_ *= x; - } - dx_size_ = sizeof(T); - for (auto x : dx_shape) { - dx_size_ *= x; - } + x_size_ = sizeof(T) * SizeOf(x_shape); + dy_size_ = sizeof(T) * SizeOf(dy_shape); + index_size_ = sizeof(S) * SizeOf(index_shape); + dx_size_ = sizeof(T) * SizeOf(dx_shape); if (x_shape.size() < kXDimLowerLimit || dy_shape.size() < kDyDimLowerLimit) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of x and dy cannot be less than 4, but got " << "the dimension of x: " << x_shape.size() << ", the dimension of dy: " << dy_shape.size(); } - n_ = SizeToInt(x_shape[kXIndexForN]); - c_ = SizeToInt(x_shape[kXIndexForC]); - x_height_ = SizeToInt(x_shape[kXIndexForH]); - x_width_ = SizeToInt(x_shape[kXIndexForW]); - dy_height_ = SizeToInt(dy_shape[kDyIndexForH]); - dy_width_ = SizeToInt(dy_shape[kDyIndexForW]); + n_ = LongToSizeClipNeg(x_shape[kXIndexForN]); + c_ = LongToSizeClipNeg(x_shape[kXIndexForC]); + x_height_ = LongToSizeClipNeg(x_shape[kXIndexForH]); + x_width_ = LongToSizeClipNeg(x_shape[kXIndexForW]); + dy_height_ = LongToSizeClipNeg(dy_shape[kDyIndexForH]); + dy_width_ = LongToSizeClipNeg(dy_shape[kDyIndexForW]); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_gpu_kernel.h index ac826b04086..9e0f1aa32f3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_gpu_kernel.h @@ -98,7 +98,7 @@ class MirrorPadFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input_x") || CHECK_SHAPE_NULL(padding_shape, kernel_name, "paddings") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({input_shape, padding_shape, output_shape})) { InitSizeLists(); return true; } @@ -117,13 +117,13 @@ class MirrorPadFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { } for (auto in_shape : input_shape) { - input_size_ *= in_shape; - input_shape_.push_back(in_shape); + input_size_ *= LongToSizeClipNeg(in_shape); + input_shape_.push_back(LongToInt(in_shape)); } num_input_ = input_size_; input_size_ *= sizeof(T); - num_paddings_ = padding_shape[0]; + num_paddings_ = LongToSizeClipNeg(padding_shape[0]); input_size_ += IntToSize(kSymmetricCoef) * num_paddings_ * sizeof(int64_t); output_size_ = sizeof(T); @@ -133,8 +133,8 @@ class MirrorPadFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { << "got the " << output_shape.size(); } for (auto x : output_shape) { - output_size_ *= x; - output_shape_.push_back(x); + output_size_ *= LongToSizeClipNeg(x); + output_shape_.push_back(LongToInt(x)); } int max_width = input_shape_[kIndexForMaxWidth]; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.h index 36dd3dd4f8f..00c4e0d6248 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.h @@ -87,11 +87,11 @@ class MirrorPadBackGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto padding_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto output_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input_x") || CHECK_SHAPE_NULL(padding_shape, kernel_name_, "paddings") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({input_shape, padding_shape})) { InitSizeLists(); return true; } @@ -109,14 +109,14 @@ class MirrorPadBackGpuKernelMod : public DeprecatedNativeGpuKernelMod { } input_size_ = sizeof(T); for (auto in_shape : input_shape) { - input_size_ *= in_shape; - input_shape_.push_back(in_shape); + input_size_ *= LongToSizeClipNeg(in_shape); + input_shape_.push_back(LongToInt(in_shape)); } num_input_ = input_size_; // account for paddings in input size -> passed as int64_ts - num_paddings_ = padding_shape[0]; + num_paddings_ = LongToSizeClipNeg(padding_shape[0]); input_size_ += (IntToSize(kSymmetricCoef) * num_paddings_ * sizeof(int64_t)); if (output_shape.size() == kDimNeedPadBatch) { @@ -133,7 +133,7 @@ class MirrorPadBackGpuKernelMod : public DeprecatedNativeGpuKernelMod { output_size_ = sizeof(T); for (auto x : output_shape) { output_size_ *= x; - output_shape_.push_back(x); + output_shape_.push_back(SizeToInt(x)); } // calc workspace size diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/momentum_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/momentum_gpu_kernel.h index 7d3d8d5e306..e7144bb381d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/momentum_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/momentum_gpu_kernel.h @@ -77,17 +77,10 @@ class MomentumGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - for (size_t i = 0; i < variable_shape.size(); i++) { - variable_size_ *= variable_shape[i]; - } + variable_size_ *= SizeOf(variable_shape); + accumulation_size_ *= SizeOf(accumulation_shape); + gradient_size_ *= SizeOf(gradient_shape); - for (size_t i = 0; i < accumulation_shape.size(); i++) { - accumulation_size_ *= accumulation_shape[i]; - } - - for (size_t i = 0; i < gradient_shape.size(); i++) { - gradient_size_ *= gradient_shape[i]; - } InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/nll_loss_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/nll_loss_gpu_kernel.h index d2fa0316aa5..ce69ce83e67 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/nll_loss_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/nll_loss_gpu_kernel.h @@ -56,7 +56,7 @@ class NLLLossGpuKernelMod : public DeprecatedNativeGpuKernelMod { bool Init(const CNodePtr &kernel_node) override { auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); kernel_node_ = kernel_node; is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "logits"); if (is_null_input_) { @@ -67,11 +67,9 @@ class NLLLossGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of logits cannot be less than 2, but " << "got the " << input_shape.size(); } - n_ = static_cast(input_shape[0]); - c_ = static_cast(input_shape[1]); - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + n_ = LongToInt(input_shape[0]); + c_ = LongToInt(input_shape[1]); + input_size_ *= SizeOf(input_shape); string reduction = GetAttr(kernel_node, "reduction"); reduction_ = kReductionModeMap[reduction]; if ((reduction_ == ReductionMode::kSum) || (reduction_ == ReductionMode::kMean)) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/nll_loss_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/nll_loss_grad_gpu_kernel.h index cc58048df49..29f37ac34c4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/nll_loss_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/nll_loss_grad_gpu_kernel.h @@ -53,7 +53,7 @@ class NLLLossGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { bool Init(const CNodePtr &kernel_node) override { auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); - std::vector input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); kernel_node_ = kernel_node; is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "logits"); if (is_null_input_) { @@ -64,11 +64,9 @@ class NLLLossGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of logits cannot be less than 2, but " << "got the " << input_shape.size(); } - n_ = static_cast(input_shape[0]); - c_ = static_cast(input_shape[1]); - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + n_ = LongToInt(input_shape[0]); + c_ = LongToInt(input_shape[1]); + input_size_ *= SizeOf(input_shape); string reduction = GetAttr(kernel_node, "reduction"); reduction_ = kReductionModeMap[reduction]; if (reduction_ == ReductionMode::kNone) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pad_gpu_kernel.h index e50c037ce8c..167df630850 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pad_gpu_kernel.h @@ -76,8 +76,8 @@ class PadFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { kernel_node_ = kernel_node; (void)CheckIONumber(kernel_node); - input_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - std::vector output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + input_shape_ = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); + auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); if (is_null_input_) { @@ -123,7 +123,7 @@ class PadFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { strides_.resize(input_rank_); strides_[input_rank_ - 1] = 1; for (int32_t i = input_rank_ - 2; i >= 0; i--) { - strides_[i] = output_shape[i + 1] * strides_[i + 1]; + strides_[i] = static_cast(output_shape[i + 1]) * strides_[i + 1]; } InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pooling_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pooling_gpu_kernel.h index dfdea546685..888d8e55081 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pooling_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pooling_gpu_kernel.h @@ -86,6 +86,9 @@ class PoolingFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { } auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + if (AnfAlgo::IsShapesDynamic({input_shape, output_shape})) { + return true; + } is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); if (is_null_input_) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pooling_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pooling_grad_gpu_kernel.h index a685dbb3588..0d99a7de310 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pooling_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pooling_grad_gpu_kernel.h @@ -96,7 +96,7 @@ class PoolingGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { bool InitShape(const CNodePtr &kernel_node, int *dimA, int *strideAin, int *dimAy, int *strideAiny, int *dimAdy, int *strideAdy, int *dimAout, int *strideAout, int nbDims) { - std::vector dout_shape, input_mask, output_shape, input_shape; + ShapeVector dout_shape, input_mask, output_shape, input_shape; if (kernel_name_ == kAvgPool3DGradOpName) { dout_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); @@ -119,7 +119,7 @@ class PoolingGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(input_mask, kernel_name_, "mask") || CHECK_SHAPE_NULL(dout_shape, kernel_name_, "dout") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({input_shape, output_shape, input_mask, dout_shape})) { InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/prelu_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/prelu_gpu_kernel.h index ee600f06cec..90259b2e5b1 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/prelu_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/prelu_gpu_kernel.h @@ -70,7 +70,7 @@ class PReLUGpuKernelMod : public DeprecatedNativeGpuKernelMod { } input_length_ = std::accumulate(input_shape.begin(), input_shape.end(), size_t(1), std::multiplies<>()); size_t input_rank = input_shape.size(); - size_t channel_num; + int64_t channel_num; if (input_rank == 0) { channel_num = 1; per_channel_length_ = 1; @@ -88,7 +88,7 @@ class PReLUGpuKernelMod : public DeprecatedNativeGpuKernelMod { << "weight: " << weight_shape.size() << ", weight.shape[0]: " << weight_shape[0] << ", the channel num: " << channel_num; } - weight_length_ = weight_shape[0]; + weight_length_ = LongToSizeClipNeg(weight_shape[0]); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/prelu_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/prelu_grad_gpu_kernel.h index cebe4e931e9..102c5b02783 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/prelu_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/prelu_grad_gpu_kernel.h @@ -79,19 +79,19 @@ class PReLUGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { per_channel_length_ = 1; } else if (x_rank == 1) { channel_num = 1; - per_channel_length_ = x_shape[0]; + per_channel_length_ = LongToSizeClipNeg(x_shape[0]); } else { - channel_num = x_shape[1]; + channel_num = LongToSizeClipNeg(x_shape[1]); per_channel_length_ = std::accumulate(x_shape.begin() + 2, x_shape.end(), size_t(1), std::multiplies<>()); } - if (weight_shape.size() != 1 || (weight_shape[0] != 1 && weight_shape[0] != channel_num)) { + if (weight_shape.size() != 1 || (weight_shape[0] != 1 && LongToSizeClipNeg(weight_shape[0]) != channel_num)) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of weight must be equal to 1 and " << "weight.shape[0] must be equal to 1 or the channel number, but got the dimension of " << "weight: " << weight_shape.size() << ", weight.shape[0]: " << weight_shape[0] << ", the channel num: " << channel_num; } - weight_length_ = weight_shape[0]; + weight_length_ = LongToSizeClipNeg(weight_shape[0]); workspace_size_ = weight_length_ * IntToSize(GET_BLOCKS(input_length_) * GET_THREADS) * sizeof(float); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ps_roi_pooling_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ps_roi_pooling_gpu_kernel.h index d9ea2bd5428..50ec8a7f1f6 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ps_roi_pooling_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ps_roi_pooling_gpu_kernel.h @@ -120,8 +120,8 @@ class PsROIPoolingFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { << "(number_rois, (bs, xmin, ymin, xmax, ymax)), " << "but got the rank of rois_shape: " << rois_shape.size(); } - rois_size_ = rois_shape[ROI_SHAPE_INDEX0] * rois_shape[ROI_SHAPE_INDEX1] * sizeof(T); - rois_shape_ = {static_cast(rois_shape[ROI_SHAPE_INDEX0]), static_cast(rois_shape[ROI_SHAPE_INDEX1])}; + rois_size_ = LongToSizeClipNeg(rois_shape[ROI_SHAPE_INDEX0] * rois_shape[ROI_SHAPE_INDEX1]) * sizeof(T); + rois_shape_ = {LongToInt(rois_shape[ROI_SHAPE_INDEX0]), LongToInt(rois_shape[ROI_SHAPE_INDEX1])}; // Get primitive args pooled_height_ = static_cast(GetAttr(kernel_node, "pooled_height")); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ps_roi_pooling_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ps_roi_pooling_grad_gpu_kernel.h index e5bb0ccb0ec..756860b2bcd 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ps_roi_pooling_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/ps_roi_pooling_grad_gpu_kernel.h @@ -105,19 +105,15 @@ class PsROIPoolingBackGpuKernelMod : public DeprecatedNativeGpuKernelMod { return true; } - auto dx_shape_size = dx_shape.size(); - dx_size_ = sizeof(T); - for (size_t i = 0; i < dx_shape_size; i++) { - dx_size_ *= dx_shape[i]; - } + dx_size_ = sizeof(T) * SizeOf(dx_shape); if (rois_shape.size() != ROI_SHAPE_SIZE) { MS_LOG(EXCEPTION) << "For 'PsROIPoolingFwdGpuKernelMod', the rank of rois_shape must be 2 " << "(number_rois, (bs, xmin, ymin, xmax, ymax)), " << "but got the rank of rois_shape: " << rois_shape.size(); } - rois_shape_ = {static_cast(rois_shape[ROI_SHAPE_INDEX0]), static_cast(rois_shape[ROI_SHAPE_INDEX1])}; - rois_size_ = rois_shape[ROI_SHAPE_INDEX0] * rois_shape[ROI_SHAPE_INDEX1] * sizeof(T); + rois_shape_ = {LongToInt(rois_shape[ROI_SHAPE_INDEX0]), LongToInt(rois_shape[ROI_SHAPE_INDEX1])}; + rois_size_ = LongToSizeClipNeg(rois_shape[ROI_SHAPE_INDEX0] * rois_shape[ROI_SHAPE_INDEX1]) * sizeof(T); if (mapping_channel_shape.size() != MAPPING_CHANNEL_SHAPE) { MS_LOG(EXCEPTION) << "For 'PsROIPoolingFwdGpuKernelMod', the rank of mapping_channel_shape must be" diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_gpu_kernel.h index 9d6eda061ba..fded3a61742 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_gpu_kernel.h @@ -51,7 +51,7 @@ class ReLUFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (input_num != 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs must be 1, but got " << input_num; } - auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_grad_gpu_kernel.h index 8d73d201c1f..92a283af039 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_grad_gpu_kernel.h @@ -55,7 +55,7 @@ class ReluGradFwdGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (input_num != 2) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs must be 2, but got " << input_num; } - auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/resize_bilinear_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/resize_bilinear_gpu_kernel.h index 7cf7a4ac9c5..5424d838ed5 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/resize_bilinear_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/resize_bilinear_gpu_kernel.h @@ -57,8 +57,8 @@ class ResizeBilinearGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (output_num != 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs must be 1, but got " << output_num; } - std::vector input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); - std::vector output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0); + auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); + auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output"); if (is_null_input_) { @@ -70,20 +70,14 @@ class ResizeBilinearGpuKernelMod : public DeprecatedNativeGpuKernelMod { << "got the dimension of input: " << input_shape.size() << ", the dimension of output: " << output_shape.size(); } - n_ = SizeToInt(input_shape[0]); - c_ = SizeToInt(input_shape[1]); - input_h_ = SizeToInt(input_shape[2]); - input_w_ = SizeToInt(input_shape[3]); - output_h_ = SizeToInt(output_shape[2]); - output_w_ = SizeToInt(output_shape[3]); - input_size_ = sizeof(T); - for (auto x : input_shape) { - input_size_ *= x; - } - output_size_ = sizeof(T); - for (auto x : output_shape) { - output_size_ *= x; - } + n_ = LongToInt(input_shape[0]); + c_ = LongToInt(input_shape[1]); + input_h_ = LongToInt(input_shape[2]); + input_w_ = LongToInt(input_shape[3]); + output_h_ = LongToInt(output_shape[2]); + output_w_ = LongToInt(output_shape[3]); + input_size_ = sizeof(T) * SizeOf(input_shape); + output_size_ = sizeof(T) * SizeOf(output_shape); align_corners_ = GetAttr(kernel_node, "align_corners"); half_pixel_centers_ = GetAttr(kernel_node, "half_pixel_centers"); InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/resize_bilinear_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/resize_bilinear_grad_gpu_kernel.h index 9e9fb58ed7a..67426243e86 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/resize_bilinear_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/resize_bilinear_grad_gpu_kernel.h @@ -73,9 +73,9 @@ class ResizeBilinearGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (output_num != 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs must be 1, but got " << output_num; } - std::vector dy_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); - std::vector x_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1); - std::vector dx_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0); + auto dy_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); + auto x_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1); + auto dx_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0); is_null_input_ = CHECK_SHAPE_NULL(dy_shape, kernel_name, "dy") || CHECK_SHAPE_NULL(x_shape, kernel_name, "x") || CHECK_SHAPE_NULL(dx_shape, kernel_name, "dx"); if (is_null_input_) { @@ -94,20 +94,14 @@ class ResizeBilinearGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of dx must be equal to 4, but got " << dx_shape.size(); } - n_ = SizeToInt(dy_shape[kDyIndexForN]); - c_ = SizeToInt(dy_shape[kDyIndexForC]); - dy_h_ = SizeToInt(dy_shape[kDyIndexForH]); - dy_w_ = SizeToInt(dy_shape[kDyIndexForW]); - dx_h_ = SizeToInt(dx_shape[kDxIndexForH]); - dx_w_ = SizeToInt(dx_shape[kDxIndexForW]); - dy_size_ = sizeof(T); - for (auto x : dy_shape) { - dy_size_ *= x; - } - dx_size_ = sizeof(T); - for (auto x : dx_shape) { - dx_size_ *= x; - } + n_ = LongToInt(dy_shape[kDyIndexForN]); + c_ = LongToInt(dy_shape[kDyIndexForC]); + dy_h_ = LongToInt(dy_shape[kDyIndexForH]); + dy_w_ = LongToInt(dy_shape[kDyIndexForW]); + dx_h_ = LongToInt(dx_shape[kDxIndexForH]); + dx_w_ = LongToInt(dx_shape[kDxIndexForW]); + dy_size_ = sizeof(T) * SizeOf(dy_shape); + dx_size_ = sizeof(T) * SizeOf(dx_shape); workspace_size_ = (dx_size_ / sizeof(T)) * sizeof(float); align_corners_ = GetAttr(kernel_node, "align_corners"); half_pixel_centers_ = GetAttr(kernel_node, "half_pixel_centers"); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/rmsprop_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/rmsprop_gpu_kernel.h index 40e4969912e..7d80a4fa6c9 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/rmsprop_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/rmsprop_gpu_kernel.h @@ -73,7 +73,7 @@ class RMSPropGpuKernelMod : public DeprecatedNativeGpuKernelMod { momentum_ = GetAttr(kernel_node, "momentum"); epsilon_ = GetAttr(kernel_node, "epsilon"); } - auto input_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, node_name, "var"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sgd_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sgd_gpu_kernel.h index 304ed9b8205..a653dce783d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sgd_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sgd_gpu_kernel.h @@ -58,7 +58,7 @@ class SGDGpuKernelMod : public DeprecatedNativeGpuKernelMod { weight_decay_ = GetAttr(kernel_node, "weight_decay"); nesterov_ = GetAttr(kernel_node, "nesterov"); - auto input_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "parameters"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h index cd164614ddd..5f75a58ba38 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h @@ -69,17 +69,9 @@ class SigmoidCrossEntropyWithLogitsGpuKernelMod : public DeprecatedNativeGpuKern InitSizeLists(); return true; } - for (size_t i = 0; i < logits_shape.size(); i++) { - logits_size_ *= logits_shape[i]; - } - - for (size_t i = 0; i < labels_shape.size(); i++) { - labels_size_ *= labels_shape[i]; - } - - for (size_t i = 0; i < output_shape.size(); i++) { - outputs_size_ *= output_shape[i]; - } + logits_size_ *= SizeOf(logits_shape); + labels_size_ *= SizeOf(labels_shape); + outputs_size_ *= SizeOf(output_shape); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h index 18b6464e94b..b37db25e2c8 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h @@ -67,17 +67,9 @@ class SigmoidCrossEntropyWithLogitsGradGpuKernelMod : public DeprecatedNativeGpu InitSizeLists(); return true; } - for (size_t i = 0; i < logits_shape.size(); i++) { - logits_size_ *= logits_shape[i]; - } - - for (size_t i = 0; i < labels_shape.size(); i++) { - labels_size_ *= labels_shape[i]; - } - - for (size_t i = 0; i < output_shape.size(); i++) { - outputs_size_ *= output_shape[i]; - } + logits_size_ *= SizeOf(logits_shape); + labels_size_ *= SizeOf(labels_shape); + outputs_size_ *= SizeOf(output_shape); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/smooth_l1_loss_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/smooth_l1_loss_gpu_kernel.h index 06b97f99db8..7e770b0e2be 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/smooth_l1_loss_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/smooth_l1_loss_gpu_kernel.h @@ -51,9 +51,7 @@ class SmoothL1LossGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + input_size_ *= SizeOf(input_shape); beta_ = GetAttr(kernel_node, "beta"); InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/smooth_l1_loss_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/smooth_l1_loss_grad_gpu_kernel.h index d3e48308658..36776f4233b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/smooth_l1_loss_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/smooth_l1_loss_grad_gpu_kernel.h @@ -52,9 +52,7 @@ class SmoothL1LossGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + input_size_ *= SizeOf(input_shape); beta_ = GetAttr(kernel_node, "beta"); InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/softmax_cross_entropy_with_logits_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/softmax_cross_entropy_with_logits_gpu_kernel.h index ea002d487a0..42fa56563e0 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/softmax_cross_entropy_with_logits_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/softmax_cross_entropy_with_logits_gpu_kernel.h @@ -130,7 +130,7 @@ class SoftmaxCrossEntropyWithLogitsGpuKernelMod : public DeprecatedNativeGpuKern auto labels_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); is_null_input_ = CHECK_SHAPE_NULL(logits_shape, kernel_name_, "logits") || CHECK_SHAPE_NULL(labels_shape, kernel_name_, "labels"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({logits_shape, labels_shape})) { InitSizeLists(); return; } @@ -139,25 +139,19 @@ class SoftmaxCrossEntropyWithLogitsGpuKernelMod : public DeprecatedNativeGpuKern size_t logits_dims = logits_shape.size(); batch_size_ = 1; for (size_t i = 0; i < logits_dims - 1; i++) { - batch_size_ *= logits_shape[i]; + batch_size_ *= LongToSizeClipNeg(logits_shape[i]); } - channel_size_ = logits_shape[logits_dims - 1]; + channel_size_ = LongToSizeClipNeg(logits_shape[logits_dims - 1]); height_ = 1; width_ = 1; logits_size_ = sizeof(T) * batch_size_ * channel_size_ * height_ * width_; + labels_size_ = sizeof(S) * SizeOf(labels_shape); - labels_size_ = 1; - size_t labels_dims = labels_shape.size(); - for (size_t i = 0; i < labels_dims; i++) { - labels_size_ *= labels_shape[i]; - } - labels_size_ *= sizeof(S); - - output1_size_ = logits_size_ / logits_shape[logits_dims - 1]; + output1_size_ = logits_size_ / LongToSizeClipNeg(logits_shape[logits_dims - 1]); output2_size_ = logits_size_; softmax_output_logits_size_ = logits_size_; } - void CheckShapeValidation(const std::vector &logits_shape, const std::vector &labels_shape) { + void CheckShapeValidation(const ShapeVector &logits_shape, const ShapeVector &labels_shape) { size_t logits_dim_length = logits_shape.size(); size_t labels_dim_length = labels_shape.size(); if (logits_dim_length == 0) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/softmax_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/softmax_gpu_kernel.h index 050b08a41b6..4590918c04c 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/softmax_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/softmax_gpu_kernel.h @@ -110,7 +110,11 @@ class SoftmaxGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (output_num != 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs must be 1, but got " << output_num; } - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_signed = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(shape_signed); + if (IsDynamic(shape_signed)) { + return true; + } is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/softmax_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/softmax_grad_gpu_kernel.h index 998bb84eadf..ff247864c0c 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/softmax_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/softmax_grad_gpu_kernel.h @@ -113,9 +113,10 @@ class SoftmaxGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (output_num != 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs must be 1, but got " << output_num; } - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_signed = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(shape_signed); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input"); - if (is_null_input_) { + if (is_null_input_ || IsDynamic(shape_signed)) { InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sparse_apply_proximal_adagrad_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sparse_apply_proximal_adagrad_kernel.h index 0494820a9e2..4954314c201 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sparse_apply_proximal_adagrad_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sparse_apply_proximal_adagrad_kernel.h @@ -88,25 +88,11 @@ class SparseApplyProximalAdagradKernelMod : public DeprecatedNativeGpuKernelMod InitSizeLists(); return true; } - for (size_t i = 0; i < variable_shape.size(); i++) { - variable_size_ *= variable_shape[i]; - } - - for (size_t i = 0; i < accumulation_shape.size(); i++) { - accumulation_size_ *= accumulation_shape[i]; - } - - for (size_t i = 0; i < learning_rate_shape.size(); i++) { - learning_rate_size_ *= learning_rate_shape[i]; - } - - for (size_t i = 0; i < gradient_shape.size(); i++) { - gradient_size_ *= gradient_shape[i]; - } - - for (size_t i = 0; i < indices_shape.size(); i++) { - indices_size_ *= indices_shape[i]; - } + variable_size_ *= SizeOf(variable_shape); + accumulation_size_ *= SizeOf(accumulation_shape); + learning_rate_size_ *= SizeOf(learning_rate_shape); + gradient_size_ *= SizeOf(gradient_shape); + indices_size_ *= SizeOf(indices_shape); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sparse_ftrl_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sparse_ftrl_gpu_kernel.h index 80ef05a190e..ceb13f8ddf0 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sparse_ftrl_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sparse_ftrl_gpu_kernel.h @@ -75,7 +75,7 @@ class SparseFtrlGpuKernelMod : public DeprecatedNativeGpuKernelMod { gradient_size_ = sizeof(T); indices_size_ = sizeof(S); - auto variable_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto variable_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); auto accumulation_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); auto linear_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); auto gradient_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); @@ -96,28 +96,17 @@ class SparseFtrlGpuKernelMod : public DeprecatedNativeGpuKernelMod { } } - for (size_t i = 0; i < accumulation_shape.size(); i++) { - accumulation_size_ *= accumulation_shape[i]; - } - - for (size_t i = 0; i < linear_shape.size(); i++) { - linear_size_ *= linear_shape[i]; - } - - for (size_t i = 0; i < gradient_shape.size(); i++) { - gradient_size_ *= gradient_shape[i]; - } - - for (size_t i = 0; i < indices_shape.size(); i++) { - indices_size_ *= indices_shape[i]; - } + accumulation_size_ *= SizeOf(accumulation_shape); + linear_size_ *= SizeOf(linear_shape); + gradient_size_ *= SizeOf(gradient_shape); + indices_size_ *= SizeOf(indices_shape); lr_ = GetAttr(kernel_node, "lr"); l1_ = GetAttr(kernel_node, "l1"); l2_ = GetAttr(kernel_node, "l2"); lr_power_ = GetAttr(kernel_node, "lr_power"); use_locking_ = GetAttr(kernel_node, "use_locking"); - num_index_ = indices_shape[0]; + num_index_ = LongToSizeClipNeg(indices_shape[0]); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h index 28fe4c56257..d63929e1474 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h @@ -134,7 +134,7 @@ class SparseSoftmaxCrossEntropyWithLogitsGpuKernelMod : public DeprecatedNativeG auto labels_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); is_null_input_ = CHECK_SHAPE_NULL(logits_shape, kernel_name_, "logits") || CHECK_SHAPE_NULL(labels_shape, kernel_name_, "labels"); - if (is_null_input_) { + if (is_null_input_ || AnfAlgo::IsShapesDynamic({logits_shape, labels_shape})) { InitSizeLists(); return; } @@ -143,24 +143,19 @@ class SparseSoftmaxCrossEntropyWithLogitsGpuKernelMod : public DeprecatedNativeG size_t logits_dims = logits_shape.size(); batch_size_ = 1; for (size_t i = 0; i < logits_dims - 1; i++) { - batch_size_ *= logits_shape[i]; + batch_size_ *= LongToSizeClipNeg(logits_shape[i]); } - channel_size_ = logits_shape[logits_dims - 1]; + channel_size_ = LongToSizeClipNeg(logits_shape[logits_dims - 1]); height_ = 1; width_ = 1; logits_size_ = sizeof(T) * batch_size_ * channel_size_ * height_ * width_; - labels_size_ = 1; - size_t labels_dims = labels_shape.size(); - for (size_t i = 0; i < labels_dims; i++) { - labels_size_ *= labels_shape[i]; - } - labels_size_ *= sizeof(S); + labels_size_ = sizeof(S) * SizeOf(labels_shape); output_size_ = is_grad_ ? logits_size_ : sizeof(T); softmax_output_logits_size_ = logits_size_; } - void CheckShapeValidation(const std::vector &logits_shape, const std::vector &labels_shape) { + void CheckShapeValidation(const ShapeVector &logits_shape, const ShapeVector &labels_shape) { size_t logits_dim_length = logits_shape.size(); size_t labels_dim_length = labels_shape.size(); if (labels_dim_length != logits_dim_length - 1) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/other/assign_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/other/assign_gpu_kernel.h index 4988fb23e03..3e6b6cf5d6a 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/other/assign_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/other/assign_gpu_kernel.h @@ -60,10 +60,7 @@ class AssignGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - input_size_ = sizeof(T); - for (size_t x : shape) { - input_size_ = input_size_ * x; - } + input_size_ = sizeof(T) * SizeOf(shape); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/other/boundingbox_decode_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/other/boundingbox_decode_gpu_kernel.h index c7fbd10837e..21566fe8fd5 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/other/boundingbox_decode_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/other/boundingbox_decode_gpu_kernel.h @@ -87,14 +87,9 @@ class BoundingBoxDecodeGpuKernelMod : public DeprecatedNativeGpuKernelMod { for (size_t i = 0; i < logits_shape.size(); i++) { rois_size_ *= logits_shape[i]; } - - for (size_t i = 0; i < labels_shape.size(); i++) { - deltas_size_ *= labels_shape[i]; - } - - for (size_t i = 0; i < output_shape.size(); i++) { - bboxes_size_ *= output_shape[i]; - } + rois_size_ *= SizeOf(logits_shape); + deltas_size_ *= SizeOf(labels_shape); + bboxes_size_ *= SizeOf(output_shape); InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/other/boundingbox_encode_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/other/boundingbox_encode_gpu_kernel.h index cc839e55f0c..ae9212ff02b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/other/boundingbox_encode_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/other/boundingbox_encode_gpu_kernel.h @@ -82,17 +82,10 @@ class BoundingBoxEncodeGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - for (size_t i = 0; i < logits_shape.size(); i++) { - anchor_size_ *= logits_shape[i]; - } - for (size_t i = 0; i < labels_shape.size(); i++) { - groundtruth_size_ *= labels_shape[i]; - } - - for (size_t i = 0; i < output_shape.size(); i++) { - deltas_size_ *= output_shape[i]; - } + anchor_size_ *= SizeOf(logits_shape); + groundtruth_size_ *= SizeOf(labels_shape); + deltas_size_ *= SizeOf(output_shape); InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/other/check_valid_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/other/check_valid_gpu_kernel.h index 08d092038fd..f9f2aea4186 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/other/check_valid_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/other/check_valid_gpu_kernel.h @@ -75,17 +75,10 @@ class CheckValidGpuKernelMod : public DeprecatedNativeGpuKernelMod { InitSizeLists(); return true; } - for (size_t i = 0; i < anchor_boxes_shape.size(); i++) { - anchor_boxes_size_ *= anchor_boxes_shape[i]; - } - for (size_t i = 0; i < img_metas_shape.size(); i++) { - img_metas_size_ *= img_metas_shape[i]; - } - - for (size_t i = 0; i < valid_shape.size(); i++) { - valid_size_ *= valid_shape[i]; - } + anchor_boxes_size_ *= SizeOf(anchor_boxes_shape); + img_metas_size_ *= SizeOf(img_metas_shape); + valid_size_ *= SizeOf(valid_shape); InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/other/concat_offset_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/other/concat_offset_gpu_kernel.h index 5aaa98863fa..ade90106983 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/other/concat_offset_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/other/concat_offset_gpu_kernel.h @@ -63,15 +63,15 @@ class ConcatOffsetGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be greater than 0"; } for (size_t i = 0; i < input_num; i++) { - size_t input_size = 1; + int64_t input_size = 1; auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, i); for (size_t j = 0; j < input_shape.size(); j++) { input_size *= input_shape[j]; } - input_size_list_.push_back(input_size * sizeof(T)); + input_size_list_.push_back(LongToSizeClipNeg(input_size) * sizeof(T)); } // cal offset - size_t shape_offset = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)[axis]; + int64_t shape_offset = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)[axis]; std::vector offset(input_num, 0); for (size_t i = 1; i < input_num; i++) { input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); @@ -80,11 +80,11 @@ class ConcatOffsetGpuKernelMod : public DeprecatedNativeGpuKernelMod { << " the dimension of the " << i << "'th input: " << input_shape.size() << " and the dimension of the first input: " << rank; } - offset[i] = shape_offset; + offset[i] = LongToSizeClipNeg(shape_offset); shape_offset += input_shape[axis]; } constexpr size_t kConcatOffsetOutputShapeSize = 2; - auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto output_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); if (output_shape.size() != kConcatOffsetOutputShapeSize) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of output should be " << kConcatOffsetOutputShapeSize << ", but got:" << output_shape.size(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_broadcast_grad_args_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_broadcast_grad_args_gpu_kernel.h index 7db3bccf636..b70feb03daa 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_broadcast_grad_args_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_broadcast_grad_args_gpu_kernel.h @@ -63,10 +63,10 @@ class DynamicBroadcastGradientArgsGpuKernelMod : public DeprecatedNativeGpuKerne if (input_num != kInputNum) { MS_LOG(EXCEPTION) << "DynamicBroadcastGradiendArgs needs " << kInputNum << " inputs, but get " << input_num; } - auto s0_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); - auto s1_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1); - auto r0_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0); - auto r1_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 1); + auto s0_shape = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); + auto s1_shape = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1)); + auto r0_shape = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0)); + auto r1_shape = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 1)); if (s0_shape.size() != 1 || s1_shape.size() != 1) { MS_LOG(EXCEPTION) << "Inputs must be [1-D], but get " << s0_shape.size() << "-D and " << s1_shape.size() << "-D."; } @@ -76,8 +76,8 @@ class DynamicBroadcastGradientArgsGpuKernelMod : public DeprecatedNativeGpuKerne input_size_list_.push_back(s0_size); input_size_list_.push_back(s1_size); - output_size_list_.push_back(r0_shape[0] * sizeof(S)); - output_size_list_.push_back(r1_shape[0] * sizeof(S)); + output_size_list_.push_back(static_cast(r0_shape[0]) * sizeof(S)); + output_size_list_.push_back(static_cast(r1_shape[0]) * sizeof(S)); is_need_retrieve_output_shape_ = true; return true; } @@ -89,8 +89,8 @@ class DynamicBroadcastGradientArgsGpuKernelMod : public DeprecatedNativeGpuKerne protected: void SyncData() override { - std::vector r0_shape{r0_size_}; - std::vector r1_shape{r1_size_}; + ShapeVector r0_shape{SizeToLong(r0_size_)}; + ShapeVector r1_shape{SizeToLong(r1_size_)}; common::AnfAlgo::SetOutputInferTypeAndShape({TypeId::kNumberTypeInt64, TypeId::kNumberTypeInt64}, {r0_shape, r1_shape}, kernel_node_.lock().get()); MS_LOG(DEBUG) << "Run PostExecute for DynamicBroadcastGradientArgs, real r0 shape is " << r0_shape diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_reshape_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_reshape_gpu_kernel.h index 38ce029dd67..0bf0076afa8 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_reshape_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_reshape_gpu_kernel.h @@ -53,9 +53,9 @@ class DynamicReshapeKernelMod : public DeprecatedNativeGpuKernelMod { } bool Init(const CNodePtr &kernel_node) override { kernel_node_ = kernel_node; - auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0); - auto input_x_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); - auto input_shape_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1); + auto output_shape = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0)); + auto input_x_shape = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0)); + auto input_shape_shape = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1)); auto data_type = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); data_type_size_ = mindspore::kernel::GetDtypeNbyte(TypeIdToString(data_type, true)); shape_size_ = input_shape_shape.size(); @@ -81,9 +81,9 @@ class DynamicReshapeKernelMod : public DeprecatedNativeGpuKernelMod { protected: void SyncData() override { auto data_type = AnfAlgo::GetInputDeviceDataType(kernel_node_.lock(), 0); - std::vector output_shape; + ShapeVector output_shape; std::transform(real_output_shape_.begin(), real_output_shape_.end(), std::back_inserter(output_shape), - [](const S &value) { return static_cast(value); }); + [](const S &value) { return static_cast(value); }); common::AnfAlgo::SetOutputInferTypeAndShape({data_type}, {output_shape}, kernel_node_.lock().get()); MS_LOG(DEBUG) << "Run PostExecute for DynamicReshape, real output shape is " << output_shape; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_stitch_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_stitch_gpu_kernel.cc index c315d904ac7..f7fa899097d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_stitch_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_stitch_gpu_kernel.cc @@ -39,11 +39,11 @@ bool DynamicStitchKernelMod::Init(const CNodePtr &kernel_node) { // Index type is restricted to int32 by kernel prim. size_t index_type_size = sizeof(int); data_type_size_ = GetDtypeNbyte(TypeIdToString(data_type, true)); - auto first_data_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, n_); + auto first_data_shape = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, n_)); auto first_index_dims = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0).size(); one_data_ele_num_ = 1; for (size_t d = first_index_dims; d < first_data_shape.size(); ++d) { - one_data_ele_num_ *= first_data_shape[d]; + one_data_ele_num_ *= LongToSizeClipNeg(first_data_shape[d]); } for (size_t i = 0; i < n_; i++) { auto data_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, n_ + i); @@ -68,7 +68,7 @@ void DynamicStitchKernelMod::SyncData() { CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(reinterpret_cast(stream_ptr_)), "DynamicStitch cudaStreamSynchronized failed"); auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node_.lock(), 0); - output_shape[0] = IntToSize(max_index_) + 1; + output_shape[0] = max_index_ + 1; auto data_type = AnfAlgo::GetInputDeviceDataType(kernel_node_.lock(), n_); common::AnfAlgo::SetOutputInferTypeAndShape({data_type}, {output_shape}, kernel_node_.lock().get()); MS_LOG(DEBUG) << "Run PostExecute for dynamicstitch, real output shape is " << output_shape; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/other/gpu_convert_to_dynamic_shape_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/other/gpu_convert_to_dynamic_shape_gpu_kernel.h index 4cfcd056905..60a44e4d6a6 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/other/gpu_convert_to_dynamic_shape_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/other/gpu_convert_to_dynamic_shape_gpu_kernel.h @@ -63,7 +63,8 @@ class GpuConvertToDynamicShapeGpuKernelMod : public DeprecatedNativeGpuKernelMod InitSizeLists(); return true; } - for (const size_t &e : input_shape_) { + input_size_ = 1; + for (const auto &e : input_shape_) { input_size_ *= e; } @@ -77,6 +78,8 @@ class GpuConvertToDynamicShapeGpuKernelMod : public DeprecatedNativeGpuKernelMod input_shape_.clear(); input_size_ = 1; is_null_input_ = false; + input_size_list_.clear(); + output_size_list_.clear(); } protected: @@ -85,7 +88,7 @@ class GpuConvertToDynamicShapeGpuKernelMod : public DeprecatedNativeGpuKernelMod "cudaStreamSynchronized failed"); std::vector output_types = {common::AnfAlgo::GetOutputInferDataType(kernel_node_.lock(), 0)}; - std::vector> output_shapes = {input_shape_}; + std::vector output_shapes = {input_shape_}; common::AnfAlgo::SetOutputInferTypeAndShape(output_types, output_shapes, kernel_node_.lock().get()); } void InitSizeLists() override { @@ -95,8 +98,8 @@ class GpuConvertToDynamicShapeGpuKernelMod : public DeprecatedNativeGpuKernelMod private: void *cuda_stream_ptr_; - std::vector input_shape_; - size_t input_size_; + ShapeVector input_shape_; + int64_t input_size_; bool is_null_input_; }; } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/other/iou_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/other/iou_gpu_kernel.h index 749a5dba8b4..408bad0c2b4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/other/iou_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/other/iou_gpu_kernel.h @@ -80,14 +80,9 @@ class IOUGpuKernelMod : public DeprecatedNativeGpuKernelMod { for (size_t i = 0; i < gt_boxes_shape.size(); i++) { gt_boxes_size_ *= gt_boxes_shape[i]; } - - for (size_t i = 0; i < anchor_boxes_shape.size(); i++) { - anchor_boxes_size_ *= anchor_boxes_shape[i]; - } - - for (size_t i = 0; i < iou_shape.size(); i++) { - iou_size_ *= iou_shape[i]; - } + gt_boxes_size_ *= SizeOf(gt_boxes_shape); + anchor_boxes_size_ *= SizeOf(anchor_boxes_shape); + iou_size_ *= SizeOf(iou_shape); InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/batchnorm_fold2_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/batchnorm_fold2_gpu_kernel.h index 6ca49e42cac..7e7223b77c0 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/batchnorm_fold2_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/batchnorm_fold2_gpu_kernel.h @@ -72,7 +72,7 @@ class BatchNormFold2GpuKernelMod : public DeprecatedNativeGpuKernelMod { << input_num; } - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/batchnorm_fold2_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/batchnorm_fold2_grad_gpu_kernel.h index b063ead396d..467771e73a0 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/batchnorm_fold2_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/batchnorm_fold2_grad_gpu_kernel.h @@ -99,7 +99,7 @@ class BatchNormFold2GradGpuKernelMod : public DeprecatedNativeGpuKernelMod { << input_num; } - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/batchnorm_fold_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/batchnorm_fold_gpu_kernel.h index 4096c5a3e9c..79b2381420e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/batchnorm_fold_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/batchnorm_fold_gpu_kernel.h @@ -127,10 +127,10 @@ class BatchNormFoldGpuKernelMod : public DeprecatedNativeGpuKernelMod { << input_shape.size(); } CheckTensorSize({input_shape}); - batch_ = input_shape[0]; - channel_ = input_shape[1]; - height_ = input_shape[2]; - width_ = input_shape[3]; + batch_ = LongToInt(input_shape[0]); + channel_ = LongToInt(input_shape[1]); + height_ = LongToInt(input_shape[2]); + width_ = LongToInt(input_shape[3]); input_size_ = sizeof(T) * batch_ * channel_ * height_ * width_; output_size_ = sizeof(T) * channel_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/correction_mul_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/correction_mul_gpu_kernel.h index 8fd28fb0980..0c912d22094 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/correction_mul_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/correction_mul_gpu_kernel.h @@ -54,7 +54,7 @@ class CorrectionMulGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num; } - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/correction_mul_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/correction_mul_grad_gpu_kernel.h index c4a7323c3bd..3593af7a3a2 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/correction_mul_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/correction_mul_grad_gpu_kernel.h @@ -60,7 +60,7 @@ class CorrectionMulGradGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 4, but got " << input_num; } - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perchannel_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perchannel_gpu_kernel.cc index 3877906c3b4..800cd4878bc 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perchannel_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perchannel_gpu_kernel.cc @@ -52,9 +52,9 @@ bool FakeLearnedScaleQuantPerChannelGpuKernelMod::Init(const CNodePtr &kernel_no // init size auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - num_channels_ = SizeToInt(input_shape[0]); + num_channels_ = LongToInt(input_shape[0]); for (size_t i = 0; i < input_shape.size(); ++i) { - quant_num_ *= SizeToInt(input_shape[i]); + quant_num_ *= LongToInt(input_shape[i]); } input_size_ = sizeof(float) * quant_num_; InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perchannel_grad_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perchannel_grad_gpu_kernel.cc index 728a8f4525c..f3dd7a3d0d3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perchannel_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perchannel_grad_gpu_kernel.cc @@ -51,9 +51,9 @@ bool FakeLearnedScaleQuantPerChannelGradGpuKernelMod::Init(const CNodePtr &kerne // init size auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - num_channels_ = SizeToInt(input_shape[0]); + num_channels_ = LongToInt(input_shape[0]); for (size_t i = 0; i < input_shape.size(); ++i) { - quant_num_ *= SizeToInt(input_shape[i]); + quant_num_ *= LongToInt(input_shape[i]); } input_size_ = sizeof(float) * quant_num_; InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perlayer_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perlayer_gpu_kernel.cc index cd52b0ae46d..6d40ef33c23 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perlayer_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perlayer_gpu_kernel.cc @@ -47,7 +47,7 @@ bool FakeLearnedScaleQuantPerLayerGpuKernelMod::Init(const CNodePtr &kernel_node // init size auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); for (size_t i = 0; i < input_shape.size(); ++i) { - quant_num_ *= SizeToInt(input_shape[i]); + quant_num_ *= LongToInt(input_shape[i]); } input_size_ = sizeof(float) * quant_num_; InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perlayer_grad_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perlayer_grad_gpu_kernel.cc index 587e9f24e96..79310c88626 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perlayer_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_learned_scale_quant_perlayer_grad_gpu_kernel.cc @@ -44,7 +44,7 @@ bool FakeLearnedScaleQuantPerLayerGradGpuKernelMod::Init(const CNodePtr &kernel_ neg_trunc_ = GetValue(common::AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("neg_trunc")); // init size - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); for (size_t i = 0; i < input_shape.size(); ++i) { quant_num_ *= SizeToInt(input_shape[i]); } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perchannel_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perchannel_gpu_kernel.cc index ff3afb041db..5d046ffb3b4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perchannel_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perchannel_gpu_kernel.cc @@ -76,7 +76,7 @@ bool FakeQuantPerChannelGpuKernelMod::Init(const CNodePtr &kernel_node) { } // shape info for gpu - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perchannel_grad_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perchannel_grad_gpu_kernel.cc index 0876013615e..c37b55221e9 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perchannel_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perchannel_grad_gpu_kernel.cc @@ -68,7 +68,7 @@ bool FakeQuantPerChannelGradGpuKernelMod::Init(const CNodePtr &kernel_node) { quant_min_++; } - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perlayer_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perlayer_gpu_kernel.cc index 56d40d8a2d2..af1c8ebf4cf 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perlayer_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perlayer_gpu_kernel.cc @@ -74,7 +74,7 @@ bool FakeQuantPerLayerGpuKernelMod::Init(const CNodePtr &kernel_node) { } // init size - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perlayer_grad_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perlayer_grad_gpu_kernel.cc index 267190ae469..c221e906ce0 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perlayer_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/fake_quant_perlayer_grad_gpu_kernel.cc @@ -70,7 +70,7 @@ bool FakeQuantPerLayerGradGpuKernelMod::Init(const CNodePtr &kernel_node) { } // init size - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/minmax_update_perchannel_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/minmax_update_perchannel_gpu_kernel.cc index 07f5be3e069..6650ac00763 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/minmax_update_perchannel_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/minmax_update_perchannel_gpu_kernel.cc @@ -45,7 +45,7 @@ bool MinMaxUpdatePerChannelGpuKernelMod::Init(const CNodePtr &kernel_node) { ema_decay_ = GetValue(prim->GetAttr("ema_decay")); // init size - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/minmax_update_perlayer_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/minmax_update_perlayer_gpu_kernel.cc index dca879cbb52..54cafa4f648 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/quant/minmax_update_perlayer_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/quant/minmax_update_perlayer_gpu_kernel.cc @@ -45,7 +45,7 @@ bool MinMaxUpdatePerLayerGpuKernelMod::Init(const CNodePtr &kernel_node) { ema_decay_ = GetValue(prim->GetAttr("ema_decay")); // init size - auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/random/random_categorical_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/random/random_categorical_gpu_kernel.h index b7ee222ec85..6cd5d649438 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/random/random_categorical_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/random/random_categorical_gpu_kernel.h @@ -99,7 +99,7 @@ class RandomCategoricalGpuKernelMod : public DeprecatedNativeGpuKernelMod { if (output_num != 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; } - auto logits_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto logits_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(logits_shape, kernel_name, "logits"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/random/random_choice_with_mask_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/random/random_choice_with_mask_gpu_kernel.h index 3aa94d84edf..05597ebf115 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/random/random_choice_with_mask_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/random/random_choice_with_mask_gpu_kernel.h @@ -105,9 +105,7 @@ class RandomChoiceWithMaskGpuKernelMod : public DeprecatedNativeGpuKernelMod { seed2_ = static_cast(GetAttr(kernel_node, "seed2")); generator_.seed(time_interval); // init memory - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; - } + input_size_ *= SizeOf(input_shape); count_ = static_cast(GetAttr(kernel_node, "count")); // upper ceiling for input for ceil_power2 if (count_ > kSmallK || input_shape_size_ > 1) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/discounted_return_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/discounted_return_gpu_kernel.h index 0ef6fb760eb..8115a5cdb5d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/discounted_return_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/discounted_return_gpu_kernel.h @@ -46,22 +46,22 @@ class DiscountedReturnGpuKernelMod : public DeprecatedNativeGpuKernelMod { << input_num; } - const std::vector &reward_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - const std::vector &done_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + const std::vector &reward_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + const std::vector &done_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); if (reward_shape.size() == 0) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of reward cannot be 0, but got " << reward_shape.size(); } // Reshape reward to [timestep, env, else], done to [timestep, env], last_value to [env, else]. - timestep_ = reward_shape[0]; + timestep_ = LongToInt(reward_shape[0]); for (size_t i = 1; i < done_shape.size(); i++) { env_num_ *= i; } int total_elements = 1; for (size_t j = 0; j < reward_shape.size(); j++) { - total_elements *= reward_shape[j]; + total_elements *= LongToInt(reward_shape[j]); } element_per_env_ = total_elements / timestep_ / env_num_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/gru_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/gru_gpu_kernel.h index 22f3205885e..696a0fcce2b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/gru_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/gru_gpu_kernel.h @@ -109,9 +109,9 @@ class GruGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 3, but got " << input_shape.size(); } - seq_len_ = SizeToInt(input_shape[0]); - batch_size_ = SizeToInt(input_shape[1]); - input_size_ = SizeToInt(input_shape[kIndexTwo]); + seq_len_ = LongToInt(input_shape[0]); + batch_size_ = LongToInt(input_shape[1]); + input_size_ = LongToInt(input_shape[kIndexTwo]); input_size_ = static_cast(GetAttr(kernel_node, "input_size")); hidden_size_ = static_cast(GetAttr(kernel_node, "hidden_size")); @@ -166,7 +166,7 @@ class GruGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of weight cannot be less than 3, but got " << weight_shape.size(); } - size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[kIndexTwo] * sizeof(T); + size_t weight_size = LongToSizeClipNeg(weight_shape[0] * weight_shape[1] * weight_shape[kIndexTwo]) * sizeof(T); CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnGetRNNParamsSize(handle_, rnn_desc_, x_desc_[0], &weight_size_, cudnn_data_type_), "get weight_size_ failed"); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/gru_grad_data_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/gru_grad_data_gpu_kernel.h index 7eaa54562ba..659dee5327a 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/gru_grad_data_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/gru_grad_data_gpu_kernel.h @@ -114,7 +114,7 @@ class GruGradDataGpuKernelMod : public DeprecatedNativeGpuKernelMod { kernel_node_ = kernel_node; InitResource(); cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); - auto input_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); @@ -179,7 +179,7 @@ class GruGradDataGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of weight cannot be less than 3, but got " << weight_shape.size(); } - size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T); + size_t weight_size = LongToSizeClipNeg(weight_shape[0] * weight_shape[1] * weight_shape[2]) * sizeof(T); CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnGetRNNParamsSize(handle_, rnn_desc_, dx_desc_[0], &weight_size_, cudnn_data_type_), "get weight_size_ failed"); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/gru_grad_weight_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/gru_grad_weight_gpu_kernel.h index a4fe85ec8fc..c82c47aa5fe 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/gru_grad_weight_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/gru_grad_weight_gpu_kernel.h @@ -104,8 +104,8 @@ class GruGradWeightGpuKernelMod : public DeprecatedNativeGpuKernelMod { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 2, but got " << input_shape.size(); } - seq_len_ = SizeToInt(input_shape[0]); - batch_size_ = SizeToInt(input_shape[1]); + seq_len_ = LongToInt(input_shape[0]); + batch_size_ = LongToInt(input_shape[1]); input_size_ = static_cast(GetAttr(kernel_node, "input_size")); hidden_size_ = static_cast(GetAttr(kernel_node, "hidden_size")); @@ -142,7 +142,7 @@ class GruGradWeightGpuKernelMod : public DeprecatedNativeGpuKernelMod { hidden_size_, hidden_size_, num_layers_, dropout_desc_, 0), "set rnn_desc failed"); #endif - auto weight_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto weight_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); is_null_input_ = CHECK_SHAPE_NULL(weight_shape, kernel_name, "weight"); if (is_null_input_) { InitSizeLists(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_create_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_create_kernel.cc index 71f5e4e3123..a70d6bda1bc 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_create_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_create_kernel.cc @@ -29,10 +29,8 @@ TensorArrayCreateKernelMod::TensorArrayCreateKernelMod() : is_dynamic_(true), si bool TensorArrayCreateKernelMod::Init(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_node_ = kernel_node; - auto shape = GetAttr>(kernel_node, "element_shape"); - for (auto i : shape) { - shapes_.push_back(LongToSize(i)); - } + shapes_ = GetAttr>(kernel_node, "element_shape"); + type_ = GetAttr(kernel_node, "dtype"); size_ = GetAttr(kernel_node, "size"); is_dynamic_ = GetAttr(kernel_node, "dynamic_size"); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_create_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_create_kernel.h index 992f1a99fbc..c2e70a71785 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_create_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_create_kernel.h @@ -38,7 +38,7 @@ class TensorArrayCreateKernelMod : public DeprecatedNativeGpuKernelMod { private: bool is_dynamic_; int64_t size_; - std::vector shapes_; + ShapeVector shapes_; TypePtr type_; std::string name_; }; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_stack_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_stack_kernel.cc index 78a0605d523..006d9d07ae1 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_stack_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_stack_kernel.cc @@ -39,7 +39,7 @@ bool TensorArrayStackKernelMod::Init(const CNodePtr &kernel_node) { is_dynamic_ = GetAttr(kernel_node, "is_dynamic_shape"); auto size = GetAttr(kernel_node, "size"); for (auto i : shape) { - shapes_.push_back(LongToSize(i)); + shapes_.push_back(LongToSizeClipNeg(i)); } type_ = GetAttr(kernel_node, "dtype"); ele_size_ = GetTypeByte(type_); @@ -68,7 +68,7 @@ void TensorArrayStackKernelMod::SyncData() { auto shape = shapes_; shape.insert(shape.begin(), tensor_size); MS_LOG(DEBUG) << "After postexecute, the real shape of TensorArrayStack is " << shape; - common::AnfAlgo::SetOutputInferTypeAndShape({type_->type_id()}, {shape}, kernel_node_.lock().get()); + common::AnfAlgo::SetOutputInferTypeAndShape({type_->type_id()}, {Convert2Long(shape)}, kernel_node_.lock().get()); } void TensorArrayStackKernelMod::ResetResource() noexcept { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_write_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_write_kernel.cc index 2a91829a664..9cce77de7b1 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_write_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_write_kernel.cc @@ -32,10 +32,8 @@ bool TensorArrayWriteKernelMod::Init(const CNodePtr &kernel_node) { kernel_node_ = kernel_node; type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, kSecondInputIndex); shapes_ = AnfAlgo::GetInputDeviceShape(kernel_node, kSecondInputIndex); - value_size_ = GetTypeByte(TypeIdToType(type_)); - for (auto i : shapes_) { - value_size_ *= i; - } + value_size_ = GetTypeByte(TypeIdToType(type_)) * SizeOf(shapes_); + InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_write_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_write_kernel.h index 8d3f294686b..b4e88ab836b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_write_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_write_kernel.h @@ -38,7 +38,7 @@ class TensorArrayWriteKernelMod : public DeprecatedNativeGpuKernelMod { private: size_t value_size_; - std::vector shapes_; + ShapeVector shapes_; TypeId type_; }; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensors_queue_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensors_queue_gpu_kernel.cc index 9b607233a90..d458ad33c4c 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensors_queue_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensors_queue_gpu_kernel.cc @@ -138,10 +138,7 @@ bool TensorsQueueGetKernelMod::Init(const CNodePtr &kernel_node) { input_size_list_.push_back(sizeof(int64_t)); for (int64_t i = 0; i < elements_num_; i++) { - size_t value_size = GetTypeByte(type); - for (auto x : shapes[i]) { - value_size *= LongToSize(x); - } + size_t value_size = GetTypeByte(type) * SizeOf(shapes[i]); output_size_list_.push_back(value_size); } return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sparse/dense_to_csr_sparse_matrix_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sparse/dense_to_csr_sparse_matrix_gpu_kernel.h index b2b7fd9beec..35fb4b32f93 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sparse/dense_to_csr_sparse_matrix_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sparse/dense_to_csr_sparse_matrix_gpu_kernel.h @@ -128,13 +128,13 @@ class DenseToCSRSparseMatrixKernelMod : public DeprecatedNativeGpuKernelMod { InitResource(); memcpy_flag_ = false; - input_shapes_ = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, kIndex0); - indices_shapes_ = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, kIndex1); - dense_shape_shapes_ = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, kIndex0); - batch_pointers_shapes_ = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, kIndex1); - row_pointers_shapes_ = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, kIndex2); - col_indices_shapes_ = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, kIndex3); - value_shapes_ = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, kIndex4); + input_shapes_ = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, kIndex0)); + indices_shapes_ = Convert2SizeTClipNeg(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, kIndex1)); + dense_shape_shapes_ = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, kIndex0)); + batch_pointers_shapes_ = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, kIndex1)); + row_pointers_shapes_ = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, kIndex2)); + col_indices_shapes_ = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, kIndex3)); + value_shapes_ = Convert2SizeTClipNeg(AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, kIndex4)); nnz_ = value_shapes_[kIndex0]; m_ = input_shapes_[kIndex0]; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sparse/sparse_matrix_add_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sparse/sparse_matrix_add_gpu_kernel.h index 877255c82ee..ab790325dd1 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sparse/sparse_matrix_add_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sparse/sparse_matrix_add_gpu_kernel.h @@ -93,11 +93,11 @@ class SparseMatrixAddGpuKernel : public DeprecatedNativeGpuKernelMod { const auto &x1_col_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, InputList::X1_COLUMN); RETURN_IF_FALSE_WITH_LOG(x1_col_shape.size() == 1, "The rank of column should be 1."); - x1_nnz_ = SizeToInt(x1_col_shape[0]); + x1_nnz_ = LongToInt(x1_col_shape[0]); const auto &x2_col_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, InputList::X2_COLUMN); RETURN_IF_FALSE_WITH_LOG(x2_col_shape.size() == 1, "The rank of row should be 1."); - x2_nnz_ = SizeToInt(x2_col_shape[0]); + x2_nnz_ = LongToInt(x2_col_shape[0]); type_id_ = common::AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, InputList::X1_VALUE); @@ -169,15 +169,15 @@ class SparseMatrixAddGpuKernel : public DeprecatedNativeGpuKernelMod { types.push_back(kNumberTypeInt32); types.push_back(type_id_); - std::vector> shapes; + std::vector shapes; shapes.push_back({ - IntToSize(row_ + 1), + row_ + 1, }); shapes.push_back({ - IntToSize(y_nnz_), + y_nnz_, }); shapes.push_back({ - IntToSize(y_nnz_), + y_nnz_, }); common::AnfAlgo::SetOutputInferTypeAndShape(types, shapes, kernel_node_.lock().get()); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_atom_energy_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_atom_energy_kernel.h index 3fb027aec5f..999c528da38 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_atom_energy_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_atom_energy_kernel.h @@ -43,13 +43,14 @@ class AngleAtomEnergyGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_angle_k = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); auto shape_angle_theta0 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i]; - for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i]; - for (size_t i = 0; i < shape_atom_c.size(); i++) ele_atom_c *= shape_atom_c[i]; - for (size_t i = 0; i < shape_angle_k.size(); i++) ele_angle_k *= shape_angle_k[i]; - for (size_t i = 0; i < shape_angle_theta0.size(); i++) ele_angle_theta0 *= shape_angle_theta0[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_atom_a *= SizeOf(shape_atom_a); + ele_atom_b *= SizeOf(shape_atom_b); + ele_atom_c *= SizeOf(shape_atom_c); + ele_angle_k *= SizeOf(shape_angle_k); + ele_angle_theta0 *= SizeOf(shape_angle_theta0); + InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_energy_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_energy_kernel.h index ee1706187b7..fe0fcc43bdd 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_energy_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_energy_kernel.h @@ -43,13 +43,14 @@ class AngleEnergyGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_angle_k = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); auto shape_angle_theta0 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i]; - for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i]; - for (size_t i = 0; i < shape_atom_c.size(); i++) ele_atom_c *= shape_atom_c[i]; - for (size_t i = 0; i < shape_angle_k.size(); i++) ele_angle_k *= shape_angle_k[i]; - for (size_t i = 0; i < shape_angle_theta0.size(); i++) ele_angle_theta0 *= shape_angle_theta0[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_atom_a *= SizeOf(shape_atom_a); + ele_atom_b *= SizeOf(shape_atom_b); + ele_atom_c *= SizeOf(shape_atom_c); + ele_angle_k *= SizeOf(shape_angle_k); + ele_angle_theta0 *= SizeOf(shape_angle_theta0); + InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_force_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_force_kernel.h index 61cfb3cc422..553f9bee9f8 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_force_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_force_kernel.h @@ -43,13 +43,14 @@ class AngleForceGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_angle_k = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); auto shape_angle_theta0 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i]; - for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i]; - for (size_t i = 0; i < shape_atom_c.size(); i++) ele_atom_c *= shape_atom_c[i]; - for (size_t i = 0; i < shape_angle_k.size(); i++) ele_angle_k *= shape_angle_k[i]; - for (size_t i = 0; i < shape_angle_theta0.size(); i++) ele_angle_theta0 *= shape_angle_theta0[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_atom_a *= SizeOf(shape_atom_a); + ele_atom_b *= SizeOf(shape_atom_b); + ele_atom_c *= SizeOf(shape_atom_c); + ele_angle_k *= SizeOf(shape_angle_k); + ele_angle_theta0 *= SizeOf(shape_angle_theta0); + InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_force_with_atom_energy_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_force_with_atom_energy_kernel.h index ac479dbdc1b..b7bdc046b25 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_force_with_atom_energy_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_force_with_atom_energy_kernel.h @@ -43,13 +43,13 @@ class AngleForceWithAtomEnergyGpuKernelMod : public DeprecatedNativeGpuKernelMod auto shape_angle_k = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); auto shape_angle_theta0 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i]; - for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i]; - for (size_t i = 0; i < shape_atom_c.size(); i++) ele_atom_c *= shape_atom_c[i]; - for (size_t i = 0; i < shape_angle_k.size(); i++) ele_angle_k *= shape_angle_k[i]; - for (size_t i = 0; i < shape_angle_theta0.size(); i++) ele_angle_theta0 *= shape_angle_theta0[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_atom_a *= SizeOf(shape_atom_a); + ele_atom_b *= SizeOf(shape_atom_b); + ele_atom_c *= SizeOf(shape_atom_c); + ele_angle_k *= SizeOf(shape_angle_k); + ele_angle_theta0 *= SizeOf(shape_angle_theta0); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_atom_energy_cuda_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_atom_energy_cuda_gpu_kernel.h index eeafc2fe959..9b51a2beec4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_atom_energy_cuda_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_atom_energy_cuda_gpu_kernel.h @@ -47,12 +47,12 @@ class BondAtomEnergyCudaGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_bond_k = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); auto shape_bond_r0 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i]; - for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i]; - for (size_t i = 0; i < shape_bond_k.size(); i++) ele_bond_k *= shape_bond_k[i]; - for (size_t i = 0; i < shape_bond_r0.size(); i++) ele_bond_r0 *= shape_bond_r0[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_atom_a *= SizeOf(shape_atom_a); + ele_atom_b *= SizeOf(shape_atom_b); + ele_bond_k *= SizeOf(shape_bond_k); + ele_bond_r0 *= SizeOf(shape_bond_r0); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_energy_cuda_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_energy_cuda_gpu_kernel.h index ab0f5f82314..035befcb539 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_energy_cuda_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_energy_cuda_gpu_kernel.h @@ -48,12 +48,12 @@ class BondEnergyCudaGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_bond_k = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); auto shape_bond_r0 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i]; - for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i]; - for (size_t i = 0; i < shape_bond_k.size(); i++) ele_bond_k *= shape_bond_k[i]; - for (size_t i = 0; i < shape_bond_r0.size(); i++) ele_bond_r0 *= shape_bond_r0[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_atom_a *= SizeOf(shape_atom_a); + ele_atom_b *= SizeOf(shape_atom_b); + ele_bond_k *= SizeOf(shape_bond_k); + ele_bond_r0 *= SizeOf(shape_bond_r0); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_cuda_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_cuda_gpu_kernel.h index 81d26b31b0d..e6466e5ae55 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_cuda_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_cuda_gpu_kernel.h @@ -48,12 +48,12 @@ class BondForceCudaGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_bond_k = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); auto shape_bond_r0 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i]; - for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i]; - for (size_t i = 0; i < shape_bond_k.size(); i++) ele_bond_k *= shape_bond_k[i]; - for (size_t i = 0; i < shape_bond_r0.size(); i++) ele_bond_r0 *= shape_bond_r0[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_atom_a *= SizeOf(shape_atom_a); + ele_atom_b *= SizeOf(shape_atom_b); + ele_bond_k *= SizeOf(shape_bond_k); + ele_bond_r0 *= SizeOf(shape_bond_r0); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_with_atom_energy_and_virial_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_with_atom_energy_and_virial_kernel.h index f9d540f3ff9..fb077750270 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_with_atom_energy_and_virial_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_with_atom_energy_and_virial_kernel.h @@ -48,12 +48,12 @@ class BondForceWithAtomEnergyAndVirialGpuKernelMod : public DeprecatedNativeGpuK auto shape_bond_k = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); auto shape_bond_r0 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i]; - for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i]; - for (size_t i = 0; i < shape_bond_k.size(); i++) ele_bond_k *= shape_bond_k[i]; - for (size_t i = 0; i < shape_bond_r0.size(); i++) ele_bond_r0 *= shape_bond_r0[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_atom_a *= SizeOf(shape_atom_a); + ele_atom_b *= SizeOf(shape_atom_b); + ele_bond_k *= SizeOf(shape_bond_k); + ele_bond_r0 *= SizeOf(shape_bond_r0); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_with_atom_energy_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_with_atom_energy_kernel.h index 617590c275e..2ffa77abcbe 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_with_atom_energy_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_with_atom_energy_kernel.h @@ -48,12 +48,12 @@ class BondForceWithAtomEnergyGpuKernelMod : public DeprecatedNativeGpuKernelMod auto shape_bond_k = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); auto shape_bond_r0 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i]; - for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i]; - for (size_t i = 0; i < shape_bond_k.size(); i++) ele_bond_k *= shape_bond_k[i]; - for (size_t i = 0; i < shape_bond_r0.size(); i++) ele_bond_r0 *= shape_bond_r0[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_atom_a *= SizeOf(shape_atom_a); + ele_atom_b *= SizeOf(shape_atom_b); + ele_bond_k *= SizeOf(shape_bond_k); + ele_bond_r0 *= SizeOf(shape_bond_r0); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_with_atom_virial_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_with_atom_virial_kernel.h index c10509d5527..876a975b9a3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_with_atom_virial_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/bond/bond_force_with_atom_virial_kernel.h @@ -48,12 +48,12 @@ class BondForceWithAtomVirialGpuKernelMod : public DeprecatedNativeGpuKernelMod auto shape_bond_k = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); auto shape_bond_r0 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i]; - for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i]; - for (size_t i = 0; i < shape_bond_k.size(); i++) ele_bond_k *= shape_bond_k[i]; - for (size_t i = 0; i < shape_bond_r0.size(); i++) ele_bond_r0 *= shape_bond_r0[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_atom_a *= SizeOf(shape_atom_a); + ele_atom_b *= SizeOf(shape_atom_b); + ele_bond_k *= SizeOf(shape_bond_k); + ele_bond_r0 *= SizeOf(shape_bond_r0); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/atomcrdtocv_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/atomcrdtocv_kernel.h index a08b040dd8f..6dd8a53587e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/atomcrdtocv_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/atomcrdtocv_kernel.h @@ -46,9 +46,9 @@ class AtomCrdToCVGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_old_crd = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); auto shape_box = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; - for (size_t i = 0; i < shape_old_crd.size(); i++) ele_old_crd *= shape_old_crd[i]; - for (size_t i = 0; i < shape_box.size(); i++) ele_box *= shape_box[i]; + ele_crd *= SizeOf(shape_crd); + ele_old_crd *= SizeOf(shape_old_crd); + ele_box *= SizeOf(shape_box); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/crd_to_uint_crd_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/crd_to_uint_crd_kernel.h index 03d9bdffd88..6f6a33655e3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/crd_to_uint_crd_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/crd_to_uint_crd_kernel.h @@ -41,9 +41,8 @@ class CrdToUintCrdGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_crd_to_uint_crd_cof = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto shape_crd = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - for (size_t i = 0; i < shape_crd_to_uint_crd_cof.size(); i++) - ele_crd_to_uint_crd_cof *= shape_crd_to_uint_crd_cof[i]; - for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; + ele_crd_to_uint_crd_cof *= SizeOf(shape_crd_to_uint_crd_cof); + ele_crd *= SizeOf(shape_crd); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/crd_to_uint_crd_quarter_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/crd_to_uint_crd_quarter_kernel.h index 89c326041d1..bc5c3058f33 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/crd_to_uint_crd_quarter_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/crd_to_uint_crd_quarter_kernel.h @@ -41,9 +41,8 @@ class CrdToUintCrdQuarterGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_crd_to_uint_crd_cof = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto shape_crd = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - for (size_t i = 0; i < shape_crd_to_uint_crd_cof.size(); i++) - ele_crd_to_uint_crd_cof *= shape_crd_to_uint_crd_cof[i]; - for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; + ele_crd_to_uint_crd_cof *= SizeOf(shape_crd_to_uint_crd_cof); + ele_crd *= SizeOf(shape_crd); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/get_center_of_mass_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/get_center_of_mass_kernel.h index 00e57d5f959..a80a7e4d765 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/get_center_of_mass_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/get_center_of_mass_kernel.h @@ -44,12 +44,11 @@ class GetCenterOfMassGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_atom_mass = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); auto shape_residue_mass_inverse = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); - for (size_t i = 0; i < shape_start.size(); i++) ele_start *= shape_start[i]; - for (size_t i = 0; i < shape_end.size(); i++) ele_end *= shape_end[i]; - for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; - for (size_t i = 0; i < shape_atom_mass.size(); i++) ele_atom_mass *= shape_atom_mass[i]; - for (size_t i = 0; i < shape_residue_mass_inverse.size(); i++) - ele_residue_mass_inverse *= shape_residue_mass_inverse[i]; + ele_start *= SizeOf(shape_start); + ele_end *= SizeOf(shape_end); + ele_crd *= SizeOf(shape_crd); + ele_atom_mass *= SizeOf(shape_atom_mass); + ele_residue_mass_inverse *= SizeOf(shape_residue_mass_inverse); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/getcenter_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/getcenter_kernel.h index 536349b0c6f..c4116facfe7 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/getcenter_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/getcenter_kernel.h @@ -42,8 +42,8 @@ class GetCenterOfGeometryGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_center_atoms = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto shape_crd = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - for (size_t i = 0; i < shape_center_atoms.size(); i++) ele_center_atoms *= shape_center_atoms[i]; - for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; + ele_center_atoms *= SizeOf(shape_center_atoms); + ele_crd *= SizeOf(shape_crd); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/map_center_of_mass_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/map_center_of_mass_kernel.h index bede3752923..a711e610331 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/map_center_of_mass_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/map_center_of_mass_kernel.h @@ -45,12 +45,12 @@ class MapCenterOfMassGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_no_wrap_crd = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); auto shape_crd = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); - for (size_t i = 0; i < shape_start.size(); i++) ele_start *= shape_start[i]; - for (size_t i = 0; i < shape_end.size(); i++) ele_end *= shape_end[i]; - for (size_t i = 0; i < shape_center_of_mass.size(); i++) ele_center_of_mass *= shape_center_of_mass[i]; - for (size_t i = 0; i < shape_box_length.size(); i++) ele_box_length *= shape_box_length[i]; - for (size_t i = 0; i < shape_no_wrap_crd.size(); i++) ele_no_wrap_crd *= shape_no_wrap_crd[i]; - for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; + ele_start *= SizeOf(shape_start); + ele_end *= SizeOf(shape_end); + ele_center_of_mass *= SizeOf(shape_center_of_mass); + ele_box_length *= SizeOf(shape_box_length); + ele_no_wrap_crd *= SizeOf(shape_no_wrap_crd); + ele_crd *= SizeOf(shape_crd); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/mdtemperature_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/mdtemperature_kernel.h index 1281643d81c..7e4558144f0 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/mdtemperature_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/mdtemperature_kernel.h @@ -43,10 +43,10 @@ class MDTemperatureGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_atom_vel = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); auto shape_atom_mass = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); - for (size_t i = 0; i < shape_start.size(); i++) ele_start *= shape_start[i]; - for (size_t i = 0; i < shape_end.size(); i++) ele_end *= shape_end[i]; - for (size_t i = 0; i < shape_atom_vel.size(); i++) ele_atom_vel *= shape_atom_vel[i]; - for (size_t i = 0; i < shape_atom_mass.size(); i++) ele_atom_mass *= shape_atom_mass[i]; + ele_start *= SizeOf(shape_start); + ele_end *= SizeOf(shape_end); + ele_atom_vel *= SizeOf(shape_atom_vel); + ele_atom_mass *= SizeOf(shape_atom_mass); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/total_c6_get_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/total_c6_get_kernel.h index 6157c6ba4e8..3dc019a64b2 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/total_c6_get_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/common/total_c6_get_kernel.h @@ -41,8 +41,8 @@ class TotalC6GetGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_atom_lj_type = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto shape_lj_b = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - for (size_t i = 0; i < shape_atom_lj_type.size(); i++) ele_atom_lj_type *= shape_atom_lj_type[i]; - for (size_t i = 0; i < shape_lj_b.size(); i++) ele_lj_b *= shape_lj_b[i]; + ele_atom_lj_type *= SizeOf(shape_atom_lj_type); + ele_lj_b *= SizeOf(shape_lj_b); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/crdmcmap/cal_no_wrap_crd_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/crdmcmap/cal_no_wrap_crd_kernel.h index 13cb2f24bf0..959c3d0674d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/crdmcmap/cal_no_wrap_crd_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/crdmcmap/cal_no_wrap_crd_kernel.h @@ -44,9 +44,9 @@ class CalculateNoWrapCrdGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_box = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); auto shape_box_map_times = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); - for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; - for (size_t i = 0; i < shape_box.size(); i++) ele_box *= shape_box[i]; - for (size_t i = 0; i < shape_box_map_times.size(); i++) ele_box_map_times *= shape_box_map_times[i]; + ele_crd *= SizeOf(shape_crd); + ele_box *= SizeOf(shape_box); + ele_box_map_times *= SizeOf(shape_box_map_times); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/crdmcmap/refresh_boxmaptimes_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/crdmcmap/refresh_boxmaptimes_kernel.h index cd08de84730..8e8e9b156ca 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/crdmcmap/refresh_boxmaptimes_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/crdmcmap/refresh_boxmaptimes_kernel.h @@ -45,10 +45,10 @@ class RefreshBoxmaptimesGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_box_length_inverse = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); auto shape_box_map_times = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); - for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; - for (size_t i = 0; i < shape_old_crd.size(); i++) ele_old_crd *= shape_old_crd[i]; - for (size_t i = 0; i < shape_box_length_inverse.size(); i++) ele_box_length_inverse *= shape_box_length_inverse[i]; - for (size_t i = 0; i < shape_box_map_times.size(); i++) ele_box_map_times *= shape_box_map_times[i]; + ele_crd *= SizeOf(shape_crd); + ele_old_crd *= SizeOf(shape_old_crd); + ele_box_length_inverse *= SizeOf(shape_box_length_inverse); + ele_box_map_times *= SizeOf(shape_box_map_times); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_atom_energy_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_atom_energy_kernel.h index 8dda84edc22..a51c6f230cc 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_atom_energy_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_atom_energy_kernel.h @@ -47,17 +47,17 @@ class DihedralAtomEnergyGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_gams = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); auto shape_pn = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 10); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i]; - for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i]; - for (size_t i = 0; i < shape_atom_c.size(); i++) ele_atom_c *= shape_atom_c[i]; - for (size_t i = 0; i < shape_atom_d.size(); i++) ele_atom_d *= shape_atom_d[i]; - for (size_t i = 0; i < shape_ipn.size(); i++) ele_ipn *= shape_ipn[i]; - for (size_t i = 0; i < shape_pk.size(); i++) ele_pk *= shape_pk[i]; - for (size_t i = 0; i < shape_gamc.size(); i++) ele_gamc *= shape_gamc[i]; - for (size_t i = 0; i < shape_gams.size(); i++) ele_gams *= shape_gams[i]; - for (size_t i = 0; i < shape_pn.size(); i++) ele_pn *= shape_pn[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_atom_a *= SizeOf(shape_atom_a); + ele_atom_b *= SizeOf(shape_atom_b); + ele_atom_c *= SizeOf(shape_atom_c); + ele_atom_d *= SizeOf(shape_atom_d); + ele_ipn *= SizeOf(shape_ipn); + ele_pk *= SizeOf(shape_pk); + ele_gamc *= SizeOf(shape_gamc); + ele_gams *= SizeOf(shape_gams); + ele_pn *= SizeOf(shape_pn); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_energy_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_energy_kernel.h index 4f18269be82..e5a42d440f3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_energy_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_energy_kernel.h @@ -47,17 +47,17 @@ class DihedralEnergyGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_gams = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); auto shape_pn = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 10); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i]; - for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i]; - for (size_t i = 0; i < shape_atom_c.size(); i++) ele_atom_c *= shape_atom_c[i]; - for (size_t i = 0; i < shape_atom_d.size(); i++) ele_atom_d *= shape_atom_d[i]; - for (size_t i = 0; i < shape_ipn.size(); i++) ele_ipn *= shape_ipn[i]; - for (size_t i = 0; i < shape_pk.size(); i++) ele_pk *= shape_pk[i]; - for (size_t i = 0; i < shape_gamc.size(); i++) ele_gamc *= shape_gamc[i]; - for (size_t i = 0; i < shape_gams.size(); i++) ele_gams *= shape_gams[i]; - for (size_t i = 0; i < shape_pn.size(); i++) ele_pn *= shape_pn[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_atom_a *= SizeOf(shape_atom_a); + ele_atom_b *= SizeOf(shape_atom_b); + ele_atom_c *= SizeOf(shape_atom_c); + ele_atom_d *= SizeOf(shape_atom_d); + ele_ipn *= SizeOf(shape_ipn); + ele_pk *= SizeOf(shape_pk); + ele_gamc *= SizeOf(shape_gamc); + ele_gams *= SizeOf(shape_gams); + ele_pn *= SizeOf(shape_pn); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_force_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_force_kernel.h index 575edb46a3e..945c8a5588a 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_force_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_force_kernel.h @@ -47,17 +47,17 @@ class DihedralForceGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_gams = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); auto shape_pn = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 10); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i]; - for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i]; - for (size_t i = 0; i < shape_atom_c.size(); i++) ele_atom_c *= shape_atom_c[i]; - for (size_t i = 0; i < shape_atom_d.size(); i++) ele_atom_d *= shape_atom_d[i]; - for (size_t i = 0; i < shape_ipn.size(); i++) ele_ipn *= shape_ipn[i]; - for (size_t i = 0; i < shape_pk.size(); i++) ele_pk *= shape_pk[i]; - for (size_t i = 0; i < shape_gamc.size(); i++) ele_gamc *= shape_gamc[i]; - for (size_t i = 0; i < shape_gams.size(); i++) ele_gams *= shape_gams[i]; - for (size_t i = 0; i < shape_pn.size(); i++) ele_pn *= shape_pn[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_atom_a *= SizeOf(shape_atom_a); + ele_atom_b *= SizeOf(shape_atom_b); + ele_atom_c *= SizeOf(shape_atom_c); + ele_atom_d *= SizeOf(shape_atom_d); + ele_ipn *= SizeOf(shape_ipn); + ele_pk *= SizeOf(shape_pk); + ele_gamc *= SizeOf(shape_gamc); + ele_gams *= SizeOf(shape_gams); + ele_pn *= SizeOf(shape_pn); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_force_with_atom_energy_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_force_with_atom_energy_kernel.h index 6e0af9bd0aa..df2291272f0 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_force_with_atom_energy_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/dihedral/dihedral_force_with_atom_energy_kernel.h @@ -47,17 +47,17 @@ class DihedralForceWithAtomEnergyGpuKernelMod : public DeprecatedNativeGpuKernel auto shape_gams = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); auto shape_pn = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 10); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i]; - for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i]; - for (size_t i = 0; i < shape_atom_c.size(); i++) ele_atom_c *= shape_atom_c[i]; - for (size_t i = 0; i < shape_atom_d.size(); i++) ele_atom_d *= shape_atom_d[i]; - for (size_t i = 0; i < shape_ipn.size(); i++) ele_ipn *= shape_ipn[i]; - for (size_t i = 0; i < shape_pk.size(); i++) ele_pk *= shape_pk[i]; - for (size_t i = 0; i < shape_gamc.size(); i++) ele_gamc *= shape_gamc[i]; - for (size_t i = 0; i < shape_gams.size(); i++) ele_gams *= shape_gams[i]; - for (size_t i = 0; i < shape_pn.size(); i++) ele_pn *= shape_pn[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_atom_a *= SizeOf(shape_atom_a); + ele_atom_b *= SizeOf(shape_atom_b); + ele_atom_c *= SizeOf(shape_atom_c); + ele_atom_d *= SizeOf(shape_atom_d); + ele_ipn *= SizeOf(shape_ipn); + ele_pk *= SizeOf(shape_pk); + ele_gamc *= SizeOf(shape_gamc); + ele_gams *= SizeOf(shape_gams); + ele_pn *= SizeOf(shape_pn); InitSizeLists(); return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_kernel.h index ccc5d86b9c1..8c562943f2e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_kernel.h @@ -51,12 +51,12 @@ class LJForceWithVirialEnergyGpuKernelMod : public DeprecatedNativeGpuKernelMod auto shape_d_LJ_a = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); auto shape_d_LJ_b = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; - for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i]; - for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_LJtype *= SizeOf(shape_LJtype); + ele_charge *= SizeOf(shape_charge); + ele_scaler *= SizeOf(shape_scaler); + ele_d_LJ_a *= SizeOf(shape_d_LJ_a); + ele_d_LJ_b *= SizeOf(shape_d_LJ_b); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_energy_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_energy_kernel.h index 688c2817659..6a5c8c217fd 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_energy_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_energy_kernel.h @@ -45,12 +45,12 @@ class LJEnergyGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_d_LJ_a = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); auto shape_d_LJ_b = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; - for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i]; - for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_LJtype *= SizeOf(shape_LJtype); + ele_charge *= SizeOf(shape_charge); + ele_scaler *= SizeOf(shape_scaler); + ele_d_LJ_a *= SizeOf(shape_d_LJ_a); + ele_d_LJ_b *= SizeOf(shape_d_LJ_b); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_kernel.h index f4bd8cc8111..9c1f12a82dd 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_kernel.h @@ -45,12 +45,12 @@ class LJForceGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_d_LJ_a = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); auto shape_d_LJ_b = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; - for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i]; - for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_LJtype *= SizeOf(shape_LJtype); + ele_charge *= SizeOf(shape_charge); + ele_scaler *= SizeOf(shape_scaler); + ele_d_LJ_a *= SizeOf(shape_d_LJ_a); + ele_d_LJ_b *= SizeOf(shape_d_LJ_b); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_with_pme_direct_force_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_with_pme_direct_force_kernel.h index acc652510fb..b47a1de9c6e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_with_pme_direct_force_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_with_pme_direct_force_kernel.h @@ -46,12 +46,12 @@ class LJForceWithPMEDirectForceGpuKernelMod : public DeprecatedNativeGpuKernelMo auto shape_d_LJ_a = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); auto shape_d_LJ_b = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; - for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i]; - for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_LJtype *= SizeOf(shape_LJtype); + ele_charge *= SizeOf(shape_charge); + ele_scaler *= SizeOf(shape_scaler); + ele_d_LJ_a *= SizeOf(shape_d_LJ_a); + ele_d_LJ_b *= SizeOf(shape_d_LJ_b); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_with_pme_direct_force_update_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_with_pme_direct_force_update_kernel.h index 8f84b0a48c6..39827ddbeb5 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_with_pme_direct_force_update_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_with_pme_direct_force_update_kernel.h @@ -51,12 +51,12 @@ class LJForceWithPMEDirectForceUpdateGpuKernelMod : public DeprecatedNativeGpuKe auto shape_d_LJ_a = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); auto shape_d_LJ_b = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; - for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i]; - for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_LJtype *= SizeOf(shape_LJtype); + ele_charge *= SizeOf(shape_charge); + ele_scaler *= SizeOf(shape_scaler); + ele_d_LJ_a *= SizeOf(shape_d_LJ_a); + ele_d_LJ_b *= SizeOf(shape_d_LJ_b); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_with_virial_energy_update_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_with_virial_energy_update_kernel.h index 6c205c9024c..5cdb6bc3320 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_with_virial_energy_update_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/lj/lj_force_with_virial_energy_update_kernel.h @@ -52,12 +52,12 @@ class LJForceWithVirialEnergyUpdateGpuKernelMod : public DeprecatedNativeGpuKern auto shape_d_LJ_a = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); auto shape_d_LJ_b = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; - for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i]; - for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_LJtype *= SizeOf(shape_LJtype); + ele_charge *= SizeOf(shape_charge); + ele_scaler *= SizeOf(shape_scaler); + ele_d_LJ_a *= SizeOf(shape_d_LJ_a); + ele_d_LJ_b *= SizeOf(shape_d_LJ_b); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_cf_atom_energy_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_cf_atom_energy_kernel.h index dc8e0b481cb..64dd2f34bfb 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_cf_atom_energy_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_cf_atom_energy_kernel.h @@ -47,13 +47,13 @@ class Dihedral14CFAtomEnergyGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_b_14 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); auto shape_cf_scale_factor = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; - for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; - for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; - for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; - for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; - for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_LJtype *= SizeOf(shape_LJtype); + ele_charge *= SizeOf(shape_charge); + ele_boxlength_f *= SizeOf(shape_boxlength_f); + ele_a_14 *= SizeOf(shape_a_14); + ele_b_14 *= SizeOf(shape_b_14); + ele_cf_scale_factor *= SizeOf(shape_cf_scale_factor); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_cf_energy_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_cf_energy_kernel.h index d4ff1d456c4..16f3123a95c 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_cf_energy_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_cf_energy_kernel.h @@ -47,13 +47,13 @@ class Dihedral14CFEnergyGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_b_14 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); auto shape_cf_scale_factor = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; - for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; - for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; - for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; - for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; - for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_LJtype *= SizeOf(shape_LJtype); + ele_charge *= SizeOf(shape_charge); + ele_boxlength_f *= SizeOf(shape_boxlength_f); + ele_a_14 *= SizeOf(shape_a_14); + ele_b_14 *= SizeOf(shape_b_14); + ele_cf_scale_factor *= SizeOf(shape_cf_scale_factor); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_atom_energy_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_atom_energy_kernel.h index deb7669e6c2..8bbb560f739 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_atom_energy_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_atom_energy_kernel.h @@ -49,15 +49,15 @@ class Dihedral14LJAtomEnergyGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_LJ_type_A = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); auto shape_LJ_type_B = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; - for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; - for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; - for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; - for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; - for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; - for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; - for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_LJtype *= SizeOf(shape_LJtype); + ele_charge *= SizeOf(shape_charge); + ele_boxlength_f *= SizeOf(shape_boxlength_f); + ele_a_14 *= SizeOf(shape_a_14); + ele_b_14 *= SizeOf(shape_b_14); + ele_lj_scale_factor *= SizeOf(shape_lj_scale_factor); + ele_LJ_type_A *= SizeOf(shape_LJ_type_A); + ele_LJ_type_B *= SizeOf(shape_LJ_type_B); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_kernel.h index be079355a44..7f534620ed3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_kernel.h @@ -51,16 +51,16 @@ class Dihedral14LJCFForceWithAtomEnergyAndVirialGpuKernelMod : public Deprecated auto shape_LJ_type_A = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); auto shape_LJ_type_B = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; - for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; - for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; - for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; - for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; - for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; - for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i]; - for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; - for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_LJtype *= SizeOf(shape_LJtype); + ele_charge *= SizeOf(shape_charge); + ele_boxlength_f *= SizeOf(shape_boxlength_f); + ele_a_14 *= SizeOf(shape_a_14); + ele_b_14 *= SizeOf(shape_b_14); + ele_lj_scale_factor *= SizeOf(shape_lj_scale_factor); + ele_cf_scale_factor *= SizeOf(shape_cf_scale_factor); + ele_LJ_type_A *= SizeOf(shape_LJ_type_A); + ele_LJ_type_B *= SizeOf(shape_LJ_type_B); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.h index a83fc362b69..289d4f4f4c4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.h @@ -50,16 +50,16 @@ class Dihedral14LJCFForceWithAtomEnergyGpuKernelMod : public DeprecatedNativeGpu auto shape_LJ_type_A = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); auto shape_LJ_type_B = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; - for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; - for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; - for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; - for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; - for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; - for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i]; - for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; - for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_LJtype *= SizeOf(shape_LJtype); + ele_charge *= SizeOf(shape_charge); + ele_boxlength_f *= SizeOf(shape_boxlength_f); + ele_a_14 *= SizeOf(shape_a_14); + ele_b_14 *= SizeOf(shape_b_14); + ele_lj_scale_factor *= SizeOf(shape_lj_scale_factor); + ele_cf_scale_factor *= SizeOf(shape_cf_scale_factor); + ele_LJ_type_A *= SizeOf(shape_LJ_type_A); + ele_LJ_type_B *= SizeOf(shape_LJ_type_B); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_energy_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_energy_kernel.h index f27769655f3..ab65bfabf39 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_energy_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_energy_kernel.h @@ -49,15 +49,15 @@ class Dihedral14LJEnergyGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_LJ_type_A = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); auto shape_LJ_type_B = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; - for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; - for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; - for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; - for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; - for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; - for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; - for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_LJtype *= SizeOf(shape_LJtype); + ele_charge *= SizeOf(shape_charge); + ele_boxlength_f *= SizeOf(shape_boxlength_f); + ele_a_14 *= SizeOf(shape_a_14); + ele_b_14 *= SizeOf(shape_b_14); + ele_lj_scale_factor *= SizeOf(shape_lj_scale_factor); + ele_LJ_type_A *= SizeOf(shape_LJ_type_A); + ele_LJ_type_B *= SizeOf(shape_LJ_type_B); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_force_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_force_gpu_kernel.h index eb45f542641..082987b7d06 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_force_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_force_gpu_kernel.h @@ -49,15 +49,15 @@ class Dihedral14LJForceGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_LJ_type_A = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); auto shape_LJ_type_B = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; - for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; - for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; - for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; - for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; - for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; - for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; - for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_LJtype *= SizeOf(shape_LJtype); + ele_charge *= SizeOf(shape_charge); + ele_boxlength_f *= SizeOf(shape_boxlength_f); + ele_a_14 *= SizeOf(shape_a_14); + ele_b_14 *= SizeOf(shape_b_14); + ele_lj_scale_factor *= SizeOf(shape_lj_scale_factor); + ele_LJ_type_A *= SizeOf(shape_LJ_type_A); + ele_LJ_type_B *= SizeOf(shape_LJ_type_B); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.h index a9bb8ea0f3c..27b080c00df 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.h @@ -50,16 +50,16 @@ class Dihedral14LJForceWithDirectCFGpuKernelMod : public DeprecatedNativeGpuKern auto shape_LJ_type_A = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); auto shape_LJ_type_B = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; - for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; - for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; - for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; - for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; - for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; - for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i]; - for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; - for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_LJtype *= SizeOf(shape_LJtype); + ele_charge *= SizeOf(shape_charge); + ele_boxlength_f *= SizeOf(shape_boxlength_f); + ele_a_14 *= SizeOf(shape_a_14); + ele_b_14 *= SizeOf(shape_b_14); + ele_lj_scale_factor *= SizeOf(shape_lj_scale_factor); + ele_cf_scale_factor *= SizeOf(shape_cf_scale_factor); + ele_LJ_type_A *= SizeOf(shape_LJ_type_A); + ele_LJ_type_B *= SizeOf(shape_LJ_type_B); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/restrain/restrain_energy_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/restrain/restrain_energy_kernel.h index b0136b5eb54..e81b99be9be 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/restrain/restrain_energy_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/restrain/restrain_energy_kernel.h @@ -47,10 +47,10 @@ class RestrainEnergyGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_crd_ref = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); auto shape_scaler = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); - for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_restrain_list.size(); i++) ele_restrain_list *= shape_restrain_list[i]; - for (size_t i = 0; i < shape_crd_ref.size(); i++) ele_crd_ref *= shape_crd_ref[i]; + ele_crd *= SizeOf(shape_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_restrain_list *= SizeOf(shape_restrain_list); + ele_crd_ref *= SizeOf(shape_crd_ref); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/restrain/restrain_force_atom_energy_virial_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/restrain/restrain_force_atom_energy_virial_kernel.h index e3579ab45ab..39b679f3e57 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/restrain/restrain_force_atom_energy_virial_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/restrain/restrain_force_atom_energy_virial_kernel.h @@ -47,10 +47,10 @@ class RestrainForceWithAtomenergyAndVirialGpuKernelMod : public DeprecatedNative auto shape_crd_ref = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); auto shape_scaler = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); - for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_restrain_list.size(); i++) ele_restrain_list *= shape_restrain_list[i]; - for (size_t i = 0; i < shape_crd_ref.size(); i++) ele_crd_ref *= shape_crd_ref[i]; + ele_crd *= SizeOf(shape_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_restrain_list *= SizeOf(shape_restrain_list); + ele_crd_ref *= SizeOf(shape_crd_ref); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/restrain/restrain_force_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/restrain/restrain_force_kernel.h index 93ecacb1fc5..471acf5b720 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/restrain/restrain_force_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/restrain/restrain_force_kernel.h @@ -47,10 +47,10 @@ class RestrainForceGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_uint_crd_ref = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); auto shape_scaler = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_restrain_list.size(); i++) ele_restrain_list *= shape_restrain_list[i]; - for (size_t i = 0; i < shape_uint_crd_ref.size(); i++) ele_uint_crd_ref *= shape_uint_crd_ref[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_restrain_list *= SizeOf(shape_restrain_list); + ele_uint_crd_ref *= SizeOf(shape_uint_crd_ref); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_cycle_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_cycle_kernel.h index 7ad770db7b8..d2d80bb59df 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_cycle_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_cycle_kernel.h @@ -50,13 +50,13 @@ class ConstrainForceCycleGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_constant_rs = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); auto shape_constrain_ks = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_pair_dr.size(); i++) ele_pair_dr *= shape_pair_dr[i]; - for (size_t i = 0; i < shape_atom_i_serials.size(); i++) ele_atom_i_serials *= shape_atom_i_serials[i]; - for (size_t i = 0; i < shape_atom_j_serials.size(); i++) ele_atom_j_serials *= shape_atom_j_serials[i]; - for (size_t i = 0; i < shape_constant_rs.size(); i++) ele_constant_rs *= shape_constant_rs[i]; - for (size_t i = 0; i < shape_constrain_ks.size(); i++) ele_constrain_ks *= shape_constrain_ks[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_pair_dr *= SizeOf(shape_pair_dr); + ele_atom_i_serials *= SizeOf(shape_atom_i_serials); + ele_atom_j_serials *= SizeOf(shape_atom_j_serials); + ele_constant_rs *= SizeOf(shape_constant_rs); + ele_constrain_ks *= SizeOf(shape_constrain_ks); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_cycle_with_virial_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_cycle_with_virial_kernel.h index 9b88d4a2b35..34d6567cbc0 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_cycle_with_virial_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_cycle_with_virial_kernel.h @@ -50,13 +50,13 @@ class ConstrainForceCycleWithVirialGpuKernelMod : public DeprecatedNativeGpuKern auto shape_constant_rs = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); auto shape_constrain_ks = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); - for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_pair_dr.size(); i++) ele_pair_dr *= shape_pair_dr[i]; - for (size_t i = 0; i < shape_atom_i_serials.size(); i++) ele_atom_i_serials *= shape_atom_i_serials[i]; - for (size_t i = 0; i < shape_atom_j_serials.size(); i++) ele_atom_j_serials *= shape_atom_j_serials[i]; - for (size_t i = 0; i < shape_constant_rs.size(); i++) ele_constant_rs *= shape_constant_rs[i]; - for (size_t i = 0; i < shape_constrain_ks.size(); i++) ele_constrain_ks *= shape_constrain_ks[i]; + ele_uint_crd *= SizeOf(shape_uint_crd); + ele_scaler *= SizeOf(shape_scaler); + ele_pair_dr *= SizeOf(shape_pair_dr); + ele_atom_i_serials *= SizeOf(shape_atom_i_serials); + ele_atom_j_serials *= SizeOf(shape_atom_j_serials); + ele_constant_rs *= SizeOf(shape_constant_rs); + ele_constrain_ks *= SizeOf(shape_constrain_ks); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_kernel.h index c4debfb9047..f8c0c2a5b6b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_kernel.h @@ -58,16 +58,15 @@ class ConstrainForceGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_constant_rs = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); auto shape_constrain_ks = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_pair_dr.size(); i++) ele_pair_dr *= shape_pair_dr[i]; - for (size_t i = 0; i < shape_atom_i_serials.size(); i++) ele_atom_i_serials *= shape_atom_i_serials[i]; - for (size_t i = 0; i < shape_atom_j_serials.size(); i++) ele_atom_j_serials *= shape_atom_j_serials[i]; - for (size_t i = 0; i < shape_constant_rs.size(); i++) ele_constant_rs *= shape_constant_rs[i]; - for (size_t i = 0; i < shape_constrain_ks.size(); i++) ele_constrain_ks *= shape_constrain_ks[i]; - - for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; - for (size_t i = 0; i < shape_quarter_cof.size(); i++) ele_quarter_cof *= shape_quarter_cof[i]; - for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i]; + ele_scaler *= SizeOf(shape_scaler); + ele_pair_dr *= SizeOf(shape_pair_dr); + ele_atom_i_serials *= SizeOf(shape_atom_i_serials); + ele_atom_j_serials *= SizeOf(shape_atom_j_serials); + ele_constant_rs *= SizeOf(shape_constant_rs); + ele_constrain_ks *= SizeOf(shape_constrain_ks); + ele_crd *= SizeOf(shape_crd); + ele_quarter_cof *= SizeOf(shape_quarter_cof); + ele_mass_inverse *= SizeOf(shape_mass_inverse); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_virial_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_virial_kernel.h index 536a0c33086..08621b810d4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_virial_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_force_virial_kernel.h @@ -57,16 +57,15 @@ class ConstrainForceVirialGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_constant_rs = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); auto shape_constrain_ks = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_pair_dr.size(); i++) ele_pair_dr *= shape_pair_dr[i]; - for (size_t i = 0; i < shape_atom_i_serials.size(); i++) ele_atom_i_serials *= shape_atom_i_serials[i]; - for (size_t i = 0; i < shape_atom_j_serials.size(); i++) ele_atom_j_serials *= shape_atom_j_serials[i]; - for (size_t i = 0; i < shape_constant_rs.size(); i++) ele_constant_rs *= shape_constant_rs[i]; - for (size_t i = 0; i < shape_constrain_ks.size(); i++) ele_constrain_ks *= shape_constrain_ks[i]; - - for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; - for (size_t i = 0; i < shape_quarter_cof.size(); i++) ele_quarter_cof *= shape_quarter_cof[i]; - for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i]; + ele_scaler *= SizeOf(shape_scaler); + ele_pair_dr *= SizeOf(shape_pair_dr); + ele_atom_i_serials *= SizeOf(shape_atom_i_serials); + ele_atom_j_serials *= SizeOf(shape_atom_j_serials); + ele_constant_rs *= SizeOf(shape_constant_rs); + ele_constrain_ks *= SizeOf(shape_constrain_ks); + ele_crd *= SizeOf(shape_crd); + ele_quarter_cof *= SizeOf(shape_quarter_cof); + ele_mass_inverse *= SizeOf(shape_mass_inverse); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_kernel.h index c514ad62056..0adc081764a 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/constrain_kernel.h @@ -58,16 +58,15 @@ class ConstrainGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_constant_rs = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); auto shape_constrain_ks = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); - for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; - for (size_t i = 0; i < shape_pair_dr.size(); i++) ele_pair_dr *= shape_pair_dr[i]; - for (size_t i = 0; i < shape_atom_i_serials.size(); i++) ele_atom_i_serials *= shape_atom_i_serials[i]; - for (size_t i = 0; i < shape_atom_j_serials.size(); i++) ele_atom_j_serials *= shape_atom_j_serials[i]; - for (size_t i = 0; i < shape_constant_rs.size(); i++) ele_constant_rs *= shape_constant_rs[i]; - for (size_t i = 0; i < shape_constrain_ks.size(); i++) ele_constrain_ks *= shape_constrain_ks[i]; - - for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; - for (size_t i = 0; i < shape_quarter_cof.size(); i++) ele_quarter_cof *= shape_quarter_cof[i]; - for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i]; + ele_scaler *= SizeOf(shape_scaler); + ele_pair_dr *= SizeOf(shape_pair_dr); + ele_atom_i_serials *= SizeOf(shape_atom_i_serials); + ele_atom_j_serials *= SizeOf(shape_atom_j_serials); + ele_constant_rs *= SizeOf(shape_constant_rs); + ele_constrain_ks *= SizeOf(shape_constrain_ks); + ele_crd *= SizeOf(shape_crd); + ele_quarter_cof *= SizeOf(shape_quarter_cof); + ele_mass_inverse *= SizeOf(shape_mass_inverse); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/last_crd_to_dr_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/last_crd_to_dr_kernel.h index e55e00aaeae..9d2d787017d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/last_crd_to_dr_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/last_crd_to_dr_kernel.h @@ -49,13 +49,13 @@ class LastCrdToDrGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_constant_rs = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); auto shape_constrain_ks = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); - for (size_t i = 0; i < shape_atom_crd.size(); i++) ele_atom_crd *= shape_atom_crd[i]; - for (size_t i = 0; i < shape_quater_cof.size(); i++) ele_quater_cof *= shape_quater_cof[i]; - for (size_t i = 0; i < shape_uint_dr_to_dr.size(); i++) ele_uint_dr_to_dr *= shape_uint_dr_to_dr[i]; - for (size_t i = 0; i < shape_atom_i_serials.size(); i++) ele_atom_i_serials *= shape_atom_i_serials[i]; - for (size_t i = 0; i < shape_atom_j_serials.size(); i++) ele_atom_j_serials *= shape_atom_j_serials[i]; - for (size_t i = 0; i < shape_constant_rs.size(); i++) ele_constant_rs *= shape_constant_rs[i]; - for (size_t i = 0; i < shape_constrain_ks.size(); i++) ele_constrain_ks *= shape_constrain_ks[i]; + ele_atom_crd *= SizeOf(shape_atom_crd); + ele_quater_cof *= SizeOf(shape_quater_cof); + ele_uint_dr_to_dr *= SizeOf(shape_uint_dr_to_dr); + ele_atom_i_serials *= SizeOf(shape_atom_i_serials); + ele_atom_j_serials *= SizeOf(shape_atom_j_serials); + ele_constant_rs *= SizeOf(shape_constant_rs); + ele_constrain_ks *= SizeOf(shape_constrain_ks); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/refresh_crd_vel_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/refresh_crd_vel_kernel.h index 2e949863284..170cd7e9543 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/refresh_crd_vel_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/refresh_crd_vel_kernel.h @@ -49,10 +49,10 @@ class RefreshCrdVelGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_test_frc = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); auto shape_mass_inverse = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); - for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; - for (size_t i = 0; i < shape_vel.size(); i++) ele_vel *= shape_vel[i]; - for (size_t i = 0; i < shape_test_frc.size(); i++) ele_test_frc *= shape_test_frc[i]; - for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i]; + ele_crd *= SizeOf(shape_crd); + ele_vel *= SizeOf(shape_vel); + ele_test_frc *= SizeOf(shape_test_frc); + ele_mass_inverse *= SizeOf(shape_mass_inverse); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/refresh_uint_crd_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/refresh_uint_crd_kernel.h index f45e1a55a76..2b8bba9cb53 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/refresh_uint_crd_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/simple_constrain/refresh_uint_crd_kernel.h @@ -46,10 +46,10 @@ class RefreshUintCrdGpuKernelMod : public DeprecatedNativeGpuKernelMod { auto shape_test_frc = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); auto shape_mass_inverse = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); - for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; - for (size_t i = 0; i < shape_quarter_cof.size(); i++) ele_quarter_cof *= shape_quarter_cof[i]; - for (size_t i = 0; i < shape_test_frc.size(); i++) ele_test_frc *= shape_test_frc[i]; - for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i]; + ele_crd *= SizeOf(shape_crd); + ele_quarter_cof *= SizeOf(shape_quarter_cof); + ele_test_frc *= SizeOf(shape_test_frc); + ele_mass_inverse *= SizeOf(shape_mass_inverse); InitSizeLists(); return true; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/trt/trt_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/trt/trt_kernel.cc index 9281cb7a63a..ae3a4492260 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/trt/trt_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/trt/trt_kernel.cc @@ -32,7 +32,7 @@ bool TrtKernelMod::Init(const CNodePtr &kernel_node) { auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i); auto type_id = AnfAlgo::GetInputDeviceDataType(kernel_node, i); size_t unit_size = UnitSizeInBytes(type_id); - auto size_in_byte = std::accumulate(input_shape.begin(), input_shape.end(), unit_size, std::multiplies()); + auto size_in_byte = unit_size * SizeOf(input_shape); input_size_list_.push_back(size_in_byte); } @@ -41,7 +41,7 @@ bool TrtKernelMod::Init(const CNodePtr &kernel_node) { auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, j); auto type_id = AnfAlgo::GetOutputDeviceDataType(kernel_node, j); size_t unit_size = UnitSizeInBytes(type_id); - auto size_in_byte = std::accumulate(output_shape.begin(), output_shape.end(), unit_size, std::multiplies()); + auto size_in_byte = unit_size * SizeOf(output_shape); output_size_list_.push_back(size_in_byte); } diff --git a/mindspore/ccsrc/plugin/device/gpu/optimizer/add_relu_grad_v2_fusion.cc b/mindspore/ccsrc/plugin/device/gpu/optimizer/add_relu_grad_v2_fusion.cc index e68cd08c24e..73fc685601c 100644 --- a/mindspore/ccsrc/plugin/device/gpu/optimizer/add_relu_grad_v2_fusion.cc +++ b/mindspore/ccsrc/plugin/device/gpu/optimizer/add_relu_grad_v2_fusion.cc @@ -77,8 +77,8 @@ const AnfNodePtr AddReluGradV2Fusion::Process(const FuncGraphPtr &graph, const A return nullptr; } - std::vector shape1 = common::AnfAlgo::GetPrevNodeOutputInferShape(tensor_add, 0); - std::vector shape2 = common::AnfAlgo::GetPrevNodeOutputInferShape(tensor_add, 1); + auto shape1 = common::AnfAlgo::GetPrevNodeOutputInferShape(tensor_add, 0); + auto shape2 = common::AnfAlgo::GetPrevNodeOutputInferShape(tensor_add, 1); if (shape1 != shape2) { return nullptr; } diff --git a/mindspore/ccsrc/plugin/device/gpu/optimizer/add_relu_v2_fusion.cc b/mindspore/ccsrc/plugin/device/gpu/optimizer/add_relu_v2_fusion.cc index f586edaf41d..2d8a1b2543e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/optimizer/add_relu_v2_fusion.cc +++ b/mindspore/ccsrc/plugin/device/gpu/optimizer/add_relu_v2_fusion.cc @@ -75,8 +75,8 @@ const AnfNodePtr AddReluV2Fusion::Process(const FuncGraphPtr &graph, const AnfNo return nullptr; } - std::vector shape1 = common::AnfAlgo::GetPrevNodeOutputInferShape(tensor_add, 0); - std::vector shape2 = common::AnfAlgo::GetPrevNodeOutputInferShape(tensor_add, 1); + auto shape1 = common::AnfAlgo::GetPrevNodeOutputInferShape(tensor_add, 0); + auto shape2 = common::AnfAlgo::GetPrevNodeOutputInferShape(tensor_add, 1); if (shape1 != shape2) { return nullptr; } diff --git a/mindspore/ccsrc/plugin/device/gpu/optimizer/alltoall_fusion.cc b/mindspore/ccsrc/plugin/device/gpu/optimizer/alltoall_fusion.cc index 3f088893e85..b90724e83b9 100644 --- a/mindspore/ccsrc/plugin/device/gpu/optimizer/alltoall_fusion.cc +++ b/mindspore/ccsrc/plugin/device/gpu/optimizer/alltoall_fusion.cc @@ -35,7 +35,7 @@ constexpr size_t kAllToAllInputIdx = 1; typedef std::vector (*GetGroupRanks)(const std::string &); -inline int64_t NormalizeDim(const std::vector &shape, int64_t dim) { +inline int64_t NormalizeDim(const ShapeVector &shape, int64_t dim) { return dim < 0 ? SizeToLong(shape.size()) + dim : dim; } @@ -70,20 +70,17 @@ CNodePtr CreateSplitNode(const FuncGraphPtr &graph, const CNodePtr &all_to_all) // Set Split CNode outputs type and shape, and CNode attributes. std::vector dtypes(split_count, dtype); - if (AnfUtils::IsShapeDynamic(shape)) { - ShapeVector shape_tmp; + if (IsDynamic(shape)) { auto min_shape = common::AnfAlgo::GetOutputMinShape(all_to_all_input, 0); auto max_shape = common::AnfAlgo::GetOutputMaxShape(all_to_all_input, 0); if (!min_shape.empty() && !max_shape.empty()) { min_shape[LongToSize(split_dim)] /= split_count; max_shape[LongToSize(split_dim)] /= split_count; } - - std::transform(shape.begin(), shape.end(), std::back_inserter(shape_tmp), SizeToLong); - std::vector shapes(split_count, std::make_shared(shape_tmp, min_shape, max_shape)); + std::vector shapes(split_count, std::make_shared(shape, min_shape, max_shape)); common::AnfAlgo::SetOutputTypeAndDetailShape(dtypes, shapes, split.get()); } else { - std::vector> shapes(split_count, shape); + std::vector shapes(split_count, shape); common::AnfAlgo::SetOutputInferTypeAndShape(dtypes, shapes, split.get()); } @@ -157,15 +154,16 @@ CNodePtr CreateConcatNode(const FuncGraphPtr &graph, const CNodePtr &all_to_all, // Set Concat CNode outputs and attributes. single_shape[LongToSize(concat_dim)] *= split_count; - if (AnfUtils::IsShapeDynamic(single_shape)) { - ShapeVector shape_tmp; + if (IsDynamic(single_shape)) { auto min_shape = common::AnfAlgo::GetOutputMinShape(all_to_all_v_outputs[0], 0); auto max_shape = common::AnfAlgo::GetOutputMaxShape(all_to_all_v_outputs[0], 0); - min_shape[LongToSize(concat_dim)] *= split_count; - max_shape[LongToSize(concat_dim)] *= split_count; - std::transform(single_shape.begin(), single_shape.end(), std::back_inserter(shape_tmp), SizeToLong); - common::AnfAlgo::SetOutputTypeAndDetailShape({common::AnfAlgo::GetOutputInferDataType(all_to_all_v_outputs[0], 0)}, - {std::make_shared(shape_tmp)}, concat.get()); + if (!min_shape.empty() && !max_shape.empty()) { + min_shape[LongToSize(concat_dim)] *= split_count; + max_shape[LongToSize(concat_dim)] *= split_count; + } + common::AnfAlgo::SetOutputTypeAndDetailShape( + {common::AnfAlgo::GetOutputInferDataType(all_to_all_v_outputs[0], 0)}, + {std::make_shared(single_shape, min_shape, max_shape)}, concat.get()); } else { common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(all_to_all_v_outputs[0], 0)}, {single_shape}, concat.get()); diff --git a/mindspore/ccsrc/plugin/device/gpu/optimizer/concat_outputs_for_all_gather.cc b/mindspore/ccsrc/plugin/device/gpu/optimizer/concat_outputs_for_all_gather.cc index 81c143317ac..935c23acdc8 100644 --- a/mindspore/ccsrc/plugin/device/gpu/optimizer/concat_outputs_for_all_gather.cc +++ b/mindspore/ccsrc/plugin/device/gpu/optimizer/concat_outputs_for_all_gather.cc @@ -24,12 +24,12 @@ namespace mindspore::opt { namespace { -using OutputInfo = std::tuple, std::vector>, std::vector, +using OutputInfo = std::tuple, std::vector, std::vector, std::vector, std::vector, std::vector>; OutputInfo GetNodeOutputInfo(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); std::vector output_infer_dtype; - std::vector> output_infer_shape; + std::vector output_infer_shape; std::vector output_max_shape; std::vector output_min_shape; std::vector output_format; @@ -109,8 +109,7 @@ AnfNodePtr InsertConcatForOutput(const FuncGraphPtr &func_graph, const AnfNodePt const std::vector &dtypes = {std::get<0>(output_info)[i]}; auto shape = std::get<1>(output_info)[i]; shape[0] *= LongToSize(rank_size); - if (AnfUtils::IsShapeDynamic(shape)) { - ShapeVector tensor_shape; + if (IsDynamic(shape)) { auto min_shape = std::get(output_info)[i]; auto max_shape = std::get(output_info)[i]; if (!min_shape.empty() && !max_shape.empty()) { @@ -118,8 +117,7 @@ AnfNodePtr InsertConcatForOutput(const FuncGraphPtr &func_graph, const AnfNodePt min_shape[0] *= rank_size; } - std::transform(shape.begin(), shape.end(), std::back_inserter(tensor_shape), SizeToLong); - BaseShapePtr base_shape = std::make_shared(tensor_shape, min_shape, max_shape); + BaseShapePtr base_shape = std::make_shared(shape, min_shape, max_shape); common::AnfAlgo::SetOutputTypeAndDetailShape(dtypes, {base_shape}, concat.get()); } else { common::AnfAlgo::SetOutputInferTypeAndShape(dtypes, {shape}, concat.get()); diff --git a/mindspore/ccsrc/plugin/device/gpu/optimizer/insert_format_transform_op.cc b/mindspore/ccsrc/plugin/device/gpu/optimizer/insert_format_transform_op.cc index 8424a79595e..22038b31bd3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/optimizer/insert_format_transform_op.cc +++ b/mindspore/ccsrc/plugin/device/gpu/optimizer/insert_format_transform_op.cc @@ -45,7 +45,7 @@ std::vector TransposeAxis(const std::string &src_format, const std::str // 1. out_shape [x, 1, 1, y] // 2. out_shape [x, y, 1, 1] // 3. out_shape [x, 1, y, 1] -bool IsFakeTranspose(const std::vector &out_shape, const std::vector &transpose_perm) { +bool IsFakeTranspose(const std::vector &out_shape, const std::vector &transpose_perm) { if (out_shape.size() != device::gpu::kFormatTransformDimension) { MS_LOG(EXCEPTION) << "Invalid data shape, 4-D data was needed, but get " << out_shape.size() << "-D."; } @@ -105,9 +105,7 @@ CNodePtr InsertTransposeOp(const FuncGraphPtr &graph, const AnfNodePtr &node, co auto base_shape = common::AnfAlgo::GetPrevNodeOutputDetailShape(used_node, used_node_index); common::AnfAlgo::SetOutputTypeAndDetailShape(transpose_type, {base_shape}, transpose_op.get()); if (is_fake) { - std::vector shape; - std::transform(transpose_shape.begin(), transpose_shape.end(), std::back_inserter(shape), SizeToLong); - common::AnfAlgo::SetNodeAttr("shape", MakeValue(shape), transpose_op); + common::AnfAlgo::SetNodeAttr("shape", MakeValue(transpose_shape), transpose_op); } else { common::AnfAlgo::SetNodeAttr(kAttrPerm, MakeValue(transpose_perm), transpose_op); } diff --git a/mindspore/ccsrc/plugin/device/gpu/optimizer/neighbor_exchange_v2_fusion.cc b/mindspore/ccsrc/plugin/device/gpu/optimizer/neighbor_exchange_v2_fusion.cc index fbe865e7528..7fa5de93da0 100644 --- a/mindspore/ccsrc/plugin/device/gpu/optimizer/neighbor_exchange_v2_fusion.cc +++ b/mindspore/ccsrc/plugin/device/gpu/optimizer/neighbor_exchange_v2_fusion.cc @@ -84,7 +84,7 @@ void CalSplitAttrs(SplitvNodeInfo *splitv_node_info) { } int64_t num_split = 0; int64_t split_middle_size = splitv_node_info->base_shape[splitv_node_info->split_dim]; - std::vector shape_tmp(splitv_node_info->base_shape); + auto shape_tmp(splitv_node_info->base_shape); // [top, bottom, left, right] int64_t first_size = splitv_node_info->split_dim == kWDim ? splitv_node_info->send_lens[kDim2] : splitv_node_info->send_lens[0]; @@ -96,7 +96,7 @@ void CalSplitAttrs(SplitvNodeInfo *splitv_node_info) { ++num_split; splitv_node_info->size_splits.push_back(first_size); split_middle_size -= first_size; - shape_tmp[splitv_node_info->split_dim] = static_cast(first_size); + shape_tmp[splitv_node_info->split_dim] = first_size; splitv_node_info->shapes.push_back(shape_tmp); } if (splitv_node_info->is_last) { @@ -105,18 +105,18 @@ void CalSplitAttrs(SplitvNodeInfo *splitv_node_info) { if (split_middle_size > 0) { ++num_split; splitv_node_info->size_splits.push_back(split_middle_size); - shape_tmp[splitv_node_info->split_dim] = static_cast(split_middle_size); + shape_tmp[splitv_node_info->split_dim] = split_middle_size; splitv_node_info->shapes.push_back(shape_tmp); } // last ++num_split; splitv_node_info->size_splits.push_back(last_size); - shape_tmp[splitv_node_info->split_dim] = static_cast(last_size); + shape_tmp[splitv_node_info->split_dim] = last_size; splitv_node_info->shapes.push_back(shape_tmp); } else if (split_middle_size > 0) { ++num_split; splitv_node_info->size_splits.push_back(split_middle_size); - shape_tmp[splitv_node_info->split_dim] = static_cast(split_middle_size); + shape_tmp[splitv_node_info->split_dim] = split_middle_size; splitv_node_info->shapes.push_back(shape_tmp); } splitv_node_info->num_split = num_split; @@ -132,17 +132,17 @@ CNodePtr CreateSliceNode(const FuncGraphPtr &graph, const std::vector slice_shape(slice_node_info.base_shape); - std::vector begins(slice_shape.size(), 0); + ShapeVector slice_shape(slice_node_info.base_shape); + std::vector begins(slice_shape.size(), 0); slice_shape[slice_node_info.slice_dim] = slice_node_info.slice_size; begins[slice_node_info.slice_dim] = slice_node_info.slice_begin; - std::vector> shapes = {slice_shape}; + std::vector shapes = {slice_shape}; std::vector dtypes(1, slice_node_info.input_dtype); common::AnfAlgo::SetOutputInferTypeAndShape(dtypes, shapes, slice.get()); - common::AnfAlgo::SetNodeAttr(kAttrBegin, MakeValue(Convert2Long(begins)), slice); - common::AnfAlgo::SetNodeAttr(kAttrSize, MakeValue(Convert2Long(slice_shape)), slice); + common::AnfAlgo::SetNodeAttr(kAttrBegin, MakeValue(begins), slice); + common::AnfAlgo::SetNodeAttr(kAttrSize, MakeValue(slice_shape), slice); return slice; } @@ -183,26 +183,21 @@ CNodePtr CreateSplitNode(const FuncGraphPtr &graph, const AnfNodePtr &split_inpu return make_tuple; } -std::vector> CalAllToAllvOutputShape(const std::vector &base_shape, - const std::vector &recv_lens, - const std::vector &recv_rank_ids) { +std::vector CalAllToAllvOutputShape(const ShapeVector &base_shape, const std::vector &recv_lens, + const std::vector &recv_rank_ids) { if (SizeToLong(base_shape.size()) != kShapeSize) { MS_LOG(EXCEPTION) << "Wrong base_shape size: " << base_shape.size() << ", it should be equal to 4."; } - std::vector> shapes = {}; - std::vector> ori_shapes = { - {base_shape[0], base_shape[1], static_cast(recv_lens[kLenTopIdx]), base_shape[kWDim]}, - {base_shape[0], base_shape[1], static_cast(recv_lens[kLenTopIdx]), - static_cast(recv_lens[kLenRightIdx])}, - {base_shape[0], base_shape[1], base_shape[kHDim], static_cast(recv_lens[kLenRightIdx])}, - {base_shape[0], base_shape[1], static_cast(recv_lens[kLenBottomIdx]), - static_cast(recv_lens[kLenRightIdx])}, - {base_shape[0], base_shape[1], static_cast(recv_lens[kLenBottomIdx]), base_shape[kWDim]}, - {base_shape[0], base_shape[1], static_cast(recv_lens[kLenBottomIdx]), - static_cast(recv_lens[kLenLeftIdx])}, - {base_shape[0], base_shape[1], base_shape[kHDim], static_cast(recv_lens[kLenLeftIdx])}, - {base_shape[0], base_shape[1], static_cast(recv_lens[kLenTopIdx]), - static_cast(recv_lens[kLenLeftIdx])}}; + std::vector shapes = {}; + std::vector ori_shapes = { + {base_shape[0], base_shape[1], recv_lens[kLenTopIdx], base_shape[kWDim]}, + {base_shape[0], base_shape[1], recv_lens[kLenTopIdx], recv_lens[kLenRightIdx]}, + {base_shape[0], base_shape[1], base_shape[kHDim], recv_lens[kLenRightIdx]}, + {base_shape[0], base_shape[1], recv_lens[kLenBottomIdx], recv_lens[kLenRightIdx]}, + {base_shape[0], base_shape[1], recv_lens[kLenBottomIdx], base_shape[kWDim]}, + {base_shape[0], base_shape[1], recv_lens[kLenBottomIdx], recv_lens[kLenLeftIdx]}, + {base_shape[0], base_shape[1], base_shape[kHDim], recv_lens[kLenLeftIdx]}, + {base_shape[0], base_shape[1], recv_lens[kLenTopIdx], recv_lens[kLenLeftIdx]}}; for (size_t idx = 0; idx < recv_rank_ids.size(); ++idx) { if (recv_rank_ids[idx] != kInvalidId) { @@ -367,7 +362,7 @@ CNodePtr CreateAllToAllvNode(const FuncGraphPtr &graph, const CNodePtr &neighbor if (SizeToLong(base_shape.size()) != kShapeSize) { MS_LOG(EXCEPTION) << "Invalid shape size " << base_shape.size() << ", only support NCHW input now!"; } - std::vector> shapes = CalAllToAllvOutputShape(base_shape, recv_lens, recv_rank_ids); + auto shapes = CalAllToAllvOutputShape(base_shape, recv_lens, recv_rank_ids); // erase -1 in send_rank_ids std::vector real_send_rank_ids(send_rank_ids.size()); @@ -511,7 +506,7 @@ std::vector NeighborExchangeV2Fusion::CreateSplitNodes(const FuncGraph CNodePtr NeighborExchangeV2Fusion::CreateConcatNode(const FuncGraphPtr &graph, const std::vector &concat_input, - const std::vector> &output_shape, + const std::vector &output_shape, const std::vector &output_dtype, int64_t axis, int64_t input_nums) const { MS_EXCEPTION_IF_NULL(graph); @@ -542,11 +537,11 @@ CNodePtr NeighborExchangeV2Fusion::CreateLeftRightConcat(const FuncGraphPtr &gra if (recv_rank_ids[first_ids] != kInvalidId) { ++input_num; - single_shape[kDim2] += static_cast(recv_lens[0]); // H in NCHW + single_shape[kDim2] += recv_lens[0]; // H in NCHW } if (recv_rank_ids[last_ids] != kInvalidId) { ++input_num; - single_shape[kDim2] += static_cast(recv_lens[1]); // H in NCHW + single_shape[kDim2] += recv_lens[1]; // H in NCHW } if (is_left) { concat_input.insert(concat_input.end(), all_to_all_v_outputs.rbegin(), all_to_all_v_outputs.rbegin() + input_num); @@ -572,8 +567,8 @@ CNodePtr NeighborExchangeV2Fusion::CreateMiddleConcat(const FuncGraphPtr &graph, auto single_shape = common::AnfAlgo::GetOutputInferShape(neighbor_exchange_v2_input, 0); size_t first_idx = concat_dim == kWDim ? kIndex6 : kIndex0; size_t last_idx = concat_dim == kWDim ? kIndex2 : kIndex4; - size_t first_len = concat_dim == kWDim ? static_cast(recv_lens[kDim2]) : static_cast(recv_lens[0]); - size_t last_len = concat_dim == kWDim ? static_cast(recv_lens[kDim3]) : static_cast(recv_lens[1]); + auto first_len = concat_dim == kWDim ? recv_lens[kDim2] : recv_lens[0]; + auto last_len = concat_dim == kWDim ? recv_lens[kDim3] : recv_lens[1]; // left if (recv_rank_ids[first_idx] != kInvalidId) { @@ -667,11 +662,9 @@ CNodePtr NeighborExchangeV2Fusion::CreateConcatNodes(const FuncGraphPtr &graph, std::vector concat_input_all = {NewValueNode(std::make_shared(kConcatOpName))}; auto neighbor_exchange_v2_input = neighbor_exchange_v2->input(kNeighborExchangeV2InputIdx); - std::vector shape_all = common::AnfAlgo::GetOutputInferShape(neighbor_exchange_v2_input, 0); - shape_all[kDim2] = - recv_rank_ids[kRankIdZero] != kInvalidId ? shape_all[kDim2] + static_cast(recv_lens[0]) : shape_all[kDim2]; - shape_all[kDim2] = - recv_rank_ids[kRankIdFour] != kInvalidId ? shape_all[kDim2] + static_cast(recv_lens[1]) : shape_all[kDim2]; + auto shape_all = common::AnfAlgo::GetOutputInferShape(neighbor_exchange_v2_input, 0); + shape_all[kDim2] = recv_rank_ids[kRankIdZero] != kInvalidId ? shape_all[kDim2] + recv_lens[0] : shape_all[kDim2]; + shape_all[kDim2] = recv_rank_ids[kRankIdFour] != kInvalidId ? shape_all[kDim2] + recv_lens[1] : shape_all[kDim2]; int64_t input_nums_all = 0; // left concat if (is_left) { @@ -802,7 +795,7 @@ std::vector NeighborExchangeV2GradFusion::CreateSplitNodesForGrad(cons split_num->push_back(0); } for (size_t i = 0; i < split_outputs_top_bottom.size(); ++i) { - std::vector base_shape(shape); + ShapeVector base_shape(shape); base_shape[kHDim] = size_split_h[i]; splitv_w_node_info.base_shape = base_shape; auto split_v_left_right = CreateSplitNode(graph, split_outputs_top_bottom[i], &splitv_w_node_info, *this); @@ -828,7 +821,7 @@ std::vector NeighborExchangeV2GradFusion::CreateSplitNodesForGrad(cons CNodePtr NeighborExchangeV2GradFusion::CreatePadNode(const FuncGraphPtr &graph, const AnfNodePtr &input, const std::vector &begin, - const std::vector &size, const std::vector &shape, + const std::vector &size, const ShapeVector &shape, TypeId dtype) const { MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(input); @@ -836,7 +829,7 @@ CNodePtr NeighborExchangeV2GradFusion::CreatePadNode(const FuncGraphPtr &graph, auto pad = NewCNode(pad_inputs, graph); std::vector> paddings; for (size_t i = 0; i < shape.size(); ++i) { - paddings.emplace_back(std::vector{begin[i], static_cast(shape[i]) - begin[i] - size[i]}); + paddings.emplace_back(std::vector{begin[i], shape[i] - begin[i] - size[i]}); } common::AnfAlgo::SetOutputInferTypeAndShape({dtype}, {shape}, pad.get()); common::AnfAlgo::SetNodeAttr(kAttrPaddings, MakeValue(paddings), pad); @@ -884,28 +877,24 @@ CNodePtr NeighborExchangeV2GradFusion::CreateSplitGradNodes(const FuncGraphPtr & } // create pad nodes // slice begin & size - std::vector> begins = {{0, 0, 0, 0}, - {0, 0, 0, static_cast(centerx_shape[kDim3]) - recv_lens[kDim3]}, - {0, 0, 0, static_cast(centerx_shape[kDim3]) - recv_lens[kDim3]}, - {0, 0, static_cast(centerx_shape[kDim2]) - recv_lens[kDim1], - static_cast(centerx_shape[kDim3]) - recv_lens[kDim3]}, - {0, 0, static_cast(centerx_shape[kDim2]) - recv_lens[kDim1], 0}, - {0, 0, static_cast(centerx_shape[kDim2]) - recv_lens[kDim1], 0}, - {0, 0, 0, 0}, - {0, 0, 0, 0}}; + std::vector> begins = { + {0, 0, 0, 0}, + {0, 0, 0, centerx_shape[kDim3] - recv_lens[kDim3]}, + {0, 0, 0, centerx_shape[kDim3] - recv_lens[kDim3]}, + {0, 0, centerx_shape[kDim2] - recv_lens[kDim1], centerx_shape[kDim3] - recv_lens[kDim3]}, + {0, 0, centerx_shape[kDim2] - recv_lens[kDim1], 0}, + {0, 0, centerx_shape[kDim2] - recv_lens[kDim1], 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}}; std::vector> sizes = { - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), recv_lens[0], - static_cast(centerx_shape[kDim3])}, - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), recv_lens[0], recv_lens[kDim3]}, - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), - static_cast(centerx_shape[kDim2]), recv_lens[kDim3]}, - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), recv_lens[1], recv_lens[kDim3]}, - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), recv_lens[1], - static_cast(centerx_shape[kDim3])}, - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), recv_lens[1], recv_lens[kDim2]}, - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), - static_cast(centerx_shape[kDim2]), recv_lens[kDim2]}, - {static_cast(centerx_shape[0]), static_cast(centerx_shape[1]), recv_lens[0], recv_lens[kDim2]}}; + {centerx_shape[0], centerx_shape[1], recv_lens[0], centerx_shape[kDim3]}, + {centerx_shape[0], centerx_shape[1], recv_lens[0], recv_lens[kDim3]}, + {centerx_shape[0], centerx_shape[1], centerx_shape[kDim2], recv_lens[kDim3]}, + {centerx_shape[0], centerx_shape[1], recv_lens[1], recv_lens[kDim3]}, + {centerx_shape[0], centerx_shape[1], recv_lens[1], centerx_shape[kDim3]}, + {centerx_shape[0], centerx_shape[1], recv_lens[1], recv_lens[kDim2]}, + {centerx_shape[0], centerx_shape[1], centerx_shape[kDim2], recv_lens[kDim2]}, + {centerx_shape[0], centerx_shape[1], recv_lens[0], recv_lens[kDim2]}}; std::vector pad_nodes; size_t output_index = 0; for (size_t i = 0; i < recv_rank_ids.size(); ++i) { diff --git a/mindspore/ccsrc/plugin/device/gpu/optimizer/neighbor_exchange_v2_fusion.h b/mindspore/ccsrc/plugin/device/gpu/optimizer/neighbor_exchange_v2_fusion.h index 2232d8d185a..31c56c7c390 100644 --- a/mindspore/ccsrc/plugin/device/gpu/optimizer/neighbor_exchange_v2_fusion.h +++ b/mindspore/ccsrc/plugin/device/gpu/optimizer/neighbor_exchange_v2_fusion.h @@ -30,14 +30,14 @@ struct SplitvNodeInfo { int64_t split_dim = 0; int64_t num_split = 0; TypeId dtype = kTypeUnknown; - std::vector base_shape; + ShapeVector base_shape; std::vector send_lens = {}; - std::vector> shapes = {}; + std::vector shapes = {}; std::vector size_splits = {}; }; struct SliceNodeInfo { - std::vector base_shape; + ShapeVector base_shape; int64_t slice_dim = 0; TypeId input_dtype = kTypeUnknown; int64_t slice_begin = 0; @@ -56,8 +56,8 @@ class NeighborExchangeV2Fusion : public PatternProcessPass { std::vector CreateSplitNodes(const FuncGraphPtr &graph, const CNodePtr &neighbor_exchange_v2, std::vector *split_num) const; CNodePtr CreateConcatNode(const FuncGraphPtr &graph, const std::vector &concat_input, - const std::vector> &output_shape, - const std::vector &output_dtype, int64_t axis, int64_t input_nums) const; + const std::vector &output_shape, const std::vector &output_dtype, + int64_t axis, int64_t input_nums) const; CNodePtr CreateLeftRightConcat(const FuncGraphPtr &graph, const std::vector &all_to_all_v_outputs, const std::vector &recv_rank_ids, const std::vector &recv_lens, bool is_left) const; @@ -83,7 +83,7 @@ class NeighborExchangeV2GradFusion : public PatternProcessPass { std::vector CreateSplitNodesForGrad(const FuncGraphPtr &graph, const CNodePtr &neighbor_exchange_v2_grad, std::vector *split_num) const; CNodePtr CreatePadNode(const FuncGraphPtr &graph, const AnfNodePtr &input, const std::vector &begin, - const std::vector &size, const std::vector &shape, TypeId dtype) const; + const std::vector &size, const ShapeVector &shape, TypeId dtype) const; CNodePtr CreateSplitGradNodes(const FuncGraphPtr &graph, const CNodePtr &neighbor_exchange_v2_grad, const CNodePtr &all_to_all_v, const std::vector &split_nodes, const std::vector &split_num) const; diff --git a/mindspore/ccsrc/plugin/device/gpu/optimizer/relu_v2_pass.cc b/mindspore/ccsrc/plugin/device/gpu/optimizer/relu_v2_pass.cc index c8554e28ed2..f7c5f4d5bab 100644 --- a/mindspore/ccsrc/plugin/device/gpu/optimizer/relu_v2_pass.cc +++ b/mindspore/ccsrc/plugin/device/gpu/optimizer/relu_v2_pass.cc @@ -30,7 +30,7 @@ namespace mindspore { namespace opt { namespace { const size_t kReluV2OutputNum = 2; -const size_t kBitPerUInt = 32; +const int64_t kBitPerUInt = 32; CNodePtr GetRelu(const CNodePtr &relu_grad) { MS_EXCEPTION_IF_NULL(relu_grad); @@ -79,11 +79,10 @@ CNodePtr CreateReluV2(const FuncGraphPtr &graph, const CNodePtr &relu) { if (common::AnfAlgo::IsDynamicShape(relu)) { return nullptr; } - std::vector output_shape = common::AnfAlgo::GetOutputInferShape(relu, 0); - auto element_num = - std::accumulate(output_shape.begin(), output_shape.end(), static_cast(1), std::multiplies()); + auto output_shape = common::AnfAlgo::GetOutputInferShape(relu, 0); + auto element_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); - std::vector mask_shape = {SizeToLong((element_num + kBitPerUInt - 1) / kBitPerUInt)}; + std::vector mask_shape = {(element_num + kBitPerUInt - 1) / kBitPerUInt}; std::vector shapes = {common::AnfAlgo::GetOutputDetailShape(relu, 0), std::make_shared(mask_shape)}; auto types = {common::AnfAlgo::GetOutputInferDataType(relu, 0), kNumberTypeUInt32}; diff --git a/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/graph_converter.cc b/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/graph_converter.cc index 411c09afd0e..83a475140aa 100644 --- a/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/graph_converter.cc +++ b/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/graph_converter.cc @@ -66,7 +66,7 @@ CNodePtr BuildMakeTupleNode(const FuncGraphPtr root, const std::map make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple)}; std::vector make_tuple_types; - std::vector> make_tuple_shapes; + std::vector make_tuple_shapes; for (size_t out_idx = 0; out_idx < anf_trt_index_map.size(); out_idx++) { // Get TrtNode output index @@ -85,7 +85,7 @@ CNodePtr BuildMakeTupleNode(const FuncGraphPtr root, const std::map types = {common::AnfAlgo::GetOutputInferDataType(trt_node, trt_index)}; - std::vector> shapes = {common::AnfAlgo::GetOutputInferShape(trt_node, trt_index)}; + std::vector shapes = {common::AnfAlgo::GetOutputInferShape(trt_node, trt_index)}; common::AnfAlgo::SetOutputInferTypeAndShape(types, shapes, tuple_getitem_cnode.get()); // Build make tuple inputs. diff --git a/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/trt_converter_context.cc b/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/trt_converter_context.cc index 7700d6ea746..f785051d44b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/trt_converter_context.cc +++ b/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/trt_converter_context.cc @@ -273,11 +273,8 @@ std::tuple, std::vector> TrtC return std::make_tuple(anf_trt_index_map, trt_output_list); } -std::shared_ptr TrtConverterContext::CreateTempWeight(const TypeId &type, - const std::vector &shape) { - ShapeVector shape_int; - std::transform(shape.begin(), shape.end(), std::back_inserter(shape_int), SizeToLong); - auto tensor = std::make_shared(type, shape_int); +std::shared_ptr TrtConverterContext::CreateTempWeight(const TypeId &type, const ShapeVector &shape) { + auto tensor = std::make_shared(type, shape); temp_weights_.push_back(tensor); return tensor; } diff --git a/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/trt_converter_context.h b/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/trt_converter_context.h index f15f14ee4e2..77e0dc1d6c4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/trt_converter_context.h +++ b/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/trt_converter_context.h @@ -73,7 +73,7 @@ class TrtConverterContext : public std::enable_shared_from_this CreateTempWeight(const TypeId &type, const std::vector &shape); + std::shared_ptr CreateTempWeight(const TypeId &type, const ShapeVector &shape); std::shared_ptr network() const { return network_; } diff --git a/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/trt_op_converter.cc b/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/trt_op_converter.cc index 3a3ab04e5b1..7e8de8eddd0 100644 --- a/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/trt_op_converter.cc +++ b/mindspore/ccsrc/plugin/device/gpu/optimizer/trt_pass/trt_op_converter.cc @@ -98,9 +98,9 @@ ConvertResult AddElementLayer(AnfNodePtr node, std::shared_ptr &x1_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); - const std::vector &x2_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1); - const std::vector &y_shape = common::AnfAlgo::GetOutputInferShape(node, 0); + const auto &x1_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0)); + const auto &x2_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1)); + const ShapeVector &y_shape = common::AnfAlgo::GetOutputInferShape(node, 0); auto Broadcast = [&context, &y_shape](nvinfer1::ITensor *tensor, const std::vector &x_shape) { if (x_shape.size() == y_shape.size()) { @@ -208,7 +208,7 @@ ConvertResult AddReduceLayer(AnfNodePtr node, std::shared_ptr &input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); + const auto &input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); const ValuePtr &value = common::AnfAlgo::GetCNodePrimitive(node)->GetAttr("axis"); uint32_t reduce_axes = 0; if (value->isa() || value->isa()) { @@ -366,7 +366,7 @@ MS_TRT_CONVERTER_FUNC_REG(ReLU6) { return {false, {}}; } - const std::vector &x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); + const auto &x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); nvinfer1::Dims dim; dim.nbDims = SizeToInt(x_shape.size()); std::fill(dim.d, dim.d + dim.nbDims, 1); @@ -401,7 +401,7 @@ MS_TRT_CONVERTER_FUNC_REG(GeLU) { return {false, {}}; } - const std::vector &x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); + const auto &x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); nvinfer1::Dims dim; dim.nbDims = SizeToInt(x_shape.size()); std::fill(dim.d, dim.d + dim.nbDims, 1); @@ -452,7 +452,7 @@ MS_TRT_CONVERTER_FUNC_REG(HSigmoid) { return {false, {}}; } - const std::vector &x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); + const auto &x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); nvinfer1::Dims dim; dim.nbDims = SizeToInt(x_shape.size()); std::fill(dim.d, dim.d + dim.nbDims, 1); @@ -493,7 +493,7 @@ MS_TRT_CONVERTER_FUNC_REG(HSwish) { return {false, {}}; } - const std::vector &x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); + const auto &x_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); nvinfer1::Dims dim; dim.nbDims = SizeToInt(x_shape.size()); std::fill(dim.d, dim.d + dim.nbDims, 1); @@ -550,8 +550,8 @@ MS_TRT_CONVERTER_FUNC_REG(MatMul) { // Apply addFullyConnected: y = x * w^T + b nvinfer1::Weights bias{nvinfer1::DataType::kFLOAT, nullptr, 0}; const auto &w_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1); - auto *layer = - context->network()->addFullyConnected(*x_reshape->getOutput(0), w_shape[0], *inputs[1].weight(), bias); + auto *layer = context->network()->addFullyConnected(*x_reshape->getOutput(0), LongToSize(w_shape[0]), + *inputs[1].weight(), bias); MS_EXCEPTION_IF_NULL(layer); // Reshape x from (M, N, 1, 1) to (M, N) @@ -564,10 +564,10 @@ MS_TRT_CONVERTER_FUNC_REG(MatMul) { } else { auto op1 = transpose_a ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE; auto op2 = transpose_b ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE; - const std::vector &x1_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); - const std::vector &x2_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1); - nvinfer1::ITensor *x1 = ToTensor(&inputs[0], x1_shape, context); - nvinfer1::ITensor *x2 = ToTensor(&inputs[1], x2_shape, context); + const auto &x1_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); + const auto &x2_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1); + nvinfer1::ITensor *x1 = ToTensor(&inputs[0], Convert2SizeTClipNeg(x1_shape), context); + nvinfer1::ITensor *x2 = ToTensor(&inputs[1], Convert2SizeTClipNeg(x2_shape), context); auto *layer = context->network()->addMatrixMultiply(*x1, op1, *x2, op2); MS_EXCEPTION_IF_NULL(layer); return {true, {layer->getOutput(0)}}; @@ -587,8 +587,8 @@ MS_TRT_CONVERTER_FUNC_REG(BatchMatMul) { const auto &trt_transpose1 = transpose_a ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE; const auto &trt_transpose2 = transpose_b ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE; - std::vector shape1 = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); - std::vector shape2 = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1); + auto shape1 = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0)); + auto shape2 = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1)); nvinfer1::ITensor *tensor1 = ToTensor(&inputs[0], shape1, context); nvinfer1::ITensor *tensor2 = ToTensor(&inputs[1], shape2, context); auto *layer = context->network()->addMatrixMultiply(*tensor1, trt_transpose1, *tensor2, trt_transpose2); @@ -616,7 +616,7 @@ MS_TRT_CONVERTER_FUNC_REG(BiasAdd) { // Convert bias to ITensor same dims as x. std::vector unsqueeze_bias_dims(x_shape.size(), 1); - unsqueeze_bias_dims[pos] = SizeToInt(bias_shape[0]); + unsqueeze_bias_dims[pos] = LongToInt(bias_shape[0]); nvinfer1::ITensor *bias = ToTensor(&inputs[1], unsqueeze_bias_dims, context); // Create Broadcast Add layer. @@ -666,8 +666,8 @@ MS_TRT_CONVERTER_FUNC_REG(BatchNorm) { auto epsilon = common::AnfAlgo::GetNodeAttr(node, "epsilon"); const TypeId &type = common::AnfAlgo::GetPrevNodeOutputInferDataType(node, 1); - const std::vector &shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1); - int64_t channel_num = SizeToLong(shape[0]); + const auto &shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1); + int64_t channel_num = shape[0]; auto coeff = context->CreateTempWeight(type, shape); auto bias = context->CreateTempWeight(type, shape); auto coeff_value = static_cast(coeff->data_c()); @@ -735,7 +735,7 @@ MS_TRT_CONVERTER_FUNC_REG(Conv2DBackpropInput) { const auto &output_shape = common::AnfAlgo::GetOutputInferShape(node, 0); const nvinfer1::Weights &bias{nvinfer1::DataType::kFLOAT, nullptr, 0}; auto *layer = context->network()->addDeconvolutionNd( - *(inputs[0].tensor()), SizeToInt(output_shape[1]), + *(inputs[0].tensor()), LongToInt(output_shape[1]), nvinfer1::DimsHW{LongToInt(kernel_size[0]), LongToInt(kernel_size[1])}, *(inputs[1].weight()), bias); MS_EXCEPTION_IF_NULL(layer); @@ -811,7 +811,7 @@ MS_TRT_CONVERTER_FUNC_REG(Softmax) { return {false, {}}; } - const std::vector &input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); + const auto &input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); const ValuePtr &value = common::AnfAlgo::GetCNodePrimitive(node)->GetAttr("axis"); uint32_t reduce_axes = 0; if (value->isa() || value->isa()) { @@ -842,7 +842,7 @@ MS_TRT_CONVERTER_FUNC_REG(LogSoftmax) { return {false, {}}; } - const std::vector &input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); + const auto &input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); const auto &axis = common::AnfAlgo::GetNodeAttr(node, "axis"); int offset = axis >= 0 ? LongToInt(axis) : LongToInt(axis + input_shape.size()); uint32_t reduce_axes = 1UL << offset; @@ -865,12 +865,12 @@ MS_TRT_CONVERTER_FUNC_REG(Gather) { return {false, {}}; } - const std::vector &input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); + const auto &input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0)); auto axis = common::AnfAlgo::GetNodeAttr(node, "axis"); axis = axis >= 0 ? axis : axis + input_shape.size(); nvinfer1::ITensor *input = ToTensor(&inputs[0], input_shape, context); - const std::vector &indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1); + const auto &indices_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(node, 1)); nvinfer1::ITensor *indices = ToTensor(&inputs[1], indices_shape, context); auto *layer = context->network()->addGather(*input, *indices, LongToInt(axis)); @@ -887,7 +887,7 @@ MS_TRT_CONVERTER_FUNC_REG(Cast) { return {false, {}}; } - const std::vector &input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0); + const auto &input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0)); nvinfer1::ITensor *input = ToTensor(&inputs[0], input_shape, context); const TypeId &dst_type = common::AnfAlgo::GetOutputInferDataType(node, 0); @@ -917,7 +917,7 @@ MS_TRT_CONVERTER_FUNC_REG(LayerNorm) { } // Calculate reduce axes - const std::vector &input_shape = common::AnfAlgo::GetOutputInferShape(node, 0); + const auto &input_shape = common::AnfAlgo::GetOutputInferShape(node, 0); auto begin_norm_axis = common::AnfAlgo::GetNodeAttr(node, "begin_norm_axis"); begin_norm_axis = begin_norm_axis >= 0 ? begin_norm_axis : begin_norm_axis + input_shape.size(); uint32_t reduce_axes = 0; @@ -928,7 +928,7 @@ MS_TRT_CONVERTER_FUNC_REG(LayerNorm) { // Reshape gamma and beta for broadcast auto begin_params_axis = common::AnfAlgo::GetNodeAttr(node, "begin_params_axis"); begin_params_axis = begin_params_axis >= 0 ? begin_params_axis : begin_params_axis + input_shape.size(); - std::vector param_shape = input_shape; + auto param_shape = Convert2SizeTClipNeg(input_shape); for (size_t j = 0; j < LongToSize(begin_params_axis); j++) { param_shape[j] = 1; } diff --git a/mindspore/ccsrc/ps/constants.h b/mindspore/ccsrc/ps/constants.h index b57eb318bef..b24bf5ec4fd 100644 --- a/mindspore/ccsrc/ps/constants.h +++ b/mindspore/ccsrc/ps/constants.h @@ -28,6 +28,7 @@ #include "ps/core/communicator/request_process_result_code.h" #include "distributed/persistent/data.h" +#include "utils/shape_utils.h" namespace mindspore { namespace ps { @@ -199,8 +200,8 @@ using Lengths = std::vector; using WeightPtr = std::shared_ptr; using PersistentWeightPtr = std::shared_ptr; using GradPtr = std::shared_ptr; -using InputsShape = std::vector>>; -using InputsShapePtr = std::shared_ptr>>>; +using InputsShape = std::vector>; +using InputsShapePtr = std::shared_ptr>>; constexpr size_t INDEX_NOT_SEND = UINT_MAX; using OptimOriginIdx = std::map; diff --git a/mindspore/ccsrc/ps/optimizer_info.cc b/mindspore/ccsrc/ps/optimizer_info.cc index efaba610877..3177ca2e0b5 100644 --- a/mindspore/ccsrc/ps/optimizer_info.cc +++ b/mindspore/ccsrc/ps/optimizer_info.cc @@ -98,7 +98,7 @@ void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) { } } -void DenseOptimInfo::ComputeMean(const std::vector> &, size_t n, size_t, size_t) { +void DenseOptimInfo::ComputeMean(const std::vector &, size_t n, size_t, size_t) { if (n > 1) { MS_EXCEPTION_IF_NULL(gradient()->addr); float *accum_grad_data = reinterpret_cast(gradient()->addr); @@ -179,8 +179,7 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) { indices()->size += incr_indice_data_size; } -void SparseOptimInfo::ComputeMean(const std::vector> &shapes, size_t n, size_t server_num, - size_t rank_id) { +void SparseOptimInfo::ComputeMean(const std::vector &shapes, size_t n, size_t server_num, size_t rank_id) { if (n == 0 || indices()->size == 0) { MS_LOG(EXCEPTION) << "The size of shapes or indices are 0."; } @@ -211,11 +210,11 @@ void SparseOptimInfo::ComputeMean(const std::vector> &shapes int *indices_data = reinterpret_cast(indices()->addr); if (sharded_) { - size_t original_row_count = input_shapes.front(); + auto original_row_count = input_shapes.front(); if (original_row_count > 0) { size_t offset = 0; std::map rank_dims = - Util::AllRankLocalShard(SizeToLong(original_row_count), SizeToLong(rank_id), SizeToLong(server_num)); + Util::AllRankLocalShard(original_row_count, SizeToLong(rank_id), SizeToLong(server_num)); for (size_t i = 0; i < rank_id; i++) { if (rank_dims.count(i) == 0) { MS_LOG(EXCEPTION) << "No local shard number for rank " << i; diff --git a/mindspore/ccsrc/ps/optimizer_info.h b/mindspore/ccsrc/ps/optimizer_info.h index 645e7dd573b..a2c5a4e6107 100644 --- a/mindspore/ccsrc/ps/optimizer_info.h +++ b/mindspore/ccsrc/ps/optimizer_info.h @@ -32,8 +32,7 @@ class OptimizerInfo { virtual void Update(const Values &values, const Lengths &lengths) {} virtual void Accumulate(const Values &values, const Lengths &lengths) = 0; - virtual void ComputeMean(const std::vector> &shapes, size_t n, size_t server_num, - size_t rank_id) {} + virtual void ComputeMean(const std::vector &shapes, size_t n, size_t server_num, size_t rank_id) {} virtual void Reset() {} void AddWorkspace(const AddressPtr &workspace); @@ -63,8 +62,7 @@ class DenseOptimInfo : public OptimizerInfo { ~DenseOptimInfo() override = default; void Accumulate(const Values &values, const Lengths &lens) override; - void ComputeMean(const std::vector> &shapes, size_t n, size_t server_num, - size_t rank_id) override; + void ComputeMean(const std::vector &shapes, size_t n, size_t server_num, size_t rank_id) override; void Reset() override; }; @@ -74,8 +72,7 @@ class SparseOptimInfo : public OptimizerInfo { ~SparseOptimInfo() override = default; void Accumulate(const Values &values, const Lengths &lens) override; - void ComputeMean(const std::vector> &shapes, size_t n, size_t server_num, - size_t rank_id) override; + void ComputeMean(const std::vector &shapes, size_t n, size_t server_num, size_t rank_id) override; void Reset() override; const size_t indice_size() const override; diff --git a/mindspore/ccsrc/ps/optimizer_info_builder.cc b/mindspore/ccsrc/ps/optimizer_info_builder.cc index 2f24e9c6c0c..95570aa825e 100644 --- a/mindspore/ccsrc/ps/optimizer_info_builder.cc +++ b/mindspore/ccsrc/ps/optimizer_info_builder.cc @@ -82,7 +82,7 @@ AddressPtr OptimizerInfoBuilder::GenInputAddrPtr(const std::string &optim_type, EXC_IF_VEC_IDX_OOB((*inputs_shape), origin_index); MS_EXCEPTION_IF_NULL((*inputs_shape)[origin_index]); auto shape = *((*inputs_shape)[origin_index]); - addr_data_size = std::accumulate(shape.begin(), shape.end(), worker_num_, std::multiplies()); + addr_data_size = SizeOf(shape) * worker_num_; } else { EXC_IF_VEC_IDX_OOB(ps_lens, ps_index); addr_data_size = IntToSize(ps_lens[ps_index]); diff --git a/mindspore/ccsrc/ps/parameter_server.cc b/mindspore/ccsrc/ps/parameter_server.cc index a6119479f4c..f83812272b5 100644 --- a/mindspore/ccsrc/ps/parameter_server.cc +++ b/mindspore/ccsrc/ps/parameter_server.cc @@ -121,9 +121,9 @@ void ParameterServer::InitOptimInputsShape(const Keys &keys, const Values &value optim_inputs_shape_[key] = inputs_shape; } for (size_t i = 0; i < keys.size(); i++) { - auto shape = std::make_shared>(); + auto shape = std::make_shared(); MS_EXCEPTION_IF_NULL(shape); - auto original_shape = std::make_shared>(); + auto original_shape = std::make_shared(); MS_EXCEPTION_IF_NULL(original_shape); inputs_shape->push_back(shape); original_inputs_shape->push_back(original_shape); @@ -136,28 +136,29 @@ void ParameterServer::InitOptimInputsShape(const Keys &keys, const Values &value if (weight_key_to_optims_.count(key) > 0) { const std::string &optim_name = weight_key_to_optims_[key]; const std::string &optim_op_name = weight_key_to_optim_op_[key]; + auto shape_tmp = optim_inputs_shape_[key]; if (optimizers_.count(key) == 0 && optim_inputs_shape_.count(key) > 0) { const CNodePtr cnode = GetCNode(optim_op_name); MS_EXCEPTION_IF_NULL(cnode); if (optim_name == kSparseAdam) { std::shared_ptr optimizer = std::make_shared(server_node_->rank_id(), pserver_num_, worker_num_); - optimizer->InitKernel(cnode, optim_inputs_shape_[key]); + optimizer->InitKernel(cnode, shape_tmp); optimizers_[key] = optimizer; } else if (optim_name == kSparseLazyAdam) { std::shared_ptr optimizer = std::make_shared( server_node_->rank_id(), pserver_num_, worker_num_); - optimizer->InitKernel(cnode, optim_inputs_shape_[key]); + optimizer->InitKernel(cnode, shape_tmp); optimizers_[key] = optimizer; } else if (optim_name == kApplyMomentum) { std::shared_ptr optimizer = std::make_shared(server_node_->rank_id(), pserver_num_, worker_num_); - optimizer->InitKernel(cnode, optim_inputs_shape_[key]); + optimizer->InitKernel(cnode, shape_tmp); optimizers_[key] = optimizer; } else if (optim_name == kSparseFtrl) { std::shared_ptr optimizer = std::make_shared(server_node_->rank_id(), pserver_num_, worker_num_); - optimizer->InitKernel(cnode, optim_inputs_shape_[key]); + optimizer->InitKernel(cnode, shape_tmp); optimizers_[key] = optimizer; } } @@ -215,8 +216,8 @@ void InitAccumParallel(float init_value, size_t total_len, float *embedding_data void CopyTensorData(void *dest_ptr, size_t tensor_size, const void *src_ptr) { MS_EXCEPTION_IF_NULL(dest_ptr); MS_EXCEPTION_IF_NULL(src_ptr); - char *dest = reinterpret_cast(dest_ptr); - const char *src = reinterpret_cast(src_ptr); + char *dest = static_cast(dest_ptr); + const char *src = static_cast(src_ptr); // The security memcpy function 'memcpy_s' limits the value of the second parameter 'destMax' not to be greater than // SECUREC_MEM_MAX_LEN. If tensor size(buffer length) is greater than SECUREC_MEM_MAX_LEN, the tensor should be cut @@ -233,7 +234,7 @@ void CopyTensorData(void *dest_ptr, size_t tensor_size, const void *src_ptr) { } // namespace void ParameterServer::PersistKernels(const Key &key, - const std::shared_ptr>>> &shapes, + const std::shared_ptr>> &shapes, const ParamInitInfo ¶m_init_info) const { if (!EnableRecovery()) { return; @@ -265,16 +266,16 @@ void ParameterServer::PersistKernels(const Key &key, } // Persist kernel input shape - std::vector>> shapes_list; + std::vector> shapes_list; if (config_storage->Exists(kShapes)) { - shapes_list = config_storage->GetValue>>>(kShapes); + shapes_list = config_storage->GetValue>>(kShapes); } if (shapes_list.size() < keys.size()) { - std::vector> shape_tmp; + std::vector shape_tmp; (void)std::transform(shapes->begin(), shapes->end(), std::back_inserter(shape_tmp), - [](const std::shared_ptr> &shape_ptr) { return *shape_ptr; }); + [](const std::shared_ptr &shape_ptr) { return *shape_ptr; }); shapes_list.push_back(shape_tmp); - config_storage->PutValue>>>(kShapes, shapes_list); + config_storage->PutValue>>(kShapes, shapes_list); } // Persist parameter name of kernel. @@ -319,9 +320,9 @@ void ParameterServer::PersistInitParameters(const Key &key, const WeightPtr &par MS_LOG(INFO) << "Finish persist initialized parameter, key: " << key; } -void ParameterServer::InitEmbeddingTable( - const Key &key, const std::shared_ptr>>> &shapes, - const ParamInitInfo ¶m_init_info) { +void ParameterServer::InitEmbeddingTable(const Key &key, + const std::shared_ptr>> &shapes, + const ParamInitInfo ¶m_init_info) { if (EnableRecovery()) { while (!finish_recovery_) { std::this_thread::yield(); @@ -424,17 +425,15 @@ void ParameterServer::UpdateWeights() { const std::vector &workspaces = optim_info->workspaces(); const std::vector &outputs = optim_info->outputs(); - std::vector> shapes = {}; - std::vector indices_shape = {}; - indices_shape.emplace_back(optim_info->indice_size()); + std::vector shapes = {}; + ShapeVector indices_shape = {}; + indices_shape.emplace_back(SizeToLong(optim_info->indice_size())); shapes.push_back(indices_shape); if (original_optim_inputs_shape_.count(key) != 0) { std::transform((*(original_optim_inputs_shape_[key])).begin(), (*(original_optim_inputs_shape_[key])).end(), std::back_inserter(shapes), - [](const std::shared_ptr> &input_shapes) -> std::vector { - return *input_shapes; - }); + [](const std::shared_ptr &input_shapes) -> ShapeVector { return *input_shapes; }); } optimizer->ReInit(shapes); optim_info->ComputeMean(shapes, worker_num_, pserver_num_, server_node_->rank_id()); @@ -517,9 +516,9 @@ void ParameterServer::DoEmbeddingLookup(Key key, const LookupIds &lookup_ids, KV MS_EXCEPTION_IF_NULL(table_lookup_op); // Update shapes of lookup operator - std::vector> shapes = {}; - std::vector indices_shape = {}; - indices_shape.emplace_back(lookup_ids.size()); + std::vector shapes = {}; + ShapeVector indices_shape = {}; + indices_shape.emplace_back(SizeToLong(lookup_ids.size())); shapes.push_back(indices_shape); table_lookup_op->ReInit(shapes); @@ -702,19 +701,18 @@ void ParameterServer::CacheEmbeddingTableParamPtr() { } void ParameterServer::RecoverKernels(const std::vector &keys, - const std::vector>> &shapes_list, + const std::vector> &shapes_list, const std::vector ¶m_names) { for (size_t i = 0; i < keys.size(); i++) { size_t key = keys.at(i); if (weights_.count(key) == 0) { // Recover embedding lookup kernels. - std::shared_ptr>>> shapes_ptr = - std::make_shared>>>(); + std::shared_ptr>> shapes_ptr = + std::make_shared>>(); const auto &shapes = shapes_list[i]; for (const auto &shape : shapes) { - std::shared_ptr> shape_ptr = - std::make_shared>(shape.begin(), shape.end()); + std::shared_ptr shape_ptr = std::make_shared(shape.begin(), shape.end()); shapes_ptr->push_back(shape_ptr); } @@ -778,7 +776,7 @@ void ParameterServer::RecoverParameters(const std::vector &keys) { } void ParameterServer::RecoverEmbedding(const std::vector &keys, - const std::vector>> &shapes_list, + const std::vector> &shapes_list, const std::vector ¶m_names) { CacheEmbeddingTableParamPtr(); size_t keys_size = keys.size(); @@ -1029,17 +1027,17 @@ void ParameterServer::ServerHandler::HandleInitEmbeddings(const void *data, size CHECK_RETURN_TYPE(embedding_table_meta.ParseFromArray(data, SizeToInt(size))); const Key &key = embedding_table_meta.key(); MS_LOG(INFO) << "Initializing embedding table for key:" << key; - std::shared_ptr>>> shapes = - std::make_shared>>>(); + std::shared_ptr>> shapes = + std::make_shared>>(); MS_EXCEPTION_IF_NULL(shapes); - std::shared_ptr> input_shape = std::make_shared>( - embedding_table_meta.input_shape().begin(), embedding_table_meta.input_shape().end()); + std::shared_ptr input_shape = + std::make_shared(embedding_table_meta.input_shape().begin(), embedding_table_meta.input_shape().end()); MS_EXCEPTION_IF_NULL(input_shape); - std::shared_ptr> indices_shape = std::make_shared>( + std::shared_ptr indices_shape = std::make_shared( embedding_table_meta.indices_shape().begin(), embedding_table_meta.indices_shape().end()); MS_EXCEPTION_IF_NULL(indices_shape); - std::shared_ptr> output_shape = std::make_shared>( - embedding_table_meta.output_shape().begin(), embedding_table_meta.output_shape().end()); + std::shared_ptr output_shape = std::make_shared(embedding_table_meta.output_shape().begin(), + embedding_table_meta.output_shape().end()); MS_EXCEPTION_IF_NULL(output_shape); shapes->push_back(input_shape); shapes->push_back(indices_shape); @@ -1174,8 +1172,7 @@ void ParameterServer::RecoverHandler::Recover() { void ParameterServer::RecoverHandler::RecoverEmbedding() { MS_EXCEPTION_IF_NULL(storage_); std::vector keys = storage_->GetValue>(kKeys); - std::vector>> shapes_list = - storage_->GetValue>>>(kShapes); + auto shapes_list = storage_->GetValue>>(kShapes); std::vector param_names = storage_->GetValue>(kParamNames); MS_EXCEPTION_IF_NULL(ps_); diff --git a/mindspore/ccsrc/ps/parameter_server.h b/mindspore/ccsrc/ps/parameter_server.h index d8a348670c8..68960263042 100644 --- a/mindspore/ccsrc/ps/parameter_server.h +++ b/mindspore/ccsrc/ps/parameter_server.h @@ -148,8 +148,7 @@ class BACKEND_EXPORT ParameterServer { void InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths); void InitWeight(const Key &key, const WeightPtr &weight); void InitGrad(const Key &key, const GradPtr &grad); - void InitEmbeddingTable(const Key &key, - const std::shared_ptr>>> &shapes, + void InitEmbeddingTable(const Key &key, const std::shared_ptr>> &shapes, const ParamInitInfo ¶m_init_info); bool HasWeight(const Key &key); void Finalize(); @@ -176,18 +175,18 @@ class BACKEND_EXPORT ParameterServer { void PersistParameters(); // Persist sparse network operators when receive init embedding table message. - void PersistKernels(const Key &key, const std::shared_ptr>>> &shapes, + void PersistKernels(const Key &key, const std::shared_ptr>> &shapes, const ParamInitInfo ¶m_init_info) const; // Persist parameters store in parameter server when receive init message. void PersistInitParameters(const Key &key, const WeightPtr ¶m); // Restore sparse network operators and parameters. - void RecoverEmbedding(const std::vector &keys, const std::vector>> &shapes_list, + void RecoverEmbedding(const std::vector &keys, const std::vector> &shapes_list, const std::vector ¶m_names); // Restore sparse network operators. - void RecoverKernels(const std::vector &keys, const std::vector>> &shapes_list, + void RecoverKernels(const std::vector &keys, const std::vector> &shapes_list, const std::vector ¶m_names); // Restore parameters store in parameter server. diff --git a/mindspore/ccsrc/ps/util.cc b/mindspore/ccsrc/ps/util.cc index 2c2a729ed9c..b231efd4088 100644 --- a/mindspore/ccsrc/ps/util.cc +++ b/mindspore/ccsrc/ps/util.cc @@ -236,7 +236,7 @@ kernel::KernelBuildInfoPtr Util::GenerateKernelBuildInfo(const std::vector outputs_device_format; std::vector inputs_device_type; std::vector outputs_device_type; - std::vector> outputs_shape; + std::vector outputs_shape; kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; for (size_t idx = 0; idx < node_list.size(); ++idx) { auto cnode = utils::cast(node_list[idx]); diff --git a/mindspore/ccsrc/runtime/data_queue/data_queue_mgr.cc b/mindspore/ccsrc/runtime/data_queue/data_queue_mgr.cc index 5aea858d763..99cc28281cc 100644 --- a/mindspore/ccsrc/runtime/data_queue/data_queue_mgr.cc +++ b/mindspore/ccsrc/runtime/data_queue/data_queue_mgr.cc @@ -257,7 +257,7 @@ bool PopDataFromDataQueue(const AnfNodePtr &data_kernel) { } MS_EXCEPTION_IF_CHECK_FAIL(data.size() == device_tensors.size(), "The number of data tensor popped from dynamic queue is not correct"); - std::vector> shapes; + std::vector shapes; std::vector types; std::vector output_size_list; for (size_t i = 0; i < data.size(); ++i) { @@ -265,10 +265,8 @@ bool PopDataFromDataQueue(const AnfNodePtr &data_kernel) { device_tensors[i]->SetSize(data[i].data_len_); device_tensors[i]->set_from_mem_pool(true); output_size_list.push_back(data[i].data_len_); - std::vector shape; - std::transform(data[i].shapes_.begin(), data[i].shapes_.end(), std::back_inserter(shape), LongToSize); kernel_info->SetOutputAddr(device_tensors[i], i); - shapes.push_back(shape); + shapes.push_back(data[i].shapes_); types.push_back(common::AnfAlgo::GetOutputInferDataType(data_kernel, i)); } auto kernel_mod = kernel_info->MutableKernelMod(); diff --git a/mindspore/ccsrc/runtime/device/kernel_adjust.cc b/mindspore/ccsrc/runtime/device/kernel_adjust.cc index 827aecf5ceb..67fabd78c33 100644 --- a/mindspore/ccsrc/runtime/device/kernel_adjust.cc +++ b/mindspore/ccsrc/runtime/device/kernel_adjust.cc @@ -46,7 +46,7 @@ constexpr auto kSpecifyParameter = "accu_status"; constexpr auto kSplitOverFlow = "split_overflow"; constexpr auto kLayerOverFlow = "layer_overflow"; constexpr auto kMixLayerStatusParameter = "mix_layer_status"; -size_t kNPUShape = 8; +int64_t kNPUShape = 8; constexpr size_t kLastHandleDiff = 2; } // namespace namespace mindspore { @@ -556,7 +556,7 @@ CNodePtr KernelAdjust::CreatTupleGetItemNode(const std::shared_ptrNewCNode({NewValueNode(prim::kPrimTupleGetItem), node, idx}); MS_EXCEPTION_IF_NULL(tuple_getitem); tuple_getitem->set_scope(node->scope()); - std::vector origin_shape = common::AnfAlgo::GetOutputInferShape(node, output_idx); + auto origin_shape = common::AnfAlgo::GetOutputInferShape(node, output_idx); TypeId origin_type = common::AnfAlgo::GetOutputInferDataType(node, output_idx); common::AnfAlgo::SetOutputInferTypeAndShape({origin_type}, {origin_shape}, tuple_getitem.get()); return tuple_getitem; @@ -731,7 +731,7 @@ CNodePtr KernelAdjust::CreateNPUAllocStatus(const std::shared_ptrNewCNode(npu_alloc_inputs); MS_EXCEPTION_IF_NULL(npu_alloc_cnode); npu_alloc_cnode->set_scope(kDefaultScope); - std::vector npu_output_shape = {kNPUShape}; + ShapeVector npu_output_shape = {kNPUShape}; common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat32}, {npu_output_shape}, npu_alloc_cnode.get()); kernel::KernelBuildInfo::KernelBuildInfoBuilder selected_kernel_builder; @@ -783,7 +783,7 @@ CNodePtr KernelAdjust::CreateAssign(const std::shared_ptr MS_EXCEPTION_IF_NULL(specify_para); std::vector reset(kNPUShape, 0.0); - ShapeVector reset_shape({static_cast(kNPUShape)}); + ShapeVector reset_shape({kNPUShape}); auto shp_buf_size = sizeof(float) * reset.size(); auto reset_tensor = std::make_shared(kNumberTypeFloat32, reset_shape, reset.data(), shp_buf_size); auto reset_value_node = std::make_shared(reset_tensor); diff --git a/mindspore/ccsrc/runtime/device/launch_mul.cc b/mindspore/ccsrc/runtime/device/launch_mul.cc index 94cd1a1ab94..dfe75421a30 100644 --- a/mindspore/ccsrc/runtime/device/launch_mul.cc +++ b/mindspore/ccsrc/runtime/device/launch_mul.cc @@ -32,7 +32,7 @@ std::shared_ptr LaunchMul::ObtainMulKernelGraph() { } int64_t shape = SizeToLong(total_size_ / dtype_size); std::vector> input_shapes = {{shape}, {1}}; - std::vector> output_shapes = {{static_cast(shape)}}; + std::vector output_shapes = {{shape}}; auto mul_graph = session::SingleKernelGraph::ConstructKernelGraphBasedOnSingleOp( kMulOpName, input_dtypes, input_shapes, output_dtypes, output_shapes); MS_EXCEPTION_IF_NULL(mul_graph); diff --git a/mindspore/ccsrc/runtime/device/ms_device_shape_transfer.cc b/mindspore/ccsrc/runtime/device/ms_device_shape_transfer.cc index 31ca821c708..bdc834e9005 100644 --- a/mindspore/ccsrc/runtime/device/ms_device_shape_transfer.cc +++ b/mindspore/ccsrc/runtime/device/ms_device_shape_transfer.cc @@ -263,8 +263,7 @@ bool IsNeedPadding(const std::string &format, size_t shape_size) { ShapeVector GetRuntimePaddingShape(const AnfNodePtr &node, size_t index) { MS_EXCEPTION_IF_NULL(node); - ShapeVector shape; - std::vector host_shape; + ShapeVector host_shape; if (node->isa()) { auto value_node = node->cast(); MS_EXCEPTION_IF_NULL(value_node); @@ -275,15 +274,15 @@ ShapeVector GetRuntimePaddingShape(const AnfNodePtr &node, size_t index) { MS_LOG(EXCEPTION) << " The node[ " << node->DebugString() << "]'s cannot convert "; } auto shape_temp = tensor->shape(); - if (AnfUtils::IsShapeDynamic(shape_temp)) { + if (IsDynamic(shape_temp)) { auto base_shape = tensor->base_shape_ptr(); MS_EXCEPTION_IF_NULL(base_shape); if (base_shape->cast() == nullptr) { MS_LOG(EXCEPTION) << "Tensor with dynamic shape should be ShapePtr type."; } - host_shape = AnfUtils::TransShapeToSizet(base_shape->cast()); + host_shape = base_shape->cast()->shape(); } else { - (void)std::transform(shape_temp.begin(), shape_temp.end(), std::back_inserter(host_shape), LongToSize); + host_shape = shape_temp; } if (host_shape.empty()) { @@ -296,8 +295,7 @@ ShapeVector GetRuntimePaddingShape(const AnfNodePtr &node, size_t index) { if (IsNeedPadding(format, host_shape.size())) { host_shape = PaddingShape(host_shape, format, AnfAlgo::GetOutputReshapeType(node, index), node); } - (void)std::transform(host_shape.begin(), host_shape.end(), std::back_inserter(shape), SizeToLong); - return shape; + return host_shape; } bool TransDataType(const TypeIdArgs &args, void *result) { diff --git a/mindspore/ccsrc/runtime/device/tensor_array.cc b/mindspore/ccsrc/runtime/device/tensor_array.cc index 776e6e0b0cd..2465d95a81a 100644 --- a/mindspore/ccsrc/runtime/device/tensor_array.cc +++ b/mindspore/ccsrc/runtime/device/tensor_array.cc @@ -17,7 +17,7 @@ namespace mindspore { namespace device { -bool TensorArray::CheckValue(const TypeId &dtype, const std::vector &shape) { +bool TensorArray::CheckValue(const TypeId &dtype, const ShapeVector &shape) { MS_LOG(DEBUG) << "Check the data shape and type for " << name_; if (dtype != dtype_->type_id()) { MS_LOG(ERROR) << "Invalid data type " << TypeIdLabel(dtype) << " for " << name_ << ", the origin type is " diff --git a/mindspore/ccsrc/runtime/device/tensor_array.h b/mindspore/ccsrc/runtime/device/tensor_array.h index e4ae9d45482..65ed8f03d6e 100644 --- a/mindspore/ccsrc/runtime/device/tensor_array.h +++ b/mindspore/ccsrc/runtime/device/tensor_array.h @@ -30,14 +30,14 @@ namespace device { class TensorArray { public: // Base TensorArray. Constructed by name, dtype and shapes. - TensorArray(const string &name, const TypePtr &dtype, const std::vector &shapes) + TensorArray(const string &name, const TypePtr &dtype, const ShapeVector &shapes) : name_(name), dtype_(dtype), shapes_(shapes), valid_size_(0), max_size_(0), is_dynamic_(true) {} virtual ~TensorArray() = default; // Check the index in valid range. Used in Read(). virtual bool CheckReadIndexLogical(const int64_t index); // Check the dtype and shape of the input data. Used in Write(). - virtual bool CheckValue(const TypeId &dtype, const std::vector &shape); + virtual bool CheckValue(const TypeId &dtype, const ShapeVector &shape); // Function Write() is used to insert or append dev_value to the position of index. virtual bool Write(const int64_t index, const mindspore::kernel::AddressPtr &dev_value); @@ -76,7 +76,7 @@ class TensorArray { protected: std::string name_; TypePtr dtype_; - std::vector shapes_; + ShapeVector shapes_; size_t valid_size_; int64_t max_size_; bool is_dynamic_; diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/control_actor.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/control_actor.cc index f6d943ed8ca..38af540ed46 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/control_actor.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/control_actor.cc @@ -459,11 +459,8 @@ void ControlActor::UpdateDynamicShapeInParameter() { auto node = input_device_tensors_[i]->GetNodeIndex().first; MS_EXCEPTION_IF_NULL(node); auto shape = trans::GetRuntimePaddingShape(node, input_device_tensors_[i]->GetNodeIndex().second); - std::vector shape_tmp; - (void)std::transform(shape.begin(), shape.end(), std::back_inserter(shape_tmp), IntToSize); - for (const auto ¶meter : backend_parameters_[i]) { - common::AnfAlgo::SetOutputInferTypeAndShape({input_device_tensors_[i]->type_id()}, {shape_tmp}, parameter.get()); + common::AnfAlgo::SetOutputInferTypeAndShape({input_device_tensors_[i]->type_id()}, {shape}, parameter.get()); } } } diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.cc index 3152c15c65a..ebca176e5d8 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.cc @@ -177,9 +177,7 @@ void ExitActor::CopyDeviceAddress(OpContext *const context) { // If there is a dynamic shape, the shape in the kernel should be used. MS_LOG(DEBUG) << "Update dynamic shape in kernel output:" << node_with_index.first->DebugString() << " for actor:" << GetAID(); - auto shape_tmp = common::AnfAlgo::GetOutputInferShape(node_with_index.first, node_with_index.second); - host_shape.clear(); - (void)std::transform(shape_tmp.begin(), shape_tmp.end(), std::back_inserter(host_shape), IntToSize); + host_shape = common::AnfAlgo::GetOutputInferShape(node_with_index.first, node_with_index.second); } // Create the new device tensor to take over the input_device_tensors which are the outputs of kernel graphs. auto new_device_tensor = device_contexts_[i]->device_res_manager_->CreateDeviceAddress( diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc index 80e840b7a21..95c219540cf 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc @@ -304,9 +304,7 @@ void DataPrepareActor::UpdateDynamicShape(const AnfNodePtr &input_node, const Te } auto shape = input_tensor->shape(); - std::vector shape_tmp; - std::transform(shape.begin(), shape.end(), std::back_inserter(shape_tmp), IntToSize); - common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(input_node, 0)}, {shape_tmp}, + common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(input_node, 0)}, {shape}, input_node.get()); } diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/embedding_cache/embedding_cache_prefetch_actor.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/embedding_cache/embedding_cache_prefetch_actor.cc index 6af93f0d740..cd714e260a4 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/embedding_cache/embedding_cache_prefetch_actor.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/embedding_cache/embedding_cache_prefetch_actor.cc @@ -1746,9 +1746,8 @@ bool Receiver::ParseDynamicShapeData(const char *msg_body, size_t msg_len, ShapeVector shapes(pb_msg.shape_vector().begin(), pb_msg.shape_vector().end()); TypeId data_type = static_cast(pb_msg.type_id()); int64_t expected_data_len = 1; - std::vector size_t_shapes(shapes.begin(), shapes.end()); - if (!kernel::GetShapeSize(size_t_shapes, TypeIdToType(data_type), &expected_data_len)) { - MS_LOG(ERROR) << "Getting shape size for shape " << size_t_shapes << " failed."; + if (!kernel::GetShapeSize(shapes, TypeIdToType(data_type), &expected_data_len)) { + MS_LOG(ERROR) << "Getting shape size for shape " << shapes << " failed."; return false; } if (LongToSize(expected_data_len) != received_data_len) { diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc index a622dab447b..7ee99f191ba 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc @@ -322,7 +322,7 @@ void KernelActor::CopyInputDeviceTensor(const OpData *input_data, auto &new_device_tensor = copy_input_device_tensors_[input_data->index_]; MS_EXCEPTION_IF_NULL(new_device_tensor); // Dynamic shape need update size. - if (AnfUtils::IsShapeDynamic(real_input_info->shape_)) { + if (IsDynamic(real_input_info->shape_)) { new_device_tensor->SetSize(input_data->data_->GetSize()); } // Update the input device tensor. diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc index 2a560e1d6f4..b7bce2111be 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc @@ -51,9 +51,7 @@ void UpdateOutputTensorShape(const std::vector &output_tensors, for (size_t i = 0; i < output_tensors.size(); ++i) { MS_EXCEPTION_IF_NULL(output_tensors[i]); auto shape = common::AnfAlgo::GetOutputInferShape(output_nodes[i].first, output_nodes[i].second); - std::vector temp_shape; - (void)std::copy(shape.begin(), shape.end(), std::back_inserter(temp_shape)); - (void)output_tensors[i]->set_shape(temp_shape); + (void)output_tensors[i]->set_shape(shape); } } @@ -197,10 +195,8 @@ TensorPtr OutputActor::CreateOutputTensor(const AnfNodePtr &output_node, size_t // Create host tensor, the output tensor should use the infer type, it will be handed correctly by tensor data sync // when infer type is not equal to device type. auto type_id = common::AnfAlgo::GetOutputInferDataType(output_node, output_index); - std::vector temp_shape; auto shape = common::AnfAlgo::GetOutputInferShape(output_node, output_index); - (void)std::copy(shape.begin(), shape.end(), std::back_inserter(temp_shape)); - auto tensor = std::make_shared(type_id, temp_shape); + auto tensor = std::make_shared(type_id, shape); tensor->set_padding_type(AnfAlgo::GetOutputReshapeType(output_node, output_index)); if (output_position >= device_contexts_.size()) { diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/rpc/recv_actor.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/rpc/recv_actor.cc index 343bab8d05c..11857d89dab 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/rpc/recv_actor.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/rpc/recv_actor.cc @@ -242,9 +242,8 @@ size_t RecvActor::ParseDynamicShapeData(const std::string &dynamic_shape_data, A // Step 5: get the size of real data as recv's input. int64_t real_data_size = 1; - std::vector size_t_shape(shapes.begin(), shapes.end()); - if (!kernel::GetShapeSize(size_t_shape, TypeIdToType(data_type), &real_data_size)) { - MS_LOG(EXCEPTION) << "Getting shape size for shape " << size_t_shape << " failed."; + if (!kernel::GetShapeSize(shapes, TypeIdToType(data_type), &real_data_size)) { + MS_LOG(EXCEPTION) << "Getting shape size for shape " << shapes << " failed."; } data_to_be_parsed += real_data_size; diff --git a/mindspore/ccsrc/runtime/pynative/run_op_helper.cc b/mindspore/ccsrc/runtime/pynative/run_op_helper.cc index 5ac0ef19819..4362fdf565d 100644 --- a/mindspore/ccsrc/runtime/pynative/run_op_helper.cc +++ b/mindspore/ccsrc/runtime/pynative/run_op_helper.cc @@ -73,10 +73,8 @@ void UpdateParameterShapeFromInputTensor(const AnfNodePtr &input_node, const ten } auto shape = input_tensor->shape(); - std::vector update_shape; - std::transform(shape.begin(), shape.end(), std::back_inserter(update_shape), IntToSize); - MS_LOG(DEBUG) << "Update input node shape to:" << update_shape; - common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(input_node, 0)}, {update_shape}, + MS_LOG(DEBUG) << "Update input node shape to:" << shape; + common::AnfAlgo::SetOutputInferTypeAndShape({common::AnfAlgo::GetOutputInferDataType(input_node, 0)}, {shape}, input_node.get()); } @@ -251,6 +249,10 @@ void UpdateOutputAddrSize(const AnfNodePtr &node, const std::shared_ptrGetOutputDeviceAddress(i); MS_EXCEPTION_IF_NULL(output_address); auto output_addr_size = AnfAlgo::GetOutputTensorMemSize(node, i); + auto host_shape = output_address->host_shape(); + if (std::any_of(host_shape.begin(), host_shape.end(), [](int64_t s) { return s < 0; })) { + output_address->set_host_shape(trans::GetRuntimePaddingShape(node, 0)); + } if (output_addr_size != output_address->GetSize()) { output_address->SetSize(output_addr_size); } diff --git a/mindspore/ccsrc/utils/anfalgo.cc b/mindspore/ccsrc/utils/anfalgo.cc index ef881e904fa..b45689cb1f0 100644 --- a/mindspore/ccsrc/utils/anfalgo.cc +++ b/mindspore/ccsrc/utils/anfalgo.cc @@ -577,16 +577,23 @@ ShapeVector AnfAlgo::GetOutputInferShapeSigned(const AnfNodePtr &node, size_t ou << base_shape->ToString() << " node : " << node->DebugString() << trace::DumpSourceLines(node); } -std::vector AnfAlgo::GetOutputInferShape(const AnfNodePtr &node, const abstract::BaseShapePtr &base_shape, - size_t output_idx) { +inline ShapeVector GetShape(const abstract::BaseShapePtr &base_shape) { + auto shape_ptr = base_shape->cast(); + MS_EXCEPTION_IF_NULL(shape_ptr); + return shape_ptr->shape(); +} + +ShapeVector AnfAlgo::GetOutputInferShape(const AnfNodePtr &node, const abstract::BaseShapePtr &base_shape, + size_t output_idx) { MS_EXCEPTION_IF_NULL(node); MS_EXCEPTION_IF_NULL(base_shape); if (base_shape->isa()) { - if (output_idx == 0) { - return AnfUtils::TransShapeToSizet(base_shape->cast()); + if (output_idx != 0) { + MS_LOG(EXCEPTION) << "The node " << node->DebugString() << "is a single output node but got index [" << output_idx + << trace::DumpSourceLines(node); } - MS_LOG(EXCEPTION) << "The node " << node->DebugString() << "is a single output node but got index [" << output_idx - << trace::DumpSourceLines(node); + + return GetShape(base_shape); } else if (base_shape->isa()) { auto tuple_shape = base_shape->cast(); MS_EXCEPTION_IF_NULL(tuple_shape); @@ -596,34 +603,34 @@ std::vector AnfAlgo::GetOutputInferShape(const AnfNodePtr &node, const a } auto b_shp = (*tuple_shape)[output_idx]; if (b_shp->isa()) { - return AnfUtils::TransShapeToSizet(b_shp->cast()); + return GetShape(b_shp); } else if (b_shp->isa()) { - return std::vector(); + return ShapeVector(); } else if (b_shp->isa()) { // Usually there is no tuple in tuple for the shape of the kernel graph parameter, but there will be such a // scenario when dump ir is in the compilation process, here return an empty shape so that dump ir can work // normally. MS_LOG(INFO) << "The output shape of node:" << node->DebugString() << " index:" << output_idx << " is a TupleShape:" << base_shape->ToString(); - return std::vector(); + return ShapeVector(); } else { MS_LOG(EXCEPTION) << "The output type of ApplyKernel index:" << output_idx << " should be a NoShape , ArrayShape or a TupleShape, but it is " << base_shape->ToString() << "node :" << node->DebugString() << "." << trace::DumpSourceLines(node); } } else if (base_shape->isa()) { - return std::vector(); + return ShapeVector(); } MS_LOG(EXCEPTION) << "The output type of ApplyKernel should be a NoShape , ArrayShape or a TupleShape, but it is " << base_shape->ToString() << " node : " << node->DebugString() << trace::DumpSourceLines(node); } -std::vector AnfAlgo::GetOutputInferShape(const AnfNodePtr &node, size_t output_idx) { +ShapeVector AnfAlgo::GetOutputInferShape(const AnfNodePtr &node, size_t output_idx) { MS_EXCEPTION_IF_NULL(node); return GetOutputInferShape(node, node->Shape(), output_idx); } -std::vector AnfAlgo::GetPrevNodeOutputInferShape(const AnfNodePtr &node, size_t input_idx) { +ShapeVector AnfAlgo::GetPrevNodeOutputInferShape(const AnfNodePtr &node, size_t input_idx) { KernelWithIndex kernel_with_index = AnfAlgo::GetPrevNodeOutput(node, input_idx); return AnfAlgo::GetOutputInferShape(kernel_with_index.first, kernel_with_index.second); } @@ -753,8 +760,8 @@ void AnfAlgo::SetOutputTypeAndDetailShape(const std::vector &types, } // set infer shapes and types of anf node -void AnfAlgo::SetOutputInferTypeAndShape(const std::vector &types, - const std::vector> &shapes, AnfNode *node) { +void AnfAlgo::SetOutputInferTypeAndShape(const std::vector &types, const std::vector &shapes, + AnfNode *node) { MS_EXCEPTION_IF_NULL(node); auto node_ptr = node->cast(); std::string node_name = ""; @@ -773,17 +780,14 @@ void AnfAlgo::SetOutputInferTypeAndShape(const std::vector &types, node->set_abstract(std::make_shared()); } else if (shapes.size() == 1 && tuple_node == kNodeTupleOutSet.end()) { // single output handle - ShapeVector shape_int; abstract::AbstractTensorPtr abstract = nullptr; if (abstract_ptr != nullptr) { auto max_shape0 = GetOutputMaxShape(node_ptr, 0); auto min_shape0 = GetOutputMinShape(node_ptr, 0); - std::transform(shapes[0].begin(), shapes[0].end(), std::back_inserter(shape_int), SizeToLong); abstract = std::make_shared(TypeIdToType(types[0]), - std::make_shared(shape_int, min_shape0, max_shape0)); + std::make_shared(shapes[0], min_shape0, max_shape0)); } else { - std::transform(shapes[0].begin(), shapes[0].end(), std::back_inserter(shape_int), SizeToLong); - abstract = std::make_shared(TypeIdToType(types[0]), shape_int); + abstract = std::make_shared(TypeIdToType(types[0]), shapes[0]); } node->set_abstract(abstract); } else { @@ -795,13 +799,11 @@ void AnfAlgo::SetOutputInferTypeAndShape(const std::vector &types, if (abstract_ptr != nullptr) { auto max_shape = GetOutputMaxShape(node_ptr, i); auto min_shape = GetOutputMinShape(node_ptr, i); - std::transform(shapes[i].begin(), shapes[i].end(), std::back_inserter(shape_int), SizeToLong); abstract = std::make_shared(TypeIdToType(types[i]), - std::make_shared(shape_int, min_shape, max_shape)); + std::make_shared(shapes[i], min_shape, max_shape)); } else { - std::transform(shapes[i].begin(), shapes[i].end(), std::back_inserter(shape_int), SizeToLong); abstract = - std::make_shared(TypeIdToType(types[i]), std::make_shared(shape_int)); + std::make_shared(TypeIdToType(types[i]), std::make_shared(shapes[i])); } abstract_list.emplace_back(abstract); } @@ -1279,8 +1281,8 @@ void AnfAlgo::GetRealDynamicShape(const std::vector &shape, NotNull GetShapeFromSequenceShape(const abstract::SequenceShapePtr &sequeue_shape_ptr, size_t index, - ShapeType type) { +static ShapeVector GetShapeFromSequenceShape(const abstract::SequenceShapePtr &sequeue_shape_ptr, size_t index, + ShapeType type) { MS_EXCEPTION_IF_NULL(sequeue_shape_ptr); auto shape_list = sequeue_shape_ptr->shape(); if (index >= shape_list.size()) { @@ -1314,21 +1316,21 @@ static std::vector GetOutputMinOrMaxShape(const AnfNodePtr &anf_node, s } } -std::vector AnfAlgo::GetInputMaxShape(const AnfNodePtr &anf_node, size_t index) { +ShapeVector AnfAlgo::GetInputMaxShape(const AnfNodePtr &anf_node, size_t index) { auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(anf_node, index); return GetOutputMaxShape(input_node_with_index.first, input_node_with_index.second); } -std::vector AnfAlgo::GetInputMinShape(const AnfNodePtr &anf_node, size_t index) { +ShapeVector AnfAlgo::GetInputMinShape(const AnfNodePtr &anf_node, size_t index) { auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(anf_node, index); return GetOutputMinShape(input_node_with_index.first, input_node_with_index.second); } -std::vector AnfAlgo::GetOutputMaxShape(const AnfNodePtr &anf_node, size_t index) { +ShapeVector AnfAlgo::GetOutputMaxShape(const AnfNodePtr &anf_node, size_t index) { return GetOutputMinOrMaxShape(anf_node, index, ShapeType::kMaxShape); } -std::vector AnfAlgo::GetOutputMinShape(const AnfNodePtr &anf_node, size_t index) { +ShapeVector AnfAlgo::GetOutputMinShape(const AnfNodePtr &anf_node, size_t index) { return GetOutputMinOrMaxShape(anf_node, index, ShapeType::kMinShape); } @@ -1484,18 +1486,6 @@ void AnfAlgo::GetRealInputs(const AnfNodePtr &node, std::vector } } -bool AnfAlgo::IsTensorBroadcast(const std::vector &lhs, const std::vector &rhs) { - if (lhs.size() != rhs.size()) { - return true; - } - for (size_t i = 0; i < lhs.size(); i++) { - if (lhs[i] != rhs[i]) { - return true; - } - } - return false; -} - bool AnfAlgo::IsControlOpExecInBackend(const AnfNodePtr &node) { if (!node->isa()) { return false; diff --git a/mindspore/core/abstract/dshape.h b/mindspore/core/abstract/dshape.h index 12c9447a254..68a500852e6 100644 --- a/mindspore/core/abstract/dshape.h +++ b/mindspore/core/abstract/dshape.h @@ -113,7 +113,7 @@ GVAR_DEF(std::shared_ptr, kNoShape, std::make_shared()); /// \brief Shape defines dimensions of tensor. class MS_CORE_API Shape final : public BaseShape { public: - static const int64_t SHP_ANY = -1; + static const ShapeValueDType SHP_ANY = -1; /// \brief Constructor of Shape. Shape() : shape_() {} @@ -121,7 +121,7 @@ class MS_CORE_API Shape final : public BaseShape { /// \brief Constructor of Shape. /// /// \param[in] list Initial shape dimensions. - Shape(const std::initializer_list &list) : shape_(list) {} + Shape(const std::initializer_list &list) : shape_(list) {} /// \brief Constructor of Shape. /// @@ -187,14 +187,12 @@ class MS_CORE_API Shape final : public BaseShape { /// \return Maximum shape dimensions. const ShapeVector &max_shape() const { return max_shape_; } - bool IsDynamic() const override { - return std::any_of(shape_.begin(), shape_.end(), [](int64_t s) { return s < 0; }); - } + bool IsDynamic() const override { return mindspore::IsDynamic(shape_); } bool IsDimZero() const override { return shape_.empty(); }; bool IsDimUnknown() const override { - return std::any_of(shape_.begin(), shape_.end(), [](int64_t s) { return s < -1; }); + return std::any_of(shape_.begin(), shape_.end(), [](ShapeValueDType s) { return s < -1; }); } private: diff --git a/mindspore/core/mindapi/base/shape_vector.h b/mindspore/core/mindapi/base/shape_vector.h index 4a2fdf9a888..160136a9a23 100644 --- a/mindspore/core/mindapi/base/shape_vector.h +++ b/mindspore/core/mindapi/base/shape_vector.h @@ -20,6 +20,7 @@ #include #include -using ShapeVector = std::vector; +using ShapeValueDType = int64_t; +using ShapeVector = std::vector; #endif // MINDSPORE_CORE_MINDAPI_BASE_SHAPE_VECTOR_H_ diff --git a/mindspore/core/ops/avg_pool_3d.cc b/mindspore/core/ops/avg_pool_3d.cc index 173653ce326..1a8c46f071d 100644 --- a/mindspore/core/ops/avg_pool_3d.cc +++ b/mindspore/core/ops/avg_pool_3d.cc @@ -75,12 +75,10 @@ std::vector GetOutputShape(const PrimitivePtr &primitive, const std::ve << ", stride_h: " << stride_h << ", stride_w: " << stride_w << "."; } if (ceil_mode) { - out_d = - static_cast(std::floor((in_d + pad_list[0] + pad_list[1] - kernel_d + stride_d - 1) / stride_d + 1)); - out_h = static_cast( - std::floor((in_h + pad_list[kInputIndex2] + pad_list[kInputIndex3] - kernel_h + stride_h - 1) / stride_h + 1)); - out_w = - static_cast(std::floor((in_w + pad_list[4] + pad_list[5] - kernel_w + stride_w - 1) / stride_w + 1)); + out_d = std::floor((in_d + pad_list[0] + pad_list[1] - kernel_d + stride_d - 1) / stride_d + 1); + out_h = + std::floor((in_h + pad_list[kInputIndex2] + pad_list[kInputIndex3] - kernel_h + stride_h - 1) / stride_h + 1); + out_w = std::floor((in_w + pad_list[4] + pad_list[5] - kernel_w + stride_w - 1) / stride_w + 1); if ((out_d - 1) * stride_d >= in_d + pad_list[0]) { out_d--; } @@ -91,10 +89,9 @@ std::vector GetOutputShape(const PrimitivePtr &primitive, const std::ve out_w--; } } else { - out_d = static_cast(std::floor((in_d + pad_list[0] + pad_list[1] - kernel_d) / stride_d + 1)); - out_h = static_cast(std::floor((in_h + pad_list[2] + pad_list[3] - kernel_h) / stride_h + 1)); - out_w = static_cast( - std::floor((in_w + pad_list[kInputIndex4] + pad_list[kInputIndex5] - kernel_w) / stride_w + 1)); + out_d = std::floor((in_d + pad_list[0] + pad_list[1] - kernel_d) / stride_d + 1); + out_h = std::floor((in_h + pad_list[2] + pad_list[3] - kernel_h) / stride_h + 1); + out_w = std::floor((in_w + pad_list[kInputIndex4] + pad_list[kInputIndex5] - kernel_w) / stride_w + 1); } std::vector output_shape = {in_shape[0], in_shape[1], out_d, out_h, out_w}; return output_shape; diff --git a/mindspore/core/utils/anf_utils.cc b/mindspore/core/utils/anf_utils.cc index 0a26ae1c1bd..3dfbd5897d6 100644 --- a/mindspore/core/utils/anf_utils.cc +++ b/mindspore/core/utils/anf_utils.cc @@ -136,18 +136,12 @@ bool AnfUtils::IsDimUnknown(const abstract::ShapePtr &shape) { } bool AnfUtils::IsShapeDynamic(const abstract::ShapePtr &shape) { - MS_EXCEPTION_IF_NULL(shape); + if (shape == nullptr) { + return false; + } return std::any_of(shape->shape().begin(), shape->shape().end(), [](int64_t s) { return s < 0; }); } -bool AnfUtils::IsShapeDynamic(const std::vector &shape) { - return std::any_of(shape.begin(), shape.end(), [](int64_t s) { return s < 0; }); -} - -bool AnfUtils::IsShapeDynamic(const std::vector &shape) { - return std::any_of(shape.begin(), shape.end(), [](int64_t s) { return s < 0; }); -} - bool AnfUtils::IsNodeOutputDynamicShape(const CNodePtr &node) { MS_EXCEPTION_IF_NULL(node); auto base_shape = node->Shape(); @@ -593,22 +587,6 @@ mindspore::HashMap> &AnfUtils::GetReal return real_input_info->real_input_nodes; } -std::vector AnfUtils::TransShapeToSizet(const abstract::ShapePtr &shape) { - MS_EXCEPTION_IF_NULL(shape); - std::vector shape_size_t; - if (AnfUtils::IsShapeDynamic(shape)) { - if (std::all_of(shape->max_shape().begin(), shape->max_shape().end(), [](int64_t s) { return s >= 0; })) { - std::transform(shape->max_shape().begin(), shape->max_shape().end(), std::back_inserter(shape_size_t), - LongToSize); - } else { - MS_LOG(EXCEPTION) << "Invalid Max Shape"; - } - } else { - std::transform(shape->shape().begin(), shape->shape().end(), std::back_inserter(shape_size_t), LongToSize); - } - return shape_size_t; -} - void FlatParameterFinder::AddParameter(const ParameterPtr ¶m) { auto tensor = dyn_cast(param->default_param()); if (tensor == nullptr) { diff --git a/mindspore/core/utils/anf_utils.h b/mindspore/core/utils/anf_utils.h index 183f104769b..2cef4436bfd 100644 --- a/mindspore/core/utils/anf_utils.h +++ b/mindspore/core/utils/anf_utils.h @@ -67,8 +67,6 @@ class MS_CORE_API AnfUtils { using CustomActorCallback = std::function; static bool IsDimUnknown(const abstract::ShapePtr &shape); static bool IsShapeDynamic(const abstract::ShapePtr &shape); - static bool IsShapeDynamic(const std::vector &shape); - static bool IsShapeDynamic(const std::vector &shape); static bool IsNodeOutputDynamicShape(const CNodePtr &node); static bool IsDimUnknown(const AnfNodePtr &node); // check whether the anf node is a real kernel that can run on device,parameter and constant is real kernel too diff --git a/mindspore/core/utils/convert_utils_base.h b/mindspore/core/utils/convert_utils_base.h index a1e97cd71c4..db37222610b 100644 --- a/mindspore/core/utils/convert_utils_base.h +++ b/mindspore/core/utils/convert_utils_base.h @@ -59,6 +59,8 @@ inline size_t IntToSize(int u) { return static_cast(u); } +inline size_t LongToSizeClipNeg(int64_t u) { return u < 0 ? 0 : static_cast(u); } + inline size_t LongToSize(int64_t u) { if (u < 0) { MS_LOG(EXCEPTION) << "The int64_t value(" << u << ") is less than 0."; @@ -230,6 +232,31 @@ inline uint8_t *AddressOffset(void *address, size_t offset) { MS_EXCEPTION_IF_NULL(address); return static_cast(address) + offset; } + +inline std::vector Convert2Int(const std::vector &v) { + std::vector result; + (void)std::transform(v.begin(), v.end(), std::back_inserter(result), SizeToInt); + return result; +} + +inline std::vector Convert2Long(const std::vector &v) { + std::vector result; + (void)std::transform(v.begin(), v.end(), std::back_inserter(result), SizeToLong); + return result; +} + +inline std::vector Convert2SizeT(const std::vector &v) { + std::vector result; + (void)std::transform(v.begin(), v.end(), std::back_inserter(result), LongToSize); + return result; +} + +inline std::vector Convert2SizeTClipNeg(const std::vector &v) { + std::vector result; + auto ConvertFunc = [](int64_t v) -> size_t { return v < 0 ? 0 : static_cast(v); }; + (void)std::transform(v.begin(), v.end(), std::back_inserter(result), ConvertFunc); + return result; +} } // namespace mindspore #endif // MINDSPORE_CORE_UTILS_CONVERT_UTILS_BASE_H_ diff --git a/mindspore/core/utils/shape_utils.h b/mindspore/core/utils/shape_utils.h index 125849a2416..33b998f5822 100644 --- a/mindspore/core/utils/shape_utils.h +++ b/mindspore/core/utils/shape_utils.h @@ -17,11 +17,12 @@ #ifndef MINDSPORE_SHAPE_UTILS_INFO_H_ #define MINDSPORE_SHAPE_UTILS_INFO_H_ +#include #include "mindapi/base/shape_vector.h" namespace mindspore { inline size_t SizeOf(const ShapeVector &shape) { - int64_t data_size = 1; + ShapeValueDType data_size = 1; for (auto dim : shape) { if (dim < 0) { // For dynamic shape which has negative dimensions, data size should be zero. @@ -31,6 +32,10 @@ inline size_t SizeOf(const ShapeVector &shape) { } return static_cast(data_size); } + +inline bool IsDynamic(const ShapeVector &shape) { + return std::any_of(shape.begin(), shape.end(), [](ShapeValueDType s) { return s < 0; }); +} } // namespace mindspore #endif // MINDSPORE_SHAPE_UTILS_INFO_H_ diff --git a/tests/st/ps/part_ps/test_entry_ps_embedding_heterogeneous_conv2d_adam.py b/tests/st/ps/part_ps/test_entry_ps_embedding_heterogeneous_conv2d_adam.py index 6c2b07952bf..dbaa403b979 100644 --- a/tests/st/ps/part_ps/test_entry_ps_embedding_heterogeneous_conv2d_adam.py +++ b/tests/st/ps/part_ps/test_entry_ps_embedding_heterogeneous_conv2d_adam.py @@ -16,10 +16,6 @@ import os import pytest -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training -@pytest.mark.env_onecard def test_ps_embedding_heterogeneous_conv2d_adam(): return_code = os.system( "bash shell_run_test.sh Ascend /home/workspace/mindspore_dataset/mnist 1 1 127.0.0.1 8085" diff --git a/tests/ut/cpp/device/hccl_adapter_test.cc b/tests/ut/cpp/device/hccl_adapter_test.cc index dd54cb638f1..ed4a1bd666c 100644 --- a/tests/ut/cpp/device/hccl_adapter_test.cc +++ b/tests/ut/cpp/device/hccl_adapter_test.cc @@ -39,8 +39,7 @@ class TestHcclAdapter : public UT::Common { return all_to_all_v; } - void SetOutputs(const CNodePtr &cnode, const std::vector> &shape, - const std::vector &data_type) { + void SetOutputs(const CNodePtr &cnode, const std::vector &shape, const std::vector &data_type) { common::AnfAlgo::SetOutputInferTypeAndShape(data_type, shape, cnode.get()); kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; builder.SetFusionType(kernel::FusionType::OPAQUE); @@ -54,7 +53,7 @@ class TestHcclAdapter : public UT::Common { AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), cnode.get()); } - std::vector CreateInputs(const FuncGraphPtr &graph, const std::vector> &shape, + std::vector CreateInputs(const FuncGraphPtr &graph, const std::vector &shape, const std::vector &data_type) { MS_EXCEPTION_IF_NULL(graph); if (shape.size() != data_type.size()) { @@ -63,8 +62,8 @@ class TestHcclAdapter : public UT::Common { std::vector res; for (size_t i = 0; i < shape.size(); ++i) { auto node = graph->NewCNode(std::vector{NewValueNode(std::make_shared("AnyNameOp"))}); - common::AnfAlgo::SetOutputInferTypeAndShape(std::vector{data_type[i]}, - std::vector>{shape[i]}, node.get()); + common::AnfAlgo::SetOutputInferTypeAndShape(std::vector{data_type[i]}, std::vector{shape[i]}, + node.get()); kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; builder.SetFusionType(kernel::FusionType::OPAQUE); builder.SetProcessor(kernel::Processor::AICORE); diff --git a/tests/ut/cpp/session/anf_runtime_algorithm_test.cc b/tests/ut/cpp/session/anf_runtime_algorithm_test.cc index a5a585edefa..6950dddcbe1 100644 --- a/tests/ut/cpp/session/anf_runtime_algorithm_test.cc +++ b/tests/ut/cpp/session/anf_runtime_algorithm_test.cc @@ -247,7 +247,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetOutputFormat) { std::vector inputs = {NewValueNode(prim::kPrimAdd), kernel_graph->NewParameter(), kernel_graph->NewParameter()}; auto add = kernel_graph->NewCNode(inputs); - std::vector shape = {1, 2, 3, 4}; + ShapeVector shape = {1, 2, 3, 4}; common::AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat32, kNumberTypeFloat32}, {shape, shape}, add.get()); MS_EXCEPTION_IF_NULL(add); add->set_kernel_info(std::make_shared()); @@ -377,9 +377,9 @@ TEST_F(AnfRuntimeAlgorithmTest, GetOutputDeviceShape) { d_kernel_info->set_select_kernel_build_info(builder.Build()); EXPECT_EQ(AnfAlgo::GetOutputDeviceShape(add, 0)[2], 224); EXPECT_EQ(AnfAlgo::GetOutputDeviceShape(add, 1)[0], 2); - std::vector expect_shape{2, 224, 224, 32}; + ShapeVector expect_shape{2, 224, 224, 32}; EXPECT_EQ(AnfAlgo::GetOutputDeviceShape(add, 2), expect_shape); - std::vector nz_expect_shape{1, 2, 1, 1, 16, 16}; + ShapeVector nz_expect_shape{1, 2, 1, 1, 16, 16}; EXPECT_EQ(AnfAlgo::GetOutputDeviceShape(add, 3), nz_expect_shape); } @@ -409,7 +409,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetInputDeviceShape) { d_kernel_info->set_select_kernel_build_info(builder.Build()); EXPECT_EQ(AnfAlgo::GetInputDeviceShape(add, 0)[2], 224); EXPECT_EQ(AnfAlgo::GetInputDeviceShape(add, 1)[1], 32); - std::vector expect_shape{2, 224, 224, 32}; + ShapeVector expect_shape{2, 224, 224, 32}; EXPECT_EQ(AnfAlgo::GetInputDeviceShape(add, 2), expect_shape); EXPECT_THROW(common::AnfAlgo::GetPrevNodeOutputInferShape(nullptr, 0), std::runtime_error); } @@ -586,20 +586,20 @@ TEST_F(AnfRuntimeAlgorithmTest, SetOutputInferTypeAndShape) { auto add = kernel_graph->NewCNode(inputs); // set none abstract std::vector none_types = {}; - std::vector> none_shapes = {}; + std::vector none_shapes = {}; EXPECT_THROW(common::AnfAlgo::SetOutputInferTypeAndShape(none_types, none_shapes, nullptr), std::runtime_error); common::AnfAlgo::SetOutputInferTypeAndShape(none_types, none_shapes, add.get()); EXPECT_EQ((*add->abstract()), abstract::AbstractNone()); // set single input std::vector single_types = {kFloat32->type_id()}; - std::vector> single_shapes = {{2, 32, 224, 224}}; + std::vector single_shapes = {{2, 32, 224, 224}}; EXPECT_THROW(common::AnfAlgo::SetOutputInferTypeAndShape(none_types, single_shapes, add.get()), std::runtime_error); common::AnfAlgo::SetOutputInferTypeAndShape(single_types, single_shapes, add.get()); EXPECT_EQ(common::AnfAlgo::GetOutputInferDataType(add, 0), kFloat32->type_id()); EXPECT_EQ(common::AnfAlgo::GetOutputInferShape(add, 0).size(), 4); // set multiple input std::vector mutiple_types = {kFloat16->type_id(), kFloat32->type_id(), kFloat64->type_id()}; - std::vector> mutiple_shapes = {{2, 32, 224, 224}, {2, 32, 224, 224}, {2, 32, 224, 224}}; + std::vector mutiple_shapes = {{2, 32, 224, 224}, {2, 32, 224, 224}, {2, 32, 224, 224}}; common::AnfAlgo::SetOutputInferTypeAndShape(mutiple_types, mutiple_shapes, add.get()); EXPECT_EQ(common::AnfAlgo::GetOutputInferDataType(add, 0), kFloat16->type_id()); EXPECT_EQ(common::AnfAlgo::GetOutputInferDataType(add, 1), kFloat32->type_id()); @@ -616,14 +616,14 @@ TEST_F(AnfRuntimeAlgorithmTest, CopyAbstract) { auto first_add = kernel_graph->NewCNode(first_inputs); // set single input std::vector single_types = {kFloat32->type_id()}; - std::vector> single_shapes = {{2, 32, 224, 224}}; + std::vector single_shapes = {{2, 32, 224, 224}}; common::AnfAlgo::SetOutputInferTypeAndShape(single_types, single_shapes, first_add.get()); // set multiple input std::vector second_inputs; second_inputs.push_back(NewValueNode(prim::kPrimAdd)); auto second_add = kernel_graph->NewCNode(second_inputs); std::vector mutiple_types = {kFloat16->type_id(), kFloat32->type_id(), kFloat64->type_id()}; - std::vector> mutiple_shapes = {{2, 32, 224, 224}, {2, 32, 224, 224}, {2, 32, 224, 224}}; + std::vector mutiple_shapes = {{2, 32, 224, 224}, {2, 32, 224, 224}, {2, 32, 224, 224}}; common::AnfAlgo::SetOutputInferTypeAndShape(mutiple_types, mutiple_shapes, second_add.get()); common::AnfAlgo::CopyAbstract(second_add, first_add.get()); EXPECT_EQ(common::AnfAlgo::GetOutputInferDataType(first_add, 0), kFloat16->type_id());