From 73ba39936423629a875924842becbb74b4fca840 Mon Sep 17 00:00:00 2001 From: Wei Luning Date: Mon, 23 Mar 2020 17:33:56 +0800 Subject: [PATCH] remove ge depend in cpu --- CMakeLists.txt | 12 +- cmake/mind_expression.cmake | 2 +- mindspore/ccsrc/CMakeLists.txt | 26 +- mindspore/ccsrc/debug/e2e_dump.cc | 2 +- .../device/ascend/kernel_build_ascend.cc | 2 +- .../ascend/profiling/profiling_manager.cc | 1 + .../device/ascend/tasksink/task_generator.cc | 4 +- .../device/ascend/tasksink/task_generator.h | 2 +- .../ccsrc/device/gpu/gpu_kernel_build.cc | 1 - .../ccsrc/device/gpu/kernel_info_setter.cc | 2 +- mindspore/ccsrc/device/kernel_adjust.cc | 1 + mindspore/ccsrc/ir/anf.cc | 17 + mindspore/ccsrc/ir/anf.h | 2 + mindspore/ccsrc/ir/meta_tensor.cc | 1 - .../ccsrc/kernel/aicpu/aicpu_kernel_mod.h | 4 +- mindspore/ccsrc/kernel/akg/akgkernelbuild.cc | 5 +- mindspore/ccsrc/kernel/ascend_kernel_mod.h | 36 + mindspore/ccsrc/kernel/common_utils.cc | 3 +- .../ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h | 4 +- .../kernel/gpu/nn/lstm_grad_data_gpu_kernel.h | 6 +- .../gpu/nn/lstm_grad_weight_gpu_kernel.h | 4 +- mindspore/ccsrc/kernel/hccl/hccl_kernel.h | 4 +- mindspore/ccsrc/kernel/kernel.h | 6 - mindspore/ccsrc/kernel/mng/rt_kernel.h | 4 +- mindspore/ccsrc/kernel/oplib/oplib.cc | 4 +- mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h | 33 +- mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.h | 4 +- .../kernel/tbe/tbe_kernel_parallel_build.h | 5 +- .../ccsrc/kernel/tbe/tbe_kernel_select.cc | 9 +- mindspore/ccsrc/kernel/tbe/tbe_utils.cc | 1 + mindspore/ccsrc/pipeline/base.h | 64 ++ mindspore/ccsrc/pipeline/init.cc | 10 +- .../ccsrc/pipeline/parse/python_adapter.cc | 1 + .../ccsrc/pipeline/parse/python_adapter.h | 1 + mindspore/ccsrc/pipeline/pipeline.cc | 698 +++--------------- mindspore/ccsrc/pipeline/pipeline.h | 35 +- mindspore/ccsrc/pipeline/pipeline_ge.cc | 545 ++++++++++++++ mindspore/ccsrc/pipeline/pipeline_ge.h | 57 ++ mindspore/ccsrc/pipeline/resource.cc | 29 - mindspore/ccsrc/pipeline/resource.h | 7 +- .../ascend/ascend_backend_optimization.cc | 2 +- .../ascend/buffer_fusion/buffer_fusion.cc | 4 +- .../ir_fusion/allreduce_fusion.cc | 2 +- .../ir_fusion/allreduce_fusion.h | 6 +- .../ccsrc/predict/converter/kernel2ms.cc | 6 +- mindspore/ccsrc/pynative/base.h | 67 ++ mindspore/ccsrc/pynative/pynative_execute.cc | 285 +------ mindspore/ccsrc/pynative/pynative_execute.h | 43 +- .../ccsrc/pynative/pynative_execute_ge.cc | 311 ++++++++ .../ccsrc/pynative/pynative_execute_ge.h | 46 ++ mindspore/ccsrc/session/ascend_session.cc | 1 + mindspore/ccsrc/session/gpu_session.cc | 2 +- mindspore/ccsrc/transform/convert.cc | 18 - mindspore/ccsrc/transform/convert.h | 1 - mindspore/ccsrc/utils/callbacks.cc | 13 +- mindspore/ccsrc/utils/callbacks.h | 9 + mindspore/ccsrc/utils/context/ms_context.cc | 7 +- mindspore/ccsrc/utils/context/ms_context.h | 1 - mindspore/ccsrc/utils/convert_utils.cc | 41 + mindspore/ccsrc/utils/convert_utils.h | 4 + mindspore/ccsrc/vm/segment_runner.cc | 4 +- mindspore/ccsrc/vm/transform.cc | 16 +- mindspore/ccsrc/vm/transform.h | 2 - mindspore/ccsrc/vm/vmimpl.cc | 35 - mindspore/ccsrc/vm/vmimpl.h | 6 - mindspore/common/api.py | 3 +- mindspore/common/parameter.py | 12 + mindspore/common/tensor.py | 10 + mindspore/ops/functional.py | 2 +- mindspore/train/model.py | 3 + .../cpp/device/ascend_kernel_select_test.cc | 2 +- tests/ut/cpp/device/ascend_profiling_test.cc | 2 +- .../ir_fusion/allreduce_fusion_test.cc | 2 +- ...onvert_const_input_to_tensor_input_test.cc | 2 +- tests/ut/python/ir/test_tensor.py | 18 + tests/ut/python/ops/test_array_ops.py | 14 - tests/ut/python/parallel/__init__.py | 5 +- tests/ut/python/parallel/test_alltoall.py | 11 +- .../parallel/test_auto_parallel_arithmetic.py | 18 +- ...t_auto_parallel_assign_sub_with_ref_key.py | 10 +- .../parallel/test_auto_parallel_cast.py | 12 +- .../test_auto_parallel_matmul_prelu.py | 8 +- .../test_auto_parallel_parameter_cast.py | 8 +- .../parallel/test_auto_parallel_transpose.py | 10 +- .../parallel/test_auto_parallel_two_matmul.py | 5 +- .../python/parallel/test_dataset_interface.py | 2 +- tests/ut/python/parallel/test_one_dev.py | 20 +- .../python/pipeline/parse/test_create_obj.py | 3 + tests/ut/python/pipeline/parse/test_dtype.py | 3 +- .../ut/python/pynative_mode/ops/test_grad.py | 16 + .../test_summary_ops_params_valid_check.py | 21 +- tests/ut/python/utils/test_serialize.py | 3 +- 92 files changed, 1574 insertions(+), 1237 deletions(-) create mode 100644 mindspore/ccsrc/kernel/ascend_kernel_mod.h create mode 100644 mindspore/ccsrc/pipeline/base.h create mode 100644 mindspore/ccsrc/pipeline/pipeline_ge.cc create mode 100644 mindspore/ccsrc/pipeline/pipeline_ge.h rename mindspore/ccsrc/pre_activate/{ascend => common}/ir_fusion/allreduce_fusion.cc (97%) rename mindspore/ccsrc/pre_activate/{ascend => common}/ir_fusion/allreduce_fusion.h (87%) create mode 100644 mindspore/ccsrc/pynative/base.h create mode 100644 mindspore/ccsrc/pynative/pynative_execute_ge.cc create mode 100644 mindspore/ccsrc/pynative/pynative_execute_ge.h rename tests/ut/cpp/pre_activate/{ascend => common}/ir_fusion/allreduce_fusion_test.cc (99%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9cb73935ee..bdac2da46e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,11 +42,13 @@ else() include(${CMAKE_SOURCE_DIR}/cmake/dependency_graphengine.cmake) endif() -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc/external) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc/framework) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc/toolchain) +if (ENABLE_GE OR ENABLE_D OR ENABLE_TESTCASES) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc/external) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc/framework) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc/toolchain) +endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") add_subdirectory(mindspore/ccsrc) diff --git a/cmake/mind_expression.cmake b/cmake/mind_expression.cmake index 103775a4c2..345fd4675e 100644 --- a/cmake/mind_expression.cmake +++ b/cmake/mind_expression.cmake @@ -40,7 +40,7 @@ if (ENABLE_GE) include_directories(${CMAKE_SOURCE_DIR}/third_party/ge/include) include_directories(${CMAKE_SOURCE_DIR}/third_party/ge/include/external) include_directories(${CMAKE_SOURCE_DIR}/third_party/ge/include/external/graph) -else() +elseif(ENABLE_D OR ENABLE_TESTCASES) include_directories(${CMAKE_SOURCE_DIR}/graphengine/inc) include_directories(${CMAKE_SOURCE_DIR}/graphengine/inc/ops) include_directories(${CMAKE_SOURCE_DIR}/graphengine/inc/external) diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index 1c684b6736..0b4bb0d1df 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -34,6 +34,8 @@ if(ENABLE_GPU) "device/gpu/*.cu" "kernel/gpu/*.cu" "kernel/akg/gpu/*.cc" + "kernel/akg/akgkernelbuild.cc" + "kernel/akg/akg_kernel_attrs_process.cc" ) file(GLOB_RECURSE GPU_KERNEL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "kernel/gpu/*.cc" @@ -100,14 +102,14 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "debug/*.cc" "onnx/onnx_exporter.cc" "operator/*.cc" - "transform/*.cc" "session/kernel_graph.cc" "utils/node_utils.cc" "session/session_basic.cc" "session/session_factory.cc" "session/anf_runtime_algorithm.cc" "vm/*.cc" - "pynative/*.cc" + "pynative/base.cc" + "pynative/pynative_execute.cc" "pybind_api/*.cc" "device/common/*.cc" "kernel/kernel_query.cc" @@ -117,7 +119,6 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/kernel_runtime.cc" "device/kernel_runtime_manager.cc" "device/convert_tensor_utils.cc" - "pre_activate/ascend/*.cc" "pre_activate/common/*.cc" "pre_activate/pass/*.cc" "pre_activate/gpu/*.cc" @@ -168,6 +169,15 @@ if(ENABLE_DUMP_PROTO) add_compile_definitions(ENABLE_DUMP_PROTO) endif() +if(ENABLE_GE) + file(GLOB_RECURSE GE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "transform/*.cc" + "pynative/pynative_execute_ge.cc" + "pipeline/pipeline_ge.cc" + ) + list(APPEND MINDSPORE_SRC_LIST ${GE_SRC_LIST}) +endif() + if(ENABLE_D) include_directories("${CMAKE_BINARY_DIR}/kernel/aicpu") file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} @@ -188,6 +198,9 @@ if(ENABLE_D) "device/kernel_adjust.cc" "kernel/kernel_fusion.cc" "kernel/tbe/*.cc" + "pre_activate/ascend/*.cc" + "transform/*.cc" + "pipeline/pipeline_ge.cc" ) list(APPEND MINDSPORE_SRC_LIST ${D_SRC_LIST}) list(APPEND MINDSPORE_PROTO_AICPU_LIST ${PROTOSRCS}) @@ -246,9 +259,11 @@ if (ENABLE_GE) target_link_libraries(mindspore graph ge_client) endif() target_link_libraries(mindspore tsdclient) -else() +elseif(ENABLE_D) add_compile_definitions(NO_GE_CLIENT) target_link_libraries(mindspore graph) +else() + add_compile_definitions(NO_GE_CLIENT) endif() if(ENABLE_D) @@ -288,8 +303,6 @@ endif() set(PYTHON_MODULE_SOURCE pipeline/init.cc kernel/oplib/oplib.cc - kernel/akg/akgkernelbuild.cc - kernel/akg/akg_kernel_attrs_process.cc ${MS_STEPS_SRC_LIST} ${MS_CCE_SRC_LIST} ${MS_AICPU_SRC_LIST} ${MS_TASKINFO_LIST} ${MS_RT_SRC_LIST} ${GPU_NCCL_LIST} ${MS_HCCL_SRC_LIST} ${MS_PREDICT_SRC_LIST} ${CPU_SRC_LIST} ${MEM_REUSE_SRC_LIST} ${GPU_KERNEL_SRC_LIST}) @@ -350,6 +363,7 @@ if(ENABLE_GPU) assign_source_group("Include" ${GROUP_INCLUDE}) file(GLOB COMPILER_SRCS + "pre_activate/gpu/*.cc" ${TVM_DIR}/src/api/*.cc ${TVM_DIR}/src/arithmetic/*.cc ${TVM_DIR}/src/autotvm/*.cc diff --git a/mindspore/ccsrc/debug/e2e_dump.cc b/mindspore/ccsrc/debug/e2e_dump.cc index 3006bb66da..ba11eafa5f 100644 --- a/mindspore/ccsrc/debug/e2e_dump.cc +++ b/mindspore/ccsrc/debug/e2e_dump.cc @@ -49,7 +49,7 @@ bool Dump::IsKernelNeedDump(const std::string& kernel_name) { return false; } -bool Dump::ParseDumpConfig(const string& dump_config_file) { +bool Dump::ParseDumpConfig(const std::string& dump_config_file) { std::ifstream jsonFile(dump_config_file); if (!jsonFile.is_open()) { MS_LOG(ERROR) << dump_config_file << " open failed."; diff --git a/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc b/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc index 66ce697ffc..e7c1449360 100644 --- a/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc +++ b/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc @@ -94,7 +94,7 @@ static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *ke return ret; } -static vector CalCleanZerosSize(const CNodePtr &pre_node) { +static std::vector CalCleanZerosSize(const CNodePtr &pre_node) { MS_EXCEPTION_IF_NULL(pre_node); std::vector clean_size_list; // clean output diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc index 4bc68e647a..29193e5cfa 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc @@ -27,6 +27,7 @@ #include "utils/log_adapter.h" #include "utils/context/ms_context.h" #include "common/utils.h" +#include "utils/convert_utils.h" using std::vector; using Json = nlohmann::json; diff --git a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc index 88d00bc6c2..62cf809c21 100644 --- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc +++ b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc @@ -121,8 +121,8 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i LaunchAddrCleanKernel(anf_node_ptr, &kernel_inputs); } - std::vector task_info_ptrs = - kernel_mod->GenTask(kernel_inputs, kernel_workspaces, kernel_outputs, stream_id); + std::vector task_info_ptrs = dynamic_cast(kernel_mod) + ->GenTask(kernel_inputs, kernel_workspaces, kernel_outputs, stream_id); task_info_list->insert(task_info_list->end(), task_info_ptrs.begin(), task_info_ptrs.end()); return true; } diff --git a/mindspore/ccsrc/device/ascend/tasksink/task_generator.h b/mindspore/ccsrc/device/ascend/tasksink/task_generator.h index 0c56fcc744..ffedcd7930 100644 --- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.h +++ b/mindspore/ccsrc/device/ascend/tasksink/task_generator.h @@ -24,7 +24,7 @@ #include #include "device/kernel_runtime.h" #include "ir/anf.h" -#include "kernel/kernel.h" +#include "kernel/ascend_kernel_mod.h" #include "framework/ge_runtime/task_info.h" namespace mindspore { diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc index 0467b59e06..2a2a2be065 100644 --- a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc +++ b/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc @@ -21,7 +21,6 @@ #include "kernel/gpu/gpu_kernel_factory.h" #include "operator/ops.h" #include "pybind11/stl.h" -#include "transform/convert.h" #include "session/anf_runtime_algorithm.h" namespace mindspore { namespace device { diff --git a/mindspore/ccsrc/device/gpu/kernel_info_setter.cc b/mindspore/ccsrc/device/gpu/kernel_info_setter.cc index 3faf7d01c8..05ecf380d1 100644 --- a/mindspore/ccsrc/device/gpu/kernel_info_setter.cc +++ b/mindspore/ccsrc/device/gpu/kernel_info_setter.cc @@ -91,7 +91,7 @@ std::string SupportedTypeList(const CNodePtr& kernel_node) { return supported_type_lists; } -bool SelectAkgKernel(const CNodePtr& kernel_node, const shared_ptr& selected_kernel_info) { +bool SelectAkgKernel(const CNodePtr& kernel_node, const std::shared_ptr& selected_kernel_info) { MS_EXCEPTION_IF_NULL(kernel_node); MS_EXCEPTION_IF_NULL(selected_kernel_info); std::vector> kernel_info_list; diff --git a/mindspore/ccsrc/device/kernel_adjust.cc b/mindspore/ccsrc/device/kernel_adjust.cc index a4d316d601..4d3ebf9490 100644 --- a/mindspore/ccsrc/device/kernel_adjust.cc +++ b/mindspore/ccsrc/device/kernel_adjust.cc @@ -32,6 +32,7 @@ #include "device/ascend/profiling/profiling_manager.h" #include "device/ascend/kernel_select_ascend.h" #include "device/kernel_info.h" +#include "runtime/base.h" constexpr auto kLoopCountParamName = "loop_count"; constexpr auto kIterLoopParamName = "iter_loop"; diff --git a/mindspore/ccsrc/ir/anf.cc b/mindspore/ccsrc/ir/anf.cc index c1348bf7d7..924453a7a6 100644 --- a/mindspore/ccsrc/ir/anf.cc +++ b/mindspore/ccsrc/ir/anf.cc @@ -197,6 +197,23 @@ PrimitivePtr GetCNodePrimitive(const AnfNodePtr& node) { return nullptr; } +std::string GetCNodeFuncName(const CNodePtr cnode) { + if (cnode->inputs().empty()) { + return ""; + } + + AnfNodePtr valuenode = cnode->input(0); + if (valuenode->isa()) { + auto value = GetValueNode(valuenode); + // check whether the valuenode is primitive + if (value->isa()) { + return value->cast()->name(); + } + return value->ToString(); + } + return ""; +} + bool IsPrimitive(const AnfNodePtr& node, const PrimitivePtr& value) { if (IsValueNode(node)) { PrimitivePtr fn_value = GetValueNode(node); diff --git a/mindspore/ccsrc/ir/anf.h b/mindspore/ccsrc/ir/anf.h index 9050a4ed16..e64b1329e9 100644 --- a/mindspore/ccsrc/ir/anf.h +++ b/mindspore/ccsrc/ir/anf.h @@ -384,6 +384,8 @@ static S GetValue(const ValuePtr &value) { return v; } +std::string GetCNodeFuncName(CNodePtr cnode); + // used to check whether an AnfNode is a cnode with a kind of Primitive as first input bool IsPrimitiveCNode(const AnfNodePtr &node, const PrimitivePtr &value); diff --git a/mindspore/ccsrc/ir/meta_tensor.cc b/mindspore/ccsrc/ir/meta_tensor.cc index d33bc10c27..e9221039a7 100644 --- a/mindspore/ccsrc/ir/meta_tensor.cc +++ b/mindspore/ccsrc/ir/meta_tensor.cc @@ -25,7 +25,6 @@ #include "device/device_address.h" #include "pybind_api/api_register.h" #include "pybind_api/export_flags.h" -#include "pynative/pynative_execute.h" #include "pipeline/static_analysis/abstract_value.h" namespace mindspore { diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.h b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.h index d1e6f69b23..dde2afe34a 100644 --- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.h +++ b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.h @@ -18,11 +18,11 @@ #include #include #include -#include "kernel/kernel.h" +#include "kernel/ascend_kernel_mod.h" #include "kernel/aicpu/aicpu_util.h" namespace mindspore { namespace kernel { -class AicpuOpKernelMod : public KernelMod { +class AicpuOpKernelMod : public AscendKernelMod { public: AicpuOpKernelMod(); ~AicpuOpKernelMod() override; diff --git a/mindspore/ccsrc/kernel/akg/akgkernelbuild.cc b/mindspore/ccsrc/kernel/akg/akgkernelbuild.cc index 8413208c4d..c0759172a5 100644 --- a/mindspore/ccsrc/kernel/akg/akgkernelbuild.cc +++ b/mindspore/ccsrc/kernel/akg/akgkernelbuild.cc @@ -35,7 +35,6 @@ #include "utils/convert_utils.h" #include "utils/any.h" #include "utils/utils.h" -#include "transform/convert.h" #include "session/anf_runtime_algorithm.h" #include "kernel/akg/akg_kernel_attrs_process.h" @@ -240,8 +239,8 @@ bool AkgKernelBuild::CreateOutputDescJson(const AnfNodePtr &anf_node, nlohmann:: return true; } -void GetJson(const AnfNodePtr &anf_node, const vector &dyn_input_sizes, const shared_ptr &op_attr, - nlohmann::json *const attr_json, const ValuePtr &attr_value) { +void GetJson(const AnfNodePtr &anf_node, const std::vector &dyn_input_sizes, + const std::shared_ptr &op_attr, nlohmann::json *const attr_json, const ValuePtr &attr_value) { MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(op_attr); MS_EXCEPTION_IF_NULL(attr_json); diff --git a/mindspore/ccsrc/kernel/ascend_kernel_mod.h b/mindspore/ccsrc/kernel/ascend_kernel_mod.h new file mode 100644 index 0000000000..ff8595c1a2 --- /dev/null +++ b/mindspore/ccsrc/kernel/ascend_kernel_mod.h @@ -0,0 +1,36 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_ +#define MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_ + +#include +#include +#include "framework/ge_runtime/task_info.h" +#include "kernel/kernel.h" + +using TaskInfoPtr = std::shared_ptr; +namespace mindspore { +namespace kernel { +class AscendKernelMod : public KernelMod { + public: + virtual std::vector GenTask(const std::vector &, const std::vector &, + const std::vector &, uint32_t) = 0; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_ diff --git a/mindspore/ccsrc/kernel/common_utils.cc b/mindspore/ccsrc/kernel/common_utils.cc index d610ea736d..c2f2638753 100644 --- a/mindspore/ccsrc/kernel/common_utils.cc +++ b/mindspore/ccsrc/kernel/common_utils.cc @@ -19,7 +19,6 @@ #include #include #include -#include "runtime/rt.h" #include "nlohmann/json.hpp" #include "session/anf_runtime_algorithm.h" #include "common/utils.h" @@ -490,7 +489,7 @@ void SaveJsonInfo(const std::string &json_name, const std::string &info) { if (!filewrite.is_open()) { return; } - filewrite << info << endl; + filewrite << info << std::endl; filewrite.close(); if (nullptr == realpath(path.c_str(), real_path)) { MS_LOG(DEBUG) << "dir " << path << " does not exit."; diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h index 2e284f72e8..51a2da8574 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h @@ -226,12 +226,12 @@ class LstmGpuKernel : public GpuKernel { size_t reserved_size_; // input desc - unique_ptr x_desc_; + std::unique_ptr x_desc_; cudnnTensorDescriptor_t hx_desc_; cudnnTensorDescriptor_t cx_desc_; cudnnFilterDescriptor_t w_desc_; cudnnDropoutDescriptor_t dropout_desc_; - unique_ptr y_desc_; + std::unique_ptr y_desc_; cudnnTensorDescriptor_t hy_desc_; cudnnTensorDescriptor_t cy_desc_; cudnnRNNDescriptor_t rnn_desc_; diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h index 2fadccb8ea..a60ab78f7d 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h @@ -258,8 +258,8 @@ class LstmGradDataGpuKernel : public GpuKernel { cudnnRNNDescriptor_t rnn_desc_; // input desc - unique_ptr y_desc_; - unique_ptr dy_desc_; + std::unique_ptr y_desc_; + std::unique_ptr dy_desc_; cudnnTensorDescriptor_t dhy_desc_; cudnnTensorDescriptor_t dcy_desc_; cudnnFilterDescriptor_t w_desc_; @@ -269,7 +269,7 @@ class LstmGradDataGpuKernel : public GpuKernel { cudnnDropoutDescriptor_t dropout_desc_; // output desc - unique_ptr dx_desc_; + std::unique_ptr dx_desc_; cudnnTensorDescriptor_t dhx_desc_; cudnnTensorDescriptor_t dcx_desc_; diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h index 6cf512f14a..b28736cc96 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h @@ -214,9 +214,9 @@ class LstmGradWeightGpuKernel : public GpuKernel { cudnnDropoutDescriptor_t dropout_desc_; // input desc - unique_ptr x_desc_; + std::unique_ptr x_desc_; cudnnTensorDescriptor_t hx_desc_; - unique_ptr y_desc_; + std::unique_ptr y_desc_; // output desc cudnnFilterDescriptor_t dw_desc_; diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel.h b/mindspore/ccsrc/kernel/hccl/hccl_kernel.h index 71d9e5ba6a..24e1feec0a 100644 --- a/mindspore/ccsrc/kernel/hccl/hccl_kernel.h +++ b/mindspore/ccsrc/kernel/hccl/hccl_kernel.h @@ -23,14 +23,14 @@ #include #include #include -#include "kernel/kernel.h" +#include "kernel/ascend_kernel_mod.h" #include "kernel/hccl/hcom_util.h" #include "hccl/hcom.h" #include "common/utils.h" namespace mindspore { namespace kernel { -class HcclKernel : public KernelMod { +class HcclKernel : public AscendKernelMod { public: HcclKernel(); ~HcclKernel() override; diff --git a/mindspore/ccsrc/kernel/kernel.h b/mindspore/ccsrc/kernel/kernel.h index aecc51794c..80d831269c 100644 --- a/mindspore/ccsrc/kernel/kernel.h +++ b/mindspore/ccsrc/kernel/kernel.h @@ -25,7 +25,6 @@ #include "ir/meta_tensor.h" #include "pipeline/static_analysis/dshape.h" #include "utils/log_adapter.h" -#include "framework/ge_runtime/task_info.h" namespace mindspore { enum KernelType : int { UNKNOWN_KERNEL_TYPE = 0, AUTO_DIFF_KERNEL, AICPU_KERNEL, RT_KERNEL, HCCL_KERNEL, TBE_KERNEL }; @@ -111,7 +110,6 @@ struct Address { size_t size; }; using AddressPtr = std::shared_ptr
; -using TaskInfoPtr = std::shared_ptr; class KernelMod { public: @@ -120,10 +118,6 @@ class KernelMod { virtual const std::vector &GetWorkspaceSizeList() const = 0; virtual bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, uintptr_t stream_ptr) = 0; - virtual std::vector GenTask(const std::vector &, const std::vector &, - const std::vector &, uint32_t) { - return {}; - } virtual std::vector GenParameters() { return {}; } virtual ~KernelMod() = default; diff --git a/mindspore/ccsrc/kernel/mng/rt_kernel.h b/mindspore/ccsrc/kernel/mng/rt_kernel.h index f86a86ef5d..54823f73cc 100644 --- a/mindspore/ccsrc/kernel/mng/rt_kernel.h +++ b/mindspore/ccsrc/kernel/mng/rt_kernel.h @@ -22,12 +22,12 @@ #include #include #include -#include "kernel/kernel.h" +#include "kernel/ascend_kernel_mod.h" #include "kernel/task_stream.h" namespace mindspore { namespace kernel { -class RtKernel : public KernelMod { +class RtKernel : public AscendKernelMod { public: RtKernel(); ~RtKernel() override; diff --git a/mindspore/ccsrc/kernel/oplib/oplib.cc b/mindspore/ccsrc/kernel/oplib/oplib.cc index 23e7014104..b20bd741f1 100644 --- a/mindspore/ccsrc/kernel/oplib/oplib.cc +++ b/mindspore/ccsrc/kernel/oplib/oplib.cc @@ -19,7 +19,7 @@ #include #include #include "utils/log_adapter.h" -#include "kernel/oplib/opinfo.h" +#include "utils/overload.h" #include "utils/context/ms_context.h" namespace mindspore { @@ -50,7 +50,7 @@ constexpr auto kNeedCompile = "need_compile"; constexpr auto kShape = "shape"; std::vector> OpLib::op_info_; -string ImplTypeToStr(OpImplyType impl_type) { +std::string ImplTypeToStr(OpImplyType impl_type) { switch (impl_type) { case kTBE: return kTbe; diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h index bc4895ac6f..de5ed84e41 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h @@ -48,7 +48,7 @@ class TbeKernelBuild { private: TbeKernelBuild() = default; ~TbeKernelBuild() = default; - static bool GenFusionDataInputJson(const shared_ptr &data_input, nlohmann::json *data_str, + static bool GenFusionDataInputJson(const std::shared_ptr &data_input, nlohmann::json *data_str, size_t *index); static bool GenFusionComputeJson(const mindspore::AnfNodePtr &compute_node, std::vector>::iterator *layer_iter, @@ -56,12 +56,13 @@ class TbeKernelBuild { static bool GenFusionComputeInputeJson(const mindspore::CNodePtr &cnode, std::vector>::iterator *layer_iter, std::vector *input_desc_list, size_t *index); - static void GenDescJson(const shared_ptr &anf_node, size_t out_idx, nlohmann::json *output_desc); - static void GenReusedOutputDesc(const shared_ptr &anf_node, size_t index, size_t output_index, - nlohmann::json *output_desc); + static void GenDescJson(const std::shared_ptr &anf_node, size_t out_idx, + nlohmann::json *output_desc); + static void GenReusedOutputDesc(const std::shared_ptr &anf_node, size_t index, + size_t output_index, nlohmann::json *output_desc); static size_t GetIOSizeImpl(const nlohmann::json &desc); - static bool GetInputLayers(const vector &input_nodes, - const vector &compute_nodes, + static bool GetInputLayers(const std::vector &input_nodes, + const std::vector &compute_nodes, std::vector> *input_layers); static bool IsDynamicInput(const CNodePtr &cnode); static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input); @@ -82,15 +83,17 @@ class TbeKernelJsonCreator { bool GenTbeAttrJson(const std::shared_ptr &anf_node, const std::shared_ptr &op_info, nlohmann::json *attrs_json); void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj); - bool GenInputDescJson(const shared_ptr &anf_node, size_t real_input_index, bool value, - const shared_ptr &input_ptr, const string &op_input_name, size_t input_i, - vector *input_list); - bool GenOutputDescJson(const shared_ptr &anf_node, const vector> &outputs_ptr, - nlohmann::json *outputs_json); - bool GenInputList(const shared_ptr &anf_node, size_t input_tensor_num, const shared_ptr &input_ptr, - size_t *real_input_index, string *op_input_name, vector *input_list); - void GenOutputList(const shared_ptr &anf_node, const size_t &output_obj_num, - const shared_ptr &output_ptr, size_t *output_idx, vector *output_list); + bool GenInputDescJson(const std::shared_ptr &anf_node, size_t real_input_index, bool value, + const std::shared_ptr &input_ptr, const string &op_input_name, size_t input_i, + std::vector *input_list); + bool GenOutputDescJson(const std::shared_ptr &anf_node, + const std::vector> &outputs_ptr, nlohmann::json *outputs_json); + bool GenInputList(const std::shared_ptr &anf_node, size_t input_tensor_num, + const std::shared_ptr &input_ptr, size_t *real_input_index, string *op_input_name, + std::vector *input_list); + void GenOutputList(const std::shared_ptr &anf_node, const size_t &output_obj_num, + const std::shared_ptr &output_ptr, size_t *output_idx, + std::vector *output_list); kCreaterType creater_type_; std::string json_name_; std::string json_info_; diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.h b/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.h index 35fc7f517d..f4fb75038e 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.h +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.h @@ -21,12 +21,12 @@ #include #include #include -#include "kernel/kernel.h" +#include "kernel/ascend_kernel_mod.h" #include "kernel/tbe/tbe_utils.h" namespace mindspore { namespace kernel { -class TbeKernelMod : public KernelMod { +class TbeKernelMod : public AscendKernelMod { public: explicit TbeKernelMod(KernelPackPtr kernel_pack) : kernel_pack_(std::move(kernel_pack)) {} ~TbeKernelMod() override = default; diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h b/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h index 45f56fdd0b..5066e9457f 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h @@ -55,8 +55,9 @@ class ParallelBuildManager { bool WaitOne(int *task_id, char **task_result) const; bool IsAllTaskFinish() const; std::pair TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true); - KernelModPtr GenKernelMod(const string &json_name, const string &processor, const vector &input_size_list, - const vector &output_size_list, const KernelPackPtr &kernel_pack) const; + KernelModPtr GenKernelMod(const string &json_name, const string &processor, + const std::vector &input_size_list, const std::vector &output_size_list, + const KernelPackPtr &kernel_pack) const; private: PyObject *tbe_parallel_compiler_; diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc index 1953fd0c72..92798aa6bc 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc @@ -168,7 +168,7 @@ bool ParseDynamicFormatJson(const std::string &jsonStr, std::vector &anf_node) { +std::string OpSelectFormat(const std::shared_ptr &anf_node) { nlohmann::json kernel_json; std::string res_json_str; TbeKernelJsonCreator creator(OP_SELECT_FORMAT); @@ -182,7 +182,7 @@ std::string OpSelectFormat(const shared_ptr &anf_node) { return res_json_str; } -void SetTidyInputsInfo(const shared_ptr &anf_node, +void SetTidyInputsInfo(const std::shared_ptr &anf_node, const std::shared_ptr &builder, const std::vector> &inputs) { std::vector inputs_type; @@ -231,7 +231,7 @@ void SetTidyInputsInfo(const shared_ptr &anf_node, builder->SetInputsFormat(inputs_format); } -void SetTidyOutputsInfo(const shared_ptr &anf_node, +void SetTidyOutputsInfo(const std::shared_ptr &anf_node, const std::shared_ptr &builder, const std::vector> &outputs) { std::vector outputs_type; @@ -268,7 +268,8 @@ void SetTidyOutputsInfo(const shared_ptr &anf_node, builder->SetOutputsFormat(outputs_format); } -void GenTidyKernelBuildInfo(const shared_ptr &anf_node, const std::vector> &inputs, +void GenTidyKernelBuildInfo(const std::shared_ptr &anf_node, + const std::vector> &inputs, const std::vector> &outputs) { auto builder_tmp = std::make_shared(); builder_tmp->SetKernelType(TBE_KERNEL); diff --git a/mindspore/ccsrc/kernel/tbe/tbe_utils.cc b/mindspore/ccsrc/kernel/tbe/tbe_utils.cc index ab29ca69bb..5980a0fd88 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_utils.cc +++ b/mindspore/ccsrc/kernel/tbe/tbe_utils.cc @@ -26,6 +26,7 @@ #include #include +#include "runtime/kernel.h" #include "kernel/oplib/oplib.h" #include "utils/utils.h" #include "session/anf_runtime_algorithm.h" diff --git a/mindspore/ccsrc/pipeline/base.h b/mindspore/ccsrc/pipeline/base.h new file mode 100644 index 0000000000..d007eac294 --- /dev/null +++ b/mindspore/ccsrc/pipeline/base.h @@ -0,0 +1,64 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PIPELINE_BASE_H_ +#define MINDSPORE_CCSRC_PIPELINE_BASE_H_ + +#include +#include +#include +#include + +#include "ir/anf.h" +#include "pipeline/resource.h" +#include "utils/context/ms_context.h" + +namespace mindspore { +namespace pipeline { + +struct ExecutorInfo { + FuncGraphPtr func_graph; + ResourcePtr resource; + std::size_t arg_list_size; +}; + +using ExecutorInfoPtr = std::shared_ptr; + +inline std::string GetPhasePrefix(const std::string& phase) { + auto pos = phase.find('.'); + if (pos == std::string::npos) { + MS_LOG(EXCEPTION) << "phase has no . for prefix" << phase; + } + return phase.substr(0, pos); +} + +inline std::string GetFilePathName(const std::string& file_name) { + std::ostringstream oss; + auto ms_context = MsContext::GetInstance(); + if (ms_context == nullptr) { + MS_LOG(EXCEPTION) << "ms_context is nullptr"; + } + auto save_graphs_path = ms_context->save_graphs_path(); + if (save_graphs_path.empty()) { + save_graphs_path = "."; + } + oss << save_graphs_path << "/" << file_name; + return oss.str(); +} +} // namespace pipeline +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PIPELINE_BASE_H_ diff --git a/mindspore/ccsrc/pipeline/init.cc b/mindspore/ccsrc/pipeline/init.cc index f42ae0cf7b..24ead047d3 100644 --- a/mindspore/ccsrc/pipeline/init.cc +++ b/mindspore/ccsrc/pipeline/init.cc @@ -73,7 +73,7 @@ PYBIND11_MODULE(_c_expression, m) { "Get CNode Strategy Dictionary.") .def("get_allreduce_fusion", &ExecutorPy::GetAllreduceFusion, py::arg("phase") = py::str("train"), "Get Allreduce Fusion Dictionary.") - .def("build_data_graph", &ExecutorPy::BuildDFGraph, py::arg("build_params"), py::arg("phase") = py::str("train"), + .def("build_data_graph", &ExecutorPy::BuildGraph, py::arg("build_params"), py::arg("phase") = py::str("train"), py::arg("broadcast_params") = py::dict(), "Build data graph.") .def("has_compiled", &ExecutorPy::HasCompiled, py::arg("phase") = py::str(""), "get if cell compiled.") .def("run_init_graph", &ExecutorPy::RunInitGraph, "Run init Graph."); @@ -86,19 +86,17 @@ PYBIND11_MODULE(_c_expression, m) { (void)m.def("generate_key", &mindspore::pipeline::GenerateKey, "Generate the function graph key."); (void)m.def("real_run_op", &mindspore::pynative::RunOp, "Run op pynatively."); - (void)m.def("initialize_distribute", &mindspore::pipeline::InitDistribute, "Initialize for Distribute.") - .def("init_ge", &mindspore::pipeline::InitGe, "Init GE"); (void)m.def("reset_op_id", &mindspore::pipeline::ResetOpId, "Reset Operator Id"); (void)m.def("init_hccl", &mindspore::pipeline::InitHccl, "Init Hccl"); - (void)m.def("finalize_ge", &mindspore::pipeline::FinalizeGe, "Finalize Ge"); (void)m.def("finalize_hccl", &mindspore::pipeline::FinalizeHccl, "Finalize Hccl"); - (void)m.def("set_ge_option", &mindspore::pipeline::SetGeOption, "API for set ge option."); (void)m.def("verify_inputs_signature", &mindspore::pipeline::VerifyInputSignature, "Verify input signature."); (void)m.def("init_exec_dataset", &mindspore::pipeline::InitExecDataset, py::arg("queue_name"), py::arg("size"), py::arg("batch_size"), py::arg("types"), py::arg("shapes"), py::arg("input_indexs"), py::arg("phase") = py::str("dataset"), "Init and exec dataset."); (void)m.def("_set_dataset_mode_config", &mindspore::ConfigManager::SetDatasetModeConfig, "API for set dataset mode."); - (void)m.def("export_graph", &mindspore::pipeline::ExportDFGraph, "Export Graph."); + (void)m.def("init_ge", &mindspore::pipeline::InitGe, "Init GE"); + + (void)m.def("export_graph", &mindspore::pipeline::ExportGraph, "Export Graph."); (void)py::class_>(m, "MSContext") .def_static("get_instance", &mindspore::MsContext::GetInstance, "Get ms context instance.") diff --git a/mindspore/ccsrc/pipeline/parse/python_adapter.cc b/mindspore/ccsrc/pipeline/parse/python_adapter.cc index 776e33235e..db40238729 100644 --- a/mindspore/ccsrc/pipeline/parse/python_adapter.cc +++ b/mindspore/ccsrc/pipeline/parse/python_adapter.cc @@ -27,6 +27,7 @@ static std::shared_ptr scoped_ = nullptr; // true: start process from python, false: start process from c++ static bool python_env_ = false; static bool use_signature_in_resolve_ = true; +void ResetPythonScope() { scoped_ = nullptr; } void set_use_signature_in_resolve(bool use_signature) noexcept { use_signature_in_resolve_ = use_signature; } bool UseSignatureInResolve() { return use_signature_in_resolve_; } void set_python_env_flag(bool python_env) noexcept { python_env_ = python_env; } diff --git a/mindspore/ccsrc/pipeline/parse/python_adapter.h b/mindspore/ccsrc/pipeline/parse/python_adapter.h index 4b9cbff251..12cfc27186 100644 --- a/mindspore/ccsrc/pipeline/parse/python_adapter.h +++ b/mindspore/ccsrc/pipeline/parse/python_adapter.h @@ -55,6 +55,7 @@ void set_use_signature_in_resolve(bool use_signature) noexcept; bool UseSignatureInResolve(); std::shared_ptr set_python_scoped(); +void ResetPythonScope(); bool IsPythonEnv(); void SetPythonPath(const std::string& path); void set_python_env_flag(bool python_env) noexcept; diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/pipeline.cc index 3c3478e89d..861862b849 100644 --- a/mindspore/ccsrc/pipeline/pipeline.cc +++ b/mindspore/ccsrc/pipeline/pipeline.cc @@ -27,11 +27,6 @@ #include "pipeline/pass.h" #include "pipeline/parse/data_converter.h" #include "optimizer/ad/dfunctor.h" -#include "ir/meta_tensor.h" -#include "transform/convert.h" -#include "transform/df_graph_manager.h" -#include "transform/graph_builder.h" -#include "transform/graph_runner.h" #include "debug/anf_ir_dump.h" #include "debug/anf_ir_utils.h" #include "utils/config_manager.h" @@ -44,6 +39,12 @@ #include "device/kernel_runtime_manager.h" #include "debug/trace.h" +#if (ENABLE_GE || ENABLE_D) +#include "pipeline/pipeline_ge.h" +#include "transform/convert.h" +#include "transform/df_graph_manager.h" +#endif + namespace mindspore { // namespace to support intermediate representation definition namespace pipeline { @@ -54,12 +55,6 @@ using mindspore::abstract::AbstractTensor; using mindspore::abstract::AbstractTensorPtr; using mindspore::abstract::AbstractTuple; using mindspore::abstract::AbstractTuplePtr; -using mindspore::transform::DfGraphConvertor; -using mindspore::transform::DfGraphManager; -using mindspore::transform::GeTensorPtr; -using mindspore::transform::MeTensorPtr; -using mindspore::transform::Status; -using mindspore::transform::TransformUtil; const char IR_TYPE_ANF[] = "anf_ir"; const char IR_TYPE_ONNX[] = "onnx_ir"; @@ -85,65 +80,8 @@ std::string GetBaseNameForIR(int stage_idx, const std::string& action_name) { oss << save_graphs_path << "/" << stage_idx << "_" << action_name; return oss.str(); } - -std::string GetFilePathName(const std::string& file_name) { - std::ostringstream oss; - auto ms_context = MsContext::GetInstance(); - if (ms_context == nullptr) { - MS_LOG(EXCEPTION) << "ms_context is nullptr"; - } - auto save_graphs_path = ms_context->save_graphs_path(); - if (save_graphs_path.empty()) { - save_graphs_path = "."; - } - oss << save_graphs_path << "/" << file_name; - return oss.str(); -} } // namespace -// We will not execute graph when output is constant or just input itself. -static bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr& output, const py::tuple& args, - const std::shared_ptr& ret_val) { - if (output->isa()) { - MS_LOG(INFO) << "Graph's output is a constant. No need to execute."; - ValuePtr value = GetValueNode(output); - *ret_val = ValuePtrToPyData(value); - return true; - } - - // Adapter will transform values in __init__() and construct() to parameters, this could cause - // inputs (a.k.a args in current function) size less than parameters'. - if (output->isa()) { - MS_LOG(INFO) << "Graph's output is a parameter. If all params are inputs, no need to execute."; - if (args.empty()) { - MS_LOG(EXCEPTION) << "Inputs size is 0, let graph to be executed."; - } - // Find the right parameter as ret_val. - auto func_graph = output->func_graph(); - MS_EXCEPTION_IF_NULL(func_graph); - auto params = func_graph->parameters(); - if (params.empty()) { - MS_EXCEPTION(UnknownError) << "Graph's parameters size is 0"; - } - if (args.size() != params.size()) { - MS_LOG(EXCEPTION) << "Input size " << args.size() << " not equal to params size " << params.size() - << ", let graph to be executed."; - } - - auto it = std::find(params.begin(), params.end(), output); - if (it == params.end()) { - MS_EXCEPTION(UnknownError) << "When graph output is Parameter, it should be found in graph parameters"; - } - size_t index = it - params.cbegin(); - if (index >= args.size()) { - MS_EXCEPTION(UnknownError) << "Index " << index << " equal or larger than args size " << args.size() << "."; - } - *ret_val = args[index]; - return true; - } - return false; -} - py::tuple GenerateKey(const std::string& name, const std::unordered_map& defaults) { MS_LOG(DEBUG) << "GenerateKey args size:" << defaults.size(); abstract::AbstractBasePtrList args_spec; @@ -207,11 +145,7 @@ py::bool_ VerifyInputSignature(const py::list input_signature, const py::tuple i return true; } -ExecutorPy::ExecutorPy() { - // because Ge only support one Session exist at the same time ,so we delete the old one - DfGraphManager::GetInstance().DeleteGraphRunner(); - DfGraphManager::GetInstance().DeleteGeSession(); -} +ExecutorPy::ExecutorPy() {} ResourcePtr ExecutorPy::GetResource(const std::string& phase) { MS_LOG(DEBUG) << "phase size:" << info_.size(); @@ -221,14 +155,6 @@ ResourcePtr ExecutorPy::GetResource(const std::string& phase) { return info_[phase]->resource; } -std::string GetPhasePrefix(const std::string& phase) { - auto pos = phase.find('.'); - if (pos == std::string::npos) { - MS_LOG(EXCEPTION) << "phase has no . for prefix" << phase; - } - return phase.substr(0, pos); -} - FuncGraphPtr ExecutorPy::GetFuncGraph(const std::string& phase) { if (info_.count(phase) == 0) { MS_LOG(EXCEPTION) << "no phase in executor:" << GetPhasePrefix(phase); @@ -323,11 +249,15 @@ void ExecutorPy::DelNetRes(const std::string& id) { } } + MS_LOG(INFO) << "Delete flag:" << flag; +#ifdef ENABLE_GE if (flag && info_.size() == 0) { - DfGraphManager::GetInstance().DeleteGraphRunner(); - DfGraphManager::GetInstance().EraseAnfGraph(); - DfGraphManager::GetInstance().DeleteGeSession(); + // because Ge only support one Session exist at the same time ,so we delete the old one + transform::DfGraphManager::GetInstance().DeleteGraphRunner(); + transform::DfGraphManager::GetInstance().EraseAnfGraph(); + transform::DfGraphManager::GetInstance().DeleteGeSession(); } +#endif } } @@ -405,7 +335,8 @@ bool ExecutorPy::CompileInner(const py::object& obj, const py::tuple& args, cons use_vm = ChangeExportGeirUseVmFlag(use_vm, phase_s); - if (use_vm) { + std::string backend = MsContext::GetInstance()->backend_policy(); + if (use_vm && backend != "ge") { // Create backend and session resource->results()[kBackend] = compile::CreateBackend(); p_actions = VmPipeline(); @@ -497,30 +428,6 @@ bool ExecutorPy::Compile(const py::object& obj, const py::tuple& args, const py: return ret_value; } -void SetGeOption(const std::map& options) { - ConfigManager::GetInstance().set_ge_initialize_options(options); -} - -bool InitDistribute(const std::map& options) { - ConfigManager::GetInstance().set_parallel_strategy(ParallelStrategy::DISTRIBUTION); - MS_LOG(INFO) << "ME run in DISTRIBUTION strategy mode"; - - SetGeOption(options); -#ifdef ENABLE_GE - auto ge_options = ConfigManager::GetInstance().ge_initialize_options(); - { - // Release GIL before calling into (potentially long-running) C++ code - py::gil_scoped_release release; - if (ge::GEInitialize(ge_options) != ge::GRAPH_SUCCESS) { - MS_LOG(ERROR) << "Initialize GE failed!"; - return false; - } - } -#endif - MS_LOG(DEBUG) << "Initialize Ge success"; - return true; -} - #ifdef ENABLE_LOAD_ANF_IR // get MindSpore Intermediate Representation File std::string GetMsIrFile(void) { @@ -704,9 +611,25 @@ py::object ExecutorPy::Run(const py::tuple& args, const py::object& phase) { } auto phase_s = py::cast(phase); std::string backend = MsContext::GetInstance()->backend_policy(); +#ifdef ENABLE_GE if (backend == "ge") { - return ExecDFGraph(args, phase_s); + return ExecDFGraph(info_, args, phase_s); } +#else + MS_LOG(WARNING) << "In ut test " << size << phase_s; + if (backend == "ge") { + std::shared_ptr ret_val = std::make_shared(); + if (info_.count(phase_s) != 0 && info_[phase_s]->func_graph != nullptr) { + if (IsGraphOutputValueNodeOrParameter(info_[phase_s]->func_graph->output(), args, ret_val)) { + return *ret_val; + } + } + if (args.size() > 0) { + return args[0]; + } + return args; + } +#endif std::size_t full_arg_size = ArgListSize(phase_s); if (size > full_arg_size) { MS_LOG(WARNING) << "The arg num : size = " << size << ". full_arg_size = " << full_arg_size; @@ -719,435 +642,25 @@ py::object ExecutorPy::Run(const py::tuple& args, const py::object& phase) { MS_LOG(EXCEPTION) << "Can't find run graph func for " << phase_s; } - MS_LOG(DEBUG) << "eval run"; + MS_LOG(DEBUG) << "eval run" << backend; BaseRef value = (*run)(arg_list); MS_LOG(DEBUG) << "run end"; return BaseRefToPyData(value); } -py::object ExtractGeneralCnodeRet(const AbstractBasePtr& cnode_data, const py::tuple& data, size_t* count) { - MS_EXCEPTION_IF_NULL(cnode_data); - if (*count >= data.size()) { - MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() - << " less than the number of elements required. "; - } - - if (cnode_data->isa()) { - BaseShapePtr shape = cnode_data->BuildShape(); - auto shape_act = shape->cast()->shape(); - Tensor tensor_exp = py::cast(data[*count]); - if (shape_act != tensor_exp.shape()) { - MS_LOG(EXCEPTION) << "The shape of the tensor returned from GE is not the same as " - "the shape of the tensor derived from ME."; - } - return data[(*count)++]; - } - - if (!cnode_data->isa()) { - MS_LOG(EXCEPTION) << "The output of operator in the final anf graph could " - << "only be a tensor or a tuple of tensor, but got " << cnode_data->BuildValue()->ToString() - << "."; - } - auto data_tp = cnode_data->cast(); - auto elements = data_tp->elements(); - size_t size = data_tp->size(); - py::tuple tp = py::tuple(size); - for (size_t i = 0; i < size; i++) { - tp[i] = ExtractGeneralCnodeRet(elements[i], data, count); - } - return std::move(tp); -} - -py::object StructureOutput(const AnfNodePtr& output_node, const py::tuple& data, size_t* count) { - MS_EXCEPTION_IF_NULL(output_node); - - if (output_node->isa()) { - return ValuePtrToPyData(GetValueNode(output_node)); - } - - if (*count >= data.size()) { - MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() - << " less than the number of elements required. "; - } - if (output_node->isa()) { - return data[(*count)++]; - } - - auto output_c = output_node->cast(); - if (output_c == nullptr) { - MS_LOG(EXCEPTION) << "The final anf graph could only have constant, parameter, and operator, but got " - << output_node->ToString(); - } - - if (output_c->IsApply(prim::kPrimMakeTuple)) { - auto input_list = output_c->inputs(); - size_t size = input_list.size(); - py::tuple tp = py::tuple(size - 1); - for (size_t i = 1; i < size; i++) { - tp[i - 1] = StructureOutput(input_list[i], data, count); - } - return std::move(tp); - } - if (output_c->IsApply(prim::kPrimDepend)) { - return StructureOutput(output_c->input(1), data, count); - } - - return ExtractGeneralCnodeRet(output_c->abstract(), data, count); -} - -std::shared_ptr DoExecGraph(const FuncGraphPtr& graph, const std::vector& inputs, - const std::string& phase) { - std::vector ge_tensors = TransformUtil::ConvertInputTensors(inputs, kOpFormat_NCHW); - if (ge_tensors.size() != inputs.size()) { - MS_LOG(ERROR) << "args convert to ge tensor error"; - return nullptr; - } - - std::vector ge_outputs; - transform::RunOptions run_options; - - run_options.name = phase; - - auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); - - if (graph_runner == nullptr) { - MS_LOG(ERROR) << "Can not found GraphRunner"; - return nullptr; - } - - { - // Release GIL before calling into (potentially long-running) C++ code - py::gil_scoped_release release; - MS_LOG(DEBUG) << "Run graph begin, inputs size is: " << inputs.size(); - Status ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs); - MS_LOG(DEBUG) << "Run graph finish, outputs size is: " << ge_outputs.size(); - if (ret != Status::SUCCESS) { - MS_LOG(ERROR) << "Exec graph failed"; - return nullptr; - } - } - - std::vector me_outputs = TransformUtil::ConvertGeTensors(ge_outputs); - if (me_outputs.size() != ge_outputs.size()) { - MS_LOG(ERROR) << "Convert output Ge tensor to Me tensor failed"; - } - - py::tuple outputs(me_outputs.size()); - for (std::size_t i = 0; i < outputs.size(); i++) { - outputs[i] = *me_outputs[i]; - } - - std::shared_ptr ret = nullptr; - -#ifdef ENABLE_GE - AnfNodePtr output_node = graph->get_return()->input(1); - MS_EXCEPTION_IF_NULL(output_node); - size_t count = 0; - py::object oj = StructureOutput(output_node, outputs, &count); - ret = std::make_shared(oj); +FuncGraphPtr ExecutorPy::BuildGraph(const py::dict& init_params, const std::string& phase, + const py::object& broadcast_params) { +#if (ENABLE_GE || ENABLE_D) + return BuildDFGraph(info_, init_params, phase, broadcast_params); #else - if (outputs.size() == 1) { - ret = std::make_shared(outputs[0]); - } else { - ret = std::make_shared(outputs); - } + return nullptr; #endif - - return ret; -} - -void DoExecNonInputGraph(const std::string& phase) { - std::vector ge_tensors; - std::vector ge_outputs; - transform::RunOptions run_options; - run_options.name = phase; - auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); - - if (graph_runner == nullptr) { - MS_LOG(ERROR) << "Can not found GraphRunner"; - return; - } - { - // Release GIL before calling into (potentially long-running) C++ code - py::gil_scoped_release release; - Status ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs); - if (ret != Status::SUCCESS) { - MS_LOG(ERROR) << "Exec graph:" << run_options.name << " failed"; - return; - } - } -} - -void ExecutorPy::ProcessGeArg(const py::tuple& args, const std::string& phase, std::vector* inputs) { - // check the arg and use the ExecutorPy args - std::size_t size = args.size(); - if (size != ArgListSize(phase)) { - MS_LOG(EXCEPTION) << "The real arg num : size = " << size << ". graph_arg_size = " << ArgListSize(phase); - } - - // process the first args of tensor - // only in Dataset Feed Mode, fp_bp graph need input tensors - if (ConfigManager::GetInstance().dataset_mode() == DS_FEED_MODE) { - for (std::size_t i = 0; i < size; i++) { - ValuePtr converted = nullptr; - bool succ = parse::ConvertData(args[i], &converted); - if (!succ) { - MS_LOG(EXCEPTION) << "args convert error"; - } - if (converted->isa()) { - (*inputs).push_back(converted->cast()); - } else { - MS_LOG(EXCEPTION) << "args, " << converted->ToString() << " is not tensor"; - } - } - } -} - -py::object ExecutorPy::ExecDFGraph(const py::tuple& args, const std::string& phase) { - std::string phase_prefix = GetPhasePrefix(phase); - - if (phase_prefix == "save") { - DoExecNonInputGraph(phase); - ConfigManager::GetInstance().ResetConfig(); - return py::none(); - } - - if (info_.count(phase) == 0) { - MS_LOG(EXCEPTION) << "has no phase:" << phase; - } - -#if (!defined ENABLE_GE) || (defined ENABLE_INFER) - // Now don't use the graph because the exec ge function don't take effect - MS_EXCEPTION_IF_NULL(info_[phase]->func_graph); - if (ENABLE_TRAIN != info_[phase]->func_graph->flags()["training"]) { - MS_LOG(ERROR) << "Graph training mode mismatch mode of libraries"; - ConfigManager::GetInstance().ResetConfig(); - return py::none(); - } -#endif - - std::shared_ptr ret_val = std::make_shared(); - if (IsGraphOutputValueNodeOrParameter(info_[phase]->func_graph->output(), args, ret_val)) { - ConfigManager::GetInstance().ResetConfig(); - return *ret_val; - } - - std::vector inputs; - ProcessGeArg(args, phase, &inputs); - - std::shared_ptr ret = DoExecGraph(GetFuncGraph(phase), inputs, phase); - ConfigManager::GetInstance().ResetConfig(); - if (ret != nullptr) { - return *ret; - } else { - MS_LOG(EXCEPTION) << "exec graph failed"; - } } void ExecutorPy::RunInitGraph(const py::dict& init_params, const std::string& phase) { - MS_LOG(DEBUG) << "ExecInitGraph start."; - TensorOrderMap inputs_with_name{}; - ConvertObjectToTensors(init_params, &inputs_with_name); - std::vector inputs; - (void)std::transform(inputs_with_name.begin(), inputs_with_name.end(), std::back_inserter(inputs), - [](const std::pair& item) { return item.second; }); - - std::vector ge_tensors = TransformUtil::ConvertInputTensors(inputs, kOpFormat_NCHW); - if (ge_tensors.size() != inputs.size()) { - MS_LOG(ERROR) << "Args convert to ge tensor error."; - return; - } - MS_LOG(DEBUG) << "Run graph begin, inputs size is: " << inputs.size() << "."; - - std::vector ge_outputs; - transform::RunOptions run_options; - - run_options.name = phase; - if (DfGraphManager::GetInstance().GetGraphByName(phase) == nullptr) { - MS_LOG(WARNING) << "Can not find " << phase << " sub graph, don't need data init subgraph in INFER mode."; - return; - } - auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); - if (graph_runner == nullptr) { - MS_LOG(EXCEPTION) << "Can not found GraphRunner."; - } - { - // Release GIL before calling into (potentially long-running) C++ code - py::gil_scoped_release release; - Status ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs); - if (ret != Status::SUCCESS) { - MS_LOG(EXCEPTION) << "Exec " << phase << " graph failed."; - } - - MS_LOG(INFO) << "Exec " << phase << " graph success."; - - if ((ConfigManager::GetInstance().parallel_strategy() == ParallelStrategy::DISTRIBUTION) && - (DfGraphManager::GetInstance().GetGraphByName(BROADCAST_GRAPH_NAME) != nullptr)) { - run_options.name = BROADCAST_GRAPH_NAME; - ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs); - if (ret != Status::SUCCESS) { - MS_LOG(EXCEPTION) << "Exec BROADCAST_GRAPH_NAME failed."; - } - MS_LOG(INFO) << "Exec broadcast graph success."; - } - } -} - -Status CreateSessionAndGraphRunner(bool is_training = true) { - std::shared_ptr sess = DfGraphManager::GetInstance().GetGeSession(); - if (sess == nullptr) { - transform::SessionOptions options; - if (is_training) { - options["ge.trainFlag"] = "1"; - options["ge.streamNum"] = "100"; - options["ge.enabledLocalFmkop"] = "1"; - options["ge.hcomParallel"] = "1"; - } else { - options["ge.trainFlag"] = "0"; - } - - options["ge.enablePrintOpPass"] = "0"; - sess = transform::GraphRunner::NewSession(options); - if (sess == nullptr) { - MS_LOG(ERROR) << "Init data graph failed, because of create Ge session failed"; - return Status::FAILED; - } else { - DfGraphManager::GetInstance().SetGeSession(sess); - } - } - - transform::GraphRunnerOptions options; - options.sess_ptr = sess; - auto graph_runner = std::make_shared(options); - if (graph_runner == nullptr) { - MS_LOG(ERROR) << "Create new graph runner failed"; - return Status::FAILED; - } else { - DfGraphManager::GetInstance().SetGraphRunner(graph_runner); - } - - return Status::SUCCESS; -} - -void ExecutorPy::ConvertObjectToTensors(const py::dict& dict, TensorOrderMap* const tensors) { - for (auto item : dict) { - if ((!py::isinstance(item.first))) { - MS_LOG(WARNING) << "Type of key of py_dict is not string, ignore it."; - continue; - } - std::shared_ptr tensor; - std::string name = py::cast(item.first); - if (py::isinstance(item.second.attr("default_input"))) { - // convert float to tensor with shape([1]) - tensor = std::make_shared(kNumberTypeFloat32, std::vector({1})); - *(static_cast(tensor->data_c(true))) = py::cast(item.second.attr("default_input")); - } else if (py::isinstance(item.second.attr("default_input"))) { - // convert int to tensor with shape([1]) - tensor = std::make_shared(kNumberTypeInt32, std::vector({1})); - *(static_cast(tensor->data_c(true))) = py::cast(item.second.attr("default_input")); - } else if (py::hasattr(item.second.attr("default_input"), PYTHON_TENSOR_FLAG)) { - // cast tensor - tensor = py::cast>(item.second.attr("default_input")); - } - - if (tensor == nullptr) { - MS_LOG(EXCEPTION) << "Get default value for " << name << " failed"; - } - (void)tensors->emplace(name, tensor); - } -} - -bool ExecutorPy::AddDFGraph(const py::dict& init_params, const std::string& phase, const py::object& broadcast_params) { - FuncGraphPtr anf_graph = info_[phase]->func_graph; - DfGraphConvertor convertor(anf_graph); - - size_t pos = phase.find('.'); - std::string net_id = ((pos == std::string::npos || pos == phase.size() - 1) ? phase : phase.substr(pos + 1)); - std::string phase_prefix = phase.substr(0, pos); - - if (phase_prefix == "export") { - MS_LOG(INFO) << "Set DfGraphConvertor training : false"; - convertor.set_training(false); - } - - TensorOrderMap init_tensors{}; - ConvertObjectToTensors(init_params, &init_tensors); - (void)convertor.ConvertAllNode().InitParam(init_tensors).BuildGraph(); - - if (broadcast_params != py::none()) { - if (!py::isinstance(broadcast_params)) { - MS_LOG(ERROR) << "Invalid broadcast params, it must be py::dict type"; - return false; - } - py::dict broadcast = broadcast_params.cast(); - if (broadcast.empty()) { - (void)convertor.GenerateBroadcastGraph(init_tensors); - } else { - TensorOrderMap broadcast_tensors{}; - ConvertObjectToTensors(broadcast, &broadcast_tensors); - (void)convertor.GenerateBroadcastGraph(broadcast_tensors); - } - MS_LOG(INFO) << "Generate broadcast graph with params and broadcast_empty is " << broadcast.empty(); - } - - (void)convertor.GenerateCheckpointGraph(); - if (convertor.ErrCode() != 0) { - DfGraphManager::GetInstance().ClearGraph(); - MS_LOG(ERROR) << "convert df graph failed, err:" << convertor.ErrCode(); - return false; - } - - if (MsContext::GetInstance()->save_graphs_flag()) { - convertor.DrawComputeGraph(GetFilePathName("ge_graph.dot")); // for debug - convertor.DrawInitGraph(GetFilePathName("init_graph.dot")); // for debug - convertor.DrawSaveCheckpointGraph(GetFilePathName("save_checkpoint_graph.dot")); // for debug - } - std::string init_graph = "init_subgraph." + net_id; - std::string checkpoint_name = "save." + net_id; - if (phase.find("train") != std::string::npos) { - (void)DfGraphManager::GetInstance().AddGraph(phase, convertor.GetComputeGraph(), {{"ge.exec.variable_acc", "1"}}); - } else { - (void)DfGraphManager::GetInstance().AddGraph(phase, convertor.GetComputeGraph()); - } - (void)DfGraphManager::GetInstance().AddGraph(init_graph, convertor.GetInitGraph()); - (void)DfGraphManager::GetInstance().AddGraph(BROADCAST_GRAPH_NAME, convertor.GetBroadcastGraph()); - Status ret = DfGraphManager::GetInstance().AddGraph(checkpoint_name, convertor.GetSaveCheckpointGraph()); - if (ret == Status::SUCCESS) { - DfGraphManager::GetInstance().SetAnfGraph(checkpoint_name, anf_graph); - } - - return true; -} - -FuncGraphPtr ExecutorPy::BuildDFGraph(const py::dict& init_params, const std::string& phase, - const py::object& broadcast_params) { - if (info_.count(phase) == 0) { - MS_LOG(EXCEPTION) << "no phase in executor:" << GetPhasePrefix(phase); - } - FuncGraphPtr anf_graph = info_[phase]->func_graph; - - if (MsContext::GetInstance()->save_graphs_flag()) { - draw::Draw(GetFilePathName("anf_graph.dot"), anf_graph); // for debug - DumpIR(GetFilePathName("anf_graph.ir"), anf_graph, true); - } - - if (!AddDFGraph(init_params, phase, broadcast_params)) { - MS_LOG(ERROR) << "GenConvertor failed"; - return nullptr; - } - -#if ENABLE_TRAIN - (void)setenv("GE_TRAIN", "1", 1); -#else - (void)setenv("GE_TRAIN", "0", 1); +#if ENABLE_GE + RunGEInitGraph(init_params, phase); #endif - - if (CreateSessionAndGraphRunner(static_cast(ENABLE_TRAIN)) != Status::SUCCESS) { - MS_LOG(ERROR) << "Create GE Session or GraphRunner failed."; - return nullptr; - } - - return anf_graph; } bool InitExecDataset(const std::string& queue_name, int64_t iter_num, int64_t batch_size, @@ -1156,47 +669,16 @@ bool InitExecDataset(const std::string& queue_name, int64_t iter_num, int64_t ba std::string name = MsContext::GetInstance()->backend_policy(); if (name == kMsConvert || name == kMsVm) { return InitExecDatasetVm(queue_name, iter_num, batch_size, types, shapes, input_indexes); - } else { - return InitExecDatasetGe(queue_name, iter_num, batch_size, types, shapes, input_indexes, phase); } -} - -bool InitExecDatasetGe(const std::string& queue_name, int64_t size, int64_t batch_size, - const std::vector& types, const std::vector>& shapes, - const std::vector& input_indexes, const std::string& phase) { - // Convert types to GE types and TF types - std::vector ge_types; - (void)std::transform(types.begin(), types.end(), std::back_inserter(ge_types), [](const TypePtr& i) -> int64_t { - return transform::TransformUtil::ConvertDataType(i->type_id()); - }); - - ConfigManager::GetInstance().set_dataset_mode(DatasetMode::DS_GRAPH_MODE); - ConfigManager::GetInstance().set_iter_num(size); - ConfigManager::GetInstance().set_dataset_phase(phase); - - DatasetGraphParam param(queue_name, size, batch_size, ge_types, shapes, input_indexes); - ConfigManager::GetInstance().set_dataset_param(param); - - if (transform::BuildDatasetGraph(param, phase) != transform::SUCCESS) { - MS_LOG(ERROR) << "Build dateset graph failed."; - return false; - } - -#if ENABLE_TRAIN - (void)setenv("GE_TRAIN", "1", 1); +#if ENABLE_GE + return InitExecDatasetGe(queue_name, iter_num, batch_size, types, shapes, input_indexes, phase); #else - (void)setenv("GE_TRAIN", "0", 1); -#endif - - if (CreateSessionAndGraphRunner(static_cast(ENABLE_TRAIN)) != Status::SUCCESS) { - MS_LOG(ERROR) << "Create GE Session or GraphRunner failed."; - return false; + std::string backend = MsContext::GetInstance()->backend_policy(); + if (backend == "ge") { + return true; } - - MS_LOG(INFO) << "DoExecNonInputGraph:" << phase; - DoExecNonInputGraph(phase); - - return true; +#endif + return false; } bool InitExecDatasetVm(const std::string& queue_name, int64_t size, int64_t batch_size, @@ -1259,25 +741,6 @@ bool InitExecDatasetVm(const std::string& queue_name, int64_t size, int64_t batc return true; } -void InitGe() { - // set python env flag - mindspore::parse::python_adapter::set_python_env_flag(true); - // open tsd before ge initialize - auto ms_context = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(ms_context); - if (!ms_context->OpenTsd()) { - MS_LOG(EXCEPTION) << "open tsd failed"; - } - (void)ms_context->InitGe(); -} - -void FinalizeGe() { - auto context_ptr = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(context_ptr); - (void)context_ptr->FinalizeGe(); - (void)context_ptr->CloseTsd(); -} - void ResetOpId() { mindspore::id_generator::reset_id(); } void InitHccl() { @@ -1309,24 +772,57 @@ void FinalizeHccl() { device::KernelRuntimeManager::Instance().ClearRuntimeResource(); #endif } -void ExportDFGraph(const std::string& file_name, const std::string&, const std::string& phase) { - MS_LOG(DEBUG) << "ExportGraph Begin"; - transform::DfGraphWrapperPtr wrap_ptr = DfGraphManager::GetInstance().GetGraphByName(phase); - if (wrap_ptr == nullptr) { - MS_LOG(ERROR) << "Get graph form DfGraphManager failed!"; - return; - } - transform::DfGraphPtr ge_graph = wrap_ptr->graph_ptr_; - if (nullptr == ge_graph) { - MS_LOG(ERROR) << "The export graph is null"; - return; - } - - (void)ge_graph->SaveToFile(file_name); - - MS_LOG(DEBUG) << "ExportGraph End"; +void ExportGraph(const std::string& file_name, const std::string&, const std::string& phase) { +#if (ENABLE_GE || ENABLE_D) + ExportDFGraph(file_name, phase); +#endif + MS_LOG(WARNING) << "In ut test no export_graph"; } +void ReleaseGeTsd() { + auto context_ptr = MsContext::GetInstance(); + if (context_ptr != nullptr) { + (void)context_ptr->FinalizeGe(true); + (void)context_ptr->CloseTsd(true); + } +} + +void InitGe() { + // set python env flag + mindspore::parse::python_adapter::set_python_env_flag(true); + // open tsd before ge initialize + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + if (!ms_context->OpenTsd()) { + MS_LOG(EXCEPTION) << "open tsd failed"; + } + (void)ms_context->InitGe(); +} + +void FinalizeGe() { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + (void)context_ptr->FinalizeGe(); + (void)context_ptr->CloseTsd(); +} + +void ClearResAtexit() { + MS_LOG(DEBUG) << "Pipeline clear all resource"; + device::KernelRuntimeManager::Instance().ClearRuntimeResource(); + + ad::g_k_prims.clear(); + + abstract::ClearPrimEvaluatorMap(); + compile::ClearConvertCache(); + pipeline::GetMethodMap().clear(); + pipeline::ExecutorPy::ClearRes(); +#ifdef ENABLE_GE + transform::DfGraphManager::GetInstance().ClearGraph(); + transform::DfGraphConvertor::get_adpt_map().clear(); +#endif + ReleaseGeTsd(); + parse::python_adapter::ResetPythonScope(); +} } // namespace pipeline } // namespace mindspore diff --git a/mindspore/ccsrc/pipeline/pipeline.h b/mindspore/ccsrc/pipeline/pipeline.h index b075306682..a0d7a19198 100644 --- a/mindspore/ccsrc/pipeline/pipeline.h +++ b/mindspore/ccsrc/pipeline/pipeline.h @@ -30,6 +30,7 @@ #include "pipeline/action.h" #include "vm/segment_runner.h" #include "vm/transform.h" +#include "pipeline/base.h" namespace mindspore { extern const char kMsConvert[]; @@ -55,14 +56,6 @@ class Pipeline { std::vector actions_; }; -struct ExecutorInfo { - FuncGraphPtr func_graph; - ResourcePtr resource; - std::size_t arg_list_size; -}; - -using ExecutorInfoPtr = std::shared_ptr; - // A function pipeline. class ExecutorPy : public std::enable_shared_from_this { public: @@ -80,11 +73,7 @@ class ExecutorPy : public std::enable_shared_from_this { bool CompileInner(const py::object& obj, const py::tuple& args, const py::object& phase, bool use_vm); bool Compile(const py::object& obj, const py::tuple& args, const py::object& phase, bool use_vm); - // for graph mode - py::object ExecDFGraph(const py::tuple& args, const std::string& phase = "train"); - void ProcessVmArg(const py::tuple& args, const std::string& phase, VectorRef* arg_list); - void ProcessGeArg(const py::tuple& args, const std::string& phase, std::vector* inputs); // for pynative mode when use_vm is on py::object Run(const py::tuple& args, const py::object& phase); @@ -95,9 +84,8 @@ class ExecutorPy : public std::enable_shared_from_this { compile::VmEvalFuncPtr GetVmEvalFunc(const std::string& phase); bool HasCompiled(const std::string& phase) const; - bool AddDFGraph(const py::dict& init_params, const std::string& phase, const py::object& broadcast_params); - FuncGraphPtr BuildDFGraph(const py::dict& init_params, const std::string& phase, - const py::object& broadcast_params = {}); + FuncGraphPtr BuildGraph(const py::dict& init_params, const std::string& phase, + const py::object& broadcast_params = {}); void RunInitGraph(const py::dict& init_params, const std::string& phase); py::dict GetParameterLayout(const std::string& phase); py::dict GetCNodeStrategy(const std::string& phase); @@ -122,32 +110,29 @@ using ExecutorPyPtr = std::shared_ptr; py::tuple GenerateKey(const std::string& name, const std::unordered_map& defaults); py::bool_ VerifyInputSignature(const py::list input_signature, const py::tuple inputs); -void SetGeOption(const std::map& options); bool InitDistribute(const std::map& options); void ResetOpId(); -void InitGe(); -void FinalizeGe(); void InitHccl(); void FinalizeHccl(); +void InitGe(); +void FinalizeGe(); + +void ClearResAtexit(); +void ReleaseGeTsd(); + +void ExportGraph(const std::string& file_name, const std::string&, const std::string& phase); // init and exec dataset sub graph bool InitExecDataset(const std::string& queue_name, int64_t iter_num, int64_t batch_size, const std::vector& types, const std::vector>& shapes, const std::vector& input_indexes, const std::string& phase); -// init and exec dataset sub graph for GE backend -bool InitExecDatasetGe(const std::string& queue_name, int64_t size, int64_t batch_size, - const std::vector& types, const std::vector>& shapes, - const std::vector& input_indexes, const std::string& phase); - // Build and run dataset subgraph for ms backend bool InitExecDatasetVm(const std::string& queue_name, int64_t size, int64_t batch_size, const std::vector& types, const std::vector>& shapes, const std::vector& input_indexes); -void ExportDFGraph(const std::string& file_name, const std::string&, const std::string& phase); - } // namespace pipeline } // namespace mindspore diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.cc b/mindspore/ccsrc/pipeline/pipeline_ge.cc new file mode 100644 index 0000000000..4617884ca0 --- /dev/null +++ b/mindspore/ccsrc/pipeline/pipeline_ge.cc @@ -0,0 +1,545 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pipeline/pipeline_ge.h" + +#include +#include +#include +#include +#include + +#include "debug/anf_ir_dump.h" +#include "ir/meta_tensor.h" +#include "transform/convert.h" +#include "transform/df_graph_manager.h" +#include "transform/graph_builder.h" +#include "transform/graph_runner.h" +#include "debug/draw.h" +#include "pipeline/static_analysis/abstract_value.h" + +namespace mindspore { +namespace pipeline { +using Tensor = mindspore::tensor::Tensor; +using MetaTensor = mindspore::tensor::MetaTensor; +using TensorOrderMap = std::map>; +using mindspore::abstract::AbstractTensor; +using mindspore::abstract::AbstractTuple; +using mindspore::abstract::AbstractTuplePtr; +using mindspore::transform::DfGraphConvertor; +using mindspore::transform::DfGraphManager; +using mindspore::transform::GeTensorPtr; +using mindspore::transform::MeTensorPtr; +using mindspore::transform::Status; +using mindspore::transform::TransformUtil; + +void DoExecNonInputGraph(const std::string& phase) { + std::vector ge_tensors; + std::vector ge_outputs; + transform::RunOptions run_options; + run_options.name = phase; + auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); + + if (graph_runner == nullptr) { + MS_LOG(ERROR) << "Can not found GraphRunner"; + return; + } + { + // Release GIL before calling into (potentially long-running) C++ code + py::gil_scoped_release release; + Status ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs); + if (ret != Status::SUCCESS) { + MS_LOG(ERROR) << "Exec graph:" << run_options.name << " failed"; + return; + } + } +} + +void SetGeOption(const std::map& options) { + ConfigManager::GetInstance().set_ge_initialize_options(options); +} + +Status CreateSessionAndGraphRunner(bool is_training = true) { + std::shared_ptr sess = DfGraphManager::GetInstance().GetGeSession(); + if (sess == nullptr) { + transform::SessionOptions options; + if (is_training) { + options["ge.trainFlag"] = "1"; + options["ge.streamNum"] = "100"; + options["ge.enabledLocalFmkop"] = "1"; + options["ge.hcomParallel"] = "1"; + } else { + options["ge.trainFlag"] = "0"; + } + + options["ge.enablePrintOpPass"] = "0"; + sess = transform::GraphRunner::NewSession(options); + if (sess == nullptr) { + MS_LOG(ERROR) << "Init data graph failed, because of create Ge session failed"; + return Status::FAILED; + } else { + DfGraphManager::GetInstance().SetGeSession(sess); + } + } + + transform::GraphRunnerOptions options; + options.sess_ptr = sess; + auto graph_runner = std::make_shared(options); + if (graph_runner == nullptr) { + MS_LOG(ERROR) << "Create new graph runner failed"; + return Status::FAILED; + } else { + DfGraphManager::GetInstance().SetGraphRunner(graph_runner); + } + + return Status::SUCCESS; +} + +bool InitExecDatasetGe(const std::string& queue_name, int64_t size, int64_t batch_size, + const std::vector& types, const std::vector>& shapes, + const std::vector& input_indexes, const std::string& phase) { + std::vector ge_types; + (void)std::transform(types.begin(), types.end(), std::back_inserter(ge_types), [](const TypePtr& i) -> int64_t { + return transform::TransformUtil::ConvertDataType(i->type_id()); + }); + + ConfigManager::GetInstance().set_dataset_mode(DatasetMode::DS_GRAPH_MODE); + ConfigManager::GetInstance().set_iter_num(size); + ConfigManager::GetInstance().set_dataset_phase(phase); + + DatasetGraphParam param(queue_name, size, batch_size, ge_types, shapes, input_indexes); + ConfigManager::GetInstance().set_dataset_param(param); + + if (transform::BuildDatasetGraph(param, phase) != transform::SUCCESS) { + MS_LOG(ERROR) << "Build dateset graph failed."; + return false; + } + +#if ENABLE_TRAIN + (void)setenv("GE_TRAIN", "1", 1); +#else + (void)setenv("GE_TRAIN", "0", 1); +#endif + + if (CreateSessionAndGraphRunner(static_cast(ENABLE_TRAIN)) != Status::SUCCESS) { + MS_LOG(ERROR) << "Create GE Session or GraphRunner failed."; + return false; + } + + MS_LOG(INFO) << "DoExecNonInputGraph:" << phase; + DoExecNonInputGraph(phase); + + return true; +} + +void ConvertObjectToTensors(const py::dict& dict, TensorOrderMap* const tensors) { + for (auto item : dict) { + if ((!py::isinstance(item.first))) { + MS_LOG(WARNING) << "Type of key of py_dict is not string, ignore it."; + continue; + } + std::shared_ptr tensor; + std::string name = py::cast(item.first); + if (py::isinstance(item.second.attr("default_input"))) { + // convert float to tensor with shape([1]) + tensor = std::make_shared(kNumberTypeFloat32, std::vector({1})); + *(static_cast(tensor->data_c(true))) = py::cast(item.second.attr("default_input")); + } else if (py::isinstance(item.second.attr("default_input"))) { + // convert int to tensor with shape([1]) + tensor = std::make_shared(kNumberTypeInt32, std::vector({1})); + *(static_cast(tensor->data_c(true))) = py::cast(item.second.attr("default_input")); + } else if (py::hasattr(item.second.attr("default_input"), PYTHON_TENSOR_FLAG)) { + // cast tensor + tensor = py::cast>(item.second.attr("default_input")); + } + + if (tensor == nullptr) { + MS_LOG(EXCEPTION) << "Get default value for " << name << " failed"; + } + (void)tensors->emplace(name, tensor); + } +} + +bool AddDFGraph(const std::map& info, const py::dict& init_params, + const std::string& phase, const py::object& broadcast_params) { + FuncGraphPtr anf_graph = info.at(phase)->func_graph; + DfGraphConvertor convertor(anf_graph); + + size_t pos = phase.find('.'); + std::string net_id = ((pos == std::string::npos || pos == phase.size() - 1) ? phase : phase.substr(pos + 1)); + std::string phase_prefix = phase.substr(0, pos); + + if (phase_prefix == "export") { + MS_LOG(INFO) << "Set DfGraphConvertor training : false"; + convertor.set_training(false); + } + + TensorOrderMap init_tensors{}; + ConvertObjectToTensors(init_params, &init_tensors); + (void)convertor.ConvertAllNode().InitParam(init_tensors).BuildGraph(); + + if (broadcast_params != py::none()) { + if (!py::isinstance(broadcast_params)) { + MS_LOG(ERROR) << "Invalid broadcast params, it must be py::dict type"; + return false; + } + py::dict broadcast = broadcast_params.cast(); + if (broadcast.empty()) { + (void)convertor.GenerateBroadcastGraph(init_tensors); + } else { + TensorOrderMap broadcast_tensors{}; + ConvertObjectToTensors(broadcast, &broadcast_tensors); + (void)convertor.GenerateBroadcastGraph(broadcast_tensors); + } + MS_LOG(INFO) << "Generate broadcast graph with params and broadcast_empty is " << broadcast.empty(); + } + + (void)convertor.GenerateCheckpointGraph(); + if (convertor.ErrCode() != 0) { + DfGraphManager::GetInstance().ClearGraph(); + MS_LOG(ERROR) << "convert df graph failed, err:" << convertor.ErrCode(); + return false; + } + + if (MsContext::GetInstance()->save_graphs_flag()) { + convertor.DrawComputeGraph(GetFilePathName("ge_graph.dot")); // for debug + convertor.DrawInitGraph(GetFilePathName("init_graph.dot")); // for debug + convertor.DrawSaveCheckpointGraph(GetFilePathName("save_checkpoint_graph.dot")); // for debug + } + std::string init_graph = "init_subgraph." + net_id; + std::string checkpoint_name = "save." + net_id; + if (phase.find("train") != std::string::npos) { + (void)DfGraphManager::GetInstance().AddGraph(phase, convertor.GetComputeGraph(), {{"ge.exec.variable_acc", "1"}}); + } else { + (void)DfGraphManager::GetInstance().AddGraph(phase, convertor.GetComputeGraph()); + } + (void)DfGraphManager::GetInstance().AddGraph(init_graph, convertor.GetInitGraph()); + (void)DfGraphManager::GetInstance().AddGraph(checkpoint_name, convertor.GetSaveCheckpointGraph()); + (void)DfGraphManager::GetInstance().AddGraph(BROADCAST_GRAPH_NAME, convertor.GetBroadcastGraph()); + + DfGraphManager::GetInstance().SetAnfGraph(checkpoint_name, anf_graph); + + return true; +} + +FuncGraphPtr BuildDFGraph(const std::map& info, const py::dict& init_params, + const std::string& phase, const py::object& broadcast_params) { + if (info.count(phase) == 0) { + MS_LOG(EXCEPTION) << "no phase in executor:" << GetPhasePrefix(phase); + } + FuncGraphPtr anf_graph = info.at(phase)->func_graph; + + if (MsContext::GetInstance()->save_graphs_flag()) { + draw::Draw(GetFilePathName("anf_graph.dot"), anf_graph); // for debug + DumpIR(GetFilePathName("anf_graph.ir"), anf_graph, true); + } + + if (!AddDFGraph(info, init_params, phase, broadcast_params)) { + MS_LOG(ERROR) << "GenConvertor failed"; + return nullptr; + } + +#if ENABLE_TRAIN + (void)setenv("GE_TRAIN", "1", 1); +#else + (void)setenv("GE_TRAIN", "0", 1); +#endif + + if (CreateSessionAndGraphRunner(static_cast(ENABLE_TRAIN)) != Status::SUCCESS) { + MS_LOG(ERROR) << "Create GE Session or GraphRunner failed."; + return nullptr; + } + + return anf_graph; +} + +void RunGEInitGraph(const py::dict& init_params, const std::string& phase) { + MS_LOG(DEBUG) << "ExecInitGraph start."; + TensorOrderMap inputs_with_name{}; + ConvertObjectToTensors(init_params, &inputs_with_name); + std::vector inputs; + (void)std::transform(inputs_with_name.begin(), inputs_with_name.end(), std::back_inserter(inputs), + [](const std::pair& item) { return item.second; }); + + std::vector ge_tensors = TransformUtil::ConvertInputTensors(inputs, kOpFormat_NCHW); + if (ge_tensors.size() != inputs.size()) { + MS_LOG(ERROR) << "Args convert to ge tensor error."; + return; + } + MS_LOG(DEBUG) << "Run graph begin, inputs size is: " << inputs.size() << "."; + + std::vector ge_outputs; + transform::RunOptions run_options; + + run_options.name = phase; + if (DfGraphManager::GetInstance().GetGraphByName(phase) == nullptr) { + MS_LOG(WARNING) << "Can not find " << phase << " sub graph, don't need data init subgraph in INFER mode."; + return; + } + auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); + if (graph_runner == nullptr) { + MS_LOG(EXCEPTION) << "Can not found GraphRunner."; + } + { + // Release GIL before calling into (potentially long-running) C++ code + py::gil_scoped_release release; + Status ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs); + if (ret != Status::SUCCESS) { + MS_LOG(EXCEPTION) << "Exec " << phase << " graph failed."; + } + + MS_LOG(INFO) << "Exec " << phase << " graph success."; + + if ((ConfigManager::GetInstance().parallel_strategy() == ParallelStrategy::DISTRIBUTION) && + (DfGraphManager::GetInstance().GetGraphByName(BROADCAST_GRAPH_NAME) != nullptr)) { + run_options.name = BROADCAST_GRAPH_NAME; + ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs); + if (ret != Status::SUCCESS) { + MS_LOG(EXCEPTION) << "Exec BROADCAST_GRAPH_NAME failed."; + } + MS_LOG(INFO) << "Exec broadcast graph success."; + } + } +} + +py::object ExtractGeneralCnodeRet(const AbstractBasePtr& cnode_data, const py::tuple& data, size_t* count) { + MS_EXCEPTION_IF_NULL(cnode_data); + if (*count >= data.size()) { + MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() + << " less than the number of elements required. "; + } + + if (cnode_data->isa()) { + BaseShapePtr shape = cnode_data->BuildShape(); + auto shape_act = shape->cast()->shape(); + Tensor tensor_exp = py::cast(data[*count]); + if (shape_act != tensor_exp.shape()) { + MS_LOG(EXCEPTION) << "The shape of the tensor returned from GE is not the same as " + "the shape of the tensor derived from ME."; + } + return data[(*count)++]; + } + + if (!cnode_data->isa()) { + MS_LOG(EXCEPTION) << "The output of operator in the final anf graph could " + << "only be a tensor or a tuple of tensor, but got " << cnode_data->BuildValue()->ToString() + << "."; + } + auto data_tp = cnode_data->cast(); + auto elements = data_tp->elements(); + size_t size = data_tp->size(); + py::tuple tp = py::tuple(size); + for (size_t i = 0; i < size; i++) { + tp[i] = ExtractGeneralCnodeRet(elements[i], data, count); + } + return std::move(tp); +} + +py::object StructureOutput(const AnfNodePtr& output_node, const py::tuple& data, size_t* count) { + MS_EXCEPTION_IF_NULL(output_node); + + if (output_node->isa()) { + return ValuePtrToPyData(GetValueNode(output_node)); + } + + if (*count >= data.size()) { + MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() + << " less than the number of elements required. "; + } + if (output_node->isa()) { + return data[(*count)++]; + } + + auto output_c = output_node->cast(); + if (output_c == nullptr) { + MS_LOG(EXCEPTION) << "The final anf graph could only have constant, parameter, and operator, but got " + << output_node->ToString(); + } + + if (output_c->IsApply(prim::kPrimMakeTuple)) { + auto input_list = output_c->inputs(); + size_t size = input_list.size(); + py::tuple tp = py::tuple(size - 1); + for (size_t i = 1; i < size; i++) { + tp[i - 1] = StructureOutput(input_list[i], data, count); + } + return std::move(tp); + } + if (output_c->IsApply(prim::kPrimDepend)) { + return StructureOutput(output_c->input(1), data, count); + } + + return ExtractGeneralCnodeRet(output_c->abstract(), data, count); +} + +std::shared_ptr DoExecGraph(const FuncGraphPtr& graph, const std::vector& inputs, + const std::string& phase) { + std::vector ge_tensors = TransformUtil::ConvertInputTensors(inputs, kOpFormat_NCHW); + if (ge_tensors.size() != inputs.size()) { + MS_LOG(ERROR) << "args convert to ge tensor error"; + return nullptr; + } + + std::vector ge_outputs; + transform::RunOptions run_options; + + run_options.name = phase; + + auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); + + if (graph_runner == nullptr) { + MS_LOG(ERROR) << "Can not found GraphRunner"; + return nullptr; + } + + { + // Release GIL before calling into (potentially long-running) C++ code + py::gil_scoped_release release; + MS_LOG(DEBUG) << "Run graph begin, inputs size is: " << inputs.size(); + Status ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs); + MS_LOG(DEBUG) << "Run graph finish, outputs size is: " << ge_outputs.size(); + if (ret != Status::SUCCESS) { + MS_LOG(ERROR) << "Exec graph failed"; + return nullptr; + } + } + + std::vector me_outputs = TransformUtil::ConvertGeTensors(ge_outputs); + if (me_outputs.size() != ge_outputs.size()) { + MS_LOG(ERROR) << "Convert output Ge tensor to Me tensor failed"; + } + + py::tuple outputs(me_outputs.size()); + for (std::size_t i = 0; i < outputs.size(); i++) { + outputs[i] = *me_outputs[i]; + } + + std::shared_ptr ret = nullptr; + +#ifdef ENABLE_GE + AnfNodePtr root = graph->get_return(); + MS_EXCEPTION_IF_NULL(root); + AbstractBasePtr output = root->abstract(); + size_t count = 0; + py::object oj = StructureOutput(output, outputs, &count); + ret = std::make_shared(oj); +#else + if (outputs.size() == 1) { + ret = std::make_shared(outputs[0]); + } else { + ret = std::make_shared(outputs); + } +#endif + + return ret; +} + +void ProcessGeArg(const std::map& info, const py::tuple& args, const std::string& phase, + std::vector* inputs) { + // check the arg and use the ExecutorPy args + std::size_t size = args.size(); + + if (info.count(phase) == 0) { + MS_LOG(EXCEPTION) << "no phase in executor:" << GetPhasePrefix(phase); + } + + auto arg_size = info.at(phase)->arg_list_size; + if (size != arg_size) { + MS_LOG(EXCEPTION) << "The real arg num : size = " << size << ". graph_arg_size = " << arg_size; + } + + // process the first args of tensor + // only in Dataset Feed Mode, fp_bp graph need input tensors + if (ConfigManager::GetInstance().dataset_mode() == DS_FEED_MODE) { + for (std::size_t i = 0; i < size; i++) { + ValuePtr converted = nullptr; + bool succ = parse::ConvertData(args[i], &converted); + if (!succ) { + MS_LOG(EXCEPTION) << "args convert error"; + } + if (converted->isa()) { + (*inputs).push_back(converted->cast()); + } else { + MS_LOG(EXCEPTION) << "args, " << converted->ToString() << " is not tensor"; + } + } + } +} + +py::object ExecDFGraph(const std::map& info, const py::tuple& args, + const std::string& phase) { + std::string phase_prefix = GetPhasePrefix(phase); + + if (phase_prefix == "save") { + DoExecNonInputGraph(phase); + ConfigManager::GetInstance().ResetConfig(); + return py::none(); + } + + if (info.count(phase) == 0) { + MS_LOG(EXCEPTION) << "has no phase:" << phase; + } + + FuncGraphPtr anf_graph = info.at(phase)->func_graph; + +#if (!defined ENABLE_GE) || (defined ENABLE_INFER) + // Now don't use the graph because the exec ge function don't take effect + MS_EXCEPTION_IF_NULL(info.at(phase)->func_graph); + if (ENABLE_TRAIN != info.at(phase)->func_graph->flags()["training"]) { + MS_LOG(ERROR) << "Graph training mode mismatch mode of libraries"; + ConfigManager::GetInstance().ResetConfig(); + return py::none(); + } +#endif + + std::shared_ptr ret_val = std::make_shared(); + // We will not execute graph when output is constant or just input itself. + if (IsGraphOutputValueNodeOrParameter(info.at(phase)->func_graph->output(), args, ret_val)) { + ConfigManager::GetInstance().ResetConfig(); + return *ret_val; + } + + std::vector inputs; + ProcessGeArg(info, args, phase, &inputs); + + std::shared_ptr ret = DoExecGraph(anf_graph, inputs, phase); + ConfigManager::GetInstance().ResetConfig(); + if (ret != nullptr) { + return *ret; + } else { + MS_LOG(EXCEPTION) << "exec graph failed"; + } +} +void ExportDFGraph(const std::string& file_name, const std::string& phase) { + MS_LOG(DEBUG) << "ExportGraph Begin"; + transform::DfGraphWrapperPtr wrap_ptr = DfGraphManager::GetInstance().GetGraphByName(phase); + if (wrap_ptr == nullptr) { + MS_LOG(ERROR) << "Get graph form DfGraphManager failed!"; + return; + } + + transform::DfGraphPtr ge_graph = wrap_ptr->graph_ptr_; + if (nullptr == ge_graph) { + MS_LOG(ERROR) << "The export graph is null"; + return; + } + + (void)ge_graph->SaveToFile(file_name); + + MS_LOG(DEBUG) << "ExportGraph End"; +} +} // namespace pipeline +} // namespace mindspore diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.h b/mindspore/ccsrc/pipeline/pipeline_ge.h new file mode 100644 index 0000000000..c3779fd982 --- /dev/null +++ b/mindspore/ccsrc/pipeline/pipeline_ge.h @@ -0,0 +1,57 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PIPELINE_PIPELINE_GE_H_ +#define MINDSPORE_CCSRC_PIPELINE_PIPELINE_GE_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "pybind11/pybind11.h" +#include "pipeline/base.h" +#include "operator/ops.h" + +namespace mindspore { +namespace pipeline { + +namespace py = pybind11; + +void SetGeOption(const std::map& options); + +void RunGEInitGraph(const py::dict& init_params, const std::string& phase); + +py::object ExecDFGraph(const std::map& info, const py::tuple& args, + const std::string& phase = "train"); + +FuncGraphPtr BuildDFGraph(const std::map& info, const py::dict& init_params, + const std::string& phase, const py::object& broadcast_params = {}); + +// init and exec dataset sub graph for GE backend +bool InitExecDatasetGe(const std::string& queue_name, int64_t size, int64_t batch_size, + const std::vector& types, const std::vector>& shapes, + const std::vector& input_indexes, const std::string& phase); + +void ExportDFGraph(const std::string& file_name, const std::string& phase); + +} // namespace pipeline +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PIPELINE_PIPELINE_GE_H_ diff --git a/mindspore/ccsrc/pipeline/resource.cc b/mindspore/ccsrc/pipeline/resource.cc index 2998ff1dbb..59ee04ad15 100644 --- a/mindspore/ccsrc/pipeline/resource.cc +++ b/mindspore/ccsrc/pipeline/resource.cc @@ -25,19 +25,13 @@ #include "pipeline/parse/data_converter.h" #include "operator/ops.h" #include "utils/graph_utils.h" -#include "transform/convert.h" #include "optimizer/ad/dfunctor.h" #include "vm/segment_runner.h" -#include "utils/context/ms_context.h" -#include "transform/df_graph_manager.h" -#include "device/kernel_runtime_manager.h" namespace mindspore { // namespace to support opmap definition namespace pipeline { -using MethodMap = std::unordered_map>; - MethodMap& GetMethodMap() { static MethodMap method_map = {{kObjectTypeString, { @@ -255,28 +249,5 @@ void Resource::Clean() { trace::ClearTraceStack(); is_cleaned_ = true; } - -void ReleaseGeTsd() { - auto context_ptr = MsContext::GetInstance(); - if (context_ptr != nullptr) { - (void)context_ptr->FinalizeGe(true); - (void)context_ptr->CloseTsd(true); - } -} - -void ClearResAtexit() { - MS_LOG(DEBUG) << "pipeline clear all resource"; - device::KernelRuntimeManager::Instance().ClearRuntimeResource(); - transform::DfGraphManager::GetInstance().ClearGraph(); - ad::g_k_prims.clear(); - - abstract::ClearPrimEvaluatorMap(); - compile::ClearConvertCache(); - transform::DfGraphConvertor::get_adpt_map().clear(); - pipeline::GetMethodMap().clear(); - pipeline::ExecutorPy::ClearRes(); - - ReleaseGeTsd(); -} } // namespace pipeline } // namespace mindspore diff --git a/mindspore/ccsrc/pipeline/resource.h b/mindspore/ccsrc/pipeline/resource.h index 43159ddbdd..15ab70db14 100644 --- a/mindspore/ccsrc/pipeline/resource.h +++ b/mindspore/ccsrc/pipeline/resource.h @@ -44,6 +44,10 @@ const char kOutput[] = "output"; class InferenceResource; +using MethodMap = std::unordered_map>; + +MethodMap& GetMethodMap(); + class ResourceBase { public: ResourceBase() { manager_ = MakeManager(); } @@ -110,9 +114,6 @@ class Resource : public ResourceBase { using ResourcePtr = std::shared_ptr; -void ClearResAtexit(); -void ReleaseGeTsd(); - } // namespace pipeline } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc index b9b324e5dd..8212d64c27 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc @@ -21,7 +21,7 @@ #include "pre_activate/ascend/ir_fission/bn_grad_split.h" #include "pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h" #include "pre_activate/ascend/ir_fission/layer_norm_grad_split.h" -#include "pre_activate/ascend/ir_fusion/allreduce_fusion.h" +#include "pre_activate/common/ir_fusion/allreduce_fusion.h" #include "pre_activate/ascend/ir_fusion/square_sum_fusion.h" #include "pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h" #include "pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h" diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc index 49c5e89641..58b8a93516 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc @@ -237,11 +237,11 @@ CNodePtr CreateFusionOp(const std::vector &inputs_list, const std::v std::vector input_names; for (uint8_t i = 0; i < inputs_list.size(); i++) { - input_names.emplace_back("input" + to_string(i)); + input_names.emplace_back("input" + std::to_string(i)); } std::vector output_names; for (uint8_t i = 0; i < outputs_list.size(); i++) { - output_names.emplace_back("output" + to_string(i)); + output_names.emplace_back("output" + std::to_string(i)); } ValuePtr input_names_v = MakeValue(input_names); diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/allreduce_fusion.cc b/mindspore/ccsrc/pre_activate/common/ir_fusion/allreduce_fusion.cc similarity index 97% rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/allreduce_fusion.cc rename to mindspore/ccsrc/pre_activate/common/ir_fusion/allreduce_fusion.cc index 7b862b21b9..55efcf9058 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/allreduce_fusion.cc +++ b/mindspore/ccsrc/pre_activate/common/ir_fusion/allreduce_fusion.cc @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "pre_activate/ascend/ir_fusion/allreduce_fusion.h" +#include "pre_activate/common/ir_fusion/allreduce_fusion.h" #include #include diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/allreduce_fusion.h b/mindspore/ccsrc/pre_activate/common/ir_fusion/allreduce_fusion.h similarity index 87% rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/allreduce_fusion.h rename to mindspore/ccsrc/pre_activate/common/ir_fusion/allreduce_fusion.h index c26dbc20d9..b49b8373c6 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/allreduce_fusion.h +++ b/mindspore/ccsrc/pre_activate/common/ir_fusion/allreduce_fusion.h @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_ALLREDUCE_FUSION_H_ -#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_ALLREDUCE_FUSION_H_ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_IR_FUSION_ALLREDUCE_FUSION_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_IR_FUSION_ALLREDUCE_FUSION_H_ #include #include "pre_activate/common/pass.h" @@ -46,4 +46,4 @@ class AllReduceFusion : public Pass { }; } // namespace opt } // namespace mindspore -#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_ALLREDUCE_FUSION_H_ +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_IR_FUSION_ALLREDUCE_FUSION_H_ diff --git a/mindspore/ccsrc/predict/converter/kernel2ms.cc b/mindspore/ccsrc/predict/converter/kernel2ms.cc index 30b1960e41..32cdee1350 100644 --- a/mindspore/ccsrc/predict/converter/kernel2ms.cc +++ b/mindspore/ccsrc/predict/converter/kernel2ms.cc @@ -16,7 +16,7 @@ #include "predict/converter/kernel2ms.h" #include -#include "transform/convert.h" +#include "ir/anf.h" #include "predict/converter/lite_model/op_attr_packer.h" #include "mindspore/ccsrc/operator/ops.h" @@ -135,7 +135,7 @@ void Kernel2Ms::GetRealInpoutsPtr(const AnfNodePtr &node, std::vectorisa()) { auto c_node = node->cast(); MS_EXCEPTION_IF_NULL(c_node); - std::string c_node_name = transform::GetCNodeFuncName(c_node); + std::string c_node_name = GetCNodeFuncName(c_node); if (c_node_name == prim::kPrimTupleGetItem->name()) { auto v_node = c_node->inputs()[kTupleGetItemIndex]->cast(); MS_EXCEPTION_IF_NULL(v_node); @@ -321,7 +321,7 @@ bool Kernel2Ms::SetGraphInputTensors(const KernelGraphPtr &kernel_graph_ptr, con } for (const auto &input_node : kernel_graph_ptr->inputs()) { if (input_node->isa()) { - ParameterPtr pk_node = dynamic_pointer_cast(input_node); + ParameterPtr pk_node = std::dynamic_pointer_cast(input_node); TensorPtr device_tensor; if (convert_mode_ == kConvertCpuMode) { device_tensor = predict::utils::GetParaCpuTensor(input_node); diff --git a/mindspore/ccsrc/pynative/base.h b/mindspore/ccsrc/pynative/base.h new file mode 100644 index 0000000000..7405f621cb --- /dev/null +++ b/mindspore/ccsrc/pynative/base.h @@ -0,0 +1,67 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PYNATIVE_BASE_H_ +#define MINDSPORE_CCSRC_PYNATIVE_BASE_H_ + +#include +#include +#include +#include +#include +#include + +#include "pybind11/pybind11.h" +#include "ir/primitive.h" +#include "pipeline/static_analysis/abstract_value.h" + +namespace mindspore { +namespace pynative { + +namespace py = pybind11; + +enum PynativeStatusCode { + PYNATIVE_SUCCESS = 0, + PYNATIVE_OP_NOT_IMPLEMENTED_ERR = 1, + PYNATIVE_OP_INPUTS_ERR = 2, + PYNATIVE_OP_PARAMS_ERR = 3, + PYNATIVE_OP_ATTRS_ERR = 4, + PYNATIVE_GRAPH_MANAGER_ERR = 5, + PYNATIVE_GRAPH_GE_BUILD_ERR = 6, + PYNATIVE_GRAPH_GE_RUN_ERR = 7, + PYNATIVE_UNKNOWN_STATE = 0XFF +}; + +enum RunOpArgsEnum { PY_PRIM = 0, PY_NAME, PY_INPUTS, PY_INPUT_MASK, PY_ARGS_NUM }; + +struct OpExecInfo { + PrimitivePyPtr py_primitive; + std::string op_name; + AbstractBasePtr abstract; + + py::tuple op_inputs; + py::tuple inputs_mask; + py::dict op_attrs; +}; +using OpExecInfoPtr = std::shared_ptr; +OpExecInfoPtr GenerateOpExecInfo(const py::args& args); + +const std::unordered_set ignore_infer_prim = {"partial"}; + +} // namespace pynative +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PYNATIVE_BASE_H_ diff --git a/mindspore/ccsrc/pynative/pynative_execute.cc b/mindspore/ccsrc/pynative/pynative_execute.cc index 27cfd89106..927e768bbe 100644 --- a/mindspore/ccsrc/pynative/pynative_execute.cc +++ b/mindspore/ccsrc/pynative/pynative_execute.cc @@ -29,16 +29,18 @@ #include "pipeline/static_analysis/prim.h" #include "session/session_factory.h" +#include "pynative/base.h" + +#ifdef ENABLE_GE +#include "pynative/pynative_execute_ge.h" +#endif + const char SINGLE_OP_GRAPH[] = "single_op_graph"; // primitive unable to infer value for constant input in pynative mode -const std::unordered_set ignore_infer_prim = {"partial"}; const std::unordered_set vm_operators = {"partial", "depend"}; namespace mindspore { namespace pynative { -using transform::GraphRunner; -using transform::GraphRunnerOptions; -using transform::OperatorPtr; inline ValuePtr PyAttrValue(const py::object& obj) { ValuePtr converted_ret = nullptr; bool converted = parse::ConvertData(obj, &converted_ret); @@ -48,32 +50,12 @@ inline ValuePtr PyAttrValue(const py::object& obj) { return converted_ret; } -MeTensorPtr ConvertPyObjToTensor(const py::object& obj) { - MeTensorPtr me_tensor_ptr = nullptr; - if (py::isinstance(obj)) { - me_tensor_ptr = py::cast(obj); - } else if (py::isinstance(obj)) { - me_tensor_ptr = std::make_shared(py::cast(obj), nullptr); - } else if (py::isinstance(obj)) { - me_tensor_ptr = std::make_shared(py::cast(obj), nullptr); - } else if (py::isinstance(obj)) { - me_tensor_ptr = std::make_shared(py::cast(obj), nullptr); - } else if (py::isinstance(obj)) { - me_tensor_ptr = std::make_shared(py::cast(obj), nullptr); - } else if (py::isinstance(obj)) { - me_tensor_ptr = std::make_shared(py::cast(obj), nullptr); - } else { - MS_LOG(EXCEPTION) << "run op inputs type is invalid!"; - } - return me_tensor_ptr; -} - void PynativeInfer(const PrimitivePyPtr& prim, const py::tuple& py_args, OpExecInfo* const op_exec_info) { size_t size = py_args.size(); AbstractBasePtrList args_spec_list; for (size_t i = 0; i < size; i++) { ValuePtr input_value = PyAttrValue(py_args[i]); - if (py::isinstance(py_args[i])) { + if (py::isinstance(py_args[i])) { args_spec_list.emplace_back(abstract::FromValueInside(input_value, true)); } else { args_spec_list.emplace_back(abstract::FromValueInside(input_value, false)); @@ -140,241 +122,6 @@ std::string GetSingleOpGraphInfo(const OpExecInfoPtr& op_exec_info) { return graph_info; } -bool SetInputsForSingleOpGraph(const OpExecInfoPtr& op_exec_info, const std::vector& inputs, - const OperatorPtr& op, std::vector* graph_input_nodes) { - MS_EXCEPTION_IF_NULL(op_exec_info); - MS_EXCEPTION_IF_NULL(graph_input_nodes); - auto op_inputs = op_exec_info->op_inputs; - std::string op_name = op_exec_info->op_name; - transform::OpAdapterPtr adapter = transform::DfGraphConvertor::FindAdapter(op_name, true); - if (adapter == nullptr) { - return false; - } - - int op_input_idx = 1; - size_t size = inputs.size(); - for (size_t i = 0; i < size; i++) { - if (inputs[i] == nullptr) { - continue; - } - auto const_op = std::make_shared(); - MS_EXCEPTION_IF_NULL(const_op); - (void)const_op->set_attr_value(*inputs[i]); - MeTensorPtr me_tensor_ptr = ConvertPyObjToTensor(op_inputs[i]); - MS_EXCEPTION_IF_NULL(me_tensor_ptr); - auto const_op_desc = - transform::TransformUtil::GetGeTensorDesc(me_tensor_ptr->shape_c(), me_tensor_ptr->data_type(), kOpFormat_NCHW); - if (const_op_desc == nullptr) { - MS_LOG(ERROR) << "Create variable " << op_name << " ouptut descriptor failed!"; - return false; - } - auto pointer_cast_const_op = std::static_pointer_cast(const_op); - MS_EXCEPTION_IF_NULL(pointer_cast_const_op); - (void)pointer_cast_const_op->update_output_desc_y(*const_op_desc); - auto& input_map = adapter->getInputMap(); - if (input_map.find(op_input_idx) == input_map.end()) { - continue; - } - if (adapter->setInput(op, op_input_idx++, const_op)) { - MS_LOG(ERROR) << "fail to set params, index is " << op_input_idx; - return false; - } - graph_input_nodes->push_back(*const_op); - } - return true; -} - -bool BuildSingleOpGraph(const OpExecInfoPtr& op_exec_info, const std::vector& inputs, - const std::unordered_map& attrs, const GeGraphPtr& graph) { - MS_EXCEPTION_IF_NULL(op_exec_info); - std::string op_name = op_exec_info->op_name; - auto op_inputs = op_exec_info->op_inputs; - transform::OpAdapterPtr adapter = transform::DfGraphConvertor::FindAdapter(op_name, true); - if (adapter == nullptr) { - MS_LOG(ERROR) << "Unable to find Adapter for " << ((std::string)py::str(op_name)); - return false; - } - OperatorPtr op = adapter->generate(op_name); - MS_EXCEPTION_IF_NULL(op); - - std::vector graph_input_nodes; - // hold param nodes after setting input and output for the graph - // set input - if (!SetInputsForSingleOpGraph(op_exec_info, inputs, op, &graph_input_nodes)) { - return false; - } - // set attributes - for (auto attr : attrs) { - (void)adapter->setAttr(op, attr.first, attr.second); - } - // set default attributes - auto extra_attrs = adapter->GetExtraAttr(); - for (auto attr : extra_attrs) { - (void)adapter->setAttr(op, attr.first, attr.second); - } - // set input attributes - auto& input_attr_map = adapter->getInputAttrMap(); - for (auto& it : input_attr_map) { - if (op_inputs.size() < it.first) { - continue; - } - auto const_value = PyAttrValue(op_inputs[it.first - 1]); - if (const_value->isa()) { - continue; - } - it.second.set_attr(op, const_value); - } - // construct output data nodes - std::vector graph_outputs{*op}; - // set input and output nodes for the graph - MS_EXCEPTION_IF_NULL(graph); - (void)graph->SetInputs(graph_input_nodes).SetOutputs(graph_outputs); - MS_LOG(INFO) << "BuildSingleOpGraph done"; - return true; -} - -void ToTensorPtr(const OpExecInfoPtr op_exec_info, std::vector* const inputs) { - MS_EXCEPTION_IF_NULL(inputs); - MS_EXCEPTION_IF_NULL(op_exec_info); - auto op_inputs = op_exec_info->op_inputs; - size_t size = op_inputs.size(); - for (size_t i = 0; i < size; i++) { - if (py::isinstance(op_inputs[i])) { - inputs->emplace_back(nullptr); - continue; - } - MeTensorPtr me_tensor_ptr = ConvertPyObjToTensor(op_inputs[i]); - auto ge_tensor_ptr = transform::TransformUtil::ConvertTensor(me_tensor_ptr, kOpFormat_NCHW); - if (ge_tensor_ptr == nullptr) { - MS_LOG(EXCEPTION) << "convert inputs to GE tensor failed in op " << op_exec_info->op_name << "."; - } - // set inputs for operator to build single node graph - inputs->push_back(ge_tensor_ptr); - } -} - -PynativeStatusCode ConvertAttributes(const OpExecInfoPtr& op_exec_info, const std::vector& inputs) { - MS_EXCEPTION_IF_NULL(op_exec_info); - auto op_attrs = op_exec_info->op_attrs; - std::unordered_map attrs{}; - - for (auto& item : op_attrs) { - if (!py::isinstance(item.first)) { - MS_LOG(ERROR) << "type error in py dict convert"; - return PYNATIVE_OP_ATTRS_ERR; - } - std::string name = py::cast(item.first); - auto attr_value = PyAttrValue(py::cast(item.second)); - (void)attrs.emplace(name, attr_value); - } - - // build graph - GeGraphPtr graph = std::make_shared(op_exec_info->op_name); - if (BuildSingleOpGraph(op_exec_info, inputs, attrs, graph) == false) { - MS_LOG(ERROR) << "Fail to BuildSingleOpGraph"; - return PYNATIVE_GRAPH_GE_BUILD_ERR; - } - - // add the single op graph into the graph manager, which will be iterated by session. - transform::Status ret = - transform::DfGraphManager::GetInstance().AddGraph(SINGLE_OP_GRAPH, std::shared_ptr(graph)); - if (ret != transform::SUCCESS) { - MS_LOG(ERROR) << "Fail to AddGraph into graph manager"; - return PYNATIVE_GRAPH_MANAGER_ERR; - } - - return PYNATIVE_SUCCESS; -} - -std::vector ConvertOutputTensors(const OpExecInfoPtr& op_exec_info, - const std::vector& ge_tensors) { - std::vector outputs; - AbstractBasePtr abs_base = op_exec_info->abstract; - std::vector> shapes; - if (abs_base != nullptr && abs_base->isa()) { - auto arg_tensor = dyn_cast(abs_base); - shapes.emplace_back(arg_tensor->shape()->shape()); - outputs = transform::TransformUtil::ConvertGeTensors(ge_tensors, shapes); - return outputs; - } - if (abs_base != nullptr && abs_base->isa()) { - auto arg_tuple = dyn_cast(abs_base); - size_t len = arg_tuple->size(); - - for (size_t i = 0; i < len; i++) { - if (arg_tuple->elements()[i]->isa()) { - auto arg_tensor = dyn_cast(arg_tuple->elements()[i]); - shapes.emplace_back(arg_tensor->shape()->shape()); - } - } - outputs = transform::TransformUtil::ConvertGeTensors(ge_tensors, shapes); - return outputs; - } - for (auto& it : ge_tensors) { - auto tensor = transform::TransformUtil::ConvertGeTensor(it); - if (tensor != nullptr) { - outputs.emplace_back(tensor); - } - } - return outputs; -} - -py::object RunOpInGE(const OpExecInfoPtr& op_exec_info, PynativeStatusCode* status) { - MS_LOG(INFO) << "RunOpInGe start"; - MS_EXCEPTION_IF_NULL(op_exec_info); - MS_EXCEPTION_IF_NULL(status); - - // returns a null py::tuple on error - py::tuple err_ret(0); - auto op_name = op_exec_info->op_name; - transform::OpAdapterPtr adapter = transform::DfGraphConvertor::FindAdapter(op_name, true); - if (adapter == nullptr) { - MS_LOG(ERROR) << "Unable to find GE Adapter for " << ((std::string)py::str(op_name)); - *status = PYNATIVE_OP_NOT_IMPLEMENTED_ERR; - return std::move(err_ret); - } - - std::vector inputs{}; - ToTensorPtr(op_exec_info, &inputs); - // convert me attr to ge AttrValue - PynativeStatusCode ret = ConvertAttributes(op_exec_info, inputs); - if (ret != PYNATIVE_SUCCESS) { - *status = ret; - return std::move(err_ret); - } - // run graph - transform::RunOptions run_options; - run_options.name = SINGLE_OP_GRAPH; - std::vector ge_inputs; - std::vector ge_outputs; - transform::GraphRunnerOptions graph_runner_options; - graph_runner_options.options["ge.trainFlag"] = "1"; - auto graph_runner = std::make_shared(graph_runner_options); - transform::Status run_ret; - { - // Release GIL before calling into (potentially long-running) C++ code - py::gil_scoped_release release; - run_ret = graph_runner->RunGraph(run_options, ge_inputs, &ge_outputs); - } - if (run_ret != transform::Status::SUCCESS) { - MS_LOG(ERROR) << "GraphRunner Fails to Run Graph"; - *status = PYNATIVE_GRAPH_GE_RUN_ERR; - return std::move(err_ret); - } - - std::vector graph_outputs = ConvertOutputTensors(op_exec_info, ge_outputs); - size_t output_size = graph_outputs.size(); - py::tuple result(output_size); - for (size_t i = 0; i < output_size; i++) { - MS_EXCEPTION_IF_NULL(graph_outputs[i]); - result[i] = *graph_outputs[i]; - } - - *status = PYNATIVE_SUCCESS; - MS_LOG(INFO) << "RunOpInGe end"; - return std::move(result); -} - py::object RunOpInVM(const OpExecInfoPtr& op_exec_info, PynativeStatusCode* status) { MS_LOG(INFO) << "RunOpInVM start"; @@ -423,12 +170,6 @@ py::object RunOpWithBackendPolicy(MsBackendPolicy backend_policy, const OpExecIn MS_EXCEPTION_IF_NULL(status); py::object result; switch (backend_policy) { - case kMsBackendGeOnly: { - // use GE only - MS_LOG(INFO) << "RunOp use GE only backend"; - result = RunOpInGE(op_exec_info, status); - break; - } case kMsBackendVmOnly: { // use vm only MS_LOG(INFO) << "RunOp use VM only backend"; @@ -436,22 +177,14 @@ py::object RunOpWithBackendPolicy(MsBackendPolicy backend_policy, const OpExecIn break; } case kMsBackendGePrior: { +#ifdef ENABLE_GE // use GE first, use vm when GE fails MS_LOG(INFO) << "RunOp use GE first backend"; result = RunOpInGE(op_exec_info, status); if (*status != PYNATIVE_SUCCESS) { result = RunOpInVM(op_exec_info, status); } - break; - } - case kMsBackendVmPrior: { - // GE_VM_SILENT - // (should not use this policy) use vm first, use GE when vm fails - MS_LOG(INFO) << "RunOp use VM first backend"; - result = RunOpInVM(op_exec_info, status); - if (*status != PYNATIVE_SUCCESS) { - result = RunOpInGE(op_exec_info, status); - } +#endif break; } case kMsBackendMsPrior: { diff --git a/mindspore/ccsrc/pynative/pynative_execute.h b/mindspore/ccsrc/pynative/pynative_execute.h index e3d7649106..17b5610bfd 100644 --- a/mindspore/ccsrc/pynative/pynative_execute.h +++ b/mindspore/ccsrc/pynative/pynative_execute.h @@ -25,55 +25,14 @@ #include "pybind11/pybind11.h" -#include "transform/convert.h" -#include "transform/graph_runner.h" -#include "transform/types.h" +#include "pynative/base.h" #include "utils/context/ms_context.h" namespace mindspore { namespace pynative { -using MeTensor = mindspore::tensor::Tensor; -using MeTensorPtr = mindspore::tensor::TensorPtr; -using GeTensor = ge::Tensor; -using GeTensorPtr = std::shared_ptr; -using GeGraph = ge::Graph; -using GeGraphPtr = std::shared_ptr; -using GeOperator = ge::Operator; -using GeOperatorPtr = std::shared_ptr; - namespace py = pybind11; -enum PynativeStatusCode { - PYNATIVE_SUCCESS = 0, - PYNATIVE_OP_NOT_IMPLEMENTED_ERR = 1, - PYNATIVE_OP_INPUTS_ERR = 2, - PYNATIVE_OP_PARAMS_ERR = 3, - PYNATIVE_OP_ATTRS_ERR = 4, - PYNATIVE_GRAPH_MANAGER_ERR = 5, - PYNATIVE_GRAPH_GE_BUILD_ERR = 6, - PYNATIVE_GRAPH_GE_RUN_ERR = 7, - PYNATIVE_UNKNOWN_STATE = 0XFF -}; - -enum RunOpArgsEnum { PY_PRIM = 0, PY_NAME, PY_INPUTS, PY_INPUT_MASK, PY_ARGS_NUM }; - -struct OpExecInfo { - PrimitivePyPtr py_primitive; - std::string op_name; - AbstractBasePtr abstract; - - py::tuple op_inputs; - py::tuple inputs_mask; - py::dict op_attrs; -}; -using OpExecInfoPtr = std::shared_ptr; -OpExecInfoPtr GenerateOpExecInfo(const py::args& args); -bool BuildSingleOpGraph(const OpExecInfoPtr& op_exec_info, const std::vector& inputs, - const std::unordered_map& attrs, const GeGraphPtr& graph); - -py::object RunOpInGE(const OpExecInfoPtr& op_exec_info, PynativeStatusCode* status); - py::object RunOpInVM(const OpExecInfoPtr& op_exec_info, PynativeStatusCode* status); py::tuple RunOp(const py::args& args); diff --git a/mindspore/ccsrc/pynative/pynative_execute_ge.cc b/mindspore/ccsrc/pynative/pynative_execute_ge.cc new file mode 100644 index 0000000000..4ed6088494 --- /dev/null +++ b/mindspore/ccsrc/pynative/pynative_execute_ge.cc @@ -0,0 +1,311 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pynative/pynative_execute_ge.h" + +#include +#include +#include +#include + +#include "utils/any.h" +#include "utils/utils.h" +#include "utils/context/ms_context.h" +#include "operator/ops.h" +#include "pipeline/parse/data_converter.h" +#include "pipeline/static_analysis/prim.h" +#include "session/session_factory.h" + +const char SINGLE_OP_GRAPH[] = "single_op_graph"; + +namespace mindspore { +namespace pynative { + +using MeTensor = mindspore::tensor::Tensor; +using MeTensorPtr = mindspore::tensor::TensorPtr; +using GeOperator = ge::Operator; +using GeOperatorPtr = std::shared_ptr; + +using transform::GraphRunner; +using transform::GraphRunnerOptions; +using transform::OperatorPtr; +static std::shared_ptr session = nullptr; +inline ValuePtr PyAttrValue(const py::object& obj) { + ValuePtr converted_ret = nullptr; + bool converted = parse::ConvertData(obj, &converted_ret); + if (!converted) { + MS_LOG(EXCEPTION) << "attribute convert error with type:" << std::string(py::str(obj)); + } + return converted_ret; +} + +MeTensorPtr ConvertPyObjToTensor(const py::object& obj) { + MeTensorPtr me_tensor_ptr = nullptr; + if (py::isinstance(obj)) { + me_tensor_ptr = py::cast(obj); + } else if (py::isinstance(obj)) { + me_tensor_ptr = std::make_shared(py::cast(obj), nullptr); + } else if (py::isinstance(obj)) { + me_tensor_ptr = std::make_shared(py::cast(obj), nullptr); + } else if (py::isinstance(obj)) { + me_tensor_ptr = std::make_shared(py::cast(obj), nullptr); + } else if (py::isinstance(obj)) { + me_tensor_ptr = std::make_shared(py::cast(obj), nullptr); + } else if (py::isinstance(obj)) { + me_tensor_ptr = std::make_shared(py::cast(obj), nullptr); + } else { + MS_LOG(EXCEPTION) << "run op inputs type is invalid!"; + } + return me_tensor_ptr; +} + +bool SetInputsForSingleOpGraph(const OpExecInfoPtr& op_exec_info, const std::vector& inputs, + const OperatorPtr& op, std::vector* graph_input_nodes) { + MS_EXCEPTION_IF_NULL(op_exec_info); + MS_EXCEPTION_IF_NULL(graph_input_nodes); + auto op_inputs = op_exec_info->op_inputs; + std::string op_name = op_exec_info->op_name; + transform::OpAdapterPtr adapter = transform::DfGraphConvertor::FindAdapter(op_name, true); + if (adapter == nullptr) { + return false; + } + + int op_input_idx = 1; + size_t size = inputs.size(); + for (size_t i = 0; i < size; i++) { + if (inputs[i] == nullptr) { + continue; + } + auto const_op = std::make_shared(); + MS_EXCEPTION_IF_NULL(const_op); + (void)const_op->set_attr_value(*inputs[i]); + MeTensorPtr me_tensor_ptr = ConvertPyObjToTensor(op_inputs[i]); + MS_EXCEPTION_IF_NULL(me_tensor_ptr); + auto const_op_desc = + transform::TransformUtil::GetGeTensorDesc(me_tensor_ptr->shape_c(), me_tensor_ptr->data_type(), kOpFormat_NCHW); + if (const_op_desc == nullptr) { + MS_LOG(ERROR) << "Create variable " << op_name << " ouptut descriptor failed!"; + return false; + } + auto pointer_cast_const_op = std::static_pointer_cast(const_op); + MS_EXCEPTION_IF_NULL(pointer_cast_const_op); + (void)pointer_cast_const_op->update_output_desc_y(*const_op_desc); + auto& input_map = adapter->getInputMap(); + if (input_map.find(op_input_idx) == input_map.end()) { + continue; + } + if (adapter->setInput(op, op_input_idx++, const_op)) { + MS_LOG(ERROR) << "fail to set params, index is " << op_input_idx; + return false; + } + graph_input_nodes->push_back(*const_op); + } + return true; +} + +bool BuildSingleOpGraph(const OpExecInfoPtr& op_exec_info, const std::vector& inputs, + const std::unordered_map& attrs, const GeGraphPtr& graph) { + MS_EXCEPTION_IF_NULL(op_exec_info); + std::string op_name = op_exec_info->op_name; + auto op_inputs = op_exec_info->op_inputs; + transform::OpAdapterPtr adapter = transform::DfGraphConvertor::FindAdapter(op_name, true); + if (adapter == nullptr) { + MS_LOG(ERROR) << "Unable to find Adapter for " << ((std::string)py::str(op_name)); + return false; + } + OperatorPtr op = adapter->generate(op_name); + MS_EXCEPTION_IF_NULL(op); + + std::vector graph_input_nodes; + // hold param nodes after setting input and output for the graph + // set input + if (!SetInputsForSingleOpGraph(op_exec_info, inputs, op, &graph_input_nodes)) { + return false; + } + // set attributes + for (auto attr : attrs) { + (void)adapter->setAttr(op, attr.first, attr.second); + } + // set default attributes + auto extra_attrs = adapter->GetExtraAttr(); + for (auto attr : extra_attrs) { + (void)adapter->setAttr(op, attr.first, attr.second); + } + // set input attributes + auto& input_attr_map = adapter->getInputAttrMap(); + for (auto& it : input_attr_map) { + if (op_inputs.size() < it.first) { + continue; + } + auto const_value = PyAttrValue(op_inputs[it.first - 1]); + if (const_value->isa()) { + continue; + } + it.second.set_attr(op, const_value); + } + // construct output data nodes + std::vector graph_outputs{*op}; + // set input and output nodes for the graph + MS_EXCEPTION_IF_NULL(graph); + (void)graph->SetInputs(graph_input_nodes).SetOutputs(graph_outputs); + MS_LOG(INFO) << "BuildSingleOpGraph done"; + return true; +} + +void ToTensorPtr(const OpExecInfoPtr op_exec_info, std::vector* const inputs) { + MS_EXCEPTION_IF_NULL(inputs); + MS_EXCEPTION_IF_NULL(op_exec_info); + auto op_inputs = op_exec_info->op_inputs; + size_t size = op_inputs.size(); + for (size_t i = 0; i < size; i++) { + if (py::isinstance(op_inputs[i])) { + inputs->emplace_back(nullptr); + continue; + } + MeTensorPtr me_tensor_ptr = ConvertPyObjToTensor(op_inputs[i]); + auto ge_tensor_ptr = transform::TransformUtil::ConvertTensor(me_tensor_ptr, kOpFormat_NCHW); + if (ge_tensor_ptr == nullptr) { + MS_LOG(EXCEPTION) << "convert inputs to GE tensor failed in op " << op_exec_info->op_name << "."; + } + // set inputs for operator to build single node graph + inputs->push_back(ge_tensor_ptr); + } +} + +PynativeStatusCode ConvertAttributes(const OpExecInfoPtr& op_exec_info, const std::vector& inputs) { + MS_EXCEPTION_IF_NULL(op_exec_info); + auto op_attrs = op_exec_info->op_attrs; + std::unordered_map attrs{}; + + for (auto& item : op_attrs) { + if (!py::isinstance(item.first)) { + MS_LOG(ERROR) << "type error in py dict convert"; + return PYNATIVE_OP_ATTRS_ERR; + } + std::string name = py::cast(item.first); + auto attr_value = PyAttrValue(py::cast(item.second)); + (void)attrs.emplace(name, attr_value); + } + + // build graph + GeGraphPtr graph = std::make_shared(op_exec_info->op_name); + if (BuildSingleOpGraph(op_exec_info, inputs, attrs, graph) == false) { + MS_LOG(ERROR) << "Fail to BuildSingleOpGraph"; + return PYNATIVE_GRAPH_GE_BUILD_ERR; + } + + // add the single op graph into the graph manager, which will be iterated by session. + transform::Status ret = + transform::DfGraphManager::GetInstance().AddGraph(SINGLE_OP_GRAPH, std::shared_ptr(graph)); + if (ret != transform::SUCCESS) { + MS_LOG(ERROR) << "Fail to AddGraph into graph manager"; + return PYNATIVE_GRAPH_MANAGER_ERR; + } + + return PYNATIVE_SUCCESS; +} + +std::vector ConvertOutputTensors(const OpExecInfoPtr& op_exec_info, + const std::vector& ge_tensors) { + std::vector outputs; + AbstractBasePtr abs_base = op_exec_info->abstract; + std::vector> shapes; + if (abs_base != nullptr && abs_base->isa()) { + auto arg_tensor = dyn_cast(abs_base); + shapes.emplace_back(arg_tensor->shape()->shape()); + outputs = transform::TransformUtil::ConvertGeTensors(ge_tensors, shapes); + return outputs; + } + if (abs_base != nullptr && abs_base->isa()) { + auto arg_tuple = dyn_cast(abs_base); + size_t len = arg_tuple->size(); + + for (size_t i = 0; i < len; i++) { + if (arg_tuple->elements()[i]->isa()) { + auto arg_tensor = dyn_cast(arg_tuple->elements()[i]); + shapes.emplace_back(arg_tensor->shape()->shape()); + } + } + outputs = transform::TransformUtil::ConvertGeTensors(ge_tensors, shapes); + return outputs; + } + for (auto& it : ge_tensors) { + auto tensor = transform::TransformUtil::ConvertGeTensor(it); + if (tensor != nullptr) { + outputs.emplace_back(tensor); + } + } + return outputs; +} + +py::object RunOpInGE(const OpExecInfoPtr& op_exec_info, PynativeStatusCode* status) { + MS_LOG(INFO) << "RunOpInGe start"; + MS_EXCEPTION_IF_NULL(op_exec_info); + MS_EXCEPTION_IF_NULL(status); + + // returns a null py::tuple on error + py::tuple err_ret(0); + auto op_name = op_exec_info->op_name; + transform::OpAdapterPtr adapter = transform::DfGraphConvertor::FindAdapter(op_name, true); + if (adapter == nullptr) { + MS_LOG(ERROR) << "Unable to find GE Adapter for " << ((std::string)py::str(op_name)); + *status = PYNATIVE_OP_NOT_IMPLEMENTED_ERR; + return std::move(err_ret); + } + + std::vector inputs{}; + ToTensorPtr(op_exec_info, &inputs); + // convert me attr to ge AttrValue + PynativeStatusCode ret = ConvertAttributes(op_exec_info, inputs); + if (ret != PYNATIVE_SUCCESS) { + *status = ret; + return std::move(err_ret); + } + // run graph + transform::RunOptions run_options; + run_options.name = SINGLE_OP_GRAPH; + std::vector ge_inputs; + std::vector ge_outputs; + transform::GraphRunnerOptions graph_runner_options; + graph_runner_options.options["ge.trainFlag"] = "1"; + auto graph_runner = std::make_shared(graph_runner_options); + transform::Status run_ret; + { + // Release GIL before calling into (potentially long-running) C++ code + py::gil_scoped_release release; + run_ret = graph_runner->RunGraph(run_options, ge_inputs, &ge_outputs); + } + if (run_ret != transform::Status::SUCCESS) { + MS_LOG(ERROR) << "GraphRunner Fails to Run Graph"; + *status = PYNATIVE_GRAPH_GE_RUN_ERR; + return std::move(err_ret); + } + + std::vector graph_outputs = ConvertOutputTensors(op_exec_info, ge_outputs); + size_t output_size = graph_outputs.size(); + py::tuple result(output_size); + for (size_t i = 0; i < output_size; i++) { + MS_EXCEPTION_IF_NULL(graph_outputs[i]); + result[i] = *graph_outputs[i]; + } + + *status = PYNATIVE_SUCCESS; + MS_LOG(INFO) << "RunOpInGe end"; + return std::move(result); +} +} // namespace pynative + +} // namespace mindspore diff --git a/mindspore/ccsrc/pynative/pynative_execute_ge.h b/mindspore/ccsrc/pynative/pynative_execute_ge.h new file mode 100644 index 0000000000..af0efec3e3 --- /dev/null +++ b/mindspore/ccsrc/pynative/pynative_execute_ge.h @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PYNATIVE_PYNATIVE_EXECUTE_GE_H_ +#define MINDSPORE_CCSRC_PYNATIVE_PYNATIVE_EXECUTE_GE_H_ + +#include +#include +#include +#include +#include + +#include "pynative/base.h" +#include "transform/convert.h" +#include "transform/graph_runner.h" +#include "transform/types.h" +#include "utils/context/ms_context.h" + +using GeTensor = ge::Tensor; +using GeTensorPtr = std::shared_ptr; +using GeGraph = ge::Graph; +using GeGraphPtr = std::shared_ptr; + +namespace mindspore { +namespace pynative { +bool BuildSingleOpGraph(const OpExecInfoPtr& op_exec_info, const std::vector& inputs, + const std::unordered_map& attrs, const GeGraphPtr& graph); + +py::object RunOpInGE(const OpExecInfoPtr& op_exec_info, PynativeStatusCode* status); +} // namespace pynative +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PYNATIVE_PYNATIVE_EXECUTE_GE_H_ diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/session/ascend_session.cc index 1a29450313..64647cd036 100644 --- a/mindspore/ccsrc/session/ascend_session.cc +++ b/mindspore/ccsrc/session/ascend_session.cc @@ -35,6 +35,7 @@ #include "pre_activate/common/helper.h" #include "device/kernel_runtime_manager.h" #include "kernel/tbe/tbe_python_funcs.h" +#include "utils/config_manager.h" namespace mindspore { namespace session { diff --git a/mindspore/ccsrc/session/gpu_session.cc b/mindspore/ccsrc/session/gpu_session.cc index 196a2f300f..293ca4f2ba 100644 --- a/mindspore/ccsrc/session/gpu_session.cc +++ b/mindspore/ccsrc/session/gpu_session.cc @@ -19,7 +19,7 @@ #include "device/gpu/gpu_kernel_runtime.h" #include "pre_activate/common/optimizer.h" #include "pre_activate/common/pass_manager.h" -#include "pre_activate/ascend/ir_fusion/allreduce_fusion.h" +#include "pre_activate/common/ir_fusion/allreduce_fusion.h" #include "device/kernel_runtime_manager.h" #include "predict/predict.h" #include "common/utils.h" diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc index 2b50d2328a..bbfe60859a 100755 --- a/mindspore/ccsrc/transform/convert.cc +++ b/mindspore/ccsrc/transform/convert.cc @@ -373,24 +373,6 @@ std::unordered_map &DfGraphConvertor::get_adpt_ma } // ---------------implement of DfGraphConvertor------------- -std::string GetCNodeFuncName(const CNodePtr cnode) { - if (cnode->inputs().empty()) { - return ""; - } - - AnfNodePtr valuenode = cnode->input(0); - if (valuenode->isa()) { - auto value = GetValueNode(valuenode); - // check whether the valuenode is primitive - if (value->isa()) { - return value->cast()->name(); - } else { - return value->ToString(); - } - } - return ""; -} - PrimType GetCNodeFuncType(const CNodePtr cnode) { if (cnode->inputs().empty()) { return kPrimTypeUnknown; diff --git a/mindspore/ccsrc/transform/convert.h b/mindspore/ccsrc/transform/convert.h index e38b0b2b3a..556db5acee 100644 --- a/mindspore/ccsrc/transform/convert.h +++ b/mindspore/ccsrc/transform/convert.h @@ -253,7 +253,6 @@ class DfGraphConvertor { bool distribute_ = false; }; -extern std::string GetCNodeFuncName(CNodePtr cnode); } // namespace transform } // namespace mindspore diff --git a/mindspore/ccsrc/utils/callbacks.cc b/mindspore/ccsrc/utils/callbacks.cc index ab3f7d883b..cdee0be82d 100644 --- a/mindspore/ccsrc/utils/callbacks.cc +++ b/mindspore/ccsrc/utils/callbacks.cc @@ -20,16 +20,16 @@ #include #include #include "pybind11/pybind11.h" +#ifdef ENABLE_GE #include "transform/df_graph_manager.h" #include "transform/util.h" +#endif #include "pipeline/parse/data_converter.h" #include "pipeline/parse/python_adapter.h" #include "utils/visible.h" namespace mindspore { namespace callbacks { -using mindspore::transform::Status; -using mindspore::transform::TransformUtil; const char PYTHON_MOD_CALLBACK_MODULE[] = "mindspore.train.callback"; const char PYTHON_FUN_PROCESS_CHECKPOINT[] = "_checkpoint_cb_for_save_op"; @@ -38,6 +38,10 @@ const char kSummary[] = "Summary"; const char kCheckPoint[] = "Save"; const int ONE_SHAPE = 1; +#ifdef ENABLE_GE +using mindspore::transform::Status; +using mindspore::transform::TransformUtil; + bool GetParameterShape(const FuncGraphPtr& graph, const std::string& param_name, const std::shared_ptr>& shape) { if (graph == nullptr) { @@ -181,6 +185,7 @@ uint32_t MS_EXPORT SummarySaveCallback(uint32_t graph_id, const std::map(ret); if (!bool_ret) { MS_LOG(ERROR) << "Python checkpoint return false during callback"; - return Status::FAILED; + return kCallbackFalied; } MS_LOG(DEBUG) << "End the summary save callback function."; - return Status::SUCCESS; + return kCallbackOk; } } // namespace callbacks } // namespace mindspore diff --git a/mindspore/ccsrc/utils/callbacks.h b/mindspore/ccsrc/utils/callbacks.h index 2a18b21b16..778b0a9ba2 100644 --- a/mindspore/ccsrc/utils/callbacks.h +++ b/mindspore/ccsrc/utils/callbacks.h @@ -20,8 +20,11 @@ #include #include #include +#include "ir/meta_tensor.h" +#ifdef ENABLE_GE #include "transform/types.h" #include "transform/util.h" +#endif namespace mindspore { namespace callbacks { @@ -36,10 +39,16 @@ extern const char kSummary[]; extern const char kCheckPoint[]; extern const std::string kPythonCheckpointModuleName; extern const std::string kPythonCheckpointFuncName; + +const int kCallbackOk = 0; +const int kCallbackFalied = 1; + bool GetParameterShape(const FuncGraphPtr& anf_graph, const std::string& param_name, const std::shared_ptr>& shape); +#ifdef ENABLE_GE uint32_t CheckpointSaveCallback(uint32_t, const std::map&); uint32_t SummarySaveCallback(uint32_t, const std::map&); +#endif uint32_t SummarySaveCallback(uint32_t, const std::map&); } // namespace callbacks diff --git a/mindspore/ccsrc/utils/context/ms_context.cc b/mindspore/ccsrc/utils/context/ms_context.cc index bf05af9858..7b531536ac 100644 --- a/mindspore/ccsrc/utils/context/ms_context.cc +++ b/mindspore/ccsrc/utils/context/ms_context.cc @@ -26,13 +26,15 @@ #include "tdt/tdt_host_interface.h" #include "tdt/data_common.h" #endif +#ifdef ENABLE_GE #include "transform/df_graph_manager.h" +#endif #include "ir/meta_tensor.h" namespace mindspore { +#ifdef ENABLE_GE using mindspore::transform::DfGraphManager; -using transform::GraphRunner; -using transform::GraphRunnerOptions; +#endif std::atomic thread_1_must_end(false); @@ -81,6 +83,7 @@ MsContext::MsContext(const std::string& policy, const std::string& target) { std::shared_ptr MsContext::GetInstance() { if (inst_context_ == nullptr) { + MS_LOG(DEBUG) << "Create new mindspore context"; #ifdef ENABLE_GE inst_context_.reset(new (std::nothrow) MsContext("ge", kAscendDevice)); #elif defined(ENABLE_D) diff --git a/mindspore/ccsrc/utils/context/ms_context.h b/mindspore/ccsrc/utils/context/ms_context.h index e7d8dc769f..06704ff9c6 100644 --- a/mindspore/ccsrc/utils/context/ms_context.h +++ b/mindspore/ccsrc/utils/context/ms_context.h @@ -23,7 +23,6 @@ #include #include #include -#include "transform/graph_runner.h" #include "utils/log_adapter.h" namespace mindspore { diff --git a/mindspore/ccsrc/utils/convert_utils.cc b/mindspore/ccsrc/utils/convert_utils.cc index ccd21f6801..e840ff8734 100644 --- a/mindspore/ccsrc/utils/convert_utils.cc +++ b/mindspore/ccsrc/utils/convert_utils.cc @@ -373,4 +373,45 @@ AbstractBasePtr PyListDtype2AbstractTensor(const py::object &shape_obj, const py MS_LOG(EXCEPTION) << "Python evaluator return invalid shape or type. " << (std::string)py::str(type_obj); } } +bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr &output, const py::tuple &args, + const std::shared_ptr &ret_val) { + if (output->isa()) { + MS_LOG(INFO) << "Graph's output is a constant. No need to execute."; + ValuePtr value = GetValueNode(output); + *ret_val = ValuePtrToPyData(value); + return true; + } + + // Adapter will transform values in __init__() and construct() to parameters, this could cause + // inputs (a.k.a args in current function) size less than parameters'. + if (output->isa()) { + MS_LOG(INFO) << "Graph's output is a parameter. If all params are inputs, no need to execute."; + if (args.empty()) { + MS_LOG(EXCEPTION) << "Inputs size is 0, let graph to be executed."; + } + // Find the right parameter as ret_val. + auto func_graph = output->func_graph(); + MS_EXCEPTION_IF_NULL(func_graph); + auto params = func_graph->parameters(); + if (params.empty()) { + MS_EXCEPTION(UnknownError) << "Graph's parameters size is 0"; + } + if (args.size() != params.size()) { + MS_LOG(EXCEPTION) << "Input size " << args.size() << " not equal to params size " << params.size() + << ", let graph to be executed."; + } + + auto it = std::find(params.begin(), params.end(), output); + if (it == params.end()) { + MS_EXCEPTION(UnknownError) << "When graph output is Parameter, it should be found in graph parameters"; + } + size_t index = it - params.cbegin(); + if (index >= args.size()) { + MS_EXCEPTION(UnknownError) << "Index " << index << " equal or larger than args size " << args.size() << "."; + } + *ret_val = args[index]; + return true; + } + return false; +} } // namespace mindspore diff --git a/mindspore/ccsrc/utils/convert_utils.h b/mindspore/ccsrc/utils/convert_utils.h index f190f98e6a..fbd4485a3f 100644 --- a/mindspore/ccsrc/utils/convert_utils.h +++ b/mindspore/ccsrc/utils/convert_utils.h @@ -18,6 +18,7 @@ #define MINDSPORE_CCSRC_UTILS_CONVERT_UTILS_H_ #include +#include #include "pybind11/pybind11.h" #include "utils/any.h" @@ -120,6 +121,9 @@ inline uint8_t *AddressOffset(void *address, size_t offset) { AbstractBasePtr PyListDtype2AbstractTensor(const py::object &shape_obj, const py::object &type_obj); +bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr &output, const py::tuple &args, + const std::shared_ptr &ret_val); + } // namespace mindspore #endif // MINDSPORE_CCSRC_UTILS_CONVERT_UTILS_H_ diff --git a/mindspore/ccsrc/vm/segment_runner.cc b/mindspore/ccsrc/vm/segment_runner.cc index 82a61c010d..d7d5a4c096 100644 --- a/mindspore/ccsrc/vm/segment_runner.cc +++ b/mindspore/ccsrc/vm/segment_runner.cc @@ -178,14 +178,12 @@ LinConvertResult Convert(const AnfNodePtrList& lst) { } LinkFuncType MsVmConvert = Convert; -LinkFuncType GeVmConvert = Convert; -std::unordered_map backends = {{kMsVm, MsVmConvert}, {kGeVm, GeVmConvert}}; +std::unordered_map backends = {{kMsVm, MsVmConvert}}; std::set backend_list = { kMsConvert, kMsVm, - kGeVm, }; } // namespace compile diff --git a/mindspore/ccsrc/vm/transform.cc b/mindspore/ccsrc/vm/transform.cc index d5933db1ab..be7aaf5baa 100644 --- a/mindspore/ccsrc/vm/transform.cc +++ b/mindspore/ccsrc/vm/transform.cc @@ -24,7 +24,9 @@ #include #include "pipeline/static_analysis/abstract_value.h" +#ifdef ENABLE_GE #include "transform/convert.h" +#endif #include "utils/graph_utils.h" #include "utils/context/ms_context.h" #include "debug/trace.h" @@ -55,7 +57,6 @@ CompileGraph::CompileGraph(const BackendPtr& backend, const std::vectorIsGraphCut()) { - return nullptr; - } -#endif - FinalVMPtr rt = Link(graph); Reset(); MS_LOG(DEBUG) << "End"; diff --git a/mindspore/ccsrc/vm/transform.h b/mindspore/ccsrc/vm/transform.h index 206fd00431..f862444a82 100644 --- a/mindspore/ccsrc/vm/transform.h +++ b/mindspore/ccsrc/vm/transform.h @@ -55,7 +55,6 @@ class CompileGraph { InstSet Run(const FuncGraphPtr& func_graph); InstSet GenMultiGraphsSinkInst(const FuncGraphPtr& graph); - bool IsGraphCut() const { return is_graph_cut; } bool IsCut(const AnfNodePtr& node); void Push(const AnfNodePtr& node); void Tie(const AnfNodePtr& n1, const AnfNodePtr& n2) { slots_[n2] = slots_[n1]; } @@ -101,7 +100,6 @@ class CompileGraph { BackendPtr backend_; LinkFuncType lin_convert_; bool is_gevm_convert_; - bool is_graph_cut; int height_{0}; int max_height_{0}; std::vector cut_list_; diff --git a/mindspore/ccsrc/vm/vmimpl.cc b/mindspore/ccsrc/vm/vmimpl.cc index e64cd16fcf..ee9a817dd8 100644 --- a/mindspore/ccsrc/vm/vmimpl.cc +++ b/mindspore/ccsrc/vm/vmimpl.cc @@ -26,8 +26,6 @@ #include #include -#include "transform/graph_runner.h" -#include "transform/convert.h" #include "ir/meta_tensor.h" #include "operator/ops.h" #include "ir/manager.h" @@ -40,39 +38,6 @@ namespace compile { using PrimitivePyPtr = std::shared_ptr; -static const char SEGMENT_GRAPH_NAME[] = "runnable_segment"; - -VectorRef GeVM::RunGraph(const FuncGraphPtr& anf_graph, const VectorRef& args) { - // Convert graph - transform::DfGraphConvertor convertor(anf_graph); - - (void)convertor.ConvertAllNode().BuildGraph(); - if (convertor.ErrCode() == 0) { - (void)transform::DfGraphManager::GetInstance().AddGraph(SEGMENT_GRAPH_NAME, convertor.GetComputeGraph()); - } else { - MS_LOG(EXCEPTION) << "convert df graph failed"; - } - - // Run graph - transform::GraphRunnerOptions options; - transform::GraphRunner graph_runner(options); - transform::RunOptions run_options; - run_options.name = SEGMENT_GRAPH_NAME; - - std::vector inputs; - (void)std::transform(std::begin(args), std::end(args), std::back_inserter(inputs), - [](const BaseRef& arg) -> tensor::TensorPtr { - auto value_ref = utils::cast(arg); - auto value = value_ref.object_; - return py::cast(value); - }); - std::vector outputs; - (void)graph_runner.RunGraph(run_options, inputs, &outputs); - std::vector ret; - (void)std::copy(outputs.begin(), outputs.end(), std::back_inserter(ret)); - return VectorRef(ret); -} - // Indicate a call to a new frame. struct CallWrap : public Base { explicit CallWrap(const VMFramePtr& vm_frame) : frame(vm_frame) {} diff --git a/mindspore/ccsrc/vm/vmimpl.h b/mindspore/ccsrc/vm/vmimpl.h index 8ff02ae946..4ef507af82 100644 --- a/mindspore/ccsrc/vm/vmimpl.h +++ b/mindspore/ccsrc/vm/vmimpl.h @@ -64,12 +64,6 @@ class VMImpl { virtual ~VMImpl() = default; }; -class GeVM : public VMImpl { - public: - VectorRef RunGraph(const FuncGraphPtr& fg, const VectorRef& args) override; - ~GeVM() override = default; -}; - // An execution frame. // This holds the state for an application of a graph. The nodes list // must contain free variables of graphs encountered before the diff --git a/mindspore/common/api.py b/mindspore/common/api.py index 8e23e9184d..9ee95ef772 100644 --- a/mindspore/common/api.py +++ b/mindspore/common/api.py @@ -22,7 +22,7 @@ from mindspore import context from mindspore import log as logger from mindspore.parallel._utils import _get_parallel_mode from .._c_expression import generate_key, Executor_, Tensor, MetaTensor -from .._c_expression import verify_inputs_signature, init_exec_dataset, export_graph, _set_dataset_mode_config, init_ge +from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_ge from .tensor import Tensor as MsTensor # store ms_function class compiled pipeline cache @@ -501,6 +501,7 @@ class _Executor: file_name (str): File name of model to export file_format (str): MindSpore currently support 'GEIR' and 'ONNX' format for exported model """ + from .._c_expression import export_graph phase = 'export' + '.' + str(net.create_time) export_graph(file_name, file_format, phase) diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py index b1992c8b82..c8ddf0eac6 100644 --- a/mindspore/common/parameter.py +++ b/mindspore/common/parameter.py @@ -155,6 +155,18 @@ class Parameter: def data(self): return self.default_input + def __add__(self, other): + return self.default_input + other + + def __sub__(self, other): + return self.default_input - other + + def __mul__(self, other): + return self.default_input * other + + def __truediv__(self, other): + return self.default_input / other + def set_parameter_data(self, data): if isinstance(data, (Tensor, list, int, float, np.float16, np.float32, np.int32, np.int16, np.ndarray)) and not isinstance(data, bool): diff --git a/mindspore/common/tensor.py b/mindspore/common/tensor.py index 4c7f31921b..d17661595f 100644 --- a/mindspore/common/tensor.py +++ b/mindspore/common/tensor.py @@ -89,6 +89,16 @@ class Tensor(Tensor_): out = self.__mul__(other) return out + def __truediv__(self, other): + if isinstance(other, (int, float)): + other_tensor = Tensor(other, self.dtype()) + elif isinstance(other, Tensor): + other_tensor = other + else: + raise TypeError("unsupported type for div operation") + out = tensor_operator_registry.get('__div__')(self, other_tensor) + return out + def __sub__(self, other): if not isinstance(other, Tensor): raise TypeError("input_data must be a tensor") diff --git a/mindspore/ops/functional.py b/mindspore/ops/functional.py index 5adb6fac57..b8411d42c1 100644 --- a/mindspore/ops/functional.py +++ b/mindspore/ops/functional.py @@ -125,5 +125,5 @@ shape_mul = Primitive("shape_mul") stop_gradient = Primitive("stop_gradient") tensor_operator_registry.register('__add__', tensor_add) - tensor_operator_registry.register('__mul__', tensor_mul) +tensor_operator_registry.register('__div__', tensor_div) diff --git a/mindspore/train/model.py b/mindspore/train/model.py index fe655433fa..65a9837fe6 100755 --- a/mindspore/train/model.py +++ b/mindspore/train/model.py @@ -161,6 +161,9 @@ class Model: def _update_metrics(self, outputs): """Update metrics local values.""" + if not isinstance(outputs, tuple): + raise ValueError("The `outputs` is not tuple.") + if self._eval_indexes is not None and len(outputs) < 3: raise ValueError("The length of `outputs` must be greater than or equal to 3, \ but got {}".format(len(outputs))) diff --git a/tests/ut/cpp/device/ascend_kernel_select_test.cc b/tests/ut/cpp/device/ascend_kernel_select_test.cc index d522a5adc0..79986d375d 100644 --- a/tests/ut/cpp/device/ascend_kernel_select_test.cc +++ b/tests/ut/cpp/device/ascend_kernel_select_test.cc @@ -231,7 +231,7 @@ void test_select(const CNodePtr &kernel_node, std::vector parent_list, std::vector> shapes, +void SetParentAbstract(std::vector parent_list, std::vector> shapes, std::vector types) { for (const auto &node : parent_list) { AnfAlgo::SetOutputInferTypeAndShape(types, shapes, node.get()); diff --git a/tests/ut/cpp/device/ascend_profiling_test.cc b/tests/ut/cpp/device/ascend_profiling_test.cc index 2bfdc9fcae..2829a5fd4a 100644 --- a/tests/ut/cpp/device/ascend_profiling_test.cc +++ b/tests/ut/cpp/device/ascend_profiling_test.cc @@ -16,10 +16,10 @@ #include #include +#include "./prof_reporter.h" #include "common/common_test.h" #include "device/ascend/profiling/profiling_manager.h" #include "./common.h" -#include "./prof_reporter.h" #define private public #include "device/ascend/profiling/plugin_impl.h" #undef private diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/allreduce_fusion_test.cc b/tests/ut/cpp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc similarity index 99% rename from tests/ut/cpp/pre_activate/ascend/ir_fusion/allreduce_fusion_test.cc rename to tests/ut/cpp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc index 1a7a103a99..79a1cf1a8a 100644 --- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/allreduce_fusion_test.cc +++ b/tests/ut/cpp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc @@ -20,7 +20,7 @@ #include "ir/manager.h" #include "debug/anf_ir_dump.h" #include "session/anf_runtime_algorithm.h" -#include "pre_activate/ascend/ir_fusion/allreduce_fusion.h" +#include "pre_activate/common/ir_fusion/allreduce_fusion.h" #include "pre_activate/common/optimizer.h" #include "device/kernel_info.h" #include "pre_activate/common/pass_manager.h" diff --git a/tests/ut/cpp/pre_activate/pass/convert_const_input_to_tensor_input_test.cc b/tests/ut/cpp/pre_activate/pass/convert_const_input_to_tensor_input_test.cc index 083e4168ce..014b0aed55 100644 --- a/tests/ut/cpp/pre_activate/pass/convert_const_input_to_tensor_input_test.cc +++ b/tests/ut/cpp/pre_activate/pass/convert_const_input_to_tensor_input_test.cc @@ -105,7 +105,7 @@ TEST_F(TestHWConstInputToTensorInput, test_value_tuple_tensor_input) { auto tensor = input1->cast()->value()->cast(); ASSERT_TRUE(tensor != nullptr); auto data = tensor->data_c(false); - EXPECT_EQ(vector((int *)data, (int *)data + 4), vector({2, 4, 2, 2})); + EXPECT_EQ(std::vector((int *)data, (int *)data + 4), std::vector({2, 4, 2, 2})); } } // namespace opt } // namespace mindspore diff --git a/tests/ut/python/ir/test_tensor.py b/tests/ut/python/ir/test_tensor.py index d4f96b54eb..1757567db5 100644 --- a/tests/ut/python/ir/test_tensor.py +++ b/tests/ut/python/ir/test_tensor.py @@ -24,6 +24,8 @@ import pytest import mindspore as ms import mindspore.common.api as me import mindspore.nn as nn +from mindspore.common.parameter import Parameter +from mindspore.common.initializer import initializer from ..ut_filter import non_graph_engine @@ -199,6 +201,21 @@ def test_sub(): z = x - y assert isinstance(z, ms.Tensor) +@non_graph_engine +def test_div(): + x = ms.Tensor(np.array([[2,6,10],[12, 4, 8]]).astype(np.float32)) + y = ms.Tensor(np.array([[2,2,5],[6, 1, 2]]).astype(np.float32)) + z = x / y + z2 = x / 2 + assert isinstance(z, ms.Tensor) + assert isinstance(z2, ms.Tensor) + +@non_graph_engine +def test_parameter(): + x = Parameter(initializer(1, [1], ms.float32), name="beta1_power") + z = x / 2 + print(z) + class Net(nn.Cell): """Net definition""" @@ -378,3 +395,4 @@ def test_tensor_dtype_fp32_to_bool(): input = np.random.randn(2, 3, 4, 5).astype(np.float32) input = ms.Tensor(input) input_me = ms.Tensor(input, dtype=ms.bool_) + diff --git a/tests/ut/python/ops/test_array_ops.py b/tests/ut/python/ops/test_array_ops.py index 4164ce6e8f..1c4895465f 100644 --- a/tests/ut/python/ops/test_array_ops.py +++ b/tests/ut/python/ops/test_array_ops.py @@ -97,20 +97,6 @@ def test_select(): assert np.all(output.asnumpy() == expect) -def test_scalar_cast_grad(): - """ test_scalar_cast_grad """ - input_x = 255.5 - input_t = get_py_obj_dtype(ms.int8) - - def fx_cast(x): - output = F.scalar_cast(x, input_t) - return output - - gfn = C.grad(fx_cast)(input_x) - expect_dx = 1 - assert gfn == expect_dx - - class CustomOP(PrimitiveWithInfer): __mindspore_signature__ = (sig_dtype.T, sig_dtype.T, sig_dtype.T1, sig_dtype.T1, sig_dtype.T2, sig_dtype.T2, diff --git a/tests/ut/python/parallel/__init__.py b/tests/ut/python/parallel/__init__.py index ffe03896ab..c08f8e247b 100644 --- a/tests/ut/python/parallel/__init__.py +++ b/tests/ut/python/parallel/__init__.py @@ -13,11 +13,14 @@ # limitations under the License. import mindspore.context as context +from mindspore.parallel._utils import _reset_op_id def setup_module(module): - context.set_context(mode=context.GRAPH_MODE) + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) + _reset_op_id() def teardown_module(): context.reset_auto_parallel_context() + _reset_op_id() diff --git a/tests/ut/python/parallel/test_alltoall.py b/tests/ut/python/parallel/test_alltoall.py index ddceece8ec..7365dd941c 100644 --- a/tests/ut/python/parallel/test_alltoall.py +++ b/tests/ut/python/parallel/test_alltoall.py @@ -97,13 +97,10 @@ def test_all_to_all(): strategys = all_to_all_common(strategy1) print(strategys) expect_dict = {'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits' - '/SoftmaxCrossEntropyWithLogits-op43': [[8, 1], [8, 1]], - 'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits' - '/OneHot-op44': [[8, 1], [], []], - 'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/Transpose-op1': - [[8, 1]], - 'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/MatMul-op0': - [[1, 1], [1, 8]]} + '/SoftmaxCrossEntropyWithLogits-op3': [[8, 1], [8, 1]], + 'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op4': [[8, 1], [], []], + 'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/Transpose-op1': [[8, 1]], + 'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/MatMul-op0': [[1, 1], [1, 8]]} assert (strategys == expect_dict) context.set_context(save_graphs=False) diff --git a/tests/ut/python/parallel/test_auto_parallel_arithmetic.py b/tests/ut/python/parallel/test_auto_parallel_arithmetic.py index e6f72d8019..7c928be376 100644 --- a/tests/ut/python/parallel/test_auto_parallel_arithmetic.py +++ b/tests/ut/python/parallel/test_auto_parallel_arithmetic.py @@ -65,8 +65,8 @@ def test_auto_parallel_arithmetic(): b = Tensor(np.ones([64, 128]), dtype=ms.float32) _executor.compile(net, x, y, b, phase='train') strategies = _executor._get_strategy(net) - expected_strategies = {'Default/network-Net/FloorDiv-op2': [[2, 4], [2, 4]], - 'Default/network-Net/MatMul-op3': [[2, 1], [1, 4]]} + expected_strategies = {'Default/network-Net/FloorDiv-op0': [[2, 4], [2, 4]], + 'Default/network-Net/MatMul-op1': [[2, 1], [1, 4]]} assert strategies == expected_strategies def test_auto_parallel_arithmetic_broadcast_both(): @@ -91,8 +91,8 @@ def test_auto_parallel_arithmetic_broadcast_both(): b = Tensor(np.ones([1, 64]), dtype=ms.float32) _executor.compile(net, x, y, b, phase='train') strategies = _executor._get_strategy(net) - expected_strategies = {'Default/network-Net/FloorDiv-op2': [[8, 1], [1, 1]], - 'Default/network-Net/MatMul-op3': [[8, 1], [1, 1]]} + expected_strategies = {'Default/network-Net/FloorDiv-op0': [[8, 1], [1, 1]], + 'Default/network-Net/MatMul-op1': [[8, 1], [1, 1]]} assert strategies == expected_strategies @@ -118,8 +118,8 @@ def test_auto_parallel_arithmetic_broadcast_right(): b = Tensor(np.ones([32]), dtype=ms.float32) _executor.compile(net, x, y, b, phase='train') strategies = _executor._get_strategy(net) - expected_strategies = {'Default/network-Net/FloorDiv-op2': [[4, 2], [2]], - 'Default/network-Net/MatMul-op3': [[4, 1], [1, 2]]} + expected_strategies = {'Default/network-Net/FloorDiv-op0': [[4, 2], [2]], + 'Default/network-Net/MatMul-op1': [[4, 1], [1, 2]]} assert strategies == expected_strategies @@ -145,6 +145,6 @@ def test_auto_parallel_arithmetic_broadcast_left(): b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) _executor.compile(net, x, y, b, phase="train") strategies = _executor._get_strategy(net) - expected_strategies = {'Default/network-Net/FloorDiv-op2': [[4, 2], [1, 4, 2]], - 'Default/network-Net/MatMul-op3': [[4, 1], [1, 2]]} - assert strategies == expected_strategies \ No newline at end of file + expected_strategies = {'Default/network-Net/FloorDiv-op0': [[4, 2], [1, 4, 2]], + 'Default/network-Net/MatMul-op1': [[4, 1], [1, 2]]} + assert strategies == expected_strategies diff --git a/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py b/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py index e436f9faf7..5364263695 100755 --- a/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py +++ b/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re import numpy as np from mindspore import context import mindspore.nn as nn @@ -55,6 +56,9 @@ def test_auto_parallel_assign_sub_with_ref_key(): _executor.compile(net, x, phase="train") strategies = _executor._get_strategy(net) - expected_strategies = {'Default/network-PReLU/PReLU-op2': [[1, 1, 1, 8], [1]], - 'Default/network-PReLU/ReLU-op3': [[1]]} - assert strategies == expected_strategies + for (k, v) in strategies.items(): + if re.search('PReLU-op', k) is not None: + assert v == [[1, 1, 1, 8], [1]] + elif re.search('ReLU-op', k) is not None: + assert v == [[1]] + diff --git a/tests/ut/python/parallel/test_auto_parallel_cast.py b/tests/ut/python/parallel/test_auto_parallel_cast.py index be7b5082d6..4cfeb59fc1 100644 --- a/tests/ut/python/parallel/test_auto_parallel_cast.py +++ b/tests/ut/python/parallel/test_auto_parallel_cast.py @@ -75,9 +75,9 @@ def test_double_star_graph(): _executor.compile(net, x, y, z, w, phase='train') strategies = _executor._get_strategy(net) - expected_strategies = {'Default/network-Net/MatMul-op0': [[1, 8], [8, 1]], - 'Default/network-Net/Cast-op7': [[8, 1]], - 'Default/network-Net/MatMul-op8': [[8, 1], [1, 1]], - 'Default/network-Net/Cast-op9': [[1, 8]], - 'Default/network-Net/MatMul-op10': [[1, 1], [1, 8]]} - assert strategies == expected_strategies \ No newline at end of file + expected_strategies = {'Default/network-Net/Cast-op1': [[8, 1]], + 'Default/network-Net/Cast-op3': [[1, 8]], + 'Default/network-Net/MatMul-op2': [[8, 1], [1, 1]], + 'Default/network-Net/MatMul-op4': [[1, 1], [1, 8]], + 'Default/network-Net/MatMul-op0': [[1, 8], [8, 1]]} + assert strategies == expected_strategies diff --git a/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py b/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py index ca9b561145..4d68a7f883 100644 --- a/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py +++ b/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re import numpy as np from mindspore import context import mindspore.nn as nn @@ -66,7 +67,10 @@ def test_matmul_prelu(): _executor.compile(net, x, y, b, phase='train') strategies = _executor._get_strategy(net) - assert strategies['Default/network-Net/PReLU-op2'] == [[16, 1, 1, 1], [1]] - assert strategies['Default/network-Net/Mul-op3'] == [[16, 1, 1, 1], [16, 1, 1, 1]] + for (k, v) in strategies.items(): + if re.search('PReLU-op', k) is not None: + assert v == [[16, 1, 1, 1], [1]] + elif re.search('Mul-op', k) is not None: + assert v == [[16, 1, 1, 1], [16, 1, 1, 1]] diff --git a/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py b/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py index b7a3255f7c..29e81f7f90 100644 --- a/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py +++ b/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py @@ -80,9 +80,9 @@ def test_common_parameter(): _executor.compile(net, x, y, z, w, phase='train') strategies = _executor._get_strategy(net) - expected_strategies = {'Default/network-Net/MatMul-op6': [[8, 1], [1, 1]], - 'Default/network-Net/MatMul-op8': [[8, 1], [1, 1]], - 'Default/network-Net/Cast-op7': [[1, 1]], + expected_strategies = {'Default/network-Net/MatMul-op1': [[8, 1], [1, 1]], + 'Default/network-Net/MatMul-op3': [[8, 1], [1, 1]], + 'Default/network-Net/Cast-op2': [[1, 1]], 'Default/network-Net/MatMul-op0': [[8, 1], [1, 1]], - 'Default/network-Net/Cast-op9': [[1, 1]]} + 'Default/network-Net/Cast-op4': [[1, 1]]} assert strategies == expected_strategies diff --git a/tests/ut/python/parallel/test_auto_parallel_transpose.py b/tests/ut/python/parallel/test_auto_parallel_transpose.py index 62fdc11120..2d4dcbae81 100644 --- a/tests/ut/python/parallel/test_auto_parallel_transpose.py +++ b/tests/ut/python/parallel/test_auto_parallel_transpose.py @@ -71,8 +71,8 @@ def test_two_matmul_transpose(): _executor.compile(net, x, y, b, phase='train') strategies = _executor._get_strategy(net) - expected_strategies = {'Default/network-Net/Transpose-op4': [[1, 16]], - 'Default/network-Net/Transpose-op5': [[16, 1]], - 'Default/network-Net/MatMul-op6': [[16, 1], [1, 1]], - 'Default/network-Net/MatMul-op7': [[16, 1], [1, 1]]} - assert strategies == expected_strategies \ No newline at end of file + expected_strategies = {'Default/network-Net/Transpose-op0': [[1, 16]], + 'Default/network-Net/Transpose-op1': [[16, 1]], + 'Default/network-Net/MatMul-op2': [[16, 1], [1, 1]], + 'Default/network-Net/MatMul-op3': [[16, 1], [1, 1]]} + assert strategies == expected_strategies diff --git a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py index e7beed384e..bd6639a501 100644 --- a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py +++ b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py @@ -135,7 +135,6 @@ def test_two_matmul(): _executor.compile(net, x, y, b, phase='train') strategies = _executor._get_strategy(net) - expected_strategies = {'Default/network-Net/MatMul-op2': [[16, 1], [1, 1]], - 'Default/network-Net/MatMul-op3': [[16, 1], [1, 1]]} + expected_strategies = {'Default/network-Net/MatMul-op0': [[16, 1], [1, 1]], + 'Default/network-Net/MatMul-op1': [[16, 1], [1, 1]]} assert strategies == expected_strategies - diff --git a/tests/ut/python/parallel/test_dataset_interface.py b/tests/ut/python/parallel/test_dataset_interface.py index da8821199e..17b8d3cc6d 100644 --- a/tests/ut/python/parallel/test_dataset_interface.py +++ b/tests/ut/python/parallel/test_dataset_interface.py @@ -84,7 +84,7 @@ def loss_scale_manager_common(strategy1): opt = Momentum(net.trainable_params(), learning_rate, momentum) scale_manager = DynamicLossScaleManager(32, 2, 2000) model = Model(net, loss, opt, loss_scale_manager=scale_manager) - # if no GE exists, outputs = self._train_network(*next_element) outputs is None, TypeError is caught. + # if no GE exists, outputs = self._train_network(*next_element) outputs inputs tensor. try: model.train(epoch_size, dataset, dataset_sink_mode=False) except TypeError: diff --git a/tests/ut/python/parallel/test_one_dev.py b/tests/ut/python/parallel/test_one_dev.py index a5867eb5f4..efd4889ce6 100644 --- a/tests/ut/python/parallel/test_one_dev.py +++ b/tests/ut/python/parallel/test_one_dev.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re from mindspore.train import Model, ParallelMode from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.nn.optim.momentum import Momentum @@ -89,16 +90,13 @@ def all_to_all_common(): def test_one_dev(): - _reset_op_id() - strategys = all_to_all_common() - expect_dict = {'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits' - '/SoftmaxCrossEntropyWithLogits-op9': [[1, 1], [1, 1]], - 'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits' - '/OneHot-op10': [[1, 1], [], []], - 'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/Transpose-op11': - [[1, 1]], - 'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/MatMul-op12': - [[1, 1], [1, 1]]} - assert (strategys == expect_dict) + strategies = all_to_all_common() + for (k, v) in strategies.items(): + if re.search('SoftmaxCrossEntropyWithLogits-op', k) is not None: + assert v == [[1, 1], [1, 1]] + elif re.search('Transpose-op', k) is not None: + assert v == [[1, 1]] + elif re.search('MatMul-op', k) is not None: + assert v == [[1, 1], [1, 1]] diff --git a/tests/ut/python/pipeline/parse/test_create_obj.py b/tests/ut/python/pipeline/parse/test_create_obj.py index a702f37e0b..370445cf99 100644 --- a/tests/ut/python/pipeline/parse/test_create_obj.py +++ b/tests/ut/python/pipeline/parse/test_create_obj.py @@ -24,6 +24,7 @@ import logging import numpy as np import mindspore.nn as nn +from mindspore import context from mindspore.ops import operations as P from mindspore.common.api import ms_function from mindspore.common.tensor import Tensor @@ -50,6 +51,7 @@ class Net(nn.Cell): def test_create_cell_object_on_construct(): """ test_create_cell_object_on_construct """ log.debug("begin test_create_object_on_construct") + context.set_context(mode=context.GRAPH_MODE) np1 = np.random.randn(2, 3, 4, 5).astype(np.float32) input_me = Tensor(np1) @@ -118,6 +120,7 @@ class NetC(nn.Cell): def test_create_cell_object_on_construct_use_many_parameter(): """ test_create_cell_object_on_construct_use_many_parameter """ log.debug("begin test_create_object_on_construct") + context.set_context(mode=context.GRAPH_MODE) np1 = np.random.randn(2, 3, 4, 5).astype(np.float32) input_me = Tensor(np1) diff --git a/tests/ut/python/pipeline/parse/test_dtype.py b/tests/ut/python/pipeline/parse/test_dtype.py index a282c82f83..645eba8004 100644 --- a/tests/ut/python/pipeline/parse/test_dtype.py +++ b/tests/ut/python/pipeline/parse/test_dtype.py @@ -28,5 +28,4 @@ def try_type(): def test_dtype_convert(): - with pytest.raises(RuntimeError): - try_type() + try_type() diff --git a/tests/ut/python/pynative_mode/ops/test_grad.py b/tests/ut/python/pynative_mode/ops/test_grad.py index 25db6b15d2..b927da5b04 100644 --- a/tests/ut/python/pynative_mode/ops/test_grad.py +++ b/tests/ut/python/pynative_mode/ops/test_grad.py @@ -19,8 +19,10 @@ from mindspore.common.api import ms_function from mindspore import Tensor from mindspore.ops import composite as C from mindspore.ops.composite import grad_all_with_sens +from mindspore.common.dtype import get_py_obj_dtype import mindspore.nn as nn import mindspore.ops.operations as P +from mindspore.ops import functional as F from ...ut_filter import non_graph_engine @@ -78,6 +80,20 @@ def test_cast_grad(): assert np.all(gout[0].asnumpy() == expect) +def test_scalar_cast_grad(): + """ test_scalar_cast_grad """ + input_x = 255.5 + input_t = get_py_obj_dtype(ms.int8) + + def fx_cast(x): + output = F.scalar_cast(x, input_t) + return output + + gfn = C.grad(fx_cast)(input_x) + expect_dx = 1 + assert gfn == expect_dx + + @non_graph_engine def test_reshape_grad(): """ test_reshape_grad """ diff --git a/tests/ut/python/train/summary/test_summary_ops_params_valid_check.py b/tests/ut/python/train/summary/test_summary_ops_params_valid_check.py index b6e1ab992c..98dfd6aaef 100644 --- a/tests/ut/python/train/summary/test_summary_ops_params_valid_check.py +++ b/tests/ut/python/train/summary/test_summary_ops_params_valid_check.py @@ -163,12 +163,7 @@ def test_scalar_summary_use_invalid_tag_None(): def test_scalar_summary_use_invalid_tag_Bool(): log.debug("begin test_scalar_summary_use_invalid_tag_Bool") net = SummaryDemoTag(True, True, True) - try: - run_case(net) - except: - assert True - else: - assert False + run_case(net) log.debug("finished test_scalar_summary_use_invalid_tag_Bool") @@ -176,12 +171,7 @@ def test_scalar_summary_use_invalid_tag_Bool(): def test_scalar_summary_use_invalid_tag_null(): log.debug("begin test_scalar_summary_use_invalid_tag_null") net = SummaryDemoTag("", "", "") - try: - run_case(net) - except: - assert True - else: - assert False + run_case(net) log.debug("finished test_scalar_summary_use_invalid_tag_null") @@ -189,12 +179,7 @@ def test_scalar_summary_use_invalid_tag_null(): def test_scalar_summary_use_invalid_tag_Int(): log.debug("begin test_scalar_summary_use_invalid_tag_Int") net = SummaryDemoTag(1, 2, 3) - try: - run_case(net) - except: - assert True - else: - assert False + run_case(net) log.debug("finished test_scalar_summary_use_invalid_tag_Int") diff --git a/tests/ut/python/utils/test_serialize.py b/tests/ut/python/utils/test_serialize.py index 12937e5a83..41da45ab25 100644 --- a/tests/ut/python/utils/test_serialize.py +++ b/tests/ut/python/utils/test_serialize.py @@ -30,7 +30,7 @@ from mindspore.nn import WithLossCell, TrainOneStepCell from mindspore.train.callback import _CheckpointManager from mindspore.train.serialization import save_checkpoint, load_checkpoint,load_param_into_net, \ _exec_save_checkpoint, export, _save_graph -from ..ut_filter import run_on_onnxruntime +from ..ut_filter import run_on_onnxruntime, non_graph_engine from mindspore import context @@ -306,6 +306,7 @@ class MYNET(nn.Cell): return out +@non_graph_engine def test_export(): net = MYNET() input_data = Tensor(np.random.randint(0, 255, [1, 3, 224, 224]).astype(np.float32))