enable mindRT

2021-06-29 12:08:43 +08:00 · 2021-06-29 12:08:43 +08:00 · 406c252834
parent 29e7da4c3e
commit 406c252834
14 changed files with 56 additions and 39 deletions
--- a/mindspore/ccsrc/debug/debugger/debugger.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger.cc
@ -35,7 +35,6 @@
 #include "utils/config_manager.h"
 #include "debug/env_config_parser.h"
 #include "utils/comm_manager.h"
-#include "runtime/framework/actor/actor_common.h"
 #include "runtime/hardware/device_context_manager.h"
 #include "debug/anf_ir_dump.h"
 #ifdef ENABLE_DEBUGGER
@ -237,7 +236,7 @@ bool Debugger::CheckDebuggerDumpEnabled() const {
  // see if dump is enabled
  if (device_target_ == kGPUDevice) {
    return device::KernelRuntime::DumpDataEnabled();
-  } else if (IsMindRTUsed()) {
+  } else if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
    auto &dump_json_parser = DumpJsonParser::GetInstance();
    return dump_json_parser.e2e_dump_enabled();
  }
@ -1292,7 +1291,7 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
    return;
  }
  // When MindRT is used, only ValueNodes and ParameterWeights can be loaded from device to host
-  if (IsMindRTUsed() && (device_target_ == kGPUDevice)) {
+  if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT) && (device_target_ == kGPUDevice)) {
    if (!anf_node->isa<ValueNode>() &&
        !(anf_node->isa<Parameter>() && AnfAlgo::IsParameterWeight(anf_node->cast<ParameterPtr>()))) {
      return;
--- a/mindspore/ccsrc/frontend/parallel/group_manager.cc
+++ b/mindspore/ccsrc/frontend/parallel/group_manager.cc
@ -20,7 +20,6 @@
 #include <utility>
 #if !defined(NO_DLIB) || defined(ENABLE_GPU)
 #include "backend/session/executor_manager.h"
-#include "runtime/framework/actor/actor_common.h"
 #else
 #include "frontend/parallel/parallel_stub/executor_manager_stub.h"
 #endif
@ -74,7 +73,7 @@ GroupManager::GroupManager() { groups_.clear(); }
 #if !defined(NO_DLIB) || defined(ENABLE_GPU)
 bool GroupManager::CreateGroupByExecutor(const std::string &device_name, const std::string &group_name,
                                         const std::vector<uint32_t> ranks, int device_id) {
-  if (IsMindRTUsed()) {
+  if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
    return CommManager::GetInstance().CreateGroupSync(group_name, ranks);
  } else {
    auto executor = session::ExecutorManager::Instance().GetExecutor(device_name, device_id);
@ -85,7 +84,7 @@ bool GroupManager::CreateGroupByExecutor(const std::string &device_name, const s

 bool GroupManager::DestroyGroupByExecutor(const std::string &device_name, const std::string &group_name,
                                          int device_id) {
-  if (IsMindRTUsed()) {
+  if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
    return CommManager::GetInstance().DestroyGroup(group_name);
  } else {
    auto executor = session::ExecutorManager::Instance().GetExecutor(device_name, device_id);
@ -104,7 +103,7 @@ Status CreateGroups(const std::vector<std::pair<std::string, std::vector<uint32_
  MS_EXCEPTION_IF_NULL(executor);
  for (auto &group : group_info) {
    bool ret = true;
-    if (IsMindRTUsed()) {
+    if (context_ptr->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
      ret = CommManager::GetInstance().CreateGroupSync(group.first, group.second);
    } else {
      ret = executor->CreateCommGroup(group.first, group.second);
--- a/mindspore/ccsrc/pipeline/jit/action.cc
+++ b/mindspore/ccsrc/pipeline/jit/action.cc
@ -44,7 +44,6 @@
 #include "vm/transform.h"
 #include "parse/python_adapter.h"
 #include "frontend/optimizer/py_pass_manager.h"
-#include "runtime/framework/actor/actor_common.h"
 #if (ENABLE_CPU && !_WIN32)
 #include "ps/parameter_server.h"
 #include "ps/scheduler.h"
@ -565,7 +564,7 @@ bool TaskEmitAction(const ResourcePtr &res) {
  }

  // The graph compiling of mindRT.
-  if ((backend == kMsConvert) && IsMindRTUsed()) {
+  if ((backend == kMsConvert) && context_ptr->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
    TaskEmitActionForMindRT(res);
    return true;
  }
@ -595,7 +594,7 @@ bool ExecuteAction(const ResourcePtr &res) {
  std::string backend = MsContext::GetInstance()->backend_policy();

  // The graph running of mindRT.
-  if ((backend == kMsConvert) && IsMindRTUsed()) {
+  if ((backend == kMsConvert) && MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
    ExecuteActionForMindRT(res);
    return true;
  }
--- a/mindspore/ccsrc/pipeline/jit/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc
@ -53,7 +53,6 @@
 #include "load_mindir/load_model.h"
 #include "pipeline/jit/prim_bprop_optimizer.h"
 #include "runtime/hardware/device_context_manager.h"
-#include "runtime/framework/actor/actor_common.h"
 #include "utils/crypto.h"

 #if ((defined ENABLE_CPU) && (!defined _WIN32))
@ -168,7 +167,7 @@ void SetGpuLoopSink(const ResourcePtr &resource) {
    auto manager = func_graph->manager();
    size_t graph_nums = manager->func_graphs().size();
    int64_t sinksize = ConfigManager::GetInstance().iter_num();
-    if (graph_nums == 1 || IsMindRTUsed()) {
+    if (graph_nums == 1 || MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
      resource->set_gpu_loopsink(true, sinksize);
    } else {
      resource->set_gpu_loopsink(false, sinksize);
@ -633,7 +632,8 @@ std::vector<ActionItem> GetPipeline(const ResourcePtr &resource, const std::stri
 #endif

  if (use_vm && backend != "ge" && !is_air) {
-    // Create backend and session
+    compile::SetMindRTEnable();
+    // Create backend.
    auto backend_ptr = compile::CreateBackend();
    // Connect session to debugger
    backend_ptr->SetDebugger();
@ -1115,10 +1115,11 @@ bool InitExecDatasetVm(const std::string &queue_name, int64_t size, int64_t batc
  // Before the graph compiling, need reset the iter num.
  ConfigManager::GetInstance().ResetIterNum();

+  compile::SetMindRTEnable();
  auto backend = compile::CreateBackend();
  MS_EXCEPTION_IF_NULL(backend);
  // The data set graph compiling and running of mindRT.
-  if (IsMindRTUsed()) {
+  if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
    const auto &mindrt_backend = std::dynamic_pointer_cast<compile::MindRTBackend>(backend);
    MS_EXCEPTION_IF_NULL(mindrt_backend);
    auto &actor_info = mindrt_backend->CompileGraphs(func_graph);
--- a/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
+++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
@ -65,7 +65,6 @@
 #endif

 #include "debug/anf_ir_dump.h"
-#include "runtime/framework/actor/actor_common.h"
 #include "runtime/hardware/device_context_manager.h"

 using mindspore::tensor::TensorPy;
@ -1691,8 +1690,9 @@ py::object ForwardExecutor::RunOpInMs(const OpExecInfoPtr &op_exec_info, Pynativ
  MS_LOG(DEBUG) << "Start run op [" << op_exec_info->op_name << "] with backend policy ms";
  auto ms_context = MsContext::GetInstance();
  ms_context->set_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER, true);
+  compile::SetMindRTEnable();

-  if (kSession == nullptr && !IsMindRTUsed()) {
+  if (kSession == nullptr && !ms_context->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
    std::string device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
    kSession = session::SessionFactory::Get().Create(device_target);
    MS_EXCEPTION_IF_NULL(kSession);
@ -1723,7 +1723,7 @@ py::object ForwardExecutor::RunOpInMs(const OpExecInfoPtr &op_exec_info, Pynativ
                                    op_exec_info->next_input_index};
 #endif
  VectorRef outputs;
-  if (!IsMindRTUsed()) {
+  if (!ms_context->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
    kSession->RunOp(&op_run_info, graph_info, &input_tensors, &outputs, tensors_mask);
  } else {
    if (mind_rt_backend == nullptr) {
@ -2834,17 +2834,17 @@ void PynativeExecutor::GradNet(const prim::GradOperationPtr &grad, const py::obj
 }

 void PynativeExecutor::Sync() {
-  if (!IsMindRTUsed()) {
+  auto ms_context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(ms_context);
+
+  if (!ms_context->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
    if (kSession == nullptr) {
      MS_EXCEPTION(NotExistsError) << "No session has been created!";
    }
    kSession->SyncStream();
  } else {
-    auto ms_context = MsContext::GetInstance();
-    MS_EXCEPTION_IF_NULL(ms_context);
    std::string device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
    uint32_t device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
-
    const auto &device_context =
      device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext({device_target, device_id});
    MS_EXCEPTION_IF_NULL(device_context);
--- a/mindspore/ccsrc/profiler/device/gpu/gpu_data_saver.cc
+++ b/mindspore/ccsrc/profiler/device/gpu/gpu_data_saver.cc
@ -19,7 +19,7 @@
 #include "sys/stat.h"
 #include "utils/log_adapter.h"
 #include "utils/ms_utils.h"
-#include "runtime/framework/actor/actor_common.h"
+#include "utils/ms_context.h"

 namespace mindspore {
 namespace profiler {
@ -122,10 +122,11 @@ void GpuDataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_tim
  WriteActivity(out_path_dir);
  WriteOpTimestamp(out_path_dir);
  WriteStartTime(out_path_dir, start_time);
-  if (IsMindRTUsed())
+  if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
    WriteStepTraceAsyncLaunchKernel(out_path_dir);
-  else
+  } else {
    WriteStepTrace(out_path_dir);
+  }
 }

 void GpuDataSaver::WriteActivity(const std::string &saver_base_dir) {
--- a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc
+++ b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc
@ -25,7 +25,7 @@
 #include "pybind_api/api_register.h"
 #include "utils/log_adapter.h"
 #include "utils/utils.h"
-#include "runtime/framework/actor/actor_common.h"
+#include "utils/ms_context.h"

 namespace mindspore {
 namespace profiler {
@ -434,7 +434,7 @@ void GPUProfiler::OpDataProducerBegin(const std::string op_name, void *stream) {
  }
  SetRunTimeData(op_name, stream);

-  if (IsMindRTUsed()) RecordOneStepStartEndInfo(op_name);
+  if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) RecordOneStepStartEndInfo(op_name);
 }

 void GPUProfiler::OpDataProducerEnd() {
--- a/mindspore/ccsrc/runtime/framework/actor/actor_common.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/actor_common.cc
@ -109,8 +109,4 @@ bool IsGatherActor(const AnfNodePtr &front_node,
  return false;
 }
 }  // namespace runtime
-
-// Judge whether to use mindRT. GPU and CPU use mindRT currently, and other hardwares will use it in the future.
-// Return false in the transitional stage.
-bool IsMindRTUsed() { return false; }
 }  // namespace mindspore
--- a/mindspore/ccsrc/runtime/framework/actor/actor_common.h
+++ b/mindspore/ccsrc/runtime/framework/actor/actor_common.h
@ -70,9 +70,6 @@ bool IsPersistentDeviceTensor(const AnfNodePtr &node);
 bool IsGatherActor(const AnfNodePtr &front_node,
                   const std::unordered_map<std::string, OpActor<DeviceTensor> *> &actor_name_to_actor);
 }  // namespace runtime
-
-// Judge whether to use mindRT. GPU and CPU use mindRT currently, and other hardwares will use it in the future.
-bool IsMindRTUsed();
 }  // namespace mindspore

 #endif  // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_ACTOR_COMMON_H_
--- a/mindspore/ccsrc/vm/transform.cc
+++ b/mindspore/ccsrc/vm/transform.cc
@ -32,7 +32,9 @@
 #include "utils/ms_context.h"
 #include "debug/trace.h"
 #include "debug/anf_ir_dump.h"
-#include "runtime/framework/actor/actor_common.h"
+#if (ENABLE_CPU && !_WIN32)
+#include "ps/ps_context.h"
+#endif

 namespace mindspore {
 namespace compile {
@ -540,13 +542,13 @@ BackendPtr CreateBackend() {
    uint32_t device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
    BackendPtr backend = nullptr;
    // Create MindRTBackend or MsBackend according to whether mindrt is used.
-    if (IsMindRTUsed()) {
+    if (context_ptr->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
      backend = std::make_shared<MindRTBackend>(name, target, device_id);
    } else {
      backend = std::make_shared<MsBackend>(name, target, device_id);
    }
-    std::string device_target = MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET);
-    if (device_target == kAscendDevice) {
+
+    if (target == kAscendDevice) {
      if (MsContext::GetInstance()->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
        backend->set_is_multi_graph_sink(false);
        context_ptr->set_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK, false);
@ -560,5 +562,23 @@ BackendPtr CreateBackend() {

  return std::make_shared<Backend>(name);
 }
+
+void SetMindRTEnable() {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  std::string target = context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET);
+  if (target != kGPUDevice) {
+    return;
+  }
+#if (ENABLE_CPU && !_WIN32)
+  if (ps::PSContext::instance()->is_ps_mode()) {
+    return;
+  }
+#endif
+
+  MS_LOG(INFO) << "Enable mindRT.";
+  context_ptr->set_param<bool>(MS_CTX_ENABLE_MINDRT, true);
+}
+
 }  // namespace compile
 }  // namespace mindspore
--- a/mindspore/ccsrc/vm/transform.h
+++ b/mindspore/ccsrc/vm/transform.h
@ -134,6 +134,9 @@ class CompileGraphs {

 BackendPtr CreateBackend();

+// Set mindRT whether enable. GPU and CPU use mindRT currently, and other hardwares will use it in the future.
+void SetMindRTEnable();
+
 }  // namespace compile
 }  // namespace mindspore

--- a/mindspore/core/utils/ms_context.cc
+++ b/mindspore/core/utils/ms_context.cc
@ -86,6 +86,7 @@ MsContext::MsContext(const std::string &policy, const std::string &target) {
  set_param<bool>(MS_CTX_GRAD_FOR_SCALAR, false);
  set_param<bool>(MS_CTX_SAVE_COMPILE_CACHE, false);
  set_param<bool>(MS_CTX_LOAD_COMPILE_CACHE, false);
+  set_param<bool>(MS_CTX_ENABLE_MINDRT, false);

  backend_policy_ = policy_map_[policy];
 }
--- a/mindspore/core/utils/ms_context.h
+++ b/mindspore/core/utils/ms_context.h
@ -88,6 +88,7 @@ enum MsCtxParam : unsigned {
  MS_CTX_GRAD_FOR_SCALAR,
  MS_CTX_SAVE_COMPILE_CACHE,
  MS_CTX_LOAD_COMPILE_CACHE,
+  MS_CTX_ENABLE_MINDRT,
  MS_CTX_TYPE_BOOL_END,

  // parameter of type int
--- a/tests/st/auto_monad/test_auto_monad_gpu.py
+++ b/tests/st/auto_monad/test_auto_monad_gpu.py
@ -226,7 +226,7 @@ class SideEffectTwoAssignTwoAddnDependencyNet(Cell):
        return grad_out


-@pytest.mark.level0
+@pytest.mark.level1
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_ctrl_while_by_while_and_if_in_first_while():
@ -262,7 +262,7 @@ def test_ctrl_while_by_while_and_if_in_first_while():
    net(input_me_a)


-@pytest.mark.level0
+@pytest.mark.level1
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_ctrl_while_by_while_and_while_in_first_while():