!18843 New runtime support run kernel graph which contains more than one kernel in PyNative mode.

Merge pull request !18843 from zyli2020/mindrt_debug
2021-06-25 01:32:06 +00:00 · 2021-06-25 01:32:06 +00:00 · 7f5b507190
parent 8fb0026d42 dec4c98f9c
commit 7f5b507190
3 changed files with 28 additions and 8 deletions
--- a/mindspore/ccsrc/runtime/framework/actor/actor_common.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/actor_common.cc
@ -53,11 +53,17 @@ bool IsHostQueueDSActor(const AnfNodePtr &node, const KernelGraphPtr &graph, con
                        const std::vector<AnfNodePtr> &host_parameters, GraphExecutionStrategy strategy) {
  MS_EXCEPTION_IF_NULL(node);
  if (node->isa<Parameter>() && (!AnfAlgo::IsParameterWeight(node->cast<ParameterPtr>()))) {
-    // There is device address in tensor, indicating the input tensor is certain kernel's output,
-    // so it's unnecessary to put the input node to host queue data source actor.
-    if (strategy == GraphExecutionStrategy::kStep && tensor != nullptr &&
-        std::dynamic_pointer_cast<DeviceTensor>(tensor->device_address()) != nullptr) {
-      return false;
+    if (strategy == GraphExecutionStrategy::kStep) {
+      // In step mode, if the number of kernel actor in actor set is greater than one, the actor set need be drived
+      // to run by data source actor.
+      if (graph != nullptr && graph->execution_order().size() > 1) {
+        return true;
+      }
+      // There is device address in tensor, indicating the input tensor is certain kernel's output,
+      // so it's unnecessary to put the input node to host queue data source actor.
+      if (tensor != nullptr && std::dynamic_pointer_cast<DeviceTensor>(tensor->device_address()) != nullptr) {
+        return false;
+      }
    }

    if (graph == nullptr) {
--- a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
+++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
@ -321,6 +321,11 @@ void PrepareDataForHostDataSourceActor(const std::unordered_map<AnfNodePtr, size
    }
  }
 }
+
+inline bool IsSingleOpActorSet(const ActorSet *actor_set) {
+  MS_EXCEPTION_IF_NULL(actor_set);
+  return actor_set->kernel_actors_.size() == 1;
+}
 }  // namespace

 void GraphScheduler::Clear() {
@ -611,7 +616,7 @@ bool GraphScheduler::Run(const ActorSet *actor_set, GraphExecutionStrategy strat
  }

  // Trigger kernel actor running in the step execution strategy.
-  if (strategy == GraphExecutionStrategy::kStep) {
+  if (strategy == GraphExecutionStrategy::kStep && IsSingleOpActorSet(actor_set)) {
    MS_EXCEPTION_IF_NULL(input_tensors);
    for (auto &kernel_actor : actor_set->kernel_actors_) {
      MS_EXCEPTION_IF_NULL(kernel_actor);
@ -730,7 +735,7 @@ void GraphScheduler::Link(ActorSet *actor_set, const GraphCompilerInfo &graph_co

        KernelWithIndex from_kernel_with_output_idx = AnfAlgo::VisitKernelWithReturnType(input_node, 0, false);
        KernelWithIndex to_kernel_with_input_idx = std::make_pair(kernel, i);
-        const auto &tensor = FetchInputTensor(graph_compiler_info, index, i);
+        TensorPtr tensor = IsSingleOpActorSet(actor_set) ? FetchInputTensor(graph_compiler_info, index, i) : nullptr;
        // The gather of linking data arrows of kernel by the different from kernel type.
        LinkDataArrow(kernel_actor, graph_compiler_info, graph, from_kernel_with_output_idx, to_kernel_with_input_idx,
                      tensor);
@ -959,7 +964,7 @@ std::vector<KernelActorPtr> GraphScheduler::BuildNoInputKernelActor(const ActorS
  for (auto &kernel_actor : actor_set->kernel_actors_) {
    MS_EXCEPTION_IF_NULL(kernel_actor);
    // Framework will trigger kernel actor running in the step execution strategy.
-    if (strategy == GraphExecutionStrategy::kStep) {
+    if (strategy == GraphExecutionStrategy::kStep && IsSingleOpActorSet(actor_set)) {
      kernel_actor->input_controls_num_++;
      continue;
    }
--- a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc
+++ b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc
@ -316,6 +316,15 @@ void GPUDeviceContext::SetOperatorInfo(const std::vector<CNodePtr> &nodes) const
 void GPUDeviceContext::CreateKernel(const std::vector<CNodePtr> &nodes) const { CreateGPUKernel(nodes); }

 void GPUDeviceContext::UpdateDynamicShape(const CNodePtr &kernel) const {
+  auto ms_context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(ms_context);
+  bool is_pynative_infer = ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER);
+  bool is_pynative_mode = ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode;
+
+  if (is_pynative_infer || is_pynative_mode) {
+    return;
+  }
+
  auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
  MS_EXCEPTION_IF_NULL(kernel_mod);