unified runtime bug of host device and graph kernel

2021-07-29 11:02:28 +08:00 · 2021-07-29 11:02:28 +08:00 · 86c69d116b
parent 8257b469f5
commit 86c69d116b
4 changed files with 13 additions and 8 deletions
--- a/mindspore/ccsrc/backend/session/kernel_graph.cc
+++ b/mindspore/ccsrc/backend/session/kernel_graph.cc
@ -1216,7 +1216,6 @@ void KernelGraph::UpdateGraphOutputMap(const std::vector<AnfWithOutIndex> &old_o
    if (old_output == new_output) {
      continue;
    }
-
    // Update the graph output map.
    if (graph_output_to_front_node_map_.count(old_output) > 0) {
      MS_LOG(INFO) << "Replace backend output node " << old_output.first->fullname_with_scope() << " with index "
@ -1226,11 +1225,6 @@ void KernelGraph::UpdateGraphOutputMap(const std::vector<AnfWithOutIndex> &old_o
      graph_output_to_front_node_map_.erase(old_output);
    }

-    // Update the internal output map.
-    if (IsInternalOutput(old_output.first, old_output.second)) {
-      ReplaceInternalOutput(old_output.first, new_output.first, old_output.second, new_output.second);
-    }
-
    if (old_output.first == new_output.first) {
      continue;
    }
--- a/mindspore/ccsrc/runtime/framework/actor/actor_common.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/actor_common.cc
@ -32,7 +32,7 @@ void ComputeThreadNums(size_t *actor_thread_num, size_t *OMP_thread_num) {
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
  // The pyNative mode is the step execution strategy, so only need the kActorThreadMinNum.
-  if (context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG) == kPynativeMode) {
+  if (context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
    *actor_thread_num = kActorThreadMinNum;
  } else {
    *actor_thread_num = cpu_core_num < kActorThreadMinNum ? kActorThreadMinNum : cpu_core_num;
@ -117,6 +117,10 @@ bool IsGatherActor(const AnfNodePtr &front_node,
 bool Copy(DeviceTensor *dst_device_tensor, const DeviceTensor *src_device_tensor) {
  MS_EXCEPTION_IF_NULL(dst_device_tensor);
  MS_EXCEPTION_IF_NULL(src_device_tensor);
+  if (src_device_tensor->GetSize() != dst_device_tensor->GetSize()) {
+    MS_LOG(WARNING) << " Copy size is not equal, input size:" << src_device_tensor->GetSize()
+                    << ", output size:" << dst_device_tensor->GetSize();
+  }

  // Exist the size alignment in some device, so get the min device size.
  size_t copy_size = std::min(src_device_tensor->GetSize(), dst_device_tensor->GetSize());
--- a/mindspore/ccsrc/runtime/framework/actor/copy_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/copy_actor.cc
@ -72,6 +72,13 @@ void CopyActor::SendMemoryFreeReq(OpContext<DeviceTensor> *context) {

 void CopyActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *context) {
  MS_EXCEPTION_IF_NULL(context);
+  MS_EXCEPTION_IF_NULL(output_device_tensor_[0]);
+  MS_EXCEPTION_IF_NULL(input_device_tensor_[0]);
+
+  if (input_device_tensor_[0]->GetSize() != output_device_tensor_[0]->GetSize()) {
+    MS_LOG(WARNING) << GetAID().Name() << " copy size is not equal, input size:" << input_device_tensor_[0]->GetSize()
+                    << ", output size:" << output_device_tensor_[0]->GetSize();
+  }

  if (!Copy(output_device_tensor_[0], input_device_tensor_[0])) {
    std::string error_info = "Copy device tensor failed: " + GetAID().Name();
--- a/mindspore/core/mindrt/src/thread/actor_threadpool.cc
+++ b/mindspore/core/mindrt/src/thread/actor_threadpool.cc
@ -18,7 +18,7 @@
 #include "thread/core_affinity.h"

 namespace mindspore {
-constexpr size_t MAX_READY_ACTOR_NR = 1024;
+constexpr size_t MAX_READY_ACTOR_NR = 4096;
 void ActorWorker::CreateThread(ActorThreadPool *pool) {
  THREAD_RETURN_IF_NULL(pool);
  pool_ = pool;