From b25d00731cfb2a82d386d971401e76eb04d608ed Mon Sep 17 00:00:00 2001
From: limingqi107 <limingqi@huawei.com>
Date: Sat, 19 Jun 2021 17:35:03 +0800
Subject: [PATCH] fix bug of CPU actor runtime

---
 .../cpu/sparse_tensor_dense_matmul_cpu_kernel.cc    |  3 +--
 .../cpu/sparse_to_dense_cpu_kernal.cc               |  3 +--
 .../ccsrc/runtime/device/cpu/cpu_device_address.cc  | 13 +++++++++++--
 .../ccsrc/runtime/device/gpu/gpu_device_address.cc  | 11 +++++++++++
 .../runtime/framework/actor/data_source_actor.cc    |  9 +++++++--
 .../ccsrc/runtime/framework/actor/kernel_actor.cc   |  9 +++++++--
 .../ccsrc/runtime/framework/actor/output_actor.cc   |  8 +++-----
 .../ccsrc/runtime/framework/graph_scheduler.cc      |  7 ++-----
 8 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.cc
index 68e5aeb1d2b..843665d6bc2 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.cc
@@ -60,11 +60,10 @@ bool SparseTensorDenseMatmulCPUKernel<I, T>::Launch(const std::vector<kernel::Ad
   auto a_values = reinterpret_cast<T *>(inputs[1]->addr);
   auto b = reinterpret_cast<T *>(inputs[3]->addr);
   auto out = reinterpret_cast<T *>(outputs[0]->addr);
-  const size_t output_length = outputs[0]->size / sizeof(T);
   const size_t indices_length = inputs[0]->size / sizeof(I);
   const size_t values_length = inputs[1]->size / sizeof(T);
   const size_t b_length = inputs[3]->size / sizeof(T);
-  if (memset_s(out, output_length, 0, output_length) != EOK) {
+  if (memset_s(out, outputs[0]->size, 0, outputs[0]->size) != EOK) {
     MS_LOG(EXCEPTION) << "Memset Failed!";
   }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_to_dense_cpu_kernal.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_to_dense_cpu_kernal.cc
index 9aab399bd44..7c9661d4a53 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_to_dense_cpu_kernal.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_to_dense_cpu_kernal.cc
@@ -55,10 +55,9 @@ bool SparseToDenseCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr>
   auto indices_addr = reinterpret_cast<I *>(inputs[0]->addr);
   auto values_addr = reinterpret_cast<T *>(inputs[1]->addr);
   auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
-  const size_t output_length = outputs[0]->size / sizeof(T);
   const size_t indices_length = inputs[0]->size / sizeof(I);
   const size_t values_length = inputs[1]->size / sizeof(T);
-  if (memset_s(output_addr, output_length, 0, output_length) != EOK) {
+  if (memset_s(output_addr, outputs[0]->size, 0, outputs[0]->size) != EOK) {
     MS_LOG(EXCEPTION) << "Memset Failed!";
   }
 
diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc
index 03e11b159ca..410de8ac867 100644
--- a/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc
@@ -44,8 +44,13 @@ bool CPUDeviceAddress::SyncDeviceToHost(const ShapeVector &, size_t size, TypeId
     MS_LOG(DEBUG) << "host_ptr is equal to ptr_, request ignored.";
     return true;
   }
+
   if (type == type_id_) {
-    auto ret_code = memcpy_s(host_ptr, size, ptr_, size_);
+    if ((size == 0) || (size_ == 0) || (size > size_)) {
+      MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_;
+      return true;
+    }
+    auto ret_code = memcpy_s(host_ptr, size, ptr_, size);
     if (ret_code != EOK) {
       MS_LOG(ERROR) << "Failed to copy tensor!";
       return false;
@@ -78,7 +83,11 @@ bool CPUDeviceAddress::SyncHostToDevice(const ShapeVector & /* shape */, size_t
   }
 
   if (type == type_id_) {
-    auto ret_code = memcpy_s(ptr_, size_, host_ptr, size);
+    if ((size == 0) || (size_ == 0) || (size > size_)) {
+      MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_;
+      return true;
+    }
+    auto ret_code = memcpy_s(ptr_, size, host_ptr, size);
     if (ret_code != EOK) {
       MS_LOG(ERROR) << "Failed to copy tensor!";
       return false;
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
index 5efdf9ca65a..8cf419c713e 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
@@ -33,10 +33,16 @@ namespace device {
 namespace gpu {
 bool GPUDeviceAddress::SyncDeviceToHost(size_t size, void *host_ptr) const {
   MS_EXCEPTION_IF_NULL(host_ptr);
+  if (ptr_ == nullptr) {
+    MS_LOG(ERROR) << "The device address is null!";
+    return false;
+  }
   bool need_sync = (size != 0) && (size_ != 0) && (size <= size_);
   if (!need_sync) {
+    MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_;
     return true;
   }
+
   auto &stream = GPUDeviceManager::GetInstance().default_stream();
   MS_EXCEPTION_IF_NULL(stream);
   auto ret = GPUDeviceManager::GetInstance().SyncStream(stream);
@@ -53,8 +59,13 @@ bool GPUDeviceAddress::SyncDeviceToHost(size_t size, void *host_ptr) const {
 
 bool GPUDeviceAddress::SyncHostToDevice(size_t size, const void *host_ptr) const {
   MS_EXCEPTION_IF_NULL(host_ptr);
+  if (ptr_ == nullptr) {
+    MS_LOG(ERROR) << "The device address is null!";
+    return false;
+  }
   bool need_sync = (size != 0) && (size_ != 0) && (size <= size_);
   if (!need_sync) {
+    MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_;
     return true;
   }
 
diff --git a/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc b/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc
index d33781c9b59..96f9ae7721c 100644
--- a/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc
@@ -140,8 +140,13 @@ void DeviceQueueDataSourceActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *co
   }
 
   // Copy data from device queue by data kernel launching.
-  auto ret =
-    device_context_->LaunchKernel(data_kernel_, launch_info_.inputs_, launch_info_.workspaces_, launch_info_.outputs_);
+  bool ret = true;
+  try {
+    ret = device_context_->LaunchKernel(data_kernel_, launch_info_.inputs_, launch_info_.workspaces_,
+                                        launch_info_.outputs_);
+  } catch (const std::exception &e) {
+    MsException::Instance().SetException();
+  }
   if (!ret) {
     std::string error_info = "Launch kernel failed: " + data_kernel_->ToString();
     SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
diff --git a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc
index 93b7650ba54..3c529b3cbf2 100644
--- a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc
@@ -147,8 +147,13 @@ void KernelActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *context) {
 
   PreLaunchKernel(context);
 
-  auto ret = device_context_->LaunchKernel(kernel_, launch_info_.inputs_, launch_info_.workspaces_,
-                                           launch_info_.outputs_, is_dynamic_shape_);
+  bool ret = true;
+  try {
+    ret = device_context_->LaunchKernel(kernel_, launch_info_.inputs_, launch_info_.workspaces_, launch_info_.outputs_,
+                                        is_dynamic_shape_);
+  } catch (const std::exception &e) {
+    MsException::Instance().SetException();
+  }
   if (!ret) {
     std::string error_info = "Launch kernel failed: " + kernel_->ToString();
     SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
diff --git a/mindspore/ccsrc/runtime/framework/actor/output_actor.cc b/mindspore/ccsrc/runtime/framework/actor/output_actor.cc
index 55d2b5d96e4..36b343fda13 100644
--- a/mindspore/ccsrc/runtime/framework/actor/output_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/output_actor.cc
@@ -25,11 +25,9 @@ TensorPtr CreateOutputTensor(const AnfNodePtr &output_node, size_t output_index,
   MS_LOG(INFO) << "Create output tensor, output node: " << output_node->fullname_with_scope()
                << ", output index: " << output_index << ", output position: " << output_position;
 
-  // Create host tensor.
-  auto type_id = AnfAlgo::GetOutputDeviceDataType(output_node, output_index);
-  if (type_id == kTypeUnknown) {
-    type_id = AnfAlgo::GetOutputInferDataType(output_node, output_index);
-  }
+  // Create host tensor, the output tensor should use the infer type, it will be handed correctly by tensor data sync
+  // when infer type is not equal to device type.
+  auto type_id = AnfAlgo::GetOutputInferDataType(output_node, output_index);
   std::vector<int64_t> temp_shape;
   auto shape = AnfAlgo::GetOutputInferShape(output_node, output_index);
   (void)std::copy(shape.begin(), shape.end(), std::back_inserter(temp_shape));
diff --git a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
index 2a947a5148a..b307b34d485 100644
--- a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
+++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
@@ -579,11 +579,8 @@ bool GraphScheduler::Run(const ActorSet *actor_set, GraphExecutionStrategy strat
   // Get the run result.
   auto result_future = result[0].GetFuture();
   result_future.Wait();
-  if (!result_future.IsOK()) {
-    return false;
-  }
-
-  return true;
+  MsException::Instance().CheckException();
+  return result_future.IsOK();
 }
 
 ActorSet *GraphScheduler::Fetch(const ActorInfo &actor_info) const {