From b25d00731cfb2a82d386d971401e76eb04d608ed Mon Sep 17 00:00:00 2001 From: limingqi107 <limingqi@huawei.com> Date: Sat, 19 Jun 2021 17:35:03 +0800 Subject: [PATCH] fix bug of CPU actor runtime --- .../cpu/sparse_tensor_dense_matmul_cpu_kernel.cc | 3 +-- .../cpu/sparse_to_dense_cpu_kernal.cc | 3 +-- .../ccsrc/runtime/device/cpu/cpu_device_address.cc | 13 +++++++++++-- .../ccsrc/runtime/device/gpu/gpu_device_address.cc | 11 +++++++++++ .../runtime/framework/actor/data_source_actor.cc | 9 +++++++-- .../ccsrc/runtime/framework/actor/kernel_actor.cc | 9 +++++++-- .../ccsrc/runtime/framework/actor/output_actor.cc | 8 +++----- .../ccsrc/runtime/framework/graph_scheduler.cc | 7 ++----- 8 files changed, 43 insertions(+), 20 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.cc index 68e5aeb1d2b..843665d6bc2 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.cc @@ -60,11 +60,10 @@ bool SparseTensorDenseMatmulCPUKernel<I, T>::Launch(const std::vector<kernel::Ad auto a_values = reinterpret_cast<T *>(inputs[1]->addr); auto b = reinterpret_cast<T *>(inputs[3]->addr); auto out = reinterpret_cast<T *>(outputs[0]->addr); - const size_t output_length = outputs[0]->size / sizeof(T); const size_t indices_length = inputs[0]->size / sizeof(I); const size_t values_length = inputs[1]->size / sizeof(T); const size_t b_length = inputs[3]->size / sizeof(T); - if (memset_s(out, output_length, 0, output_length) != EOK) { + if (memset_s(out, outputs[0]->size, 0, outputs[0]->size) != EOK) { MS_LOG(EXCEPTION) << "Memset Failed!"; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_to_dense_cpu_kernal.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_to_dense_cpu_kernal.cc index 9aab399bd44..7c9661d4a53 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_to_dense_cpu_kernal.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_to_dense_cpu_kernal.cc @@ -55,10 +55,9 @@ bool SparseToDenseCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr> auto indices_addr = reinterpret_cast<I *>(inputs[0]->addr); auto values_addr = reinterpret_cast<T *>(inputs[1]->addr); auto output_addr = reinterpret_cast<T *>(outputs[0]->addr); - const size_t output_length = outputs[0]->size / sizeof(T); const size_t indices_length = inputs[0]->size / sizeof(I); const size_t values_length = inputs[1]->size / sizeof(T); - if (memset_s(output_addr, output_length, 0, output_length) != EOK) { + if (memset_s(output_addr, outputs[0]->size, 0, outputs[0]->size) != EOK) { MS_LOG(EXCEPTION) << "Memset Failed!"; } diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc index 03e11b159ca..410de8ac867 100644 --- a/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc +++ b/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc @@ -44,8 +44,13 @@ bool CPUDeviceAddress::SyncDeviceToHost(const ShapeVector &, size_t size, TypeId MS_LOG(DEBUG) << "host_ptr is equal to ptr_, request ignored."; return true; } + if (type == type_id_) { - auto ret_code = memcpy_s(host_ptr, size, ptr_, size_); + if ((size == 0) || (size_ == 0) || (size > size_)) { + MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_; + return true; + } + auto ret_code = memcpy_s(host_ptr, size, ptr_, size); if (ret_code != EOK) { MS_LOG(ERROR) << "Failed to copy tensor!"; return false; @@ -78,7 +83,11 @@ bool CPUDeviceAddress::SyncHostToDevice(const ShapeVector & /* shape */, size_t } if (type == type_id_) { - auto ret_code = memcpy_s(ptr_, size_, host_ptr, size); + if ((size == 0) || (size_ == 0) || (size > size_)) { + MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_; + return true; + } + auto ret_code = memcpy_s(ptr_, size, host_ptr, size); if (ret_code != EOK) { MS_LOG(ERROR) << "Failed to copy tensor!"; return false; diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc index 5efdf9ca65a..8cf419c713e 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc @@ -33,10 +33,16 @@ namespace device { namespace gpu { bool GPUDeviceAddress::SyncDeviceToHost(size_t size, void *host_ptr) const { MS_EXCEPTION_IF_NULL(host_ptr); + if (ptr_ == nullptr) { + MS_LOG(ERROR) << "The device address is null!"; + return false; + } bool need_sync = (size != 0) && (size_ != 0) && (size <= size_); if (!need_sync) { + MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_; return true; } + auto &stream = GPUDeviceManager::GetInstance().default_stream(); MS_EXCEPTION_IF_NULL(stream); auto ret = GPUDeviceManager::GetInstance().SyncStream(stream); @@ -53,8 +59,13 @@ bool GPUDeviceAddress::SyncDeviceToHost(size_t size, void *host_ptr) const { bool GPUDeviceAddress::SyncHostToDevice(size_t size, const void *host_ptr) const { MS_EXCEPTION_IF_NULL(host_ptr); + if (ptr_ == nullptr) { + MS_LOG(ERROR) << "The device address is null!"; + return false; + } bool need_sync = (size != 0) && (size_ != 0) && (size <= size_); if (!need_sync) { + MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_; return true; } diff --git a/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc b/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc index d33781c9b59..96f9ae7721c 100644 --- a/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc +++ b/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc @@ -140,8 +140,13 @@ void DeviceQueueDataSourceActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *co } // Copy data from device queue by data kernel launching. - auto ret = - device_context_->LaunchKernel(data_kernel_, launch_info_.inputs_, launch_info_.workspaces_, launch_info_.outputs_); + bool ret = true; + try { + ret = device_context_->LaunchKernel(data_kernel_, launch_info_.inputs_, launch_info_.workspaces_, + launch_info_.outputs_); + } catch (const std::exception &e) { + MsException::Instance().SetException(); + } if (!ret) { std::string error_info = "Launch kernel failed: " + data_kernel_->ToString(); SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info); diff --git a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc index 93b7650ba54..3c529b3cbf2 100644 --- a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc +++ b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc @@ -147,8 +147,13 @@ void KernelActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *context) { PreLaunchKernel(context); - auto ret = device_context_->LaunchKernel(kernel_, launch_info_.inputs_, launch_info_.workspaces_, - launch_info_.outputs_, is_dynamic_shape_); + bool ret = true; + try { + ret = device_context_->LaunchKernel(kernel_, launch_info_.inputs_, launch_info_.workspaces_, launch_info_.outputs_, + is_dynamic_shape_); + } catch (const std::exception &e) { + MsException::Instance().SetException(); + } if (!ret) { std::string error_info = "Launch kernel failed: " + kernel_->ToString(); SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info); diff --git a/mindspore/ccsrc/runtime/framework/actor/output_actor.cc b/mindspore/ccsrc/runtime/framework/actor/output_actor.cc index 55d2b5d96e4..36b343fda13 100644 --- a/mindspore/ccsrc/runtime/framework/actor/output_actor.cc +++ b/mindspore/ccsrc/runtime/framework/actor/output_actor.cc @@ -25,11 +25,9 @@ TensorPtr CreateOutputTensor(const AnfNodePtr &output_node, size_t output_index, MS_LOG(INFO) << "Create output tensor, output node: " << output_node->fullname_with_scope() << ", output index: " << output_index << ", output position: " << output_position; - // Create host tensor. - auto type_id = AnfAlgo::GetOutputDeviceDataType(output_node, output_index); - if (type_id == kTypeUnknown) { - type_id = AnfAlgo::GetOutputInferDataType(output_node, output_index); - } + // Create host tensor, the output tensor should use the infer type, it will be handed correctly by tensor data sync + // when infer type is not equal to device type. + auto type_id = AnfAlgo::GetOutputInferDataType(output_node, output_index); std::vector<int64_t> temp_shape; auto shape = AnfAlgo::GetOutputInferShape(output_node, output_index); (void)std::copy(shape.begin(), shape.end(), std::back_inserter(temp_shape)); diff --git a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc index 2a947a5148a..b307b34d485 100644 --- a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc +++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc @@ -579,11 +579,8 @@ bool GraphScheduler::Run(const ActorSet *actor_set, GraphExecutionStrategy strat // Get the run result. auto result_future = result[0].GetFuture(); result_future.Wait(); - if (!result_future.IsOK()) { - return false; - } - - return true; + MsException::Instance().CheckException(); + return result_future.IsOK(); } ActorSet *GraphScheduler::Fetch(const ActorInfo &actor_info) const {