fix bug of CPU actor runtime

This commit is contained in:
limingqi107 2021-06-19 17:35:03 +08:00
parent c3a9b08624
commit b25d00731c
8 changed files with 43 additions and 20 deletions

View File

@ -60,11 +60,10 @@ bool SparseTensorDenseMatmulCPUKernel<I, T>::Launch(const std::vector<kernel::Ad
auto a_values = reinterpret_cast<T *>(inputs[1]->addr);
auto b = reinterpret_cast<T *>(inputs[3]->addr);
auto out = reinterpret_cast<T *>(outputs[0]->addr);
const size_t output_length = outputs[0]->size / sizeof(T);
const size_t indices_length = inputs[0]->size / sizeof(I);
const size_t values_length = inputs[1]->size / sizeof(T);
const size_t b_length = inputs[3]->size / sizeof(T);
if (memset_s(out, output_length, 0, output_length) != EOK) {
if (memset_s(out, outputs[0]->size, 0, outputs[0]->size) != EOK) {
MS_LOG(EXCEPTION) << "Memset Failed!";
}

View File

@ -55,10 +55,9 @@ bool SparseToDenseCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr>
auto indices_addr = reinterpret_cast<I *>(inputs[0]->addr);
auto values_addr = reinterpret_cast<T *>(inputs[1]->addr);
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
const size_t output_length = outputs[0]->size / sizeof(T);
const size_t indices_length = inputs[0]->size / sizeof(I);
const size_t values_length = inputs[1]->size / sizeof(T);
if (memset_s(output_addr, output_length, 0, output_length) != EOK) {
if (memset_s(output_addr, outputs[0]->size, 0, outputs[0]->size) != EOK) {
MS_LOG(EXCEPTION) << "Memset Failed!";
}

View File

@ -44,8 +44,13 @@ bool CPUDeviceAddress::SyncDeviceToHost(const ShapeVector &, size_t size, TypeId
MS_LOG(DEBUG) << "host_ptr is equal to ptr_, request ignored.";
return true;
}
if (type == type_id_) {
auto ret_code = memcpy_s(host_ptr, size, ptr_, size_);
if ((size == 0) || (size_ == 0) || (size > size_)) {
MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_;
return true;
}
auto ret_code = memcpy_s(host_ptr, size, ptr_, size);
if (ret_code != EOK) {
MS_LOG(ERROR) << "Failed to copy tensor!";
return false;
@ -78,7 +83,11 @@ bool CPUDeviceAddress::SyncHostToDevice(const ShapeVector & /* shape */, size_t
}
if (type == type_id_) {
auto ret_code = memcpy_s(ptr_, size_, host_ptr, size);
if ((size == 0) || (size_ == 0) || (size > size_)) {
MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_;
return true;
}
auto ret_code = memcpy_s(ptr_, size, host_ptr, size);
if (ret_code != EOK) {
MS_LOG(ERROR) << "Failed to copy tensor!";
return false;

View File

@ -33,10 +33,16 @@ namespace device {
namespace gpu {
bool GPUDeviceAddress::SyncDeviceToHost(size_t size, void *host_ptr) const {
MS_EXCEPTION_IF_NULL(host_ptr);
if (ptr_ == nullptr) {
MS_LOG(ERROR) << "The device address is null!";
return false;
}
bool need_sync = (size != 0) && (size_ != 0) && (size <= size_);
if (!need_sync) {
MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_;
return true;
}
auto &stream = GPUDeviceManager::GetInstance().default_stream();
MS_EXCEPTION_IF_NULL(stream);
auto ret = GPUDeviceManager::GetInstance().SyncStream(stream);
@ -53,8 +59,13 @@ bool GPUDeviceAddress::SyncDeviceToHost(size_t size, void *host_ptr) const {
bool GPUDeviceAddress::SyncHostToDevice(size_t size, const void *host_ptr) const {
MS_EXCEPTION_IF_NULL(host_ptr);
if (ptr_ == nullptr) {
MS_LOG(ERROR) << "The device address is null!";
return false;
}
bool need_sync = (size != 0) && (size_ != 0) && (size <= size_);
if (!need_sync) {
MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_;
return true;
}

View File

@ -140,8 +140,13 @@ void DeviceQueueDataSourceActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *co
}
// Copy data from device queue by data kernel launching.
auto ret =
device_context_->LaunchKernel(data_kernel_, launch_info_.inputs_, launch_info_.workspaces_, launch_info_.outputs_);
bool ret = true;
try {
ret = device_context_->LaunchKernel(data_kernel_, launch_info_.inputs_, launch_info_.workspaces_,
launch_info_.outputs_);
} catch (const std::exception &e) {
MsException::Instance().SetException();
}
if (!ret) {
std::string error_info = "Launch kernel failed: " + data_kernel_->ToString();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);

View File

@ -147,8 +147,13 @@ void KernelActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *context) {
PreLaunchKernel(context);
auto ret = device_context_->LaunchKernel(kernel_, launch_info_.inputs_, launch_info_.workspaces_,
launch_info_.outputs_, is_dynamic_shape_);
bool ret = true;
try {
ret = device_context_->LaunchKernel(kernel_, launch_info_.inputs_, launch_info_.workspaces_, launch_info_.outputs_,
is_dynamic_shape_);
} catch (const std::exception &e) {
MsException::Instance().SetException();
}
if (!ret) {
std::string error_info = "Launch kernel failed: " + kernel_->ToString();
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);

View File

@ -25,11 +25,9 @@ TensorPtr CreateOutputTensor(const AnfNodePtr &output_node, size_t output_index,
MS_LOG(INFO) << "Create output tensor, output node: " << output_node->fullname_with_scope()
<< ", output index: " << output_index << ", output position: " << output_position;
// Create host tensor.
auto type_id = AnfAlgo::GetOutputDeviceDataType(output_node, output_index);
if (type_id == kTypeUnknown) {
type_id = AnfAlgo::GetOutputInferDataType(output_node, output_index);
}
// Create host tensor, the output tensor should use the infer type, it will be handed correctly by tensor data sync
// when infer type is not equal to device type.
auto type_id = AnfAlgo::GetOutputInferDataType(output_node, output_index);
std::vector<int64_t> temp_shape;
auto shape = AnfAlgo::GetOutputInferShape(output_node, output_index);
(void)std::copy(shape.begin(), shape.end(), std::back_inserter(temp_shape));

View File

@ -579,11 +579,8 @@ bool GraphScheduler::Run(const ActorSet *actor_set, GraphExecutionStrategy strat
// Get the run result.
auto result_future = result[0].GetFuture();
result_future.Wait();
if (!result_future.IsOK()) {
return false;
}
return true;
MsException::Instance().CheckException();
return result_future.IsOK();
}
ActorSet *GraphScheduler::Fetch(const ActorInfo &actor_info) const {