fix bug of CPU actor runtime
This commit is contained in:
parent
c3a9b08624
commit
b25d00731c
|
@ -60,11 +60,10 @@ bool SparseTensorDenseMatmulCPUKernel<I, T>::Launch(const std::vector<kernel::Ad
|
|||
auto a_values = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
auto b = reinterpret_cast<T *>(inputs[3]->addr);
|
||||
auto out = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
const size_t output_length = outputs[0]->size / sizeof(T);
|
||||
const size_t indices_length = inputs[0]->size / sizeof(I);
|
||||
const size_t values_length = inputs[1]->size / sizeof(T);
|
||||
const size_t b_length = inputs[3]->size / sizeof(T);
|
||||
if (memset_s(out, output_length, 0, output_length) != EOK) {
|
||||
if (memset_s(out, outputs[0]->size, 0, outputs[0]->size) != EOK) {
|
||||
MS_LOG(EXCEPTION) << "Memset Failed!";
|
||||
}
|
||||
|
||||
|
|
|
@ -55,10 +55,9 @@ bool SparseToDenseCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr>
|
|||
auto indices_addr = reinterpret_cast<I *>(inputs[0]->addr);
|
||||
auto values_addr = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
const size_t output_length = outputs[0]->size / sizeof(T);
|
||||
const size_t indices_length = inputs[0]->size / sizeof(I);
|
||||
const size_t values_length = inputs[1]->size / sizeof(T);
|
||||
if (memset_s(output_addr, output_length, 0, output_length) != EOK) {
|
||||
if (memset_s(output_addr, outputs[0]->size, 0, outputs[0]->size) != EOK) {
|
||||
MS_LOG(EXCEPTION) << "Memset Failed!";
|
||||
}
|
||||
|
||||
|
|
|
@ -44,8 +44,13 @@ bool CPUDeviceAddress::SyncDeviceToHost(const ShapeVector &, size_t size, TypeId
|
|||
MS_LOG(DEBUG) << "host_ptr is equal to ptr_, request ignored.";
|
||||
return true;
|
||||
}
|
||||
|
||||
if (type == type_id_) {
|
||||
auto ret_code = memcpy_s(host_ptr, size, ptr_, size_);
|
||||
if ((size == 0) || (size_ == 0) || (size > size_)) {
|
||||
MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_;
|
||||
return true;
|
||||
}
|
||||
auto ret_code = memcpy_s(host_ptr, size, ptr_, size);
|
||||
if (ret_code != EOK) {
|
||||
MS_LOG(ERROR) << "Failed to copy tensor!";
|
||||
return false;
|
||||
|
@ -78,7 +83,11 @@ bool CPUDeviceAddress::SyncHostToDevice(const ShapeVector & /* shape */, size_t
|
|||
}
|
||||
|
||||
if (type == type_id_) {
|
||||
auto ret_code = memcpy_s(ptr_, size_, host_ptr, size);
|
||||
if ((size == 0) || (size_ == 0) || (size > size_)) {
|
||||
MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_;
|
||||
return true;
|
||||
}
|
||||
auto ret_code = memcpy_s(ptr_, size, host_ptr, size);
|
||||
if (ret_code != EOK) {
|
||||
MS_LOG(ERROR) << "Failed to copy tensor!";
|
||||
return false;
|
||||
|
|
|
@ -33,10 +33,16 @@ namespace device {
|
|||
namespace gpu {
|
||||
bool GPUDeviceAddress::SyncDeviceToHost(size_t size, void *host_ptr) const {
|
||||
MS_EXCEPTION_IF_NULL(host_ptr);
|
||||
if (ptr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "The device address is null!";
|
||||
return false;
|
||||
}
|
||||
bool need_sync = (size != 0) && (size_ != 0) && (size <= size_);
|
||||
if (!need_sync) {
|
||||
MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_;
|
||||
return true;
|
||||
}
|
||||
|
||||
auto &stream = GPUDeviceManager::GetInstance().default_stream();
|
||||
MS_EXCEPTION_IF_NULL(stream);
|
||||
auto ret = GPUDeviceManager::GetInstance().SyncStream(stream);
|
||||
|
@ -53,8 +59,13 @@ bool GPUDeviceAddress::SyncDeviceToHost(size_t size, void *host_ptr) const {
|
|||
|
||||
bool GPUDeviceAddress::SyncHostToDevice(size_t size, const void *host_ptr) const {
|
||||
MS_EXCEPTION_IF_NULL(host_ptr);
|
||||
if (ptr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "The device address is null!";
|
||||
return false;
|
||||
}
|
||||
bool need_sync = (size != 0) && (size_ != 0) && (size <= size_);
|
||||
if (!need_sync) {
|
||||
MS_LOG(INFO) << "No need sync, host size: " << size << ", device size: " << size_;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -140,8 +140,13 @@ void DeviceQueueDataSourceActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *co
|
|||
}
|
||||
|
||||
// Copy data from device queue by data kernel launching.
|
||||
auto ret =
|
||||
device_context_->LaunchKernel(data_kernel_, launch_info_.inputs_, launch_info_.workspaces_, launch_info_.outputs_);
|
||||
bool ret = true;
|
||||
try {
|
||||
ret = device_context_->LaunchKernel(data_kernel_, launch_info_.inputs_, launch_info_.workspaces_,
|
||||
launch_info_.outputs_);
|
||||
} catch (const std::exception &e) {
|
||||
MsException::Instance().SetException();
|
||||
}
|
||||
if (!ret) {
|
||||
std::string error_info = "Launch kernel failed: " + data_kernel_->ToString();
|
||||
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
|
||||
|
|
|
@ -147,8 +147,13 @@ void KernelActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *context) {
|
|||
|
||||
PreLaunchKernel(context);
|
||||
|
||||
auto ret = device_context_->LaunchKernel(kernel_, launch_info_.inputs_, launch_info_.workspaces_,
|
||||
launch_info_.outputs_, is_dynamic_shape_);
|
||||
bool ret = true;
|
||||
try {
|
||||
ret = device_context_->LaunchKernel(kernel_, launch_info_.inputs_, launch_info_.workspaces_, launch_info_.outputs_,
|
||||
is_dynamic_shape_);
|
||||
} catch (const std::exception &e) {
|
||||
MsException::Instance().SetException();
|
||||
}
|
||||
if (!ret) {
|
||||
std::string error_info = "Launch kernel failed: " + kernel_->ToString();
|
||||
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
|
||||
|
|
|
@ -25,11 +25,9 @@ TensorPtr CreateOutputTensor(const AnfNodePtr &output_node, size_t output_index,
|
|||
MS_LOG(INFO) << "Create output tensor, output node: " << output_node->fullname_with_scope()
|
||||
<< ", output index: " << output_index << ", output position: " << output_position;
|
||||
|
||||
// Create host tensor.
|
||||
auto type_id = AnfAlgo::GetOutputDeviceDataType(output_node, output_index);
|
||||
if (type_id == kTypeUnknown) {
|
||||
type_id = AnfAlgo::GetOutputInferDataType(output_node, output_index);
|
||||
}
|
||||
// Create host tensor, the output tensor should use the infer type, it will be handed correctly by tensor data sync
|
||||
// when infer type is not equal to device type.
|
||||
auto type_id = AnfAlgo::GetOutputInferDataType(output_node, output_index);
|
||||
std::vector<int64_t> temp_shape;
|
||||
auto shape = AnfAlgo::GetOutputInferShape(output_node, output_index);
|
||||
(void)std::copy(shape.begin(), shape.end(), std::back_inserter(temp_shape));
|
||||
|
|
|
@ -579,11 +579,8 @@ bool GraphScheduler::Run(const ActorSet *actor_set, GraphExecutionStrategy strat
|
|||
// Get the run result.
|
||||
auto result_future = result[0].GetFuture();
|
||||
result_future.Wait();
|
||||
if (!result_future.IsOK()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
MsException::Instance().CheckException();
|
||||
return result_future.IsOK();
|
||||
}
|
||||
|
||||
ActorSet *GraphScheduler::Fetch(const ActorInfo &actor_info) const {
|
||||
|
|
Loading…
Reference in New Issue