forked from mindspore-Ecosystem/mindspore
fix nccl kernel memory align bug
This commit is contained in:
parent
e06e9cd772
commit
fcaf86f5d9
|
@ -122,6 +122,8 @@ class NcclGpuKernel : public GpuKernel {
|
|||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
nccl_data_type_ = kNcclDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))];
|
||||
InferCommType(kernel_node);
|
||||
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
for (size_t i = 0; i < input_num; ++i) {
|
||||
|
@ -130,7 +132,7 @@ class NcclGpuKernel : public GpuKernel {
|
|||
for (size_t j = 0; j < shape.size(); j++) {
|
||||
size *= IntToSize(shape[j]);
|
||||
}
|
||||
size_t aligned_size = AlignMemorySize(size);
|
||||
size_t aligned_size = (nccl_kernel_type_ != NCCL_ALL_REDUCE) ? size : AlignMemorySize(size);
|
||||
input_size_list_.push_back(aligned_size);
|
||||
input_size_ += aligned_size;
|
||||
}
|
||||
|
@ -140,12 +142,11 @@ class NcclGpuKernel : public GpuKernel {
|
|||
for (size_t j = 0; j < shape.size(); j++) {
|
||||
size *= IntToSize(shape[j]);
|
||||
}
|
||||
size_t aligned_size = AlignMemorySize(size);
|
||||
size_t aligned_size = (nccl_kernel_type_ != NCCL_ALL_REDUCE) ? size : AlignMemorySize(size);
|
||||
output_size_list_.push_back(aligned_size);
|
||||
output_size_ += aligned_size;
|
||||
}
|
||||
|
||||
InferCommType(kernel_node);
|
||||
group_name_ = GetAttr<std::string>(kernel_node, kAttrGroup);
|
||||
MS_LOG(INFO) << AnfAlgo::GetCNodeName(kernel_node) << " for group " << group_name_;
|
||||
auto comm_stream_attr = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stream_id");
|
||||
|
|
|
@ -40,8 +40,8 @@ bool GPUDeviceAddress::SyncDeviceToHost(const std::vector<int> &, size_t size, T
|
|||
return ret;
|
||||
}
|
||||
if (size != size_) {
|
||||
// nccl kernel input and outpu memory size is aligned, may lead to sync memory size is inconformity
|
||||
MS_LOG(INFO) << "Sync memory size is inconformity, host size: " << size << ", device size " << size_;
|
||||
// nccl kernel input and output device address is aligned, may lead to host size is not equal to device size
|
||||
MS_LOG(INFO) << "Sync memory size is inconsistent, host size: " << size << ", device size " << size_;
|
||||
}
|
||||
return GPUDeviceManager::GetInstance().CopyDeviceMemToHost(host_ptr, ptr_, size);
|
||||
}
|
||||
|
@ -51,8 +51,8 @@ bool GPUDeviceAddress::SyncHostToDevice(const std::vector<int> &, size_t size, T
|
|||
auto &stream = GPUDeviceManager::GetInstance().default_stream();
|
||||
MS_EXCEPTION_IF_NULL(stream);
|
||||
if (size != size_) {
|
||||
// nccl kernel input and outpu memory size is aligned, may lead to sync memory size is inconformity
|
||||
MS_LOG(INFO) << "Sync memory size is inconformity, host size: " << size << ", device size " << size_;
|
||||
// nccl kernel input and output device address is aligned, may lead to host size is not equal to device size
|
||||
MS_LOG(INFO) << "Sync memory size is inconsistent, host size: " << size << ", device size " << size_;
|
||||
}
|
||||
if (!GPUDeviceManager::GetInstance().CopyHostMemToDeviceAsync(ptr_, host_ptr, size, stream)) {
|
||||
MS_LOG(ERROR) << "CopyHostMemToDeviceAsync failed";
|
||||
|
|
Loading…
Reference in New Issue