diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/rl/tensor_array_stack_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/rl/tensor_array_stack_kernel.cc index 23de73ffb86..c078d4c9a0c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/rl/tensor_array_stack_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/rl/tensor_array_stack_kernel.cc @@ -45,7 +45,7 @@ void TensorArrayCPUStackKernel::InitKernel(const CNodePtr &kernel_node) { type_ = AnfAlgo::GetNodeAttr(kernel_node, "dtype"); ele_size_ = GetTypeByte(type_); for (auto i : shapes_) { - ele_size_ *= LongToSize(i); + ele_size_ *= i; } value_size_ = ele_size_ * LongToSize(max_element); output_size_list_.push_back(value_size_); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/other/dynamic_broadcast_grad_args_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/dynamic_broadcast_grad_args_gpu_kernel.h index 5805979c256..d8c504038c4 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/other/dynamic_broadcast_grad_args_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/dynamic_broadcast_grad_args_gpu_kernel.h @@ -127,10 +127,21 @@ class DynamicBroadcastGradientArgsGpuKernel : public GpuKernel { grad_reduce_idx = GetGradIndex(reverse_shapes, max_rank); return grad_reduce_idx; } + + void AddGradReduceIdx(std::vector> *grad_reduce_idx, std::vector cur_one, bool none_one, + const size_t max_rank, size_t j) { + MS_EXCEPTION_IF_NULL(grad_reduce_idx); + for (size_t i = 0; i < kInputNum; i++) { + if (cur_one[i] && !none_one) { + (void)(*grad_reduce_idx)[i].emplace_back(SizeToLong(max_rank - 1 - j)); + } + } + } + std::vector> GetGradIndex(const std::vector> &revers_shapes, const size_t max_rank) { std::vector> grad_reduce_index(kInputNum); - bool pre_one[kInputNum]; - bool cur_one[kInputNum]; + std::vector pre_one(kInputNum); + std::vector cur_one(kInputNum); for (size_t i = 0; i < kInputNum; i++) { pre_one[i] = false; cur_one[i] = false; @@ -159,18 +170,10 @@ class DynamicBroadcastGradientArgsGpuKernel : public GpuKernel { (void)grad_reduce_index[i].emplace_back(max_rank - 1 - j); } continue; - } else if (std::equal(cur_one, cur_one + kInputNum, pre_one) && set_one) { - for (size_t i = 0; i < kInputNum; i++) { - if (cur_one[i] && !none_one) { - (void)grad_reduce_index[i].emplace_back(max_rank - 1 - j); - } - } + } else if (std::equal(cur_one.begin(), cur_one.end(), pre_one.begin()) && set_one) { + AddGradReduceIdx(&grad_reduce_index, cur_one, none_one, max_rank, j); } else { - for (size_t i = 0; i < kInputNum; i++) { - if (cur_one[i] && !none_one) { - (void)grad_reduce_index[i].emplace_back(max_rank - 1 - j); - } - } + AddGradReduceIdx(&grad_reduce_index, cur_one, none_one, max_rank, j); } set_one = true; for (size_t i = 0; i < kInputNum; i++) { diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_tensor_array.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_tensor_array.cc index 6fe1fdba096..a8749f8fbce 100644 --- a/mindspore/ccsrc/runtime/device/cpu/cpu_tensor_array.cc +++ b/mindspore/ccsrc/runtime/device/cpu/cpu_tensor_array.cc @@ -27,7 +27,7 @@ void *CPUTensorArray::CreateMemory(const size_t size) { return CPUMemoryPool::Ge void CPUTensorArray::ClearMemory(void *addr, const size_t size) { (void)memset_s(addr, size, 0, size); } -void CPUTensorArray::ReleaseMemory(void *addr) { CPUMemoryPool::GetInstance().FreeTensorMem(addr); } +void CPUTensorArray::ReleaseMemory(const DeviceMemPtr addr) { CPUMemoryPool::GetInstance().FreeTensorMem(addr); } } // namespace cpu } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_tensor_array.h b/mindspore/ccsrc/runtime/device/cpu/cpu_tensor_array.h index eaf0181236f..dd1075074a2 100644 --- a/mindspore/ccsrc/runtime/device/cpu/cpu_tensor_array.h +++ b/mindspore/ccsrc/runtime/device/cpu/cpu_tensor_array.h @@ -30,7 +30,7 @@ class CPUTensorArray : public TensorArray { CPUTensorArray(const string &name, const TypePtr &dtype, const std::vector &shapes) : TensorArray(name, dtype, shapes) {} ~CPUTensorArray() override = default; - void ReleaseMemory(void *addr) override; + void ReleaseMemory(const DeviceMemPtr addr) override; void *CreateMemory(const size_t size) override; void ClearMemory(void *addr, const size_t size) override; }; diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_tensor_array.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_tensor_array.cc index 634dd4aa4d3..7cdf11538a4 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_tensor_array.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_tensor_array.cc @@ -26,7 +26,9 @@ namespace mindspore { namespace device { namespace gpu { // ReleaseMemory() used in Free() in TensorArray. -void GPUTensorArray::ReleaseMemory(void *addr) { device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(addr); } +void GPUTensorArray::ReleaseMemory(const DeviceMemPtr addr) { + device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(addr); +} void GPUTensorArray::ClearMemory(void *addr, const size_t size) { CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(cudaMemsetAsync(addr, 0, size), "failed to set cuda memory with zeros."); diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_tensor_array.h b/mindspore/ccsrc/runtime/device/gpu/gpu_tensor_array.h index 68cc99348ce..2e1f35ef765 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_tensor_array.h +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_tensor_array.h @@ -31,7 +31,7 @@ class GPUTensorArray : public TensorArray { GPUTensorArray(const string &name, const TypePtr &dtype, const std::vector &shapes) : TensorArray(name, dtype, shapes) {} ~GPUTensorArray() override = default; - void ReleaseMemory(void *addr) override; + void ReleaseMemory(const DeviceMemPtr addr) override; void *CreateMemory(const size_t size) override; void ClearMemory(void *addr, const size_t size) override; }; diff --git a/mindspore/ccsrc/runtime/device/tensor_array.cc b/mindspore/ccsrc/runtime/device/tensor_array.cc index 3ce38dbd76f..a2295e028ae 100644 --- a/mindspore/ccsrc/runtime/device/tensor_array.cc +++ b/mindspore/ccsrc/runtime/device/tensor_array.cc @@ -99,7 +99,7 @@ void TensorArray::Free() { MS_LOG(DEBUG) << "Free device memory for " << name_; for (const auto &addr : tensors_) { if (addr != nullptr) { - ReleaseMemory(static_cast(addr->addr)); + ReleaseMemory(static_cast(addr->addr)); } } } diff --git a/mindspore/ccsrc/runtime/device/tensor_array.h b/mindspore/ccsrc/runtime/device/tensor_array.h index 825e2465850..84f969251f5 100644 --- a/mindspore/ccsrc/runtime/device/tensor_array.h +++ b/mindspore/ccsrc/runtime/device/tensor_array.h @@ -50,7 +50,7 @@ class TensorArray { // These three func should by implied for different device due to the difference in memory usage. // Create/Release Memory is used for malloc/free a device memory, used in function Write(). // ClearMemory is used to reset the input addr with zeros, used in function Free(). - virtual void ReleaseMemory(void *addr) = 0; + virtual void ReleaseMemory(const DeviceMemPtr addr) = 0; virtual void *CreateMemory(const size_t size) = 0; virtual void ClearMemory(void *addr, const size_t size) = 0;