fix codex
This commit is contained in:
parent
5c643a207f
commit
87c4e20513
|
@ -45,7 +45,7 @@ void TensorArrayCPUStackKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
type_ = AnfAlgo::GetNodeAttr<TypePtr>(kernel_node, "dtype");
|
||||
ele_size_ = GetTypeByte(type_);
|
||||
for (auto i : shapes_) {
|
||||
ele_size_ *= LongToSize(i);
|
||||
ele_size_ *= i;
|
||||
}
|
||||
value_size_ = ele_size_ * LongToSize(max_element);
|
||||
output_size_list_.push_back(value_size_);
|
||||
|
|
|
@ -127,10 +127,21 @@ class DynamicBroadcastGradientArgsGpuKernel : public GpuKernel {
|
|||
grad_reduce_idx = GetGradIndex(reverse_shapes, max_rank);
|
||||
return grad_reduce_idx;
|
||||
}
|
||||
|
||||
void AddGradReduceIdx(std::vector<std::vector<T>> *grad_reduce_idx, std::vector<bool> cur_one, bool none_one,
|
||||
const size_t max_rank, size_t j) {
|
||||
MS_EXCEPTION_IF_NULL(grad_reduce_idx);
|
||||
for (size_t i = 0; i < kInputNum; i++) {
|
||||
if (cur_one[i] && !none_one) {
|
||||
(void)(*grad_reduce_idx)[i].emplace_back(SizeToLong(max_rank - 1 - j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<T>> GetGradIndex(const std::vector<std::vector<T>> &revers_shapes, const size_t max_rank) {
|
||||
std::vector<std::vector<T>> grad_reduce_index(kInputNum);
|
||||
bool pre_one[kInputNum];
|
||||
bool cur_one[kInputNum];
|
||||
std::vector<bool> pre_one(kInputNum);
|
||||
std::vector<bool> cur_one(kInputNum);
|
||||
for (size_t i = 0; i < kInputNum; i++) {
|
||||
pre_one[i] = false;
|
||||
cur_one[i] = false;
|
||||
|
@ -159,18 +170,10 @@ class DynamicBroadcastGradientArgsGpuKernel : public GpuKernel {
|
|||
(void)grad_reduce_index[i].emplace_back(max_rank - 1 - j);
|
||||
}
|
||||
continue;
|
||||
} else if (std::equal(cur_one, cur_one + kInputNum, pre_one) && set_one) {
|
||||
for (size_t i = 0; i < kInputNum; i++) {
|
||||
if (cur_one[i] && !none_one) {
|
||||
(void)grad_reduce_index[i].emplace_back(max_rank - 1 - j);
|
||||
}
|
||||
}
|
||||
} else if (std::equal(cur_one.begin(), cur_one.end(), pre_one.begin()) && set_one) {
|
||||
AddGradReduceIdx(&grad_reduce_index, cur_one, none_one, max_rank, j);
|
||||
} else {
|
||||
for (size_t i = 0; i < kInputNum; i++) {
|
||||
if (cur_one[i] && !none_one) {
|
||||
(void)grad_reduce_index[i].emplace_back(max_rank - 1 - j);
|
||||
}
|
||||
}
|
||||
AddGradReduceIdx(&grad_reduce_index, cur_one, none_one, max_rank, j);
|
||||
}
|
||||
set_one = true;
|
||||
for (size_t i = 0; i < kInputNum; i++) {
|
||||
|
|
|
@ -27,7 +27,7 @@ void *CPUTensorArray::CreateMemory(const size_t size) { return CPUMemoryPool::Ge
|
|||
|
||||
void CPUTensorArray::ClearMemory(void *addr, const size_t size) { (void)memset_s(addr, size, 0, size); }
|
||||
|
||||
void CPUTensorArray::ReleaseMemory(void *addr) { CPUMemoryPool::GetInstance().FreeTensorMem(addr); }
|
||||
void CPUTensorArray::ReleaseMemory(const DeviceMemPtr addr) { CPUMemoryPool::GetInstance().FreeTensorMem(addr); }
|
||||
} // namespace cpu
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -30,7 +30,7 @@ class CPUTensorArray : public TensorArray {
|
|||
CPUTensorArray(const string &name, const TypePtr &dtype, const std::vector<size_t> &shapes)
|
||||
: TensorArray(name, dtype, shapes) {}
|
||||
~CPUTensorArray() override = default;
|
||||
void ReleaseMemory(void *addr) override;
|
||||
void ReleaseMemory(const DeviceMemPtr addr) override;
|
||||
void *CreateMemory(const size_t size) override;
|
||||
void ClearMemory(void *addr, const size_t size) override;
|
||||
};
|
||||
|
|
|
@ -26,7 +26,9 @@ namespace mindspore {
|
|||
namespace device {
|
||||
namespace gpu {
|
||||
// ReleaseMemory() used in Free() in TensorArray.
|
||||
void GPUTensorArray::ReleaseMemory(void *addr) { device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(addr); }
|
||||
void GPUTensorArray::ReleaseMemory(const DeviceMemPtr addr) {
|
||||
device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(addr);
|
||||
}
|
||||
|
||||
void GPUTensorArray::ClearMemory(void *addr, const size_t size) {
|
||||
CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(cudaMemsetAsync(addr, 0, size), "failed to set cuda memory with zeros.");
|
||||
|
|
|
@ -31,7 +31,7 @@ class GPUTensorArray : public TensorArray {
|
|||
GPUTensorArray(const string &name, const TypePtr &dtype, const std::vector<size_t> &shapes)
|
||||
: TensorArray(name, dtype, shapes) {}
|
||||
~GPUTensorArray() override = default;
|
||||
void ReleaseMemory(void *addr) override;
|
||||
void ReleaseMemory(const DeviceMemPtr addr) override;
|
||||
void *CreateMemory(const size_t size) override;
|
||||
void ClearMemory(void *addr, const size_t size) override;
|
||||
};
|
||||
|
|
|
@ -99,7 +99,7 @@ void TensorArray::Free() {
|
|||
MS_LOG(DEBUG) << "Free device memory for " << name_;
|
||||
for (const auto &addr : tensors_) {
|
||||
if (addr != nullptr) {
|
||||
ReleaseMemory(static_cast<void *>(addr->addr));
|
||||
ReleaseMemory(static_cast<DeviceMemPtr>(addr->addr));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,7 +50,7 @@ class TensorArray {
|
|||
// These three func should by implied for different device due to the difference in memory usage.
|
||||
// Create/Release Memory is used for malloc/free a device memory, used in function Write().
|
||||
// ClearMemory is used to reset the input addr with zeros, used in function Free().
|
||||
virtual void ReleaseMemory(void *addr) = 0;
|
||||
virtual void ReleaseMemory(const DeviceMemPtr addr) = 0;
|
||||
virtual void *CreateMemory(const size_t size) = 0;
|
||||
virtual void ClearMemory(void *addr, const size_t size) = 0;
|
||||
|
||||
|
|
Loading…
Reference in New Issue