fix codex

This commit is contained in:
VectorSL 2022-01-11 11:45:09 +08:00
parent 5c643a207f
commit 87c4e20513
8 changed files with 25 additions and 20 deletions

View File

@ -45,7 +45,7 @@ void TensorArrayCPUStackKernel::InitKernel(const CNodePtr &kernel_node) {
type_ = AnfAlgo::GetNodeAttr<TypePtr>(kernel_node, "dtype");
ele_size_ = GetTypeByte(type_);
for (auto i : shapes_) {
ele_size_ *= LongToSize(i);
ele_size_ *= i;
}
value_size_ = ele_size_ * LongToSize(max_element);
output_size_list_.push_back(value_size_);

View File

@ -127,10 +127,21 @@ class DynamicBroadcastGradientArgsGpuKernel : public GpuKernel {
grad_reduce_idx = GetGradIndex(reverse_shapes, max_rank);
return grad_reduce_idx;
}
void AddGradReduceIdx(std::vector<std::vector<T>> *grad_reduce_idx, std::vector<bool> cur_one, bool none_one,
const size_t max_rank, size_t j) {
MS_EXCEPTION_IF_NULL(grad_reduce_idx);
for (size_t i = 0; i < kInputNum; i++) {
if (cur_one[i] && !none_one) {
(void)(*grad_reduce_idx)[i].emplace_back(SizeToLong(max_rank - 1 - j));
}
}
}
std::vector<std::vector<T>> GetGradIndex(const std::vector<std::vector<T>> &revers_shapes, const size_t max_rank) {
std::vector<std::vector<T>> grad_reduce_index(kInputNum);
bool pre_one[kInputNum];
bool cur_one[kInputNum];
std::vector<bool> pre_one(kInputNum);
std::vector<bool> cur_one(kInputNum);
for (size_t i = 0; i < kInputNum; i++) {
pre_one[i] = false;
cur_one[i] = false;
@ -159,18 +170,10 @@ class DynamicBroadcastGradientArgsGpuKernel : public GpuKernel {
(void)grad_reduce_index[i].emplace_back(max_rank - 1 - j);
}
continue;
} else if (std::equal(cur_one, cur_one + kInputNum, pre_one) && set_one) {
for (size_t i = 0; i < kInputNum; i++) {
if (cur_one[i] && !none_one) {
(void)grad_reduce_index[i].emplace_back(max_rank - 1 - j);
}
}
} else if (std::equal(cur_one.begin(), cur_one.end(), pre_one.begin()) && set_one) {
AddGradReduceIdx(&grad_reduce_index, cur_one, none_one, max_rank, j);
} else {
for (size_t i = 0; i < kInputNum; i++) {
if (cur_one[i] && !none_one) {
(void)grad_reduce_index[i].emplace_back(max_rank - 1 - j);
}
}
AddGradReduceIdx(&grad_reduce_index, cur_one, none_one, max_rank, j);
}
set_one = true;
for (size_t i = 0; i < kInputNum; i++) {

View File

@ -27,7 +27,7 @@ void *CPUTensorArray::CreateMemory(const size_t size) { return CPUMemoryPool::Ge
void CPUTensorArray::ClearMemory(void *addr, const size_t size) { (void)memset_s(addr, size, 0, size); }
void CPUTensorArray::ReleaseMemory(void *addr) { CPUMemoryPool::GetInstance().FreeTensorMem(addr); }
void CPUTensorArray::ReleaseMemory(const DeviceMemPtr addr) { CPUMemoryPool::GetInstance().FreeTensorMem(addr); }
} // namespace cpu
} // namespace device
} // namespace mindspore

View File

@ -30,7 +30,7 @@ class CPUTensorArray : public TensorArray {
CPUTensorArray(const string &name, const TypePtr &dtype, const std::vector<size_t> &shapes)
: TensorArray(name, dtype, shapes) {}
~CPUTensorArray() override = default;
void ReleaseMemory(void *addr) override;
void ReleaseMemory(const DeviceMemPtr addr) override;
void *CreateMemory(const size_t size) override;
void ClearMemory(void *addr, const size_t size) override;
};

View File

@ -26,7 +26,9 @@ namespace mindspore {
namespace device {
namespace gpu {
// ReleaseMemory() used in Free() in TensorArray.
void GPUTensorArray::ReleaseMemory(void *addr) { device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(addr); }
void GPUTensorArray::ReleaseMemory(const DeviceMemPtr addr) {
device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(addr);
}
void GPUTensorArray::ClearMemory(void *addr, const size_t size) {
CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(cudaMemsetAsync(addr, 0, size), "failed to set cuda memory with zeros.");

View File

@ -31,7 +31,7 @@ class GPUTensorArray : public TensorArray {
GPUTensorArray(const string &name, const TypePtr &dtype, const std::vector<size_t> &shapes)
: TensorArray(name, dtype, shapes) {}
~GPUTensorArray() override = default;
void ReleaseMemory(void *addr) override;
void ReleaseMemory(const DeviceMemPtr addr) override;
void *CreateMemory(const size_t size) override;
void ClearMemory(void *addr, const size_t size) override;
};

View File

@ -99,7 +99,7 @@ void TensorArray::Free() {
MS_LOG(DEBUG) << "Free device memory for " << name_;
for (const auto &addr : tensors_) {
if (addr != nullptr) {
ReleaseMemory(static_cast<void *>(addr->addr));
ReleaseMemory(static_cast<DeviceMemPtr>(addr->addr));
}
}
}

View File

@ -50,7 +50,7 @@ class TensorArray {
// These three func should by implied for different device due to the difference in memory usage.
// Create/Release Memory is used for malloc/free a device memory, used in function Write().
// ClearMemory is used to reset the input addr with zeros, used in function Free().
virtual void ReleaseMemory(void *addr) = 0;
virtual void ReleaseMemory(const DeviceMemPtr addr) = 0;
virtual void *CreateMemory(const size_t size) = 0;
virtual void ClearMemory(void *addr, const size_t size) = 0;