!48371 修复sparsedense add 内存问题

Merge pull request !48371 from melody/master
This commit is contained in:
i-robot 2023-02-09 13:41:03 +00:00 committed by Gitee
commit 34788b5a4b
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
2 changed files with 33 additions and 2 deletions

View File

@ -42,8 +42,6 @@ __global__ void SparseTensorDenseAddKernelFunc(size_t input_elements, size_t ran
int out_index = 0;
for (size_t j = 0; j < rank; j++) {
int index = x1_indices_addr[pos * rank + j];
CUDA_KERNEL_ASSERT(x2_shape[j] == x1_shape_addr[j] && "The input x1_shape does not equal x2_shape!");
CUDA_KERNEL_ASSERT(index < x1_shape_addr[j] && "The input x1_indices is out of bounds!");
int count = 1;
for (size_t k = j + 1; k < rank; k++) {
count *= x1_shape_addr[k];

View File

@ -203,6 +203,39 @@ bool SparseTensorDenseAddGpuKernelMod::LaunchKernel(const std::vector<kernel::Ad
CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(cudaMemcpyAsync(x2_shape, &x2_shape_[0], workspace_size_, cudaMemcpyHostToDevice,
reinterpret_cast<cudaStream_t>(cuda_stream_)),
"cudaMemcpyAsync x2_shape failed");
constexpr int X1_SHAPE_INDICES = 2;
std::vector<I> x1_shape(inputs[X1_SHAPE_INDICES]->size / sizeof(I));
CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(
cudaMemcpyAsync(x1_shape.data(), x1_shape_addr, inputs[X1_SHAPE_INDICES]->size, cudaMemcpyDeviceToHost,
reinterpret_cast<cudaStream_t>(cuda_stream_)),
"cudaMemcpyAsync x1_shape failed");
std::vector<I> x1_indices_host(inputs[0]->size / sizeof(I));
CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(
cudaMemcpyAsync(x1_indices_host.data(), x1_indices_addr, inputs[0]->size, cudaMemcpyDeviceToHost,
reinterpret_cast<cudaStream_t>(cuda_stream_)),
"cudaMemcpyAsync x1_indices failed");
if (x1_shape.size() != x2_shape_.size()) {
MS_LOG(ERROR) << "For '" << kernel_name_ << " The input x1_shape size does not equal x2_shape size! "
<< "tensor shape of 'sparse': " << x1_shape.size()
<< ",and the tensor shape of 'dense':" << x2_shape_.size();
return false;
}
for (size_t idx = 0; idx < x2_shape_.size(); ++idx) {
if (x1_shape[idx] != x2_shape_[idx]) {
MS_LOG(ERROR) << "For '" << kernel_name_ << " The input x1_shape dim does not equal x2_shape dim! "
<< "tensor dim of 'sparse': " << x1_shape[idx]
<< ",and the tensor dim of 'dense':" << x2_shape_[idx];
return false;
}
if (x1_indices_host[idx] >= x1_shape[idx]) {
MS_LOG(ERROR) << "For '" << kernel_name_ << " The input x1_indices is out of bounds! "
<< "x1_indices is : " << x1_indices_host[idx] << ", tensor bounds is:" << x1_shape[idx];
return false;
}
}
CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(
cudaMemcpyAsync(y_addr, x2_values_addr, output_elements_ * sizeof(T), cudaMemcpyDeviceToDevice,
reinterpret_cast<cudaStream_t>(cuda_stream_)),