forked from mindspore-Ecosystem/mindspore
!48371 修复sparsedense add 内存问题
Merge pull request !48371 from melody/master
This commit is contained in:
commit
34788b5a4b
|
@ -42,8 +42,6 @@ __global__ void SparseTensorDenseAddKernelFunc(size_t input_elements, size_t ran
|
|||
int out_index = 0;
|
||||
for (size_t j = 0; j < rank; j++) {
|
||||
int index = x1_indices_addr[pos * rank + j];
|
||||
CUDA_KERNEL_ASSERT(x2_shape[j] == x1_shape_addr[j] && "The input x1_shape does not equal x2_shape!");
|
||||
CUDA_KERNEL_ASSERT(index < x1_shape_addr[j] && "The input x1_indices is out of bounds!");
|
||||
int count = 1;
|
||||
for (size_t k = j + 1; k < rank; k++) {
|
||||
count *= x1_shape_addr[k];
|
||||
|
|
|
@ -203,6 +203,39 @@ bool SparseTensorDenseAddGpuKernelMod::LaunchKernel(const std::vector<kernel::Ad
|
|||
CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(cudaMemcpyAsync(x2_shape, &x2_shape_[0], workspace_size_, cudaMemcpyHostToDevice,
|
||||
reinterpret_cast<cudaStream_t>(cuda_stream_)),
|
||||
"cudaMemcpyAsync x2_shape failed");
|
||||
constexpr int X1_SHAPE_INDICES = 2;
|
||||
std::vector<I> x1_shape(inputs[X1_SHAPE_INDICES]->size / sizeof(I));
|
||||
CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(
|
||||
cudaMemcpyAsync(x1_shape.data(), x1_shape_addr, inputs[X1_SHAPE_INDICES]->size, cudaMemcpyDeviceToHost,
|
||||
reinterpret_cast<cudaStream_t>(cuda_stream_)),
|
||||
"cudaMemcpyAsync x1_shape failed");
|
||||
|
||||
std::vector<I> x1_indices_host(inputs[0]->size / sizeof(I));
|
||||
CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(
|
||||
cudaMemcpyAsync(x1_indices_host.data(), x1_indices_addr, inputs[0]->size, cudaMemcpyDeviceToHost,
|
||||
reinterpret_cast<cudaStream_t>(cuda_stream_)),
|
||||
"cudaMemcpyAsync x1_indices failed");
|
||||
|
||||
if (x1_shape.size() != x2_shape_.size()) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_ << " The input x1_shape size does not equal x2_shape size! "
|
||||
<< "tensor shape of 'sparse': " << x1_shape.size()
|
||||
<< ",and the tensor shape of 'dense':" << x2_shape_.size();
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t idx = 0; idx < x2_shape_.size(); ++idx) {
|
||||
if (x1_shape[idx] != x2_shape_[idx]) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_ << " The input x1_shape dim does not equal x2_shape dim! "
|
||||
<< "tensor dim of 'sparse': " << x1_shape[idx]
|
||||
<< ",and the tensor dim of 'dense':" << x2_shape_[idx];
|
||||
return false;
|
||||
}
|
||||
if (x1_indices_host[idx] >= x1_shape[idx]) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_ << " The input x1_indices is out of bounds! "
|
||||
<< "x1_indices is : " << x1_indices_host[idx] << ", tensor bounds is:" << x1_shape[idx];
|
||||
return false;
|
||||
}
|
||||
}
|
||||
CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(
|
||||
cudaMemcpyAsync(y_addr, x2_values_addr, output_elements_ * sizeof(T), cudaMemcpyDeviceToDevice,
|
||||
reinterpret_cast<cudaStream_t>(cuda_stream_)),
|
||||
|
|
Loading…
Reference in New Issue