From 558fac28580c479045c2a528b527e51bf44a27ea Mon Sep 17 00:00:00 2001 From: tom__chen Date: Fri, 8 Jan 2021 12:38:03 -0500 Subject: [PATCH] add one_hot and one_like int64_t requirement --- .../gpu/arrays/one_hot_gpu_kernel.h | 34 +++++++++---------- .../gpu/arrays/oneslike_gpu_kernel.h | 3 +- .../gpu/cuda_impl/one_hot_impl.cu | 12 +++---- .../gpu/cuda_impl/oneslike_impl.cu | 12 +++---- .../gpu/cuda_impl/oneslike_impl.cuh | 2 +- 5 files changed, 31 insertions(+), 32 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h index 10c922d30ed..a6e25cac507 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h @@ -45,36 +45,36 @@ class OneHotGpuFwdKernel : public GpuKernel { return true; } bool Init(const CNodePtr &kernel_node) override { - int axis = static_cast(GetAttr(kernel_node, "axis")); - auto input = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto output = AnfAlgo::GetOutputInferShape(kernel_node, 0); - int input_size = SizeToInt(input.size()); - const int default_axis = -1; + int64_t axis = GetAttr(kernel_node, "axis"); + auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); + int64_t input_dims = static_cast(input_shape.size()); + if (axis >= input_dims) { + MS_LOG(ERROR) << "invalid one hot axis value: " << axis << " for input dims size: " << input_shape.size(); + return false; + } + const int64_t default_axis = -1; // Compress arbitrary tensor dimensions into three dimensions (left_dims, depth, right_dims). - for (int i = 0; i < input_size; i++) { - auto dim_size = input[IntToSize(i)]; - if (axis == default_axis || i < axis) { + for (size_t i = 0; i < input_shape.size(); i++) { + auto dim_size = input_shape[i]; + if (axis == default_axis || i < IntToSize(axis)) { left_dim_size_ *= dim_size; } - if (axis != default_axis && i >= axis) { + if (axis != default_axis && i >= IntToSize(axis)) { right_dim_size_ *= dim_size; } } - for (auto size : input) { + for (auto size : input_shape) { input_size_ *= size; } - for (auto size : output) { + for (auto size : output_shape) { output_size_ *= size; } - if (axis >= input_size) { - MS_LOG(ERROR) << "invalid one hot axis value: " << axis << " for input dims size: " << input.size(); - return false; - } if (axis == default_axis) { - depth_ = output[output.size() - 1]; + depth_ = output_shape[output_shape.size() - 1]; } else { - depth_ = output[IntToSize(axis)]; + depth_ = output_shape[IntToSize(axis)]; } InitSizeLists(); return true; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/oneslike_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/oneslike_gpu_kernel.h index 11f972f07ee..02a6287913a 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/oneslike_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/oneslike_gpu_kernel.h @@ -55,11 +55,10 @@ class OnesLikeGpuKernel : public GpuKernel { auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); size_t shape_size = input_shape.size(); - input_size_ = 1; + input_size_ = sizeof(T); for (size_t i = 0; i < shape_size; i++) { input_size_ *= input_shape[i]; } - input_size_ *= sizeof(T); output_size_ = input_size_; InitSizeLists(); return true; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cu index 6dc4d676f22..b0adf756e1a 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cu @@ -21,14 +21,14 @@ __global__ void OneHotKernel(size_t size, const S *indices, size_t depth, const size_t left_dim_size, size_t right_dim_size, T *output) { T on_v = *on_value; T off_v = *off_value; - for (int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; thread_idx < size; + for (size_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x; thread_idx < size; thread_idx += blockDim.x * gridDim.x) { if (thread_idx < size) { - int left_idx = (thread_idx / (depth * right_dim_size)) % left_dim_size; - int d_idx = thread_idx / right_dim_size % depth; - int right_idx = thread_idx % right_dim_size; - int input_idx = left_idx * right_dim_size + right_idx; - int output_idx = left_idx * depth * right_dim_size + d_idx * right_dim_size + right_idx; + size_t left_idx = (thread_idx / (depth * right_dim_size)) % left_dim_size; + size_t d_idx = thread_idx / right_dim_size % depth; + size_t right_idx = thread_idx % right_dim_size; + size_t input_idx = left_idx * right_dim_size + right_idx; + size_t output_idx = left_idx * depth * right_dim_size + d_idx * right_dim_size + right_idx; if (indices[input_idx] == d_idx) { output[output_idx] = on_v; } else { diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/oneslike_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/oneslike_impl.cu index dc1d9cd206e..a2fa7d90b12 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/oneslike_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/oneslike_impl.cu @@ -18,20 +18,20 @@ #include "oneslike_impl.cuh" #include "runtime/device/gpu/cuda_common.h" template -__global__ void OnesLike(const int size, const T* input, T* output) { +__global__ void OnesLike(const size_t size, const T* input, T* output) { int one = 1; T val = static_cast(one); - for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) { + for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) { output[pos] = val; } return; } template -void CalOnesLike(const int size, const T* input, T* output, cudaStream_t cuda_stream) { +void CalOnesLike(const size_t size, const T* input, T* output, cudaStream_t cuda_stream) { OnesLike<<>>(size, input, output); return; } -template void CalOnesLike(const int size, const float* input, float* output, cudaStream_t cuda_stream); -template void CalOnesLike(const int size, const half* input, half* output, cudaStream_t cuda_stream); -template void CalOnesLike(const int size, const int* input, int* output, cudaStream_t cuda_stream); +template void CalOnesLike(const size_t size, const float* input, float* output, cudaStream_t cuda_stream); +template void CalOnesLike(const size_t size, const half* input, half* output, cudaStream_t cuda_stream); +template void CalOnesLike(const size_t size, const int* input, int* output, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/oneslike_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/oneslike_impl.cuh index 81e92c1d099..21bd995fb7b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/oneslike_impl.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/oneslike_impl.cuh @@ -18,6 +18,6 @@ #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_ONESLIKE_H_ template -void CalOnesLike(const int size, const T* input, T* output, cudaStream_t cuda_stream); +void CalOnesLike(const size_t size, const T* input, T* output, cudaStream_t cuda_stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_ONESLIKE_H_