forked from mindspore-Ecosystem/mindspore
Update tensor shape from int to size_t in scatterop
This commit is contained in:
parent
1d94569d04
commit
1ab6f73d49
|
@ -96,10 +96,10 @@ class ScatterAddKernel : public GpuKernel {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int input_size_;
|
size_t input_size_;
|
||||||
int inner_size_;
|
size_t inner_size_;
|
||||||
int indices_size_;
|
size_t indices_size_;
|
||||||
int updates_size_;
|
size_t updates_size_;
|
||||||
std::vector<size_t> input_size_list_;
|
std::vector<size_t> input_size_list_;
|
||||||
std::vector<size_t> output_size_list_;
|
std::vector<size_t> output_size_list_;
|
||||||
std::vector<size_t> workspace_size_list_;
|
std::vector<size_t> workspace_size_list_;
|
||||||
|
|
|
@ -96,10 +96,10 @@ class ScatterUpdateKernel : public GpuKernel {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int input_size_;
|
size_t input_size_;
|
||||||
int inner_size_;
|
size_t inner_size_;
|
||||||
int indices_size_;
|
size_t indices_size_;
|
||||||
int updates_size_;
|
size_t updates_size_;
|
||||||
std::vector<size_t> input_size_list_;
|
std::vector<size_t> input_size_list_;
|
||||||
std::vector<size_t> output_size_list_;
|
std::vector<size_t> output_size_list_;
|
||||||
std::vector<size_t> workspace_size_list_;
|
std::vector<size_t> workspace_size_list_;
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cuh"
|
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cuh"
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void ScatterAdd(const int inner_size, const int updates_size, const int *indices, const T *updates,
|
__global__ void ScatterAdd(const size_t inner_size, const size_t updates_size, const int *indices, const T *updates,
|
||||||
T *input) {
|
T *input) {
|
||||||
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
|
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
|
||||||
const size_t index = pos / inner_size;
|
const size_t index = pos / inner_size;
|
||||||
|
@ -29,16 +29,16 @@ __global__ void ScatterAdd(const int inner_size, const int updates_size, const i
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void CalScatterAdd(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *input,
|
void CalScatterAdd(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates, T *input,
|
||||||
cudaStream_t cuda_stream) {
|
cudaStream_t cuda_stream) {
|
||||||
const int updates_size = inner_size * indices_size;
|
const size_t updates_size = inner_size * indices_size;
|
||||||
ScatterAdd<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates,
|
ScatterAdd<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates,
|
||||||
input);
|
input);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void CalScatterAdd<float>(const int &inner_size, const int &indices_size, const int *indices,
|
template void CalScatterAdd<float>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||||
const float *updates, float *input, cudaStream_t cuda_stream);
|
const float *updates, float *input, cudaStream_t cuda_stream);
|
||||||
template void CalScatterAdd<half>(const int &inner_size, const int &indices_size, const int *indices,
|
template void CalScatterAdd<half>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||||
const half *updates, half *input, cudaStream_t cuda_stream);
|
const half *updates, half *input, cudaStream_t cuda_stream);
|
||||||
template void CalScatterAdd<int>(const int &inner_size, const int &indices_size, const int *indices, const int *updates,
|
template void CalScatterAdd<int>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||||
int *input, cudaStream_t cuda_stream);
|
const int *updates, int *input, cudaStream_t cuda_stream);
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include "runtime/device/gpu/cuda_common.h"
|
#include "runtime/device/gpu/cuda_common.h"
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void CalScatterAdd(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *input,
|
void CalScatterAdd(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates, T *input,
|
||||||
cudaStream_t cuda_stream);
|
cudaStream_t cuda_stream);
|
||||||
|
|
||||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ADD_IMPL_CUH_
|
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ADD_IMPL_CUH_
|
||||||
|
|
|
@ -17,32 +17,32 @@
|
||||||
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cuh"
|
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cuh"
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void ScatterUpdate(const int inner_size, const int updates_size, const int *indices, const T *updates,
|
__global__ void ScatterUpdate(const size_t inner_size, const size_t updates_size, const int *indices, const T *updates,
|
||||||
T *input) {
|
T *input) {
|
||||||
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
|
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
|
||||||
const int index = pos / inner_size;
|
const size_t index = pos / inner_size;
|
||||||
const int offset = pos % inner_size;
|
const size_t offset = pos % inner_size;
|
||||||
const int current_pos = indices[index] * inner_size + offset;
|
const size_t current_pos = indices[index] * inner_size + offset;
|
||||||
input[current_pos] = updates[pos];
|
input[current_pos] = updates[pos];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void CalScatterUpdate(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *input,
|
void CalScatterUpdate(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates,
|
||||||
cudaStream_t cuda_stream) {
|
T *input, cudaStream_t cuda_stream) {
|
||||||
const int updates_size = inner_size * indices_size;
|
const size_t updates_size = inner_size * indices_size;
|
||||||
ScatterUpdate<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates,
|
ScatterUpdate<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates,
|
||||||
input);
|
input);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void CalScatterUpdate<float>(const int &inner_size, const int &indices_size, const int *indices,
|
template void CalScatterUpdate<float>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||||
const float *updates, float *input, cudaStream_t cuda_stream);
|
const float *updates, float *input, cudaStream_t cuda_stream);
|
||||||
template void CalScatterUpdate<half>(const int &inner_size, const int &indices_size, const int *indices,
|
template void CalScatterUpdate<half>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||||
const half *updates, half *input, cudaStream_t cuda_stream);
|
const half *updates, half *input, cudaStream_t cuda_stream);
|
||||||
template void CalScatterUpdate<int>(const int &inner_size, const int &indices_size, const int *indices,
|
template void CalScatterUpdate<int>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||||
const int *updates, int *input, cudaStream_t cuda_stream);
|
const int *updates, int *input, cudaStream_t cuda_stream);
|
||||||
template void CalScatterUpdate<unsigned char>(const int &inner_size, const int &indices_size, const int *indices,
|
template void CalScatterUpdate<unsigned char>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||||
const unsigned char *updates, unsigned char *input,
|
const unsigned char *updates, unsigned char *input,
|
||||||
cudaStream_t cuda_stream);
|
cudaStream_t cuda_stream);
|
||||||
template void CalScatterUpdate<int8_t>(const int &inner_size, const int &indices_size, const int *indices,
|
template void CalScatterUpdate<int8_t>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||||
const int8_t *updates, int8_t *input, cudaStream_t cuda_stream);
|
const int8_t *updates, int8_t *input, cudaStream_t cuda_stream);
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include "runtime/device/gpu/cuda_common.h"
|
#include "runtime/device/gpu/cuda_common.h"
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void CalScatterUpdate(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *input,
|
void CalScatterUpdate(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates,
|
||||||
cudaStream_t cuda_stream);
|
T *input, cudaStream_t cuda_stream);
|
||||||
|
|
||||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_UPDATE_IMPL_CUH_
|
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_UPDATE_IMPL_CUH_
|
||||||
|
|
Loading…
Reference in New Issue