!19843 fix naming collision for gpu op cumsum and cumprod [r1.3]
Merge pull request !19843 from 杨林枫/fix_cumsum_cumprod_naming_collision_r1.3
This commit is contained in:
commit
88a84a6fbb
|
@ -26,7 +26,7 @@ __global__ void Copy(T *input, T *output, size_t size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void LeftMove(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride,
|
__global__ void LeftMoveProd(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride,
|
||||||
size_t stride2) {
|
size_t stride2) {
|
||||||
size_t num = dim0 * dim2;
|
size_t num = dim0 * dim2;
|
||||||
size_t i, k, offset;
|
size_t i, k, offset;
|
||||||
|
@ -48,7 +48,7 @@ __global__ void LeftMove(const T *input, T *output, size_t dim0, size_t dim1, si
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void RightMove(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride,
|
__global__ void RightMoveProd(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride,
|
||||||
size_t stride2) {
|
size_t stride2) {
|
||||||
size_t num = dim0 * dim2;
|
size_t num = dim0 * dim2;
|
||||||
size_t i, k, offset;
|
size_t i, k, offset;
|
||||||
|
@ -117,12 +117,12 @@ void CumProd(const T *input, T *output, T *workspace, size_t dim0, size_t dim1,
|
||||||
int size = dim0 * dim2;
|
int size = dim0 * dim2;
|
||||||
if (exclusive_) {
|
if (exclusive_) {
|
||||||
if (reverse_) {
|
if (reverse_) {
|
||||||
RightMove<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(input, output, dim0, dim1, dim2, stride, stride2);
|
RightMoveProd<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(input, output, dim0, dim1, dim2, stride, stride2);
|
||||||
Copy<<<GET_BLOCKS(size * dim1), GET_THREADS, 0, stream>>>(workspace, output, size * dim1);
|
Copy<<<GET_BLOCKS(size * dim1), GET_THREADS, 0, stream>>>(workspace, output, size * dim1);
|
||||||
CumProdKernelReverse<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(workspace, output, dim0, dim1, dim2, stride,
|
CumProdKernelReverse<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(workspace, output, dim0, dim1, dim2, stride,
|
||||||
stride2);
|
stride2);
|
||||||
} else {
|
} else {
|
||||||
LeftMove<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(input, output, dim0, dim1, dim2, stride, stride2);
|
LeftMoveProd<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(input, output, dim0, dim1, dim2, stride, stride2);
|
||||||
Copy<<<GET_BLOCKS(size * dim1), GET_THREADS, 0, stream>>>(workspace, output, size * dim1);
|
Copy<<<GET_BLOCKS(size * dim1), GET_THREADS, 0, stream>>>(workspace, output, size * dim1);
|
||||||
CumProdKernel<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(workspace, output, dim0, dim1, dim2, stride, stride2);
|
CumProdKernel<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(workspace, output, dim0, dim1, dim2, stride, stride2);
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,7 +26,7 @@ __global__ void Copy(T *input, T *output, size_t size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void LeftMove(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride,
|
__global__ void LeftMoveSum(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride,
|
||||||
size_t stride2) {
|
size_t stride2) {
|
||||||
size_t num = dim0 * dim2;
|
size_t num = dim0 * dim2;
|
||||||
size_t i, k, offset;
|
size_t i, k, offset;
|
||||||
|
@ -48,7 +48,7 @@ __global__ void LeftMove(const T *input, T *output, size_t dim0, size_t dim1, si
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void RightMove(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride,
|
__global__ void RightMoveSum(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride,
|
||||||
size_t stride2) {
|
size_t stride2) {
|
||||||
size_t num = dim0 * dim2;
|
size_t num = dim0 * dim2;
|
||||||
size_t i, k, offset;
|
size_t i, k, offset;
|
||||||
|
@ -117,12 +117,12 @@ void CumSum(const T *input, T *output, T *workspace, size_t dim0, size_t dim1, s
|
||||||
int size = dim0 * dim2;
|
int size = dim0 * dim2;
|
||||||
if (exclusive_) {
|
if (exclusive_) {
|
||||||
if (reverse_) {
|
if (reverse_) {
|
||||||
RightMove<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(input, output, dim0, dim1, dim2, stride, stride2);
|
RightMoveSum<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(input, output, dim0, dim1, dim2, stride, stride2);
|
||||||
Copy<<<GET_BLOCKS(size * dim1), GET_THREADS, 0, stream>>>(workspace, output, size * dim1);
|
Copy<<<GET_BLOCKS(size * dim1), GET_THREADS, 0, stream>>>(workspace, output, size * dim1);
|
||||||
CumSumKernelReverse<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(workspace, output, dim0, dim1, dim2, stride,
|
CumSumKernelReverse<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(workspace, output, dim0, dim1, dim2, stride,
|
||||||
stride2);
|
stride2);
|
||||||
} else {
|
} else {
|
||||||
LeftMove<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(input, output, dim0, dim1, dim2, stride, stride2);
|
LeftMoveSum<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(input, output, dim0, dim1, dim2, stride, stride2);
|
||||||
Copy<<<GET_BLOCKS(size * dim1), GET_THREADS, 0, stream>>>(workspace, output, size * dim1);
|
Copy<<<GET_BLOCKS(size * dim1), GET_THREADS, 0, stream>>>(workspace, output, size * dim1);
|
||||||
CumSumKernel<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(workspace, output, dim0, dim1, dim2, stride, stride2);
|
CumSumKernel<<<GET_BLOCKS(size), GET_THREADS, 0, stream>>>(workspace, output, dim0, dim1, dim2, stride, stride2);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue