forked from mindspore-Ecosystem/mindspore
allow unsigned char types during registration, convert char to unsigned char
fixed some bugs fixed cpplint fix cpplint
This commit is contained in:
parent
7d70fb4dc4
commit
7baf5352ca
|
@ -32,7 +32,7 @@ MS_REG_GPU_KERNEL_ONE(Concat,
|
|||
ConcatV2GpuFwdKernel, short) // NOLINT
|
||||
MS_REG_GPU_KERNEL_ONE(Concat,
|
||||
KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
|
||||
ConcatV2GpuFwdKernel, char)
|
||||
ConcatV2GpuFwdKernel, uchar)
|
||||
MS_REG_GPU_KERNEL_ONE(Concat,
|
||||
KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
|
||||
ConcatV2GpuFwdKernel, bool)
|
||||
|
|
|
@ -34,7 +34,7 @@ MS_REG_GPU_KERNEL_TWO(
|
|||
GatherNdGpuFwdKernel, short, int) // NOLINT
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
GatherNd, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeUInt8),
|
||||
GatherNdGpuFwdKernel, char, int)
|
||||
GatherNdGpuFwdKernel, uchar, int)
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
GatherNd, KernelAttr().AddInputAttr(kNumberTypeBool).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool),
|
||||
GatherNdGpuFwdKernel, bool, int)
|
||||
|
|
|
@ -27,7 +27,7 @@ MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeInt32).
|
|||
MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
|
||||
StridedSliceGpuKernel, short) // NOLINT
|
||||
MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
|
||||
StridedSliceGpuKernel, char)
|
||||
StridedSliceGpuKernel, uchar)
|
||||
MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
|
||||
StridedSliceGpuKernel, bool)
|
||||
} // namespace kernel
|
||||
|
|
|
@ -27,7 +27,7 @@ MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeInt
|
|||
MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
|
||||
StridedSliceGradGpuKernel, short) // NOLINT
|
||||
MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
|
||||
StridedSliceGradGpuKernel, char)
|
||||
StridedSliceGradGpuKernel, uchar)
|
||||
MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
|
||||
StridedSliceGradGpuKernel, bool)
|
||||
} // namespace kernel
|
||||
|
|
|
@ -37,7 +37,8 @@ __global__ void CheckValidKernel(const size_t size, const T *box, const T *img_m
|
|||
}
|
||||
|
||||
template <typename S>
|
||||
__global__ void CheckValidKernel(const size_t size, const char *box, const char *img_metas, S *valid) {
|
||||
__global__ void CheckValidKernel(const size_t size, const unsigned char *box,
|
||||
const unsigned char *img_metas, S *valid) {
|
||||
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) {
|
||||
const size_t left_x = i * 4;
|
||||
const size_t left_y = i * 4 + 1;
|
||||
|
@ -45,10 +46,8 @@ __global__ void CheckValidKernel(const size_t size, const char *box, const char
|
|||
const size_t right_y = i * 4 + 3;
|
||||
|
||||
S valid_flag = false;
|
||||
valid_flag |= !((unsigned int)box[left_x] >= 0);
|
||||
valid_flag |= !((unsigned int)box[left_y] >= 0);
|
||||
valid_flag |= !((unsigned int)img_metas[0] * (unsigned int)img_metas[2] - 1 >= (unsigned int)box[right_x]);
|
||||
valid_flag |= !((unsigned int)img_metas[1] * (unsigned int)img_metas[2] - 1 >= (unsigned int)box[right_y]);
|
||||
valid_flag |= !(img_metas[0] * img_metas[2] >= box[right_x] + 1);
|
||||
valid_flag |= !(img_metas[1] * img_metas[2] >= box[right_y] + 1);
|
||||
|
||||
valid[i] = !valid_flag;
|
||||
}
|
||||
|
@ -67,5 +66,5 @@ template void CheckValid(const size_t &size, const half *box, const half *img_me
|
|||
cudaStream_t cuda_stream);
|
||||
template void CheckValid(const size_t &size, const short *box, const short *img_metas, bool *valid, // NOLINT
|
||||
cudaStream_t cuda_stream);
|
||||
template void CheckValid(const size_t &size, const char *box, const char *img_metas, bool *valid,
|
||||
template void CheckValid(const size_t &size, const unsigned char *box, const unsigned char *img_metas, bool *valid,
|
||||
cudaStream_t cuda_stream);
|
||||
|
|
|
@ -73,7 +73,7 @@ template void ConcatKernel(const size_t size, const int input_num,
|
|||
cudaStream_t cuda_stream);
|
||||
template void ConcatKernel(const size_t size, const int input_num,
|
||||
const int all_size_before_axis, const int all_size_axis,
|
||||
int* len_axis, char** inputs, char* output,
|
||||
int* len_axis, unsigned char** inputs, unsigned char* output,
|
||||
cudaStream_t cuda_stream);
|
||||
template void ConcatKernel(const size_t size, const int input_num,
|
||||
const int all_size_before_axis, const int all_size_axis,
|
||||
|
|
|
@ -64,11 +64,12 @@ template void GatherNd<int, int>(int *input, int *indices, int *output, const si
|
|||
const size_t &output_dim1, const size_t &indices_dim1, int *batch_indices,
|
||||
int *batch_strides, cudaStream_t stream);
|
||||
template void GatherNd<short, int>(short *input, int *indices, short *output, const size_t &output_dim0, // NOLINT
|
||||
const size_t &output_dim1, const size_t &indices_dim1, int *batch_indices,
|
||||
int *batch_strides, cudaStream_t stream);
|
||||
template void GatherNd<char, int>(char *input, int *indices, char *output, const size_t &output_dim0,
|
||||
const size_t &output_dim1, const size_t &indices_dim1, int *batch_indices,
|
||||
int *batch_strides, cudaStream_t stream);
|
||||
const size_t &output_dim1, const size_t &indices_dim1, int *batch_indices,
|
||||
int *batch_strides, cudaStream_t stream);
|
||||
template void GatherNd<unsigned char, int>(unsigned char *input, int *indices, unsigned char *output,
|
||||
const size_t &output_dim0, const size_t &output_dim1,
|
||||
const size_t &indices_dim1, int *batch_indices, int *batch_strides,
|
||||
cudaStream_t stream);
|
||||
template void GatherNd<bool, int>(bool *input, int *indices, bool *output, const size_t &output_dim0,
|
||||
const size_t &output_dim1, const size_t &indices_dim1, int *batch_indices,
|
||||
int *batch_strides, cudaStream_t stream);
|
||||
|
|
|
@ -180,8 +180,7 @@ template void CalSliceGrad<int>(const size_t input_size, const int *dy, const st
|
|||
const std::vector<int> begin, const std::vector<int> size, int *output,
|
||||
cudaStream_t cuda_stream);
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
template void FillDeviceArray<short>(const size_t input_size, short *addr, const float value, cudaStream_t cuda_stream);
|
||||
template void FillDeviceArray<short>(const size_t input_size, short *addr, const float value, cudaStream_t cuda_stream); // NOLINT
|
||||
template void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, const int l1, const int l2,
|
||||
const int l3, const int l4, const int d1, const int d2, const int d3, const int d4,
|
||||
const short *input, short *output, cudaStream_t stream); // NOLINT
|
||||
|
@ -189,13 +188,14 @@ template void CalSliceGrad<short>(const size_t input_size, const short *dy, cons
|
|||
const std::vector<int> begin, const std::vector<int> size, short *output, // NOLINT
|
||||
cudaStream_t cuda_stream);
|
||||
|
||||
template void FillDeviceArray<char>(const size_t input_size, char *addr, const float value, cudaStream_t cuda_stream);
|
||||
template void FillDeviceArray<unsigned char>(const size_t input_size, unsigned char *addr, const float value,
|
||||
cudaStream_t cuda_stream);
|
||||
template void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, const int l1, const int l2,
|
||||
const int l3, const int l4, const int d1, const int d2, const int d3, const int d4,
|
||||
const char *input, char *output, cudaStream_t stream);
|
||||
template void CalSliceGrad<char>(const size_t input_size, const char *dy, const std::vector<int> in_shape,
|
||||
const std::vector<int> begin, const std::vector<int> size, char *output,
|
||||
cudaStream_t cuda_stream);
|
||||
const unsigned char *input, unsigned char *output, cudaStream_t stream);
|
||||
template void CalSliceGrad<unsigned char>(const size_t input_size, const unsigned char *dy,
|
||||
const std::vector<int> in_shape, const std::vector<int> begin,
|
||||
const std::vector<int> size, unsigned char *output, cudaStream_t cuda_stream);
|
||||
|
||||
template void FillDeviceArray<bool>(const size_t input_size, bool *addr, const float value, cudaStream_t cuda_stream);
|
||||
template void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, const int l1, const int l2,
|
||||
|
@ -215,12 +215,11 @@ template void StridedSlice(const std::vector<size_t> &input_shape, const std::ve
|
|||
const std::vector<int> &strides, const std::vector<int> &output_shape, const int *input,
|
||||
int *output, cudaStream_t cuda_stream);
|
||||
template void StridedSlice(const std::vector<size_t> &input_shape, const std::vector<int> &begin,
|
||||
// NOLINTNEXTLINE
|
||||
const std::vector<int> &strides, const std::vector<int> &output_shape, const short *input,
|
||||
short *output, cudaStream_t cuda_stream); // NOLINT
|
||||
const std::vector<int> &strides, const std::vector<int> &output_shape,
|
||||
const short *input, short *output, cudaStream_t cuda_stream); // NOLINT
|
||||
template void StridedSlice(const std::vector<size_t> &input_shape, const std::vector<int> &begin,
|
||||
const std::vector<int> &strides, const std::vector<int> &output_shape, const char *input,
|
||||
char *output, cudaStream_t cuda_stream);
|
||||
const std::vector<int> &strides, const std::vector<int> &output_shape,
|
||||
const unsigned char *input, unsigned char *output, cudaStream_t cuda_stream);
|
||||
template void StridedSlice(const std::vector<size_t> &input_shape, const std::vector<int> &begin,
|
||||
const std::vector<int> &strides, const std::vector<int> &output_shape, const bool *input,
|
||||
bool *output, cudaStream_t cuda_stream);
|
||||
|
@ -235,12 +234,11 @@ template void StridedSliceGrad(const std::vector<int> &dy_shape, const std::vect
|
|||
const std::vector<int> &strides, const std::vector<int> &dx_shape, const int *dy,
|
||||
int *dx, cudaStream_t cuda_stream);
|
||||
template void StridedSliceGrad(const std::vector<int> &dy_shape, const std::vector<int> &begin,
|
||||
// NOLINTNEXTLINE
|
||||
const std::vector<int> &strides, const std::vector<int> &dx_shape, const short *dy,
|
||||
const std::vector<int> &strides, const std::vector<int> &dx_shape, const short *dy, // NOLINT
|
||||
short *dx, cudaStream_t cuda_stream); // NOLINT
|
||||
template void StridedSliceGrad(const std::vector<int> &dy_shape, const std::vector<int> &begin,
|
||||
const std::vector<int> &strides, const std::vector<int> &dx_shape, const char *dy,
|
||||
char *dx, cudaStream_t cuda_stream);
|
||||
const std::vector<int> &strides, const std::vector<int> &dx_shape,
|
||||
const unsigned char *dy, unsigned char *dx, cudaStream_t cuda_stream);
|
||||
template void StridedSliceGrad(const std::vector<int> &dy_shape, const std::vector<int> &begin,
|
||||
const std::vector<int> &strides, const std::vector<int> &dx_shape, const bool *dy,
|
||||
bool *dx, cudaStream_t cuda_stream);
|
||||
|
|
|
@ -64,6 +64,14 @@ class GpuKernelRegister {
|
|||
}
|
||||
};
|
||||
|
||||
// This is necessary for gpu kernels to support uint8 data type. In cuda, an unsigned,
|
||||
// 8 bit integral type is represented by an unsigned char, but the MS_REG_GPU_KERNEL
|
||||
// macros defined below will create compilation errors when datatype T contains a space,
|
||||
// because the variable created by the macro will also contain a space. So, we solve this
|
||||
// problem by writing uchar when calling these macros, and expanding uchar after the
|
||||
// variable has been created.
|
||||
#define uchar unsigned char
|
||||
|
||||
#define MS_REG_GPU_KERNEL(OPNAME, OPCLASS) \
|
||||
static_assert(std::is_base_of<GpuKernel, OPCLASS>::value, " must be base of GpuKernel"); \
|
||||
static const GpuKernelRegister g_##OPNAME##_gpu_kernel_reg(#OPNAME, KernelAttr(), []() { return new OPCLASS(); });
|
||||
|
@ -88,7 +96,6 @@ class GpuKernelRegister {
|
|||
static_assert(std::is_base_of<GpuKernel, OPCLASS<T, S>>::value, " must be base of GpuKernel"); \
|
||||
static const GpuKernelRegister g_##OPNAME##_##T##_##S##_gpu_kernel_reg(#OPNAME, ATTR, \
|
||||
[]() { return new OPCLASS<T, S>(); });
|
||||
|
||||
// register of mixed accuracy kernels which use template and maintain three typename
|
||||
#define MS_REG_GPU_KERNEL_THREE(OPNAME, ATTR, OPCLASS, T, S, G) \
|
||||
static_assert(std::is_base_of<GpuKernel, OPCLASS<T, S, G>>::value, " must be base of GpuKernel"); \
|
||||
|
|
|
@ -31,6 +31,6 @@ MS_REG_GPU_KERNEL_TWO(
|
|||
CheckValidGpuKernel, short, bool) // NOLINT
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
CheckValid, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeBool),
|
||||
CheckValidGpuKernel, char, bool)
|
||||
CheckValidGpuKernel, uchar, bool)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
Loading…
Reference in New Issue