allow unsigned char types during registration, convert char to unsigned char

fixed some bugs

fixed cpplint

fix cpplint
This commit is contained in:
Peilin Wang 2020-08-14 15:57:57 -04:00
parent 7d70fb4dc4
commit 7baf5352ca
10 changed files with 39 additions and 34 deletions

View File

@ -32,7 +32,7 @@ MS_REG_GPU_KERNEL_ONE(Concat,
ConcatV2GpuFwdKernel, short) // NOLINT
MS_REG_GPU_KERNEL_ONE(Concat,
KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
ConcatV2GpuFwdKernel, char)
ConcatV2GpuFwdKernel, uchar)
MS_REG_GPU_KERNEL_ONE(Concat,
KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
ConcatV2GpuFwdKernel, bool)

View File

@ -34,7 +34,7 @@ MS_REG_GPU_KERNEL_TWO(
GatherNdGpuFwdKernel, short, int) // NOLINT
MS_REG_GPU_KERNEL_TWO(
GatherNd, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeUInt8),
GatherNdGpuFwdKernel, char, int)
GatherNdGpuFwdKernel, uchar, int)
MS_REG_GPU_KERNEL_TWO(
GatherNd, KernelAttr().AddInputAttr(kNumberTypeBool).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool),
GatherNdGpuFwdKernel, bool, int)

View File

@ -27,7 +27,7 @@ MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeInt32).
MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
StridedSliceGpuKernel, short) // NOLINT
MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
StridedSliceGpuKernel, char)
StridedSliceGpuKernel, uchar)
MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
StridedSliceGpuKernel, bool)
} // namespace kernel

View File

@ -27,7 +27,7 @@ MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeInt
MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
StridedSliceGradGpuKernel, short) // NOLINT
MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
StridedSliceGradGpuKernel, char)
StridedSliceGradGpuKernel, uchar)
MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
StridedSliceGradGpuKernel, bool)
} // namespace kernel

View File

@ -37,7 +37,8 @@ __global__ void CheckValidKernel(const size_t size, const T *box, const T *img_m
}
template <typename S>
__global__ void CheckValidKernel(const size_t size, const char *box, const char *img_metas, S *valid) {
__global__ void CheckValidKernel(const size_t size, const unsigned char *box,
const unsigned char *img_metas, S *valid) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) {
const size_t left_x = i * 4;
const size_t left_y = i * 4 + 1;
@ -45,10 +46,8 @@ __global__ void CheckValidKernel(const size_t size, const char *box, const char
const size_t right_y = i * 4 + 3;
S valid_flag = false;
valid_flag |= !((unsigned int)box[left_x] >= 0);
valid_flag |= !((unsigned int)box[left_y] >= 0);
valid_flag |= !((unsigned int)img_metas[0] * (unsigned int)img_metas[2] - 1 >= (unsigned int)box[right_x]);
valid_flag |= !((unsigned int)img_metas[1] * (unsigned int)img_metas[2] - 1 >= (unsigned int)box[right_y]);
valid_flag |= !(img_metas[0] * img_metas[2] >= box[right_x] + 1);
valid_flag |= !(img_metas[1] * img_metas[2] >= box[right_y] + 1);
valid[i] = !valid_flag;
}
@ -67,5 +66,5 @@ template void CheckValid(const size_t &size, const half *box, const half *img_me
cudaStream_t cuda_stream);
template void CheckValid(const size_t &size, const short *box, const short *img_metas, bool *valid, // NOLINT
cudaStream_t cuda_stream);
template void CheckValid(const size_t &size, const char *box, const char *img_metas, bool *valid,
template void CheckValid(const size_t &size, const unsigned char *box, const unsigned char *img_metas, bool *valid,
cudaStream_t cuda_stream);

View File

@ -73,7 +73,7 @@ template void ConcatKernel(const size_t size, const int input_num,
cudaStream_t cuda_stream);
template void ConcatKernel(const size_t size, const int input_num,
const int all_size_before_axis, const int all_size_axis,
int* len_axis, char** inputs, char* output,
int* len_axis, unsigned char** inputs, unsigned char* output,
cudaStream_t cuda_stream);
template void ConcatKernel(const size_t size, const int input_num,
const int all_size_before_axis, const int all_size_axis,

View File

@ -64,11 +64,12 @@ template void GatherNd<int, int>(int *input, int *indices, int *output, const si
const size_t &output_dim1, const size_t &indices_dim1, int *batch_indices,
int *batch_strides, cudaStream_t stream);
template void GatherNd<short, int>(short *input, int *indices, short *output, const size_t &output_dim0, // NOLINT
const size_t &output_dim1, const size_t &indices_dim1, int *batch_indices,
int *batch_strides, cudaStream_t stream);
template void GatherNd<char, int>(char *input, int *indices, char *output, const size_t &output_dim0,
const size_t &output_dim1, const size_t &indices_dim1, int *batch_indices,
int *batch_strides, cudaStream_t stream);
const size_t &output_dim1, const size_t &indices_dim1, int *batch_indices,
int *batch_strides, cudaStream_t stream);
template void GatherNd<unsigned char, int>(unsigned char *input, int *indices, unsigned char *output,
const size_t &output_dim0, const size_t &output_dim1,
const size_t &indices_dim1, int *batch_indices, int *batch_strides,
cudaStream_t stream);
template void GatherNd<bool, int>(bool *input, int *indices, bool *output, const size_t &output_dim0,
const size_t &output_dim1, const size_t &indices_dim1, int *batch_indices,
int *batch_strides, cudaStream_t stream);

View File

@ -180,8 +180,7 @@ template void CalSliceGrad<int>(const size_t input_size, const int *dy, const st
const std::vector<int> begin, const std::vector<int> size, int *output,
cudaStream_t cuda_stream);
// NOLINTNEXTLINE
template void FillDeviceArray<short>(const size_t input_size, short *addr, const float value, cudaStream_t cuda_stream);
template void FillDeviceArray<short>(const size_t input_size, short *addr, const float value, cudaStream_t cuda_stream); // NOLINT
template void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, const int l1, const int l2,
const int l3, const int l4, const int d1, const int d2, const int d3, const int d4,
const short *input, short *output, cudaStream_t stream); // NOLINT
@ -189,13 +188,14 @@ template void CalSliceGrad<short>(const size_t input_size, const short *dy, cons
const std::vector<int> begin, const std::vector<int> size, short *output, // NOLINT
cudaStream_t cuda_stream);
template void FillDeviceArray<char>(const size_t input_size, char *addr, const float value, cudaStream_t cuda_stream);
template void FillDeviceArray<unsigned char>(const size_t input_size, unsigned char *addr, const float value,
cudaStream_t cuda_stream);
template void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, const int l1, const int l2,
const int l3, const int l4, const int d1, const int d2, const int d3, const int d4,
const char *input, char *output, cudaStream_t stream);
template void CalSliceGrad<char>(const size_t input_size, const char *dy, const std::vector<int> in_shape,
const std::vector<int> begin, const std::vector<int> size, char *output,
cudaStream_t cuda_stream);
const unsigned char *input, unsigned char *output, cudaStream_t stream);
template void CalSliceGrad<unsigned char>(const size_t input_size, const unsigned char *dy,
const std::vector<int> in_shape, const std::vector<int> begin,
const std::vector<int> size, unsigned char *output, cudaStream_t cuda_stream);
template void FillDeviceArray<bool>(const size_t input_size, bool *addr, const float value, cudaStream_t cuda_stream);
template void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, const int l1, const int l2,
@ -215,12 +215,11 @@ template void StridedSlice(const std::vector<size_t> &input_shape, const std::ve
const std::vector<int> &strides, const std::vector<int> &output_shape, const int *input,
int *output, cudaStream_t cuda_stream);
template void StridedSlice(const std::vector<size_t> &input_shape, const std::vector<int> &begin,
// NOLINTNEXTLINE
const std::vector<int> &strides, const std::vector<int> &output_shape, const short *input,
short *output, cudaStream_t cuda_stream); // NOLINT
const std::vector<int> &strides, const std::vector<int> &output_shape,
const short *input, short *output, cudaStream_t cuda_stream); // NOLINT
template void StridedSlice(const std::vector<size_t> &input_shape, const std::vector<int> &begin,
const std::vector<int> &strides, const std::vector<int> &output_shape, const char *input,
char *output, cudaStream_t cuda_stream);
const std::vector<int> &strides, const std::vector<int> &output_shape,
const unsigned char *input, unsigned char *output, cudaStream_t cuda_stream);
template void StridedSlice(const std::vector<size_t> &input_shape, const std::vector<int> &begin,
const std::vector<int> &strides, const std::vector<int> &output_shape, const bool *input,
bool *output, cudaStream_t cuda_stream);
@ -235,12 +234,11 @@ template void StridedSliceGrad(const std::vector<int> &dy_shape, const std::vect
const std::vector<int> &strides, const std::vector<int> &dx_shape, const int *dy,
int *dx, cudaStream_t cuda_stream);
template void StridedSliceGrad(const std::vector<int> &dy_shape, const std::vector<int> &begin,
// NOLINTNEXTLINE
const std::vector<int> &strides, const std::vector<int> &dx_shape, const short *dy,
const std::vector<int> &strides, const std::vector<int> &dx_shape, const short *dy, // NOLINT
short *dx, cudaStream_t cuda_stream); // NOLINT
template void StridedSliceGrad(const std::vector<int> &dy_shape, const std::vector<int> &begin,
const std::vector<int> &strides, const std::vector<int> &dx_shape, const char *dy,
char *dx, cudaStream_t cuda_stream);
const std::vector<int> &strides, const std::vector<int> &dx_shape,
const unsigned char *dy, unsigned char *dx, cudaStream_t cuda_stream);
template void StridedSliceGrad(const std::vector<int> &dy_shape, const std::vector<int> &begin,
const std::vector<int> &strides, const std::vector<int> &dx_shape, const bool *dy,
bool *dx, cudaStream_t cuda_stream);

View File

@ -64,6 +64,14 @@ class GpuKernelRegister {
}
};
// This is necessary for gpu kernels to support uint8 data type. In cuda, an unsigned,
// 8 bit integral type is represented by an unsigned char, but the MS_REG_GPU_KERNEL
// macros defined below will create compilation errors when datatype T contains a space,
// because the variable created by the macro will also contain a space. So, we solve this
// problem by writing uchar when calling these macros, and expanding uchar after the
// variable has been created.
#define uchar unsigned char
#define MS_REG_GPU_KERNEL(OPNAME, OPCLASS) \
static_assert(std::is_base_of<GpuKernel, OPCLASS>::value, " must be base of GpuKernel"); \
static const GpuKernelRegister g_##OPNAME##_gpu_kernel_reg(#OPNAME, KernelAttr(), []() { return new OPCLASS(); });
@ -88,7 +96,6 @@ class GpuKernelRegister {
static_assert(std::is_base_of<GpuKernel, OPCLASS<T, S>>::value, " must be base of GpuKernel"); \
static const GpuKernelRegister g_##OPNAME##_##T##_##S##_gpu_kernel_reg(#OPNAME, ATTR, \
[]() { return new OPCLASS<T, S>(); });
// register of mixed accuracy kernels which use template and maintain three typename
#define MS_REG_GPU_KERNEL_THREE(OPNAME, ATTR, OPCLASS, T, S, G) \
static_assert(std::is_base_of<GpuKernel, OPCLASS<T, S, G>>::value, " must be base of GpuKernel"); \

View File

@ -31,6 +31,6 @@ MS_REG_GPU_KERNEL_TWO(
CheckValidGpuKernel, short, bool) // NOLINT
MS_REG_GPU_KERNEL_TWO(
CheckValid, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeBool),
CheckValidGpuKernel, char, bool)
CheckValidGpuKernel, uchar, bool)
} // namespace kernel
} // namespace mindspore