!24269 add vector size check, input shape check and divide by zero check for gpu operators.

Merge pull request !24269 from wangshuide/wsd_master
2021-10-08 02:44:11 +00:00 · 2021-10-08 02:44:11 +00:00 · 2c1d3baace
parent ddee167d97 7a1862a6e6
commit 2c1d3baace
30 changed files with 381 additions and 54 deletions
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/crop_and_resize_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/crop_and_resize_gpu_kernel.h
@ -41,13 +41,17 @@ class CropAndResizeGpuKernel : public GpuKernel {
        input_width_(0),
        final_height_(0),
        final_width_(0),
-        channel_(0) {}
+        channel_(0),
+        is_null_input_(false) {}
  ~CropAndResizeGpuKernel() override = default;
  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    VARIABLE_NOT_USED(workspace);
    T *input_image = GetDeviceAddress<T>(inputs, 0);
    float *input_boxes = GetDeviceAddress<float>(inputs, 1);
@ -72,6 +76,18 @@ class CropAndResizeGpuKernel : public GpuKernel {
    }
    // input image
    auto input_image_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    auto input_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    auto input_box_index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
+    auto input_crop_size_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
+    auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
+    is_null_input_ = CHECK_NULL_INPUT(input_image_shape) || CHECK_NULL_INPUT(input_boxes_shape) ||
+                     CHECK_NULL_INPUT(input_box_index_shape) || CHECK_NULL_INPUT(input_crop_size_shape) ||
+                     CHECK_NULL_INPUT(output_shape);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'CropAndResizeGpuKernel', input or output is null.";
+      InitSizeLists();
+      return true;
+    }
    size_t input_image_shape_len = input_image_shape.size();
    if (input_image_shape_len != 4) {
      MS_LOG(ERROR) << " image tensor is " << input_image_shape_len << "-D, but CropAndResize supports only " << 4
@ -86,7 +102,6 @@ class CropAndResizeGpuKernel : public GpuKernel {
    input_height_ = input_image_shape[1];
    input_width_ = input_image_shape[2];
    // input boxes
-    auto input_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
    size_t input_boxes_shape_len = input_boxes_shape.size();
    if (input_boxes_shape_len != 2) {
      MS_LOG(ERROR) << "Boxes is rank" << input_boxes_shape_len << " but CropAndResize supports only rank " << 2
@ -99,7 +114,6 @@ class CropAndResizeGpuKernel : public GpuKernel {
    }
    input_boxes_size_ *= sizeof(float);
    // input box_index
-    auto input_box_index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
    size_t input_box_index_shape_len = input_box_index_shape.size();
    if (input_box_index_shape_len != 1) {
      MS_LOG(ERROR) << "Box_index is rank " << input_box_index_shape_len << " but CropAndResize supports only rank "
@ -110,7 +124,6 @@ class CropAndResizeGpuKernel : public GpuKernel {
    input_box_ind_size_ *= input_box_index_shape[0];  // single dim required
    input_box_ind_size_ *= sizeof(int);
    // input crop_size
-    auto input_crop_size_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
    size_t input_crop_size_shape_len = input_crop_size_shape.size();
    if (input_crop_size_shape_len != 1) {
      MS_LOG(ERROR) << "Crop_size is rank " << input_crop_size_shape_len << "-D, but CropAndResize supports only rank "
@ -126,8 +139,11 @@ class CropAndResizeGpuKernel : public GpuKernel {
    input_crop_size_ *= input_crop_size_shape[0];
    input_crop_size_ *= sizeof(int);
    // output
-    auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
    auto output_shape_len = output_shape.size();
+    if (output_shape_len != 4) {
+      MS_LOG(ERROR) << "For 'CropAndResize', the rank of output should be 4, but got " << output_shape_len;
+      return false;
+    }
    output_size_ = 1;
    for (size_t i = 0; i < output_shape_len; i++) {
      output_size_ *= output_shape[i];
@ -175,6 +191,7 @@ class CropAndResizeGpuKernel : public GpuKernel {
  int final_height_;
  int final_width_;
  int channel_;
+  bool is_null_input_;
  std::vector<size_t> input_size_list_;
  std::vector<size_t> output_size_list_;
  std::vector<size_t> workspace_size_list_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/depthtospace_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/depthtospace_gpu_kernel.h
@ -1,4 +1,3 @@
-
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
@ -37,6 +36,9 @@ class DepthToSpaceFwdKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    // get device buffer ptr
    T *input = GetDeviceAddress<T>(inputs, 0);
    T *output = GetDeviceAddress<T>(outputs, 0);
@ -71,6 +73,12 @@ class DepthToSpaceFwdKernel : public GpuKernel {
    }
    // check input_shape
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
+    is_null_input_ = CHECK_NULL_INPUT(input_shape);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'DepthToSpaceGpuKernel', input is null.";
+      InitSizeLists();
+      return true;
+    }
    shape_size_ = input_shape.size();
    if (shape_size_ != DEPTHTOSPACE_BUFFER_DIMENSION) {
      MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but DepthToSpace supports 4-D tensor.";
@ -111,6 +119,7 @@ class DepthToSpaceFwdKernel : public GpuKernel {
    oc_ = 0;
    oh_ = 0;
    ow_ = 0;
+    is_null_input_ = false;

    input_size_list_.clear();
    output_size_list_.clear();
@ -140,6 +149,7 @@ class DepthToSpaceFwdKernel : public GpuKernel {
  size_t oc_;
  size_t oh_;
  size_t ow_;
+  bool is_null_input_;
 };
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/embedding_lookup_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/embedding_lookup_gpu_kernel.h
@ -37,6 +37,9 @@ class EmbeddingLookupKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    VARIABLE_NOT_USED(workspace);
    T *input_addr = GetDeviceAddress<T>(inputs, 0);
    S *indices_addr = GetDeviceAddress<S>(inputs, 1);
@ -69,6 +72,16 @@ class EmbeddingLookupKernel : public GpuKernel {
    input_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    indices_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
    output_shapes_ = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
+    is_null_input_ =
+      CHECK_NULL_INPUT(input_shapes_) || CHECK_NULL_INPUT(indices_shapes_) || CHECK_NULL_INPUT(output_shapes_);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'EmbeddingLookupGpuKernel', input or output is null.";
+      InitSizeLists();
+      return true;
+    }
+    if (input_shapes_.size() < 1) {
+      MS_LOG(EXCEPTION) << "For 'EmbeddingLookupGpuKernel', the rank of input cannot be less than 1.";
+    }
    if (!is_dynamic_shape_) {
      offset_ = GetAttr<int64_t>(kernel_node, "offset");
    }
@ -78,6 +91,7 @@ class EmbeddingLookupKernel : public GpuKernel {
  }
  void ResetResource() noexcept override {
    is_dynamic_shape_ = false;
+    is_null_input_ = false;
    input_shapes_.clear();
    indices_shapes_.clear();
    output_shapes_.clear();
@ -138,6 +152,7 @@ class EmbeddingLookupKernel : public GpuKernel {
  size_t dims_[3] = {};
  int64_t offset_;
  bool is_dynamic_shape_;
+  bool is_null_input_;
  std::vector<size_t> input_size_list_;
  std::vector<size_t> output_size_list_;
  std::vector<size_t> workspace_size_list_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/extract_image_patches_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/extract_image_patches_gpu_kernel.h
@ -40,6 +40,9 @@ class ExtractImagePatchesKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    T *input = GetDeviceAddress<T>(inputs, 0);
    T *output = GetDeviceAddress<T>(outputs, 0);
    T *t_input = GetDeviceAddress<T>(workspace, 0);
@ -92,16 +95,28 @@ class ExtractImagePatchesKernel : public GpuKernel {
      MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ExtractImagePatches has 1 output.";
    }
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
+    is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'ExtractImagePatchesGpuKernel', input or output is null.";
+      InitSizeLists();
+      return true;
+    }
    input_size_ = 1;
    for (size_t i = 0; i < input_shape.size(); i++) {
      input_size_ *= input_shape[i];
      input_shape_.push_back(input_shape[i]);
    }
-    auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
+
    output_size_ = 1;
    for (size_t i = 0; i < output_shape.size(); i++) {
      output_size_ *= output_shape[i];
    }
+    if (input_shape.size() != 4 || output_shape.size() != 4) {
+      MS_LOG(EXCEPTION) << "For 'ExtractImagePatchesGpuKernel', the rank of input and output should be 4, "
+                        << "but got the rank of input: " << input_shape.size()
+                        << ", the rank of output: " << output_shape.size();
+    }
    // transposed NHWC shape
    t_output_shape_ = {output_shape[0], output_shape[2], output_shape[3], output_shape[1]};

@ -109,6 +124,11 @@ class ExtractImagePatchesKernel : public GpuKernel {
    auto ksizes = GetAttr<std::vector<int64_t>>(kernel_node, "ksizes");
    auto strides = GetAttr<std::vector<int64_t>>(kernel_node, "strides");
    auto rates = GetAttr<std::vector<int64_t>>(kernel_node, "rates");
+    if (ksizes.size() != 4 || strides.size() != 4 || rates.size() != 4) {
+      MS_LOG(EXCEPTION) << "For 'ExtractImagePatchesGpuKernel', the rank of ksizes, strides and rates should be 4, "
+                        << "but got the rank of ksizes: " << ksizes.size()
+                        << ", the rank of strides: " << strides.size() << ", the rank of rates: " << rates.size();
+    }

    ksize_row_ = ksizes[2];
    ksize_col_ = ksizes[3];
@ -127,6 +147,9 @@ class ExtractImagePatchesKernel : public GpuKernel {
    int64_t patch_rows_eff = ksize_row_ + (ksize_row_ - 1) * (rate_row_ - 1);
    int64_t patch_cols_eff = ksize_col_ + (ksize_col_ - 1) * (rate_col_ - 1);

+    MS_EXCEPTION_IF_ZERO("stride row", stride_row_);
+    MS_EXCEPTION_IF_ZERO("stride col", stride_col_);
+
    if (padding == "VALID") {
      output_rows_ = std::ceil((input_row_size_ - patch_rows_eff + 1.f) / static_cast<float>(stride_row_));
      output_cols_ = std::ceil((input_col_size_ - patch_cols_eff + 1.f) / static_cast<float>(stride_col_));
@ -148,6 +171,7 @@ class ExtractImagePatchesKernel : public GpuKernel {
    row_input_stride_ = input_depth * input_col_size_;
    patch_input_stride_ = input_depth * input_col_size_ * input_row_size_;
    output_depth_ = input_depth;
+    MS_EXCEPTION_IF_ZERO("other stride", other_stride_);
    need_batch_ = (output_size_ - 1) / other_stride_;

    InitSizeLists();
@ -177,6 +201,7 @@ class ExtractImagePatchesKernel : public GpuKernel {
    row_input_stride_ = 1;
    patch_input_stride_ = 1;
    output_depth_ = 1;
+    is_null_input_ = false;
    input_shape_.clear();
    t_output_shape_.clear();
    input_size_list_.clear();
@ -208,6 +233,7 @@ class ExtractImagePatchesKernel : public GpuKernel {
  int64_t output_rows_;
  int64_t output_cols_;
  bool need_batch_;
+  bool is_null_input_;
  int64_t row_stride_;
  int64_t patch_stride_;
  int64_t other_stride_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/in_top_k_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/in_top_k_gpu_kernel.h
@ -40,6 +40,9 @@ class InTopKGpuKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    T *predictions_device = GetDeviceAddress<T>(inputs, 0);
    int32_t *targets_device = GetDeviceAddress<int32_t>(inputs, 1);

@ -106,6 +109,16 @@ class InTopKGpuKernel : public GpuKernel {
    }

    input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
+    if (input_shape_.size() < 2) {
+      MS_LOG(EXCEPTION) << "For 'InTopKGpuKernel', the rank of input cannot be less than 2, but got "
+                        << input_shape_.size();
+    }
+    is_null_input_ = CHECK_NULL_INPUT(input_shape_);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'InTopKGpuKernel', input is null.";
+      InitSizeLists();
+      return true;
+    }
    input_rank_ = input_shape_.size();
    input_size_ = 1;
    for (size_t i = 0; i < input_rank_; i++) {
@ -136,6 +149,7 @@ class InTopKGpuKernel : public GpuKernel {
    input_rank_ = 0;
    outer_size_ = 0;
    inner_size_ = 0;
+    is_null_input_ = false;
    top_k_init_ = static_cast<T>(0.);
    input_size_list_.clear();
    output_size_list_.clear();
@ -171,6 +185,7 @@ class InTopKGpuKernel : public GpuKernel {
  // for topk
  size_t outer_size_;
  size_t inner_size_;
+  bool is_null_input_;

  std::vector<size_t> input_size_list_;
  std::vector<size_t> output_size_list_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/meshgrid_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/meshgrid_gpu_kernel.h
@ -41,6 +41,9 @@ class MeshgridGpuKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    T *ones_device = GetDeviceAddress<T>(workspace, 0);
    CalOnesLike(output_size_, static_cast<T *>(nullptr), ones_device, reinterpret_cast<cudaStream_t>(stream_ptr));

@ -79,9 +82,14 @@ class MeshgridGpuKernel : public GpuKernel {
    input_size_ = 1;
    input_count_ = AnfAlgo::GetInputTensorNum(kernel_node);
    for (size_t i = 0; i < input_count_; i++) {
-      size_t input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i)[0];
-      input_shapes_.push_back(input_shape);
-      input_size_ *= input_shape;
+      auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i);
+      if (input_shape.size() < 1) {
+        MS_LOG(ERROR) << "For 'MeshGridGpuKernel', the rank of input" << i << " cannot be less than 1.";
+        return false;
+      }
+      size_t input_size = input_shape[0];
+      input_shapes_.push_back(input_size);
+      input_size_ *= input_size;
    }

    output_size_ = 1;
@ -89,6 +97,12 @@ class MeshgridGpuKernel : public GpuKernel {

    // inferred shape swaps output shape for us if needed
    output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
+    is_null_input_ = CHECK_NULL_INPUT(output_shape_);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'MeshGridGpuKernel', output is null.";
+      InitSizeLists();
+      return true;
+    }

    if (output_count_ != input_count_) {
      MS_LOG(ERROR) << "output count is " << output_count_ << ", but MeshgridGpuKernel needs " << input_count_
@ -101,7 +115,7 @@ class MeshgridGpuKernel : public GpuKernel {
    }

    // need to pad output shape with ones for broadcast kernel
-    for (size_t i = 0; i < output_shape_.size() - MAX_DIMS; i++) {
+    for (size_t i = 0; i < MAX_DIMS - output_shape_.size(); i++) {
      output_shape_.push_back(1);
    }

@ -118,6 +132,7 @@ class MeshgridGpuKernel : public GpuKernel {
    output_size_ = 0;
    output_count_ = 0;
    swap_indexing_ = true;
+    is_null_input_ = false;

    input_size_list_.clear();
    output_size_list_.clear();
@ -145,6 +160,7 @@ class MeshgridGpuKernel : public GpuKernel {
  size_t output_size_;
  size_t output_count_;
  bool swap_indexing_;
+  bool is_null_input_;

  std::vector<size_t> input_size_list_;
  std::vector<size_t> output_size_list_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/reverse_sequence_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/reverse_sequence_gpu_kernel.h
@ -36,6 +36,7 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel {
        input_size_(0),
        batch_dim_(0),
        seq_dim_(0),
+        is_null_input_(false),
        seq_len_size_(0),
        total_index_dim_(0),
        output_size_(0),
@ -46,6 +47,9 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel {
  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    T *input = GetDeviceAddress<T>(inputs, 0);
    S *seq_len = GetDeviceAddress<S>(inputs, 1);
    size_t *input_shape_ptr = GetDeviceAddress<size_t>(workspace, 0);
@ -75,23 +79,23 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel {
      return false;
    }
    input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
-    if (CHECK_NULL_INPUT(input_shape_)) {
-      MS_LOG(WARNING) << "ReverseSequence input is null";
+    auto seq_len_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    is_null_input_ = CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(seq_len_shape);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'ReverseSequenceGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
+    if (input_shape_.size() < 1) {
+      MS_LOG(EXCEPTION) << "For 'ReverseSequenceGpuKernel', the rank of input cannot be less than 1, but got "
+                        << input_shape_.size();
+    }
    input_size_ = 1;
    shape_size_ = input_shape_.size();  // required for calls
    for (size_t i = 0; i < shape_size_; i++) {
      input_size_ *= input_shape_[i];
    }
    // get seq len shape
-    auto seq_len_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
-    if (CHECK_NULL_INPUT(seq_len_shape)) {
-      MS_LOG(WARNING) << "ReverseSequence seq lengths input is null";
-      InitSizeLists();
-      return true;
-    }
    seq_len_size_ = seq_len_shape.size();
    output_size_ = input_size_;  // size does not change
    // Allocate workspace memory to use for storing indices for each thread to compute with
@ -116,6 +120,7 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel {
  size_t input_size_;
  int64_t batch_dim_;
  int64_t seq_dim_;
+  bool is_null_input_;
  size_t seq_len_size_;
  size_t total_index_dim_;
  size_t output_size_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/reverse_v2_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/reverse_v2_gpu_kernel.h
@ -38,6 +38,9 @@ class ReverseV2GpuKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    T *input_device = GetDeviceAddress<T>(inputs, 0);
    T *output_device = GetDeviceAddress<T>(outputs, 0);
    size_t *input_shape_device = GetDeviceAddress<size_t>(workspace, 0);
@ -79,7 +82,16 @@ class ReverseV2GpuKernel : public GpuKernel {
    }

    input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
+    is_null_input_ = CHECK_NULL_INPUT(input_shape_);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'ReverseV2GpuKernel', input is null.";
+      InitSizeLists();
+      return true;
+    }
    input_rank_ = input_shape_.size();
+    if (input_rank_ < 1) {
+      MS_LOG(EXCEPTION) << "For 'ReverseV2GpuKernel', the rank of input cannot be less than 1, bot got " << input_rank_;
+    }
    input_size_ = 1;
    for (size_t i = 0; i < input_rank_; i++) {
      input_size_ *= input_shape_[i];
@ -92,6 +104,9 @@ class ReverseV2GpuKernel : public GpuKernel {
    }

    axis_ = GetAttr<std::vector<int64_t>>(kernel_node, "axis");
+    if (axis_.size() < 1) {
+      MS_LOG(EXCEPTION) << "For 'ReverseV2GpuKernel', the rank of axis cannot be less than 1, bot got " << axis_.size();
+    }
    for (int64_t &dimension : axis_) {
      if (dimension < 0) {
        dimension += input_rank_;
@ -106,6 +121,7 @@ class ReverseV2GpuKernel : public GpuKernel {
  void ResetResource() noexcept override {
    input_size_ = 0;
    input_rank_ = 0;
+    is_null_input_ = false;
    input_shape_.clear();
    strides_.clear();
    axis_.clear();
@ -131,6 +147,7 @@ class ReverseV2GpuKernel : public GpuKernel {
  std::vector<size_t> input_shape_;
  std::vector<int64_t> strides_;
  std::vector<int64_t> axis_;
+  bool is_null_input_;

  std::vector<size_t> input_size_list_;
  std::vector<size_t> output_size_list_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/sort_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/sort_gpu_kernel.h
@ -42,6 +42,9 @@ class SortGpuKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    T *input_device = GetDeviceAddress<T>(inputs, 0);

    T *output_device = GetDeviceAddress<T>(outputs, 0);
@ -127,10 +130,17 @@ class SortGpuKernel : public GpuKernel {
    }

    input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
+    is_null_input_ = CHECK_NULL_INPUT(input_shape_);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'SortGpuKernel', input is null.";
+      InitSizeLists();
+      return true;
+    }

    input_rank_ = input_shape_.size();
-    if (input_rank_ > TRANSPOSE_MAX_DIMENSION) {
-      MS_LOG(ERROR) << "Sort cannot support input that has more than " << TRANSPOSE_MAX_DIMENSION << " dimensions.";
+    if (input_rank_ > TRANSPOSE_MAX_DIMENSION || input_rank_ < 1) {
+      MS_LOG(ERROR) << "For 'SortGpuKernel', the rank of input cannot be more than " << TRANSPOSE_MAX_DIMENSION
+                    << " dimensions or less than 1 dimension.";
      return false;
    }

@ -145,6 +155,11 @@ class SortGpuKernel : public GpuKernel {
    if (axis_ < 0) {
      axis_ += input_rank_;
    }
+    if ((size_t)axis_ >= input_rank_) {
+      MS_LOG(ERROR) << "For 'SortGpuKernel', axis should be less than the rank of input, bot got axis: " << axis_
+                    << " the rank of input: " << input_rank_;
+      return false;
+    }

    perm_.resize(input_rank_);
    std::iota(perm_.begin(), perm_.end(), 0);
@ -172,6 +187,7 @@ class SortGpuKernel : public GpuKernel {
    input_size_ = 0;
    axis_ = 0;
    descending_ = false;
+    is_null_input_ = false;
    input_shape_.clear();
    input_rank_ = 0;
    transposed_shape_.clear();
@ -206,6 +222,7 @@ class SortGpuKernel : public GpuKernel {
  size_t input_size_;
  int64_t axis_;
  bool descending_;
+  bool is_null_input_;
  std::vector<size_t> input_shape_;
  size_t input_rank_;

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/spacetodepth_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/spacetodepth_gpu_kernel.h
@ -37,6 +37,9 @@ class SpaceToDepthFwdKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    // get device buffer ptr
    T *input = GetDeviceAddress<T>(inputs, 0);
    T *output = GetDeviceAddress<T>(outputs, 0);
@ -71,6 +74,12 @@ class SpaceToDepthFwdKernel : public GpuKernel {
    }
    // check input_shape
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
+    is_null_input_ = CHECK_NULL_INPUT(input_shape);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'SpaceToDepthGpuKernel', input is null.";
+      InitSizeLists();
+      return true;
+    }
    shape_size_ = input_shape.size();
    if (shape_size_ != SPACETODEPTH_BUFFER_DIMENSION) {
      MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but SpaceToDepth supports 4-D tensor.";
@ -102,6 +111,7 @@ class SpaceToDepthFwdKernel : public GpuKernel {
    input_size_ = 0;
    output_size_ = 0;
    block_size_ = 0;
+    is_null_input_ = false;
    in_ = 0;
    ic_ = 0;
    ih_ = 0;
@ -131,6 +141,7 @@ class SpaceToDepthFwdKernel : public GpuKernel {
  size_t input_size_;
  size_t output_size_;
  size_t block_size_;
+  bool is_null_input_;
  size_t in_;
  size_t ic_;
  size_t ih_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/tensor_copy_slices_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/tensor_copy_slices_gpu_kernel.h
@ -31,11 +31,14 @@ namespace kernel {
 template <typename T>
 class TensorCopySlicesGpuKernel : public GpuKernel {
 public:
-  TensorCopySlicesGpuKernel() : input_size_(0), update_size_(0), output_size_(0) {}
+  TensorCopySlicesGpuKernel() : input_size_(0), update_size_(0), output_size_(0), is_null_input_(false) {}
  ~TensorCopySlicesGpuKernel() {}

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    T *input_addr = GetDeviceAddress<T>(inputs, 0);
    T *update_addr = GetDeviceAddress<T>(inputs, 1);
    T *output_addr = GetDeviceAddress<T>(outputs, 0);
@ -69,7 +72,13 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
    }

    input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
-
+    auto update_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    is_null_input_ = CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(update_shape);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'TensorCopySlicesGpuKernel', input or output is null.";
+      InitSizeLists();
+      return true;
+    }
    if (input_shape_.size() > kMaxDims) {
      MS_LOG(ERROR) << "StridedSlice support dims no more than " << kMaxDims << ", but the input shape is "
                    << input_shape_.size();
@ -80,6 +89,13 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
    end_ = GetAttr<std::vector<int64_t>>(kernel_node, kAttrEnd);
    strides_ = GetAttr<std::vector<int64_t>>(kernel_node, kAttrStrides);

+    if (begin_.size() > input_shape_.size()) {
+      MS_LOG(ERROR) << "For 'TensorCopySlicesGpuKernel', the rank of begin attr cannot be more than the rank of input, "
+                    << "but got the rank of begin attr: " << begin_.size()
+                    << ", the rank of input: " << input_shape_.size();
+      return false;
+    }
+
    FillEmptyDims(kernel_node);
    output_shape_ = input_shape_;
    FillUpdateDim();
@ -100,6 +116,7 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
    auto len = begin_.size();
    size_t total_input_num = 1;
    for (size_t i = 0; i < len; ++i) {
+      MS_EXCEPTION_IF_ZERO("strides_[i]", strides_[i]);
      total_input_num *= ((end_[i] - begin_[i]) / strides_[i]);
    }
    if (total_input_num != total_update_num) {
@ -161,6 +178,7 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
      if (begin_[i] <= end_[i] && strides_[i] > 0) {
        update_shape_.push_back((end_[i] - 1 - begin_[i]) / strides_[i] + 1);
      } else if (begin_[i] > end_[i] && strides_[i] < 0) {
+        MS_EXCEPTION_IF_ZERO("strides_[i] + 1", strides_[i] + 1);
        update_shape_.push_back((end_[i] - begin_[i] + 1) / strides_[i] + 1);
      } else {
        update_shape_.push_back(0);
@ -185,6 +203,7 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
  size_t update_size_;
  size_t output_size_;
  inline static size_t kMaxDims = 8;
+  bool is_null_input_;
 };
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/tensor_scatter_add_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/tensor_scatter_add_gpu_kernel.h
@ -38,7 +38,8 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
        work_shape_(nullptr),
        indices_dim_0_(0),
        indices_dim_1_(0),
-        memcpy_flag_(false) {}
+        memcpy_flag_(false),
+        is_null_input_(false) {}
  ~TensorScatterAddGpuFwdKernel() {
    if (indices_stride_ != nullptr) {
      device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(static_cast<void *>(indices_stride_));
@ -54,6 +55,9 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    VARIABLE_NOT_USED(workspace);
    T *input = GetDeviceAddress<T>(inputs, 0);
    S *indices = GetDeviceAddress<S>(inputs, 1);
@ -77,10 +81,10 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
    const size_t update_size = update_size_ / sizeof(T);
    const size_t output_size = output_size_ / sizeof(T);

-    CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
-                               cudaMemcpyAsync(&output[0], &input[0], input_size_, cudaMemcpyDeviceToDevice,
-                                               reinterpret_cast<cudaStream_t>(stream_ptr)),
-                               "cudaMemcpyAsync output failed");
+    CHECK_CUDA_RET_WITH_EXCEPT(
+      kernel_node_,
+      cudaMemcpyAsync(output, input, input_size_, cudaMemcpyDeviceToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)),
+      "cudaMemcpyAsync output failed");

    TensorScatterAdd(input, indices, update, output, block_size_, update_size, output_size, indices_dim_0_,
                     indices_dim_1_, indices_stride_, work_shape_, reinterpret_cast<cudaStream_t>(stream_ptr));
@ -106,6 +110,13 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
    indices_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
    input_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
+    is_null_input_ = CHECK_NULL_INPUT(update_shapes_) || CHECK_NULL_INPUT(indices_shapes_) ||
+                     CHECK_NULL_INPUT(input_shapes_) || CHECK_NULL_INPUT(output_shapes_);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'TensorScatterAddGpuKernel', input or output is null.";
+      InitSizeLists();
+      return true;
+    }

    std::vector<size_t> shape_me = input_shapes_;
    (void)std::transform(shape_me.begin(), shape_me.end(), std::back_inserter(vec_work_shape_),
@ -126,6 +137,10 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
      MS_LOG(EXCEPTION) << "Failed to alloc work_shape_work, size: " << vec_work_len;
    }
    work_shape_ = static_cast<S *>(work_shape_work);
+    if (vec_work_shape_.size() < 1) {
+      MS_LOG(EXCEPTION) << "For 'TensorScatterAddGpuKernel', the rank of vec work cannot be less than 1, but got "
+                        << vec_work_shape_.size();
+    }

    InitSizeLists();

@ -160,6 +175,10 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
      output_size_ *= output_shapes_[i];
    }

+    if (indices_shapes_.size() < 1) {
+      MS_LOG(EXCEPTION) << "For 'TensorScatterAddGpuKernel', the rank of indices cannot be less than 1, but got "
+                        << indices_shapes_.size();
+    }
    // calculate indices dim 0/1
    indices_dim_0_ = indices_shapes_[0];
    indices_dim_1_ = indices_shapes_[indices_shapes_.size() - 1];
@ -169,6 +188,11 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
      block_size_ *= output_shapes_[i];
    }

+    if (indices_dim_1_ < 1 || indices_dim_1_ > output_shapes_.size()) {
+      MS_LOG(EXCEPTION) << "For 'TensorScatterAddGpuKernel', indices_shapes[-1] cannot be less than 1 and greater than "
+                        << "the rank of output_shapes, but got indices_shapes[-1]: " << indices_dim_1_
+                        << ", rank of output_shapes: " << output_shapes_.size();
+    }
    // calculate indices_stride
    vec_indices_stride_.resize(indices_dim_1_, 0);
    vec_indices_stride_[indices_dim_1_ - 1] = block_size_;
@ -201,6 +225,7 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
  size_t indices_dim_0_;
  size_t indices_dim_1_;
  bool memcpy_flag_;
+  bool is_null_input_;
 };
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/tile_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/tile_gpu_kernel.h
@ -63,12 +63,10 @@ class TileGpuKernel : public GpuKernel {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but Tile needs 1 input.";
-      return false;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but Tile has 1 output.";
-      return false;
    }
    input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
@ -78,6 +76,10 @@ class TileGpuKernel : public GpuKernel {
      InitSizeLists();
      return true;
    }
+    if (output_shape_.size() < 1) {
+      MS_LOG(EXCEPTION) << "For 'TileGpuKernel', the rank of output cannot be less than 1, but got "
+                        << output_shape_.size();
+    }
    input_size_ = 1;
    for (size_t i = 0; i < input_shape_.size(); i++) {
      input_size_ *= input_shape_[i];
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc
@ -140,6 +140,9 @@ std::pair<bool, size_t> GpuKernelFactory::GpuKernelAttrCheck(const std::string &
    }
    bool flag = true;
    auto attr_size = (&(iter->second))->at(attr_index).first.GetInputSize();
+    if (kernel_info->GetInputNum() > 0) {
+      MS_EXCEPTION_IF_ZERO("attr size", attr_size);
+    }
    // data type matching check of all input parameters of kernel
    for (size_t input_index = 0; input_index < kernel_info->GetInputNum(); input_index++) {
      GpuKernelFactory::CheckSM(kernel_info, input_index);
@ -153,6 +156,9 @@ std::pair<bool, size_t> GpuKernelFactory::GpuKernelAttrCheck(const std::string &
      continue;
    }
    attr_size = (&(iter->second))->at(attr_index).first.GetOutputSize();
+    if (kernel_info->GetOutputNum() > 0) {
+      MS_EXCEPTION_IF_ZERO("attr size", attr_size);
+    }
    // data type matching check of all output parameters of kernel
    for (size_t output_index = 0; output_index < kernel_info->GetOutputNum(); output_index++) {
      if (kernel_info->GetOutputDeviceType(output_index) !=
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/cumprod_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/cumprod_gpu_kernel.h
@ -28,7 +28,14 @@ constexpr int kMaxDimsSize = 3;
 template <typename T>
 class CumProdGpuKernel : public GpuKernel {
 public:
-  CumProdGpuKernel() : exclusive_(false), reverse_(false), axis_(0), input_size_0_(0), stride_(0), stride2_(0) {}
+  CumProdGpuKernel()
+      : exclusive_(false),
+        reverse_(false),
+        is_null_input_(false),
+        axis_(0),
+        input_size_0_(0),
+        stride_(0),
+        stride2_(0) {}
  ~CumProdGpuKernel() = default;

  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
@ -37,6 +44,9 @@ class CumProdGpuKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    T *input_addr = GetDeviceAddress<T>(inputs, 0);
    T *output_addr = GetDeviceAddress<T>(outputs, 0);
    T *ws_addr = GetDeviceAddress<T>(workspace, 0);
@ -51,6 +61,12 @@ class CumProdGpuKernel : public GpuKernel {
    }
    input_size_0_ = sizeof(T);
    shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    is_null_input_ = CHECK_NULL_INPUT(shape_);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'CumProdGpuKernel', input is null.";
+      InitSizeLists();
+      return true;
+    }
    axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
    exclusive_ = GetAttr<bool>(kernel_node, "exclusive");
    reverse_ = GetAttr<bool>(kernel_node, "reverse");
@ -93,6 +109,7 @@ class CumProdGpuKernel : public GpuKernel {
  }
  bool exclusive_;
  bool reverse_;
+  bool is_null_input_;
  int axis_;
  size_t input_size_0_;
  size_t stride_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/identity_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/identity_gpu_kernel.h
@ -39,6 +39,9 @@ class IdentityGpuKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    T *input_addr = GetDeviceAddress<T>(inputs, 0);
    T *output_addr = GetDeviceAddress<T>(outputs, 0);

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/apply_gradient_descent_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/apply_gradient_descent_gpu_kernel.h
@ -36,6 +36,9 @@ class ApplyGradientDescentKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    VARIABLE_NOT_USED(workspace);
    T *var = GetDeviceAddress<T>(inputs, 0);
    T *alpha = GetDeviceAddress<T>(inputs, 1);
@ -50,14 +53,18 @@ class ApplyGradientDescentKernel : public GpuKernel {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 3) {
      MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ApplyGradientDescent needs 3 inputs.";
-      return false;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ApplyGradientDescent has 1 output.";
-      return false;
    }
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    is_null_input_ = CHECK_NULL_INPUT(input_shape);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'ApplyGradientDescentGpuKernel', input is null.";
+      InitSizeLists();
+      return true;
+    }
    input_size_ = 1;
    for (size_t i = 0; i < input_shape.size(); i++) {
      input_size_ *= input_shape[i];
@ -68,6 +75,7 @@ class ApplyGradientDescentKernel : public GpuKernel {

  void ResetResource() noexcept override {
    input_size_ = 1;
+    is_null_input_ = false;
    input_size_list_.clear();
    output_size_list_.clear();
    workspace_size_list_.clear();
@ -83,6 +91,7 @@ class ApplyGradientDescentKernel : public GpuKernel {

 private:
  size_t input_size_;
+  bool is_null_input_;
  std::vector<size_t> input_size_list_;
  std::vector<size_t> output_size_list_;
  std::vector<size_t> workspace_size_list_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bce_with_logits_loss_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bce_with_logits_loss_kernel.h
@ -36,6 +36,9 @@ class BCEWithLogitsLossKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    T *predict = GetDeviceAddress<T>(inputs, 0);
    T *target = GetDeviceAddress<T>(inputs, 1);
    T *weight = GetDeviceAddress<T>(inputs, 2);
@ -69,25 +72,42 @@ class BCEWithLogitsLossKernel : public GpuKernel {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 4) {
      MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but BCEWithLogitsLoss needs 4 inputs.";
-      return false;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but BCEWithLogitsLoss has 1 output.";
-      return false;
    }
    input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
+    pos_weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
+    is_null_input_ =
+      CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(weight_shape_) || CHECK_NULL_INPUT(pos_weight_shape_);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'BCEWithLogitsLossGpuKernel', input is null.";
+      InitSizeLists();
+      return true;
+    }
+    if (input_shape_.size() < 1) {
+      MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of input cannot be less than 1, but got "
+                        << input_shape_.size();
+    }
+    if (weight_shape_.size() < 1) {
+      MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of weight cannot be less than 1, but got "
+                        << weight_shape_.size();
+    }
+    if (pos_weight_shape_.size() < 1) {
+      MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of pos_weight cannot be less than 1, but got "
+                        << pos_weight_shape_.size();
+    }
    input_size_ = 1;
    if (input_shape_.size() > MAX_LOGITS_DIMENSION) {
      MS_LOG(EXCEPTION) << "Input dimension is " << input_shape_.size()
                        << ", but BCEWithLogitsLoss can only support up to " << MAX_LOGITS_DIMENSION << "-D.";
-      return false;
    }
    for (size_t i = 0; i < input_shape_.size(); i++) {
      input_size_ *= input_shape_[i];
    }
    // weight shape
-    weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
    weight_size_ = 1;
    for (size_t i = 0; i < weight_shape_.size(); i++) {
      weight_size_ *= weight_shape_[i];
@ -95,7 +115,6 @@ class BCEWithLogitsLossKernel : public GpuKernel {
    weight_need_broadcast_ = NeedBroadcast(&weight_shape_, input_shape_);
    // pos_weight shape
    pos_weight_size_ = 1;
-    pos_weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
    for (size_t i = 0; i < pos_weight_shape_.size(); i++) {
      pos_weight_size_ *= pos_weight_shape_[i];
    }
@ -110,6 +129,7 @@ class BCEWithLogitsLossKernel : public GpuKernel {
    pos_weight_size_ = 1;
    weight_need_broadcast_ = false;
    pos_weight_need_broadcast_ = false;
+    is_null_input_ = false;
    input_shape_.clear();
    weight_shape_.clear();
    pos_weight_shape_.clear();
@ -137,7 +157,7 @@ class BCEWithLogitsLossKernel : public GpuKernel {
  bool NeedBroadcast(std::vector<size_t> *shape, const std::vector<size_t> &result_shape) {
    // result_shape is larger that shape
    // and shape is able to broadcasted to result_shape
-    if (shape->size() != result_shape.size()) {
+    if (shape->size() < result_shape.size()) {
      size_t fill_size = result_shape.size() - shape->size();
      (void)shape->insert(shape->begin(), fill_size, 1);
      return true;
@ -155,6 +175,7 @@ class BCEWithLogitsLossKernel : public GpuKernel {
  size_t pos_weight_size_;
  bool weight_need_broadcast_;
  bool pos_weight_need_broadcast_;
+  bool is_null_input_;
  std::vector<size_t> input_shape_;
  std::vector<size_t> weight_shape_;
  std::vector<size_t> pos_weight_shape_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
@ -77,7 +77,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
    const float beta = 0;

    if (use_pad_) {
-      T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
+      T *padded = GetDeviceAddress<T>(workspace, 1);
      if (data_format_ == kOpFormat_NHWC) {
        CalPadNHWC(padded_size_ / sizeof(T), x, n_, old_height_, old_width_, c_, old_height_ + pad_height_,
                   old_width_ + pad_width_, pad_top_, pad_left_, pad_value_, padded,
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h
@ -50,7 +50,7 @@ class Conv3dGpuKernel : public GpuKernel {
    const float alpha = 1;
    const float beta = 0;
    if (use_pad_) {
-      T *padded_addr = GetPossiblyNullDeviceAddress<T>(workspace, 1);
+      T *padded_addr = GetDeviceAddress<T>(workspace, 1);
      CalPad3d(padded_size_ / sizeof(T), input_addr, n_, c_, old_depth_, old_height_, old_width_,
               old_depth_ + pad_depth_, old_height_ + pad_height_, old_width_ + pad_width_, pad_head_, pad_top_,
               pad_left_, pad_value_, padded_addr, reinterpret_cast<cudaStream_t>(stream_ptr));
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h
@ -61,7 +61,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
    const float alpha = 1;
    const float beta = 0;
    if (use_pad_) {
-      T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
+      T *padded = GetDeviceAddress<T>(workspace, 1);
      CalPad3d(padded_size_ / sizeof(T), x, n_, c_, old_depth_, old_height_, old_width_, old_depth_ + pad_depth_,
               old_height_ + pad_height_, old_width_ + pad_width_, pad_head_, pad_top_, pad_left_, pad_value_, padded,
               reinterpret_cast<cudaStream_t>(stream_ptr));
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h
@ -72,7 +72,7 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
      }
    } else {
      if (greater_stride_) {
-        T *stride_padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
+        T *stride_padded = GetDeviceAddress<T>(workspace, 1);
        CHECK_CUDNN_RET_WITH_EXCEPT(
          kernel_node_,
          cudnnConvolutionBackwardData(cudnn_handle_, &alpha, filter_desc_, filter_addr, input_desc_, input_addr,
@ -93,6 +93,23 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
    return true;
  }

+  bool CheckNull(const std::vector<size_t> filter_shape, const std::vector<size_t> input_shape) {
+    is_null_input_ = CHECK_NULL_INPUT(filter_shape) || CHECK_NULL_INPUT(input_shape);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'Conv3dTransposeGpuKernel', input is null.";
+      InitSizeLists();
+      return true;
+    }
+    return false;
+  }
+
+  void CheckSize(const size_t value, const size_t expect_value, const string arg_name) {
+    if (value != expect_value) {
+      MS_LOG(EXCEPTION) << "For 'Conv3dTransposeGpuKernel', the length of " << arg_name << " must be " << expect_value
+                        << ", but got " << value;
+    }
+  }
+
  bool Init(const CNodePtr &kernel_node) override {
    kernel_node_ = kernel_node;
    InitResource();
@ -105,10 +122,7 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
    }
    auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
    auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
-    is_null_input_ = CHECK_NULL_INPUT(input_shape);
-    if (is_null_input_) {
-      MS_LOG(WARNING) << "Conv3dTransposeGpuBkwKernel input is null.";
-      InitSizeLists();
+    if (CheckNull(filter_shape, input_shape)) {
      return true;
    }
    std::vector<size_t> output_shape;
@ -133,6 +147,8 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
    pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
    SetStrideAndDilation(kernel_node);
    std::vector<int> stride_pad_list(6, 0);
+    (void)CheckSize(filter_shape.size(), 5, "filter_shape");
+    (void)CheckSize(pad_list.size(), 6, "pad_list");
    if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) {  // pad_mode_ = same
      UpdatePaddingAndDilation(input_shape, filter_shape, pad_list.data(), stride_pad_list.data());
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout3d_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout3d_gpu_kernel.h
@ -101,6 +101,7 @@ class Dropout3DGpuFwdKernel : public GpuKernel {
    }

    num_chan_ = n_ * c_;
+    MS_EXCEPTION_IF_ZERO("num channel", num_chan_);
    num_per_chan_ = num_count_ / num_chan_;  // number of elements per channel

    keep_prob_ = GetAttr<float>(kernel_node, "keep_prob");
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/hsigmoid_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/hsigmoid_gpu_kernel.h
@ -51,12 +51,10 @@ class HSigmoidKernel : public GpuKernel {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoid needs 1 inputs.";
-      return false;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoid has 1 output.";
-      return false;
    }
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/hsigmoid_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/hsigmoid_grad_gpu_kernel.h
@ -52,12 +52,10 @@ class HSigmoidGradKernel : public GpuKernel {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoidGrad needs 2 inputs.";
-      return false;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoidGrad has 1 output.";
-      return false;
    }
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h
@ -38,6 +38,9 @@ class NLLLossGpuKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    T *input_device = GetDeviceAddress<T>(inputs, 0);
    int32_t *target_device = GetDeviceAddress<int32_t>(inputs, 1);  // nll_loss only supports int32 target
    S *weight_device = GetDeviceAddress<S>(inputs, 2);
@ -45,7 +48,9 @@ class NLLLossGpuKernel : public GpuKernel {
    T *loss_device = GetDeviceAddress<T>(outputs, 0);
    S *total_weight_device = GetDeviceAddress<S>(outputs, 1);

-    T *tmp_loss_device = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+    T *tmp_loss_device =
+      reduction_ != 0 ? GetDeviceAddress<T>(workspace, 0) : GetPossiblyNullDeviceAddress<T>(workspace, 0);
+
    S *tmp_target_weight_device = GetDeviceAddress<S>(workspace, 1);

    NLLLoss(n_, c_, reduction_, input_device, target_device, weight_device, loss_device, total_weight_device,
@ -55,6 +60,16 @@ class NLLLossGpuKernel : public GpuKernel {

  bool Init(const CNodePtr &kernel_node) override {
    std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    is_null_input_ = CHECK_NULL_INPUT(input_shape);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'NllLossGpuKernel', input is null";
+      InitSizeLists();
+      return true;
+    }
+    if (input_shape.size() < 2) {
+      MS_LOG(EXCEPTION) << "For 'NllLossGpuKernel', the rank of input cannot less than 2, but got "
+                        << input_shape.size();
+    }
    n_ = static_cast<int>(input_shape[0]);
    c_ = static_cast<int>(input_shape[1]);
    for (size_t i = 0; i < input_shape.size(); i++) {
@ -75,6 +90,7 @@ class NLLLossGpuKernel : public GpuKernel {
    input_size_ = 1;
    n_ = 0;
    c_ = 0;
+    is_null_input_ = false;
    reduction_ = 1;  // default value
    tmp_loss_size_ = 0;
    tmp_target_weight_size_ = 0;  // tmp_target_weight (N,) array
@ -106,6 +122,7 @@ class NLLLossGpuKernel : public GpuKernel {
  size_t tmp_target_weight_size_;
  int n_;
  int c_;
+  bool is_null_input_;
  std::vector<size_t> input_size_list_;
  std::vector<size_t> output_size_list_;
  std::vector<size_t> workspace_size_list_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_grad_gpu_kernel.h
@ -38,6 +38,9 @@ class NLLLossGradGpuKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    T *input_device = GetDeviceAddress<T>(inputs, 0);
    T *dloss_device = GetDeviceAddress<T>(inputs, 1);
    int32_t *target_device = GetDeviceAddress<int32_t>(inputs, 2);  // nll_loss_grad only supports int32 target
@ -54,6 +57,16 @@ class NLLLossGradGpuKernel : public GpuKernel {

  bool Init(const CNodePtr &kernel_node) override {
    std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    is_null_input_ = CHECK_NULL_INPUT(input_shape);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'NllLossGradGpuKernel', input is null";
+      InitSizeLists();
+      return true;
+    }
+    if (input_shape.size() < 2) {
+      MS_LOG(EXCEPTION) << "For 'NllLossGradGpuKernel', the rank of input cannot less than 2, but got "
+                        << input_shape.size();
+    }
    n_ = static_cast<int>(input_shape[0]);
    c_ = static_cast<int>(input_shape[1]);
    for (size_t i = 0; i < input_shape.size(); i++) {
@ -73,6 +86,7 @@ class NLLLossGradGpuKernel : public GpuKernel {
    input_size_ = 1;
    n_ = 0;
    c_ = 0;
+    is_null_input_ = false;
    reduction_ = 1;  // default value
    num_dloss_ = 1;  // default size (scalar)
    input_size_list_.clear();
@ -96,6 +110,7 @@ class NLLLossGradGpuKernel : public GpuKernel {
  int reduction_;
  int n_;
  int c_;
+  bool is_null_input_;
  int num_dloss_;
  std::vector<size_t> input_size_list_;
  std::vector<size_t> output_size_list_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/resize_bilinear_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/resize_bilinear_gpu_kernel.h
@ -36,6 +36,9 @@ class ResizeBilinearGpuKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    T *input = GetDeviceAddress<T>(inputs, 0);
    T *output = GetDeviceAddress<T>(outputs, 0);
    float h_scale = Scaling(input_h_, output_h_, align_corners_);
@ -58,8 +61,15 @@ class ResizeBilinearGpuKernel : public GpuKernel {
    }
    std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    std::vector<size_t> output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
-    if (input_shape.size() != 4) {
-      MS_LOG(ERROR) << "Input is " << input_shape.size() << "-D, but ResizeBilinear supports only 4-D inputs.";
+    is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'ResizeBilinearGpuKernel', input or output is null.";
+      InitSizeLists();
+      return true;
+    }
+    if (input_shape.size() != 4 || output_shape.size() != 4) {
+      MS_LOG(ERROR) << "For 'ResizeBilinear', the rank of input and output must be 4, but got the rank of input: "
+                    << input_shape.size() << ", the rank of output: " << output_shape.size();
      return false;
    }
    n_ = SizeToInt(input_shape[0]);
@ -83,6 +93,7 @@ class ResizeBilinearGpuKernel : public GpuKernel {

  void ResetResource() noexcept override {
    align_corners_ = false;
+    is_null_input_ = false;
    n_ = 0;
    c_ = 0;
    input_h_ = 0;
@ -110,6 +121,7 @@ class ResizeBilinearGpuKernel : public GpuKernel {
  }

  bool align_corners_;
+  bool is_null_input_;
  int n_;
  int c_;
  int input_h_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/resize_bilinear_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/resize_bilinear_grad_gpu_kernel.h
@ -36,6 +36,9 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (is_null_input_) {
+      return true;
+    }
    T *dy = GetDeviceAddress<T>(inputs, 0);
    float *interim = GetDeviceAddress<float>(workspace, 0);
    T *dx = GetDeviceAddress<T>(outputs, 0);
@ -67,6 +70,12 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {
    std::vector<size_t> dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
    std::vector<size_t> dx_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
+    is_null_input_ = CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(x_shape) || CHECK_NULL_INPUT(dx_shape);
+    if (is_null_input_) {
+      MS_LOG(WARNING) << "For 'ResizeBilinearGradGpuKernel', input or output is null.";
+      InitSizeLists();
+      return true;
+    }
    if (dy_shape.size() != 4) {
      MS_LOG(ERROR) << "Input is " << dy_shape.size() << "-D, but ResizeBilinearGrad supports only 4-D inputs.";
      return false;
@ -75,6 +84,10 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {
      MS_LOG(ERROR) << "Input is " << x_shape.size() << "-D, but ResizeBilinearGrad supports only 4-D inputs.";
      return false;
    }
+    if (dx_shape.size() != 4) {
+      MS_LOG(ERROR) << "For 'ResizeBilinearGradGpuKernel', the rank of output must be 4, but got " << dx_shape.size();
+      return false;
+    }
    n_ = SizeToInt(dy_shape[0]);
    c_ = SizeToInt(dy_shape[1]);
    dy_h_ = SizeToInt(dy_shape[2]);
@ -97,6 +110,7 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {

  void ResetResource() noexcept override {
    align_corners_ = false;
+    is_null_input_ = false;
    n_ = 0;
    c_ = 0;
    dy_h_ = 0;
@ -125,6 +139,7 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {
  }

  bool align_corners_;
+  bool is_null_input_;
  int n_;
  int c_;
  int dy_h_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/random/randperm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/random/randperm_gpu_kernel.h
@ -84,6 +84,10 @@ class RandpermGpuKernel : public GpuKernel {
    }

    max_length_ = static_cast<size_t>(GetAttr<int64_t>(kernel_node, "max_length"));
+    if (max_length_ < 1) {
+      MS_LOG(ERROR) << "For 'RandpermGpuKernel', the max_length cannot be less than 1, but got " << max_length_;
+      return false;
+    }
    pad_ = static_cast<T>(GetAttr<int64_t>(kernel_node, "pad"));

    InitSizeLists();