!24269 add vector size check, input shape check and divide by zero check for gpu operators.

Merge pull request !24269 from wangshuide/wsd_master
This commit is contained in:
i-robot 2021-10-08 02:44:11 +00:00 committed by Gitee
commit 2c1d3baace
30 changed files with 381 additions and 54 deletions

View File

@ -41,13 +41,17 @@ class CropAndResizeGpuKernel : public GpuKernel {
input_width_(0),
final_height_(0),
final_width_(0),
channel_(0) {}
channel_(0),
is_null_input_(false) {}
~CropAndResizeGpuKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
VARIABLE_NOT_USED(workspace);
T *input_image = GetDeviceAddress<T>(inputs, 0);
float *input_boxes = GetDeviceAddress<float>(inputs, 1);
@ -72,6 +76,18 @@ class CropAndResizeGpuKernel : public GpuKernel {
}
// input image
auto input_image_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto input_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto input_box_index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto input_crop_size_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_image_shape) || CHECK_NULL_INPUT(input_boxes_shape) ||
CHECK_NULL_INPUT(input_box_index_shape) || CHECK_NULL_INPUT(input_crop_size_shape) ||
CHECK_NULL_INPUT(output_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CropAndResizeGpuKernel', input or output is null.";
InitSizeLists();
return true;
}
size_t input_image_shape_len = input_image_shape.size();
if (input_image_shape_len != 4) {
MS_LOG(ERROR) << " image tensor is " << input_image_shape_len << "-D, but CropAndResize supports only " << 4
@ -86,7 +102,6 @@ class CropAndResizeGpuKernel : public GpuKernel {
input_height_ = input_image_shape[1];
input_width_ = input_image_shape[2];
// input boxes
auto input_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
size_t input_boxes_shape_len = input_boxes_shape.size();
if (input_boxes_shape_len != 2) {
MS_LOG(ERROR) << "Boxes is rank" << input_boxes_shape_len << " but CropAndResize supports only rank " << 2
@ -99,7 +114,6 @@ class CropAndResizeGpuKernel : public GpuKernel {
}
input_boxes_size_ *= sizeof(float);
// input box_index
auto input_box_index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
size_t input_box_index_shape_len = input_box_index_shape.size();
if (input_box_index_shape_len != 1) {
MS_LOG(ERROR) << "Box_index is rank " << input_box_index_shape_len << " but CropAndResize supports only rank "
@ -110,7 +124,6 @@ class CropAndResizeGpuKernel : public GpuKernel {
input_box_ind_size_ *= input_box_index_shape[0]; // single dim required
input_box_ind_size_ *= sizeof(int);
// input crop_size
auto input_crop_size_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
size_t input_crop_size_shape_len = input_crop_size_shape.size();
if (input_crop_size_shape_len != 1) {
MS_LOG(ERROR) << "Crop_size is rank " << input_crop_size_shape_len << "-D, but CropAndResize supports only rank "
@ -126,8 +139,11 @@ class CropAndResizeGpuKernel : public GpuKernel {
input_crop_size_ *= input_crop_size_shape[0];
input_crop_size_ *= sizeof(int);
// output
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
auto output_shape_len = output_shape.size();
if (output_shape_len != 4) {
MS_LOG(ERROR) << "For 'CropAndResize', the rank of output should be 4, but got " << output_shape_len;
return false;
}
output_size_ = 1;
for (size_t i = 0; i < output_shape_len; i++) {
output_size_ *= output_shape[i];
@ -175,6 +191,7 @@ class CropAndResizeGpuKernel : public GpuKernel {
int final_height_;
int final_width_;
int channel_;
bool is_null_input_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;

View File

@ -1,4 +1,3 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
@ -37,6 +36,9 @@ class DepthToSpaceFwdKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
// get device buffer ptr
T *input = GetDeviceAddress<T>(inputs, 0);
T *output = GetDeviceAddress<T>(outputs, 0);
@ -71,6 +73,12 @@ class DepthToSpaceFwdKernel : public GpuKernel {
}
// check input_shape
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'DepthToSpaceGpuKernel', input is null.";
InitSizeLists();
return true;
}
shape_size_ = input_shape.size();
if (shape_size_ != DEPTHTOSPACE_BUFFER_DIMENSION) {
MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but DepthToSpace supports 4-D tensor.";
@ -111,6 +119,7 @@ class DepthToSpaceFwdKernel : public GpuKernel {
oc_ = 0;
oh_ = 0;
ow_ = 0;
is_null_input_ = false;
input_size_list_.clear();
output_size_list_.clear();
@ -140,6 +149,7 @@ class DepthToSpaceFwdKernel : public GpuKernel {
size_t oc_;
size_t oh_;
size_t ow_;
bool is_null_input_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -37,6 +37,9 @@ class EmbeddingLookupKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
VARIABLE_NOT_USED(workspace);
T *input_addr = GetDeviceAddress<T>(inputs, 0);
S *indices_addr = GetDeviceAddress<S>(inputs, 1);
@ -69,6 +72,16 @@ class EmbeddingLookupKernel : public GpuKernel {
input_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
indices_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
output_shapes_ = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ =
CHECK_NULL_INPUT(input_shapes_) || CHECK_NULL_INPUT(indices_shapes_) || CHECK_NULL_INPUT(output_shapes_);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'EmbeddingLookupGpuKernel', input or output is null.";
InitSizeLists();
return true;
}
if (input_shapes_.size() < 1) {
MS_LOG(EXCEPTION) << "For 'EmbeddingLookupGpuKernel', the rank of input cannot be less than 1.";
}
if (!is_dynamic_shape_) {
offset_ = GetAttr<int64_t>(kernel_node, "offset");
}
@ -78,6 +91,7 @@ class EmbeddingLookupKernel : public GpuKernel {
}
void ResetResource() noexcept override {
is_dynamic_shape_ = false;
is_null_input_ = false;
input_shapes_.clear();
indices_shapes_.clear();
output_shapes_.clear();
@ -138,6 +152,7 @@ class EmbeddingLookupKernel : public GpuKernel {
size_t dims_[3] = {};
int64_t offset_;
bool is_dynamic_shape_;
bool is_null_input_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;

View File

@ -40,6 +40,9 @@ class ExtractImagePatchesKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *input = GetDeviceAddress<T>(inputs, 0);
T *output = GetDeviceAddress<T>(outputs, 0);
T *t_input = GetDeviceAddress<T>(workspace, 0);
@ -92,16 +95,28 @@ class ExtractImagePatchesKernel : public GpuKernel {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ExtractImagePatches has 1 output.";
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'ExtractImagePatchesGpuKernel', input or output is null.";
InitSizeLists();
return true;
}
input_size_ = 1;
for (size_t i = 0; i < input_shape.size(); i++) {
input_size_ *= input_shape[i];
input_shape_.push_back(input_shape[i]);
}
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
output_size_ = 1;
for (size_t i = 0; i < output_shape.size(); i++) {
output_size_ *= output_shape[i];
}
if (input_shape.size() != 4 || output_shape.size() != 4) {
MS_LOG(EXCEPTION) << "For 'ExtractImagePatchesGpuKernel', the rank of input and output should be 4, "
<< "but got the rank of input: " << input_shape.size()
<< ", the rank of output: " << output_shape.size();
}
// transposed NHWC shape
t_output_shape_ = {output_shape[0], output_shape[2], output_shape[3], output_shape[1]};
@ -109,6 +124,11 @@ class ExtractImagePatchesKernel : public GpuKernel {
auto ksizes = GetAttr<std::vector<int64_t>>(kernel_node, "ksizes");
auto strides = GetAttr<std::vector<int64_t>>(kernel_node, "strides");
auto rates = GetAttr<std::vector<int64_t>>(kernel_node, "rates");
if (ksizes.size() != 4 || strides.size() != 4 || rates.size() != 4) {
MS_LOG(EXCEPTION) << "For 'ExtractImagePatchesGpuKernel', the rank of ksizes, strides and rates should be 4, "
<< "but got the rank of ksizes: " << ksizes.size()
<< ", the rank of strides: " << strides.size() << ", the rank of rates: " << rates.size();
}
ksize_row_ = ksizes[2];
ksize_col_ = ksizes[3];
@ -127,6 +147,9 @@ class ExtractImagePatchesKernel : public GpuKernel {
int64_t patch_rows_eff = ksize_row_ + (ksize_row_ - 1) * (rate_row_ - 1);
int64_t patch_cols_eff = ksize_col_ + (ksize_col_ - 1) * (rate_col_ - 1);
MS_EXCEPTION_IF_ZERO("stride row", stride_row_);
MS_EXCEPTION_IF_ZERO("stride col", stride_col_);
if (padding == "VALID") {
output_rows_ = std::ceil((input_row_size_ - patch_rows_eff + 1.f) / static_cast<float>(stride_row_));
output_cols_ = std::ceil((input_col_size_ - patch_cols_eff + 1.f) / static_cast<float>(stride_col_));
@ -148,6 +171,7 @@ class ExtractImagePatchesKernel : public GpuKernel {
row_input_stride_ = input_depth * input_col_size_;
patch_input_stride_ = input_depth * input_col_size_ * input_row_size_;
output_depth_ = input_depth;
MS_EXCEPTION_IF_ZERO("other stride", other_stride_);
need_batch_ = (output_size_ - 1) / other_stride_;
InitSizeLists();
@ -177,6 +201,7 @@ class ExtractImagePatchesKernel : public GpuKernel {
row_input_stride_ = 1;
patch_input_stride_ = 1;
output_depth_ = 1;
is_null_input_ = false;
input_shape_.clear();
t_output_shape_.clear();
input_size_list_.clear();
@ -208,6 +233,7 @@ class ExtractImagePatchesKernel : public GpuKernel {
int64_t output_rows_;
int64_t output_cols_;
bool need_batch_;
bool is_null_input_;
int64_t row_stride_;
int64_t patch_stride_;
int64_t other_stride_;

View File

@ -40,6 +40,9 @@ class InTopKGpuKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *predictions_device = GetDeviceAddress<T>(inputs, 0);
int32_t *targets_device = GetDeviceAddress<int32_t>(inputs, 1);
@ -106,6 +109,16 @@ class InTopKGpuKernel : public GpuKernel {
}
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (input_shape_.size() < 2) {
MS_LOG(EXCEPTION) << "For 'InTopKGpuKernel', the rank of input cannot be less than 2, but got "
<< input_shape_.size();
}
is_null_input_ = CHECK_NULL_INPUT(input_shape_);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'InTopKGpuKernel', input is null.";
InitSizeLists();
return true;
}
input_rank_ = input_shape_.size();
input_size_ = 1;
for (size_t i = 0; i < input_rank_; i++) {
@ -136,6 +149,7 @@ class InTopKGpuKernel : public GpuKernel {
input_rank_ = 0;
outer_size_ = 0;
inner_size_ = 0;
is_null_input_ = false;
top_k_init_ = static_cast<T>(0.);
input_size_list_.clear();
output_size_list_.clear();
@ -171,6 +185,7 @@ class InTopKGpuKernel : public GpuKernel {
// for topk
size_t outer_size_;
size_t inner_size_;
bool is_null_input_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;

View File

@ -41,6 +41,9 @@ class MeshgridGpuKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *ones_device = GetDeviceAddress<T>(workspace, 0);
CalOnesLike(output_size_, static_cast<T *>(nullptr), ones_device, reinterpret_cast<cudaStream_t>(stream_ptr));
@ -79,9 +82,14 @@ class MeshgridGpuKernel : public GpuKernel {
input_size_ = 1;
input_count_ = AnfAlgo::GetInputTensorNum(kernel_node);
for (size_t i = 0; i < input_count_; i++) {
size_t input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i)[0];
input_shapes_.push_back(input_shape);
input_size_ *= input_shape;
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i);
if (input_shape.size() < 1) {
MS_LOG(ERROR) << "For 'MeshGridGpuKernel', the rank of input" << i << " cannot be less than 1.";
return false;
}
size_t input_size = input_shape[0];
input_shapes_.push_back(input_size);
input_size_ *= input_size;
}
output_size_ = 1;
@ -89,6 +97,12 @@ class MeshgridGpuKernel : public GpuKernel {
// inferred shape swaps output shape for us if needed
output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(output_shape_);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'MeshGridGpuKernel', output is null.";
InitSizeLists();
return true;
}
if (output_count_ != input_count_) {
MS_LOG(ERROR) << "output count is " << output_count_ << ", but MeshgridGpuKernel needs " << input_count_
@ -101,7 +115,7 @@ class MeshgridGpuKernel : public GpuKernel {
}
// need to pad output shape with ones for broadcast kernel
for (size_t i = 0; i < output_shape_.size() - MAX_DIMS; i++) {
for (size_t i = 0; i < MAX_DIMS - output_shape_.size(); i++) {
output_shape_.push_back(1);
}
@ -118,6 +132,7 @@ class MeshgridGpuKernel : public GpuKernel {
output_size_ = 0;
output_count_ = 0;
swap_indexing_ = true;
is_null_input_ = false;
input_size_list_.clear();
output_size_list_.clear();
@ -145,6 +160,7 @@ class MeshgridGpuKernel : public GpuKernel {
size_t output_size_;
size_t output_count_;
bool swap_indexing_;
bool is_null_input_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;

View File

@ -36,6 +36,7 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel {
input_size_(0),
batch_dim_(0),
seq_dim_(0),
is_null_input_(false),
seq_len_size_(0),
total_index_dim_(0),
output_size_(0),
@ -46,6 +47,9 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *input = GetDeviceAddress<T>(inputs, 0);
S *seq_len = GetDeviceAddress<S>(inputs, 1);
size_t *input_shape_ptr = GetDeviceAddress<size_t>(workspace, 0);
@ -75,23 +79,23 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel {
return false;
}
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (CHECK_NULL_INPUT(input_shape_)) {
MS_LOG(WARNING) << "ReverseSequence input is null";
auto seq_len_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
is_null_input_ = CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(seq_len_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'ReverseSequenceGpuKernel', input is null.";
InitSizeLists();
return true;
}
if (input_shape_.size() < 1) {
MS_LOG(EXCEPTION) << "For 'ReverseSequenceGpuKernel', the rank of input cannot be less than 1, but got "
<< input_shape_.size();
}
input_size_ = 1;
shape_size_ = input_shape_.size(); // required for calls
for (size_t i = 0; i < shape_size_; i++) {
input_size_ *= input_shape_[i];
}
// get seq len shape
auto seq_len_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
if (CHECK_NULL_INPUT(seq_len_shape)) {
MS_LOG(WARNING) << "ReverseSequence seq lengths input is null";
InitSizeLists();
return true;
}
seq_len_size_ = seq_len_shape.size();
output_size_ = input_size_; // size does not change
// Allocate workspace memory to use for storing indices for each thread to compute with
@ -116,6 +120,7 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel {
size_t input_size_;
int64_t batch_dim_;
int64_t seq_dim_;
bool is_null_input_;
size_t seq_len_size_;
size_t total_index_dim_;
size_t output_size_;

View File

@ -38,6 +38,9 @@ class ReverseV2GpuKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *input_device = GetDeviceAddress<T>(inputs, 0);
T *output_device = GetDeviceAddress<T>(outputs, 0);
size_t *input_shape_device = GetDeviceAddress<size_t>(workspace, 0);
@ -79,7 +82,16 @@ class ReverseV2GpuKernel : public GpuKernel {
}
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape_);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'ReverseV2GpuKernel', input is null.";
InitSizeLists();
return true;
}
input_rank_ = input_shape_.size();
if (input_rank_ < 1) {
MS_LOG(EXCEPTION) << "For 'ReverseV2GpuKernel', the rank of input cannot be less than 1, bot got " << input_rank_;
}
input_size_ = 1;
for (size_t i = 0; i < input_rank_; i++) {
input_size_ *= input_shape_[i];
@ -92,6 +104,9 @@ class ReverseV2GpuKernel : public GpuKernel {
}
axis_ = GetAttr<std::vector<int64_t>>(kernel_node, "axis");
if (axis_.size() < 1) {
MS_LOG(EXCEPTION) << "For 'ReverseV2GpuKernel', the rank of axis cannot be less than 1, bot got " << axis_.size();
}
for (int64_t &dimension : axis_) {
if (dimension < 0) {
dimension += input_rank_;
@ -106,6 +121,7 @@ class ReverseV2GpuKernel : public GpuKernel {
void ResetResource() noexcept override {
input_size_ = 0;
input_rank_ = 0;
is_null_input_ = false;
input_shape_.clear();
strides_.clear();
axis_.clear();
@ -131,6 +147,7 @@ class ReverseV2GpuKernel : public GpuKernel {
std::vector<size_t> input_shape_;
std::vector<int64_t> strides_;
std::vector<int64_t> axis_;
bool is_null_input_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;

View File

@ -42,6 +42,9 @@ class SortGpuKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *input_device = GetDeviceAddress<T>(inputs, 0);
T *output_device = GetDeviceAddress<T>(outputs, 0);
@ -127,10 +130,17 @@ class SortGpuKernel : public GpuKernel {
}
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape_);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'SortGpuKernel', input is null.";
InitSizeLists();
return true;
}
input_rank_ = input_shape_.size();
if (input_rank_ > TRANSPOSE_MAX_DIMENSION) {
MS_LOG(ERROR) << "Sort cannot support input that has more than " << TRANSPOSE_MAX_DIMENSION << " dimensions.";
if (input_rank_ > TRANSPOSE_MAX_DIMENSION || input_rank_ < 1) {
MS_LOG(ERROR) << "For 'SortGpuKernel', the rank of input cannot be more than " << TRANSPOSE_MAX_DIMENSION
<< " dimensions or less than 1 dimension.";
return false;
}
@ -145,6 +155,11 @@ class SortGpuKernel : public GpuKernel {
if (axis_ < 0) {
axis_ += input_rank_;
}
if ((size_t)axis_ >= input_rank_) {
MS_LOG(ERROR) << "For 'SortGpuKernel', axis should be less than the rank of input, bot got axis: " << axis_
<< " the rank of input: " << input_rank_;
return false;
}
perm_.resize(input_rank_);
std::iota(perm_.begin(), perm_.end(), 0);
@ -172,6 +187,7 @@ class SortGpuKernel : public GpuKernel {
input_size_ = 0;
axis_ = 0;
descending_ = false;
is_null_input_ = false;
input_shape_.clear();
input_rank_ = 0;
transposed_shape_.clear();
@ -206,6 +222,7 @@ class SortGpuKernel : public GpuKernel {
size_t input_size_;
int64_t axis_;
bool descending_;
bool is_null_input_;
std::vector<size_t> input_shape_;
size_t input_rank_;

View File

@ -37,6 +37,9 @@ class SpaceToDepthFwdKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
// get device buffer ptr
T *input = GetDeviceAddress<T>(inputs, 0);
T *output = GetDeviceAddress<T>(outputs, 0);
@ -71,6 +74,12 @@ class SpaceToDepthFwdKernel : public GpuKernel {
}
// check input_shape
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'SpaceToDepthGpuKernel', input is null.";
InitSizeLists();
return true;
}
shape_size_ = input_shape.size();
if (shape_size_ != SPACETODEPTH_BUFFER_DIMENSION) {
MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but SpaceToDepth supports 4-D tensor.";
@ -102,6 +111,7 @@ class SpaceToDepthFwdKernel : public GpuKernel {
input_size_ = 0;
output_size_ = 0;
block_size_ = 0;
is_null_input_ = false;
in_ = 0;
ic_ = 0;
ih_ = 0;
@ -131,6 +141,7 @@ class SpaceToDepthFwdKernel : public GpuKernel {
size_t input_size_;
size_t output_size_;
size_t block_size_;
bool is_null_input_;
size_t in_;
size_t ic_;
size_t ih_;

View File

@ -31,11 +31,14 @@ namespace kernel {
template <typename T>
class TensorCopySlicesGpuKernel : public GpuKernel {
public:
TensorCopySlicesGpuKernel() : input_size_(0), update_size_(0), output_size_(0) {}
TensorCopySlicesGpuKernel() : input_size_(0), update_size_(0), output_size_(0), is_null_input_(false) {}
~TensorCopySlicesGpuKernel() {}
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *input_addr = GetDeviceAddress<T>(inputs, 0);
T *update_addr = GetDeviceAddress<T>(inputs, 1);
T *output_addr = GetDeviceAddress<T>(outputs, 0);
@ -69,7 +72,13 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
}
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto update_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
is_null_input_ = CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(update_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'TensorCopySlicesGpuKernel', input or output is null.";
InitSizeLists();
return true;
}
if (input_shape_.size() > kMaxDims) {
MS_LOG(ERROR) << "StridedSlice support dims no more than " << kMaxDims << ", but the input shape is "
<< input_shape_.size();
@ -80,6 +89,13 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
end_ = GetAttr<std::vector<int64_t>>(kernel_node, kAttrEnd);
strides_ = GetAttr<std::vector<int64_t>>(kernel_node, kAttrStrides);
if (begin_.size() > input_shape_.size()) {
MS_LOG(ERROR) << "For 'TensorCopySlicesGpuKernel', the rank of begin attr cannot be more than the rank of input, "
<< "but got the rank of begin attr: " << begin_.size()
<< ", the rank of input: " << input_shape_.size();
return false;
}
FillEmptyDims(kernel_node);
output_shape_ = input_shape_;
FillUpdateDim();
@ -100,6 +116,7 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
auto len = begin_.size();
size_t total_input_num = 1;
for (size_t i = 0; i < len; ++i) {
MS_EXCEPTION_IF_ZERO("strides_[i]", strides_[i]);
total_input_num *= ((end_[i] - begin_[i]) / strides_[i]);
}
if (total_input_num != total_update_num) {
@ -161,6 +178,7 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
if (begin_[i] <= end_[i] && strides_[i] > 0) {
update_shape_.push_back((end_[i] - 1 - begin_[i]) / strides_[i] + 1);
} else if (begin_[i] > end_[i] && strides_[i] < 0) {
MS_EXCEPTION_IF_ZERO("strides_[i] + 1", strides_[i] + 1);
update_shape_.push_back((end_[i] - begin_[i] + 1) / strides_[i] + 1);
} else {
update_shape_.push_back(0);
@ -185,6 +203,7 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
size_t update_size_;
size_t output_size_;
inline static size_t kMaxDims = 8;
bool is_null_input_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -38,7 +38,8 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
work_shape_(nullptr),
indices_dim_0_(0),
indices_dim_1_(0),
memcpy_flag_(false) {}
memcpy_flag_(false),
is_null_input_(false) {}
~TensorScatterAddGpuFwdKernel() {
if (indices_stride_ != nullptr) {
device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(static_cast<void *>(indices_stride_));
@ -54,6 +55,9 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
VARIABLE_NOT_USED(workspace);
T *input = GetDeviceAddress<T>(inputs, 0);
S *indices = GetDeviceAddress<S>(inputs, 1);
@ -77,10 +81,10 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
const size_t update_size = update_size_ / sizeof(T);
const size_t output_size = output_size_ / sizeof(T);
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
cudaMemcpyAsync(&output[0], &input[0], input_size_, cudaMemcpyDeviceToDevice,
reinterpret_cast<cudaStream_t>(stream_ptr)),
"cudaMemcpyAsync output failed");
CHECK_CUDA_RET_WITH_EXCEPT(
kernel_node_,
cudaMemcpyAsync(output, input, input_size_, cudaMemcpyDeviceToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)),
"cudaMemcpyAsync output failed");
TensorScatterAdd(input, indices, update, output, block_size_, update_size, output_size, indices_dim_0_,
indices_dim_1_, indices_stride_, work_shape_, reinterpret_cast<cudaStream_t>(stream_ptr));
@ -106,6 +110,13 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
indices_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
input_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(update_shapes_) || CHECK_NULL_INPUT(indices_shapes_) ||
CHECK_NULL_INPUT(input_shapes_) || CHECK_NULL_INPUT(output_shapes_);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'TensorScatterAddGpuKernel', input or output is null.";
InitSizeLists();
return true;
}
std::vector<size_t> shape_me = input_shapes_;
(void)std::transform(shape_me.begin(), shape_me.end(), std::back_inserter(vec_work_shape_),
@ -126,6 +137,10 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
MS_LOG(EXCEPTION) << "Failed to alloc work_shape_work, size: " << vec_work_len;
}
work_shape_ = static_cast<S *>(work_shape_work);
if (vec_work_shape_.size() < 1) {
MS_LOG(EXCEPTION) << "For 'TensorScatterAddGpuKernel', the rank of vec work cannot be less than 1, but got "
<< vec_work_shape_.size();
}
InitSizeLists();
@ -160,6 +175,10 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
output_size_ *= output_shapes_[i];
}
if (indices_shapes_.size() < 1) {
MS_LOG(EXCEPTION) << "For 'TensorScatterAddGpuKernel', the rank of indices cannot be less than 1, but got "
<< indices_shapes_.size();
}
// calculate indices dim 0/1
indices_dim_0_ = indices_shapes_[0];
indices_dim_1_ = indices_shapes_[indices_shapes_.size() - 1];
@ -169,6 +188,11 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
block_size_ *= output_shapes_[i];
}
if (indices_dim_1_ < 1 || indices_dim_1_ > output_shapes_.size()) {
MS_LOG(EXCEPTION) << "For 'TensorScatterAddGpuKernel', indices_shapes[-1] cannot be less than 1 and greater than "
<< "the rank of output_shapes, but got indices_shapes[-1]: " << indices_dim_1_
<< ", rank of output_shapes: " << output_shapes_.size();
}
// calculate indices_stride
vec_indices_stride_.resize(indices_dim_1_, 0);
vec_indices_stride_[indices_dim_1_ - 1] = block_size_;
@ -201,6 +225,7 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
size_t indices_dim_0_;
size_t indices_dim_1_;
bool memcpy_flag_;
bool is_null_input_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -63,12 +63,10 @@ class TileGpuKernel : public GpuKernel {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but Tile needs 1 input.";
return false;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but Tile has 1 output.";
return false;
}
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
@ -78,6 +76,10 @@ class TileGpuKernel : public GpuKernel {
InitSizeLists();
return true;
}
if (output_shape_.size() < 1) {
MS_LOG(EXCEPTION) << "For 'TileGpuKernel', the rank of output cannot be less than 1, but got "
<< output_shape_.size();
}
input_size_ = 1;
for (size_t i = 0; i < input_shape_.size(); i++) {
input_size_ *= input_shape_[i];

View File

@ -140,6 +140,9 @@ std::pair<bool, size_t> GpuKernelFactory::GpuKernelAttrCheck(const std::string &
}
bool flag = true;
auto attr_size = (&(iter->second))->at(attr_index).first.GetInputSize();
if (kernel_info->GetInputNum() > 0) {
MS_EXCEPTION_IF_ZERO("attr size", attr_size);
}
// data type matching check of all input parameters of kernel
for (size_t input_index = 0; input_index < kernel_info->GetInputNum(); input_index++) {
GpuKernelFactory::CheckSM(kernel_info, input_index);
@ -153,6 +156,9 @@ std::pair<bool, size_t> GpuKernelFactory::GpuKernelAttrCheck(const std::string &
continue;
}
attr_size = (&(iter->second))->at(attr_index).first.GetOutputSize();
if (kernel_info->GetOutputNum() > 0) {
MS_EXCEPTION_IF_ZERO("attr size", attr_size);
}
// data type matching check of all output parameters of kernel
for (size_t output_index = 0; output_index < kernel_info->GetOutputNum(); output_index++) {
if (kernel_info->GetOutputDeviceType(output_index) !=

View File

@ -28,7 +28,14 @@ constexpr int kMaxDimsSize = 3;
template <typename T>
class CumProdGpuKernel : public GpuKernel {
public:
CumProdGpuKernel() : exclusive_(false), reverse_(false), axis_(0), input_size_0_(0), stride_(0), stride2_(0) {}
CumProdGpuKernel()
: exclusive_(false),
reverse_(false),
is_null_input_(false),
axis_(0),
input_size_0_(0),
stride_(0),
stride2_(0) {}
~CumProdGpuKernel() = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
@ -37,6 +44,9 @@ class CumProdGpuKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *input_addr = GetDeviceAddress<T>(inputs, 0);
T *output_addr = GetDeviceAddress<T>(outputs, 0);
T *ws_addr = GetDeviceAddress<T>(workspace, 0);
@ -51,6 +61,12 @@ class CumProdGpuKernel : public GpuKernel {
}
input_size_0_ = sizeof(T);
shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape_);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CumProdGpuKernel', input is null.";
InitSizeLists();
return true;
}
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
exclusive_ = GetAttr<bool>(kernel_node, "exclusive");
reverse_ = GetAttr<bool>(kernel_node, "reverse");
@ -93,6 +109,7 @@ class CumProdGpuKernel : public GpuKernel {
}
bool exclusive_;
bool reverse_;
bool is_null_input_;
int axis_;
size_t input_size_0_;
size_t stride_;

View File

@ -39,6 +39,9 @@ class IdentityGpuKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *input_addr = GetDeviceAddress<T>(inputs, 0);
T *output_addr = GetDeviceAddress<T>(outputs, 0);

View File

@ -36,6 +36,9 @@ class ApplyGradientDescentKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
VARIABLE_NOT_USED(workspace);
T *var = GetDeviceAddress<T>(inputs, 0);
T *alpha = GetDeviceAddress<T>(inputs, 1);
@ -50,14 +53,18 @@ class ApplyGradientDescentKernel : public GpuKernel {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ApplyGradientDescent needs 3 inputs.";
return false;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ApplyGradientDescent has 1 output.";
return false;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'ApplyGradientDescentGpuKernel', input is null.";
InitSizeLists();
return true;
}
input_size_ = 1;
for (size_t i = 0; i < input_shape.size(); i++) {
input_size_ *= input_shape[i];
@ -68,6 +75,7 @@ class ApplyGradientDescentKernel : public GpuKernel {
void ResetResource() noexcept override {
input_size_ = 1;
is_null_input_ = false;
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
@ -83,6 +91,7 @@ class ApplyGradientDescentKernel : public GpuKernel {
private:
size_t input_size_;
bool is_null_input_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;

View File

@ -36,6 +36,9 @@ class BCEWithLogitsLossKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *predict = GetDeviceAddress<T>(inputs, 0);
T *target = GetDeviceAddress<T>(inputs, 1);
T *weight = GetDeviceAddress<T>(inputs, 2);
@ -69,25 +72,42 @@ class BCEWithLogitsLossKernel : public GpuKernel {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 4) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but BCEWithLogitsLoss needs 4 inputs.";
return false;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but BCEWithLogitsLoss has 1 output.";
return false;
}
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
pos_weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
is_null_input_ =
CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(weight_shape_) || CHECK_NULL_INPUT(pos_weight_shape_);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BCEWithLogitsLossGpuKernel', input is null.";
InitSizeLists();
return true;
}
if (input_shape_.size() < 1) {
MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of input cannot be less than 1, but got "
<< input_shape_.size();
}
if (weight_shape_.size() < 1) {
MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of weight cannot be less than 1, but got "
<< weight_shape_.size();
}
if (pos_weight_shape_.size() < 1) {
MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of pos_weight cannot be less than 1, but got "
<< pos_weight_shape_.size();
}
input_size_ = 1;
if (input_shape_.size() > MAX_LOGITS_DIMENSION) {
MS_LOG(EXCEPTION) << "Input dimension is " << input_shape_.size()
<< ", but BCEWithLogitsLoss can only support up to " << MAX_LOGITS_DIMENSION << "-D.";
return false;
}
for (size_t i = 0; i < input_shape_.size(); i++) {
input_size_ *= input_shape_[i];
}
// weight shape
weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
weight_size_ = 1;
for (size_t i = 0; i < weight_shape_.size(); i++) {
weight_size_ *= weight_shape_[i];
@ -95,7 +115,6 @@ class BCEWithLogitsLossKernel : public GpuKernel {
weight_need_broadcast_ = NeedBroadcast(&weight_shape_, input_shape_);
// pos_weight shape
pos_weight_size_ = 1;
pos_weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
for (size_t i = 0; i < pos_weight_shape_.size(); i++) {
pos_weight_size_ *= pos_weight_shape_[i];
}
@ -110,6 +129,7 @@ class BCEWithLogitsLossKernel : public GpuKernel {
pos_weight_size_ = 1;
weight_need_broadcast_ = false;
pos_weight_need_broadcast_ = false;
is_null_input_ = false;
input_shape_.clear();
weight_shape_.clear();
pos_weight_shape_.clear();
@ -137,7 +157,7 @@ class BCEWithLogitsLossKernel : public GpuKernel {
bool NeedBroadcast(std::vector<size_t> *shape, const std::vector<size_t> &result_shape) {
// result_shape is larger that shape
// and shape is able to broadcasted to result_shape
if (shape->size() != result_shape.size()) {
if (shape->size() < result_shape.size()) {
size_t fill_size = result_shape.size() - shape->size();
(void)shape->insert(shape->begin(), fill_size, 1);
return true;
@ -155,6 +175,7 @@ class BCEWithLogitsLossKernel : public GpuKernel {
size_t pos_weight_size_;
bool weight_need_broadcast_;
bool pos_weight_need_broadcast_;
bool is_null_input_;
std::vector<size_t> input_shape_;
std::vector<size_t> weight_shape_;
std::vector<size_t> pos_weight_shape_;

View File

@ -77,7 +77,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
const float beta = 0;
if (use_pad_) {
T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
T *padded = GetDeviceAddress<T>(workspace, 1);
if (data_format_ == kOpFormat_NHWC) {
CalPadNHWC(padded_size_ / sizeof(T), x, n_, old_height_, old_width_, c_, old_height_ + pad_height_,
old_width_ + pad_width_, pad_top_, pad_left_, pad_value_, padded,

View File

@ -50,7 +50,7 @@ class Conv3dGpuKernel : public GpuKernel {
const float alpha = 1;
const float beta = 0;
if (use_pad_) {
T *padded_addr = GetPossiblyNullDeviceAddress<T>(workspace, 1);
T *padded_addr = GetDeviceAddress<T>(workspace, 1);
CalPad3d(padded_size_ / sizeof(T), input_addr, n_, c_, old_depth_, old_height_, old_width_,
old_depth_ + pad_depth_, old_height_ + pad_height_, old_width_ + pad_width_, pad_head_, pad_top_,
pad_left_, pad_value_, padded_addr, reinterpret_cast<cudaStream_t>(stream_ptr));

View File

@ -61,7 +61,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
const float alpha = 1;
const float beta = 0;
if (use_pad_) {
T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
T *padded = GetDeviceAddress<T>(workspace, 1);
CalPad3d(padded_size_ / sizeof(T), x, n_, c_, old_depth_, old_height_, old_width_, old_depth_ + pad_depth_,
old_height_ + pad_height_, old_width_ + pad_width_, pad_head_, pad_top_, pad_left_, pad_value_, padded,
reinterpret_cast<cudaStream_t>(stream_ptr));

View File

@ -72,7 +72,7 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
}
} else {
if (greater_stride_) {
T *stride_padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
T *stride_padded = GetDeviceAddress<T>(workspace, 1);
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_,
cudnnConvolutionBackwardData(cudnn_handle_, &alpha, filter_desc_, filter_addr, input_desc_, input_addr,
@ -93,6 +93,23 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
return true;
}
bool CheckNull(const std::vector<size_t> filter_shape, const std::vector<size_t> input_shape) {
is_null_input_ = CHECK_NULL_INPUT(filter_shape) || CHECK_NULL_INPUT(input_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'Conv3dTransposeGpuKernel', input is null.";
InitSizeLists();
return true;
}
return false;
}
void CheckSize(const size_t value, const size_t expect_value, const string arg_name) {
if (value != expect_value) {
MS_LOG(EXCEPTION) << "For 'Conv3dTransposeGpuKernel', the length of " << arg_name << " must be " << expect_value
<< ", but got " << value;
}
}
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
InitResource();
@ -105,10 +122,7 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
}
auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "Conv3dTransposeGpuBkwKernel input is null.";
InitSizeLists();
if (CheckNull(filter_shape, input_shape)) {
return true;
}
std::vector<size_t> output_shape;
@ -133,6 +147,8 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
SetStrideAndDilation(kernel_node);
std::vector<int> stride_pad_list(6, 0);
(void)CheckSize(filter_shape.size(), 5, "filter_shape");
(void)CheckSize(pad_list.size(), 6, "pad_list");
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) { // pad_mode_ = same
UpdatePaddingAndDilation(input_shape, filter_shape, pad_list.data(), stride_pad_list.data());
}

View File

@ -101,6 +101,7 @@ class Dropout3DGpuFwdKernel : public GpuKernel {
}
num_chan_ = n_ * c_;
MS_EXCEPTION_IF_ZERO("num channel", num_chan_);
num_per_chan_ = num_count_ / num_chan_; // number of elements per channel
keep_prob_ = GetAttr<float>(kernel_node, "keep_prob");

View File

@ -51,12 +51,10 @@ class HSigmoidKernel : public GpuKernel {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoid needs 1 inputs.";
return false;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoid has 1 output.";
return false;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);

View File

@ -52,12 +52,10 @@ class HSigmoidGradKernel : public GpuKernel {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoidGrad needs 2 inputs.";
return false;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoidGrad has 1 output.";
return false;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);

View File

@ -38,6 +38,9 @@ class NLLLossGpuKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *input_device = GetDeviceAddress<T>(inputs, 0);
int32_t *target_device = GetDeviceAddress<int32_t>(inputs, 1); // nll_loss only supports int32 target
S *weight_device = GetDeviceAddress<S>(inputs, 2);
@ -45,7 +48,9 @@ class NLLLossGpuKernel : public GpuKernel {
T *loss_device = GetDeviceAddress<T>(outputs, 0);
S *total_weight_device = GetDeviceAddress<S>(outputs, 1);
T *tmp_loss_device = GetPossiblyNullDeviceAddress<T>(workspace, 0);
T *tmp_loss_device =
reduction_ != 0 ? GetDeviceAddress<T>(workspace, 0) : GetPossiblyNullDeviceAddress<T>(workspace, 0);
S *tmp_target_weight_device = GetDeviceAddress<S>(workspace, 1);
NLLLoss(n_, c_, reduction_, input_device, target_device, weight_device, loss_device, total_weight_device,
@ -55,6 +60,16 @@ class NLLLossGpuKernel : public GpuKernel {
bool Init(const CNodePtr &kernel_node) override {
std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NllLossGpuKernel', input is null";
InitSizeLists();
return true;
}
if (input_shape.size() < 2) {
MS_LOG(EXCEPTION) << "For 'NllLossGpuKernel', the rank of input cannot less than 2, but got "
<< input_shape.size();
}
n_ = static_cast<int>(input_shape[0]);
c_ = static_cast<int>(input_shape[1]);
for (size_t i = 0; i < input_shape.size(); i++) {
@ -75,6 +90,7 @@ class NLLLossGpuKernel : public GpuKernel {
input_size_ = 1;
n_ = 0;
c_ = 0;
is_null_input_ = false;
reduction_ = 1; // default value
tmp_loss_size_ = 0;
tmp_target_weight_size_ = 0; // tmp_target_weight (N,) array
@ -106,6 +122,7 @@ class NLLLossGpuKernel : public GpuKernel {
size_t tmp_target_weight_size_;
int n_;
int c_;
bool is_null_input_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;

View File

@ -38,6 +38,9 @@ class NLLLossGradGpuKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *input_device = GetDeviceAddress<T>(inputs, 0);
T *dloss_device = GetDeviceAddress<T>(inputs, 1);
int32_t *target_device = GetDeviceAddress<int32_t>(inputs, 2); // nll_loss_grad only supports int32 target
@ -54,6 +57,16 @@ class NLLLossGradGpuKernel : public GpuKernel {
bool Init(const CNodePtr &kernel_node) override {
std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NllLossGradGpuKernel', input is null";
InitSizeLists();
return true;
}
if (input_shape.size() < 2) {
MS_LOG(EXCEPTION) << "For 'NllLossGradGpuKernel', the rank of input cannot less than 2, but got "
<< input_shape.size();
}
n_ = static_cast<int>(input_shape[0]);
c_ = static_cast<int>(input_shape[1]);
for (size_t i = 0; i < input_shape.size(); i++) {
@ -73,6 +86,7 @@ class NLLLossGradGpuKernel : public GpuKernel {
input_size_ = 1;
n_ = 0;
c_ = 0;
is_null_input_ = false;
reduction_ = 1; // default value
num_dloss_ = 1; // default size (scalar)
input_size_list_.clear();
@ -96,6 +110,7 @@ class NLLLossGradGpuKernel : public GpuKernel {
int reduction_;
int n_;
int c_;
bool is_null_input_;
int num_dloss_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;

View File

@ -36,6 +36,9 @@ class ResizeBilinearGpuKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *input = GetDeviceAddress<T>(inputs, 0);
T *output = GetDeviceAddress<T>(outputs, 0);
float h_scale = Scaling(input_h_, output_h_, align_corners_);
@ -58,8 +61,15 @@ class ResizeBilinearGpuKernel : public GpuKernel {
}
std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
std::vector<size_t> output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
if (input_shape.size() != 4) {
MS_LOG(ERROR) << "Input is " << input_shape.size() << "-D, but ResizeBilinear supports only 4-D inputs.";
is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'ResizeBilinearGpuKernel', input or output is null.";
InitSizeLists();
return true;
}
if (input_shape.size() != 4 || output_shape.size() != 4) {
MS_LOG(ERROR) << "For 'ResizeBilinear', the rank of input and output must be 4, but got the rank of input: "
<< input_shape.size() << ", the rank of output: " << output_shape.size();
return false;
}
n_ = SizeToInt(input_shape[0]);
@ -83,6 +93,7 @@ class ResizeBilinearGpuKernel : public GpuKernel {
void ResetResource() noexcept override {
align_corners_ = false;
is_null_input_ = false;
n_ = 0;
c_ = 0;
input_h_ = 0;
@ -110,6 +121,7 @@ class ResizeBilinearGpuKernel : public GpuKernel {
}
bool align_corners_;
bool is_null_input_;
int n_;
int c_;
int input_h_;

View File

@ -36,6 +36,9 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *dy = GetDeviceAddress<T>(inputs, 0);
float *interim = GetDeviceAddress<float>(workspace, 0);
T *dx = GetDeviceAddress<T>(outputs, 0);
@ -67,6 +70,12 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {
std::vector<size_t> dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
std::vector<size_t> dx_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(x_shape) || CHECK_NULL_INPUT(dx_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'ResizeBilinearGradGpuKernel', input or output is null.";
InitSizeLists();
return true;
}
if (dy_shape.size() != 4) {
MS_LOG(ERROR) << "Input is " << dy_shape.size() << "-D, but ResizeBilinearGrad supports only 4-D inputs.";
return false;
@ -75,6 +84,10 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {
MS_LOG(ERROR) << "Input is " << x_shape.size() << "-D, but ResizeBilinearGrad supports only 4-D inputs.";
return false;
}
if (dx_shape.size() != 4) {
MS_LOG(ERROR) << "For 'ResizeBilinearGradGpuKernel', the rank of output must be 4, but got " << dx_shape.size();
return false;
}
n_ = SizeToInt(dy_shape[0]);
c_ = SizeToInt(dy_shape[1]);
dy_h_ = SizeToInt(dy_shape[2]);
@ -97,6 +110,7 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {
void ResetResource() noexcept override {
align_corners_ = false;
is_null_input_ = false;
n_ = 0;
c_ = 0;
dy_h_ = 0;
@ -125,6 +139,7 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {
}
bool align_corners_;
bool is_null_input_;
int n_;
int c_;
int dy_h_;

View File

@ -84,6 +84,10 @@ class RandpermGpuKernel : public GpuKernel {
}
max_length_ = static_cast<size_t>(GetAttr<int64_t>(kernel_node, "max_length"));
if (max_length_ < 1) {
MS_LOG(ERROR) << "For 'RandpermGpuKernel', the max_length cannot be less than 1, but got " << max_length_;
return false;
}
pad_ = static_cast<T>(GetAttr<int64_t>(kernel_node, "pad"));
InitSizeLists();