forked from mindspore-Ecosystem/mindspore
!24269 add vector size check, input shape check and divide by zero check for gpu operators.
Merge pull request !24269 from wangshuide/wsd_master
This commit is contained in:
commit
2c1d3baace
|
@ -41,13 +41,17 @@ class CropAndResizeGpuKernel : public GpuKernel {
|
|||
input_width_(0),
|
||||
final_height_(0),
|
||||
final_width_(0),
|
||||
channel_(0) {}
|
||||
channel_(0),
|
||||
is_null_input_(false) {}
|
||||
~CropAndResizeGpuKernel() override = default;
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
VARIABLE_NOT_USED(workspace);
|
||||
T *input_image = GetDeviceAddress<T>(inputs, 0);
|
||||
float *input_boxes = GetDeviceAddress<float>(inputs, 1);
|
||||
|
@ -72,6 +76,18 @@ class CropAndResizeGpuKernel : public GpuKernel {
|
|||
}
|
||||
// input image
|
||||
auto input_image_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto input_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto input_box_index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
auto input_crop_size_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_image_shape) || CHECK_NULL_INPUT(input_boxes_shape) ||
|
||||
CHECK_NULL_INPUT(input_box_index_shape) || CHECK_NULL_INPUT(input_crop_size_shape) ||
|
||||
CHECK_NULL_INPUT(output_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'CropAndResizeGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
size_t input_image_shape_len = input_image_shape.size();
|
||||
if (input_image_shape_len != 4) {
|
||||
MS_LOG(ERROR) << " image tensor is " << input_image_shape_len << "-D, but CropAndResize supports only " << 4
|
||||
|
@ -86,7 +102,6 @@ class CropAndResizeGpuKernel : public GpuKernel {
|
|||
input_height_ = input_image_shape[1];
|
||||
input_width_ = input_image_shape[2];
|
||||
// input boxes
|
||||
auto input_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
size_t input_boxes_shape_len = input_boxes_shape.size();
|
||||
if (input_boxes_shape_len != 2) {
|
||||
MS_LOG(ERROR) << "Boxes is rank" << input_boxes_shape_len << " but CropAndResize supports only rank " << 2
|
||||
|
@ -99,7 +114,6 @@ class CropAndResizeGpuKernel : public GpuKernel {
|
|||
}
|
||||
input_boxes_size_ *= sizeof(float);
|
||||
// input box_index
|
||||
auto input_box_index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
size_t input_box_index_shape_len = input_box_index_shape.size();
|
||||
if (input_box_index_shape_len != 1) {
|
||||
MS_LOG(ERROR) << "Box_index is rank " << input_box_index_shape_len << " but CropAndResize supports only rank "
|
||||
|
@ -110,7 +124,6 @@ class CropAndResizeGpuKernel : public GpuKernel {
|
|||
input_box_ind_size_ *= input_box_index_shape[0]; // single dim required
|
||||
input_box_ind_size_ *= sizeof(int);
|
||||
// input crop_size
|
||||
auto input_crop_size_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
size_t input_crop_size_shape_len = input_crop_size_shape.size();
|
||||
if (input_crop_size_shape_len != 1) {
|
||||
MS_LOG(ERROR) << "Crop_size is rank " << input_crop_size_shape_len << "-D, but CropAndResize supports only rank "
|
||||
|
@ -126,8 +139,11 @@ class CropAndResizeGpuKernel : public GpuKernel {
|
|||
input_crop_size_ *= input_crop_size_shape[0];
|
||||
input_crop_size_ *= sizeof(int);
|
||||
// output
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
auto output_shape_len = output_shape.size();
|
||||
if (output_shape_len != 4) {
|
||||
MS_LOG(ERROR) << "For 'CropAndResize', the rank of output should be 4, but got " << output_shape_len;
|
||||
return false;
|
||||
}
|
||||
output_size_ = 1;
|
||||
for (size_t i = 0; i < output_shape_len; i++) {
|
||||
output_size_ *= output_shape[i];
|
||||
|
@ -175,6 +191,7 @@ class CropAndResizeGpuKernel : public GpuKernel {
|
|||
int final_height_;
|
||||
int final_width_;
|
||||
int channel_;
|
||||
bool is_null_input_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
|
@ -37,6 +36,9 @@ class DepthToSpaceFwdKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
// get device buffer ptr
|
||||
T *input = GetDeviceAddress<T>(inputs, 0);
|
||||
T *output = GetDeviceAddress<T>(outputs, 0);
|
||||
|
@ -71,6 +73,12 @@ class DepthToSpaceFwdKernel : public GpuKernel {
|
|||
}
|
||||
// check input_shape
|
||||
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'DepthToSpaceGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
shape_size_ = input_shape.size();
|
||||
if (shape_size_ != DEPTHTOSPACE_BUFFER_DIMENSION) {
|
||||
MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but DepthToSpace supports 4-D tensor.";
|
||||
|
@ -111,6 +119,7 @@ class DepthToSpaceFwdKernel : public GpuKernel {
|
|||
oc_ = 0;
|
||||
oh_ = 0;
|
||||
ow_ = 0;
|
||||
is_null_input_ = false;
|
||||
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
|
@ -140,6 +149,7 @@ class DepthToSpaceFwdKernel : public GpuKernel {
|
|||
size_t oc_;
|
||||
size_t oh_;
|
||||
size_t ow_;
|
||||
bool is_null_input_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -37,6 +37,9 @@ class EmbeddingLookupKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
VARIABLE_NOT_USED(workspace);
|
||||
T *input_addr = GetDeviceAddress<T>(inputs, 0);
|
||||
S *indices_addr = GetDeviceAddress<S>(inputs, 1);
|
||||
|
@ -69,6 +72,16 @@ class EmbeddingLookupKernel : public GpuKernel {
|
|||
input_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
indices_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
|
||||
output_shapes_ = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
is_null_input_ =
|
||||
CHECK_NULL_INPUT(input_shapes_) || CHECK_NULL_INPUT(indices_shapes_) || CHECK_NULL_INPUT(output_shapes_);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'EmbeddingLookupGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shapes_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'EmbeddingLookupGpuKernel', the rank of input cannot be less than 1.";
|
||||
}
|
||||
if (!is_dynamic_shape_) {
|
||||
offset_ = GetAttr<int64_t>(kernel_node, "offset");
|
||||
}
|
||||
|
@ -78,6 +91,7 @@ class EmbeddingLookupKernel : public GpuKernel {
|
|||
}
|
||||
void ResetResource() noexcept override {
|
||||
is_dynamic_shape_ = false;
|
||||
is_null_input_ = false;
|
||||
input_shapes_.clear();
|
||||
indices_shapes_.clear();
|
||||
output_shapes_.clear();
|
||||
|
@ -138,6 +152,7 @@ class EmbeddingLookupKernel : public GpuKernel {
|
|||
size_t dims_[3] = {};
|
||||
int64_t offset_;
|
||||
bool is_dynamic_shape_;
|
||||
bool is_null_input_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
|
|
|
@ -40,6 +40,9 @@ class ExtractImagePatchesKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *input = GetDeviceAddress<T>(inputs, 0);
|
||||
T *output = GetDeviceAddress<T>(outputs, 0);
|
||||
T *t_input = GetDeviceAddress<T>(workspace, 0);
|
||||
|
@ -92,16 +95,28 @@ class ExtractImagePatchesKernel : public GpuKernel {
|
|||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ExtractImagePatches has 1 output.";
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ExtractImagePatchesGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
input_size_ = 1;
|
||||
for (size_t i = 0; i < input_shape.size(); i++) {
|
||||
input_size_ *= input_shape[i];
|
||||
input_shape_.push_back(input_shape[i]);
|
||||
}
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
|
||||
output_size_ = 1;
|
||||
for (size_t i = 0; i < output_shape.size(); i++) {
|
||||
output_size_ *= output_shape[i];
|
||||
}
|
||||
if (input_shape.size() != 4 || output_shape.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "For 'ExtractImagePatchesGpuKernel', the rank of input and output should be 4, "
|
||||
<< "but got the rank of input: " << input_shape.size()
|
||||
<< ", the rank of output: " << output_shape.size();
|
||||
}
|
||||
// transposed NHWC shape
|
||||
t_output_shape_ = {output_shape[0], output_shape[2], output_shape[3], output_shape[1]};
|
||||
|
||||
|
@ -109,6 +124,11 @@ class ExtractImagePatchesKernel : public GpuKernel {
|
|||
auto ksizes = GetAttr<std::vector<int64_t>>(kernel_node, "ksizes");
|
||||
auto strides = GetAttr<std::vector<int64_t>>(kernel_node, "strides");
|
||||
auto rates = GetAttr<std::vector<int64_t>>(kernel_node, "rates");
|
||||
if (ksizes.size() != 4 || strides.size() != 4 || rates.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "For 'ExtractImagePatchesGpuKernel', the rank of ksizes, strides and rates should be 4, "
|
||||
<< "but got the rank of ksizes: " << ksizes.size()
|
||||
<< ", the rank of strides: " << strides.size() << ", the rank of rates: " << rates.size();
|
||||
}
|
||||
|
||||
ksize_row_ = ksizes[2];
|
||||
ksize_col_ = ksizes[3];
|
||||
|
@ -127,6 +147,9 @@ class ExtractImagePatchesKernel : public GpuKernel {
|
|||
int64_t patch_rows_eff = ksize_row_ + (ksize_row_ - 1) * (rate_row_ - 1);
|
||||
int64_t patch_cols_eff = ksize_col_ + (ksize_col_ - 1) * (rate_col_ - 1);
|
||||
|
||||
MS_EXCEPTION_IF_ZERO("stride row", stride_row_);
|
||||
MS_EXCEPTION_IF_ZERO("stride col", stride_col_);
|
||||
|
||||
if (padding == "VALID") {
|
||||
output_rows_ = std::ceil((input_row_size_ - patch_rows_eff + 1.f) / static_cast<float>(stride_row_));
|
||||
output_cols_ = std::ceil((input_col_size_ - patch_cols_eff + 1.f) / static_cast<float>(stride_col_));
|
||||
|
@ -148,6 +171,7 @@ class ExtractImagePatchesKernel : public GpuKernel {
|
|||
row_input_stride_ = input_depth * input_col_size_;
|
||||
patch_input_stride_ = input_depth * input_col_size_ * input_row_size_;
|
||||
output_depth_ = input_depth;
|
||||
MS_EXCEPTION_IF_ZERO("other stride", other_stride_);
|
||||
need_batch_ = (output_size_ - 1) / other_stride_;
|
||||
|
||||
InitSizeLists();
|
||||
|
@ -177,6 +201,7 @@ class ExtractImagePatchesKernel : public GpuKernel {
|
|||
row_input_stride_ = 1;
|
||||
patch_input_stride_ = 1;
|
||||
output_depth_ = 1;
|
||||
is_null_input_ = false;
|
||||
input_shape_.clear();
|
||||
t_output_shape_.clear();
|
||||
input_size_list_.clear();
|
||||
|
@ -208,6 +233,7 @@ class ExtractImagePatchesKernel : public GpuKernel {
|
|||
int64_t output_rows_;
|
||||
int64_t output_cols_;
|
||||
bool need_batch_;
|
||||
bool is_null_input_;
|
||||
int64_t row_stride_;
|
||||
int64_t patch_stride_;
|
||||
int64_t other_stride_;
|
||||
|
|
|
@ -40,6 +40,9 @@ class InTopKGpuKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *predictions_device = GetDeviceAddress<T>(inputs, 0);
|
||||
int32_t *targets_device = GetDeviceAddress<int32_t>(inputs, 1);
|
||||
|
||||
|
@ -106,6 +109,16 @@ class InTopKGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
if (input_shape_.size() < 2) {
|
||||
MS_LOG(EXCEPTION) << "For 'InTopKGpuKernel', the rank of input cannot be less than 2, but got "
|
||||
<< input_shape_.size();
|
||||
}
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape_);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'InTopKGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
input_rank_ = input_shape_.size();
|
||||
input_size_ = 1;
|
||||
for (size_t i = 0; i < input_rank_; i++) {
|
||||
|
@ -136,6 +149,7 @@ class InTopKGpuKernel : public GpuKernel {
|
|||
input_rank_ = 0;
|
||||
outer_size_ = 0;
|
||||
inner_size_ = 0;
|
||||
is_null_input_ = false;
|
||||
top_k_init_ = static_cast<T>(0.);
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
|
@ -171,6 +185,7 @@ class InTopKGpuKernel : public GpuKernel {
|
|||
// for topk
|
||||
size_t outer_size_;
|
||||
size_t inner_size_;
|
||||
bool is_null_input_;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
|
|
|
@ -41,6 +41,9 @@ class MeshgridGpuKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *ones_device = GetDeviceAddress<T>(workspace, 0);
|
||||
CalOnesLike(output_size_, static_cast<T *>(nullptr), ones_device, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
|
||||
|
@ -79,9 +82,14 @@ class MeshgridGpuKernel : public GpuKernel {
|
|||
input_size_ = 1;
|
||||
input_count_ = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
for (size_t i = 0; i < input_count_; i++) {
|
||||
size_t input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i)[0];
|
||||
input_shapes_.push_back(input_shape);
|
||||
input_size_ *= input_shape;
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i);
|
||||
if (input_shape.size() < 1) {
|
||||
MS_LOG(ERROR) << "For 'MeshGridGpuKernel', the rank of input" << i << " cannot be less than 1.";
|
||||
return false;
|
||||
}
|
||||
size_t input_size = input_shape[0];
|
||||
input_shapes_.push_back(input_size);
|
||||
input_size_ *= input_size;
|
||||
}
|
||||
|
||||
output_size_ = 1;
|
||||
|
@ -89,6 +97,12 @@ class MeshgridGpuKernel : public GpuKernel {
|
|||
|
||||
// inferred shape swaps output shape for us if needed
|
||||
output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(output_shape_);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'MeshGridGpuKernel', output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (output_count_ != input_count_) {
|
||||
MS_LOG(ERROR) << "output count is " << output_count_ << ", but MeshgridGpuKernel needs " << input_count_
|
||||
|
@ -101,7 +115,7 @@ class MeshgridGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
// need to pad output shape with ones for broadcast kernel
|
||||
for (size_t i = 0; i < output_shape_.size() - MAX_DIMS; i++) {
|
||||
for (size_t i = 0; i < MAX_DIMS - output_shape_.size(); i++) {
|
||||
output_shape_.push_back(1);
|
||||
}
|
||||
|
||||
|
@ -118,6 +132,7 @@ class MeshgridGpuKernel : public GpuKernel {
|
|||
output_size_ = 0;
|
||||
output_count_ = 0;
|
||||
swap_indexing_ = true;
|
||||
is_null_input_ = false;
|
||||
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
|
@ -145,6 +160,7 @@ class MeshgridGpuKernel : public GpuKernel {
|
|||
size_t output_size_;
|
||||
size_t output_count_;
|
||||
bool swap_indexing_;
|
||||
bool is_null_input_;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
|
|
|
@ -36,6 +36,7 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel {
|
|||
input_size_(0),
|
||||
batch_dim_(0),
|
||||
seq_dim_(0),
|
||||
is_null_input_(false),
|
||||
seq_len_size_(0),
|
||||
total_index_dim_(0),
|
||||
output_size_(0),
|
||||
|
@ -46,6 +47,9 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel {
|
|||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *input = GetDeviceAddress<T>(inputs, 0);
|
||||
S *seq_len = GetDeviceAddress<S>(inputs, 1);
|
||||
size_t *input_shape_ptr = GetDeviceAddress<size_t>(workspace, 0);
|
||||
|
@ -75,23 +79,23 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel {
|
|||
return false;
|
||||
}
|
||||
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
if (CHECK_NULL_INPUT(input_shape_)) {
|
||||
MS_LOG(WARNING) << "ReverseSequence input is null";
|
||||
auto seq_len_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(seq_len_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ReverseSequenceGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shape_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'ReverseSequenceGpuKernel', the rank of input cannot be less than 1, but got "
|
||||
<< input_shape_.size();
|
||||
}
|
||||
input_size_ = 1;
|
||||
shape_size_ = input_shape_.size(); // required for calls
|
||||
for (size_t i = 0; i < shape_size_; i++) {
|
||||
input_size_ *= input_shape_[i];
|
||||
}
|
||||
// get seq len shape
|
||||
auto seq_len_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
if (CHECK_NULL_INPUT(seq_len_shape)) {
|
||||
MS_LOG(WARNING) << "ReverseSequence seq lengths input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
seq_len_size_ = seq_len_shape.size();
|
||||
output_size_ = input_size_; // size does not change
|
||||
// Allocate workspace memory to use for storing indices for each thread to compute with
|
||||
|
@ -116,6 +120,7 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel {
|
|||
size_t input_size_;
|
||||
int64_t batch_dim_;
|
||||
int64_t seq_dim_;
|
||||
bool is_null_input_;
|
||||
size_t seq_len_size_;
|
||||
size_t total_index_dim_;
|
||||
size_t output_size_;
|
||||
|
|
|
@ -38,6 +38,9 @@ class ReverseV2GpuKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *input_device = GetDeviceAddress<T>(inputs, 0);
|
||||
T *output_device = GetDeviceAddress<T>(outputs, 0);
|
||||
size_t *input_shape_device = GetDeviceAddress<size_t>(workspace, 0);
|
||||
|
@ -79,7 +82,16 @@ class ReverseV2GpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape_);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ReverseV2GpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
input_rank_ = input_shape_.size();
|
||||
if (input_rank_ < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'ReverseV2GpuKernel', the rank of input cannot be less than 1, bot got " << input_rank_;
|
||||
}
|
||||
input_size_ = 1;
|
||||
for (size_t i = 0; i < input_rank_; i++) {
|
||||
input_size_ *= input_shape_[i];
|
||||
|
@ -92,6 +104,9 @@ class ReverseV2GpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
axis_ = GetAttr<std::vector<int64_t>>(kernel_node, "axis");
|
||||
if (axis_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'ReverseV2GpuKernel', the rank of axis cannot be less than 1, bot got " << axis_.size();
|
||||
}
|
||||
for (int64_t &dimension : axis_) {
|
||||
if (dimension < 0) {
|
||||
dimension += input_rank_;
|
||||
|
@ -106,6 +121,7 @@ class ReverseV2GpuKernel : public GpuKernel {
|
|||
void ResetResource() noexcept override {
|
||||
input_size_ = 0;
|
||||
input_rank_ = 0;
|
||||
is_null_input_ = false;
|
||||
input_shape_.clear();
|
||||
strides_.clear();
|
||||
axis_.clear();
|
||||
|
@ -131,6 +147,7 @@ class ReverseV2GpuKernel : public GpuKernel {
|
|||
std::vector<size_t> input_shape_;
|
||||
std::vector<int64_t> strides_;
|
||||
std::vector<int64_t> axis_;
|
||||
bool is_null_input_;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
|
|
|
@ -42,6 +42,9 @@ class SortGpuKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *input_device = GetDeviceAddress<T>(inputs, 0);
|
||||
|
||||
T *output_device = GetDeviceAddress<T>(outputs, 0);
|
||||
|
@ -127,10 +130,17 @@ class SortGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape_);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'SortGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
input_rank_ = input_shape_.size();
|
||||
if (input_rank_ > TRANSPOSE_MAX_DIMENSION) {
|
||||
MS_LOG(ERROR) << "Sort cannot support input that has more than " << TRANSPOSE_MAX_DIMENSION << " dimensions.";
|
||||
if (input_rank_ > TRANSPOSE_MAX_DIMENSION || input_rank_ < 1) {
|
||||
MS_LOG(ERROR) << "For 'SortGpuKernel', the rank of input cannot be more than " << TRANSPOSE_MAX_DIMENSION
|
||||
<< " dimensions or less than 1 dimension.";
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -145,6 +155,11 @@ class SortGpuKernel : public GpuKernel {
|
|||
if (axis_ < 0) {
|
||||
axis_ += input_rank_;
|
||||
}
|
||||
if ((size_t)axis_ >= input_rank_) {
|
||||
MS_LOG(ERROR) << "For 'SortGpuKernel', axis should be less than the rank of input, bot got axis: " << axis_
|
||||
<< " the rank of input: " << input_rank_;
|
||||
return false;
|
||||
}
|
||||
|
||||
perm_.resize(input_rank_);
|
||||
std::iota(perm_.begin(), perm_.end(), 0);
|
||||
|
@ -172,6 +187,7 @@ class SortGpuKernel : public GpuKernel {
|
|||
input_size_ = 0;
|
||||
axis_ = 0;
|
||||
descending_ = false;
|
||||
is_null_input_ = false;
|
||||
input_shape_.clear();
|
||||
input_rank_ = 0;
|
||||
transposed_shape_.clear();
|
||||
|
@ -206,6 +222,7 @@ class SortGpuKernel : public GpuKernel {
|
|||
size_t input_size_;
|
||||
int64_t axis_;
|
||||
bool descending_;
|
||||
bool is_null_input_;
|
||||
std::vector<size_t> input_shape_;
|
||||
size_t input_rank_;
|
||||
|
||||
|
|
|
@ -37,6 +37,9 @@ class SpaceToDepthFwdKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
// get device buffer ptr
|
||||
T *input = GetDeviceAddress<T>(inputs, 0);
|
||||
T *output = GetDeviceAddress<T>(outputs, 0);
|
||||
|
@ -71,6 +74,12 @@ class SpaceToDepthFwdKernel : public GpuKernel {
|
|||
}
|
||||
// check input_shape
|
||||
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'SpaceToDepthGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
shape_size_ = input_shape.size();
|
||||
if (shape_size_ != SPACETODEPTH_BUFFER_DIMENSION) {
|
||||
MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but SpaceToDepth supports 4-D tensor.";
|
||||
|
@ -102,6 +111,7 @@ class SpaceToDepthFwdKernel : public GpuKernel {
|
|||
input_size_ = 0;
|
||||
output_size_ = 0;
|
||||
block_size_ = 0;
|
||||
is_null_input_ = false;
|
||||
in_ = 0;
|
||||
ic_ = 0;
|
||||
ih_ = 0;
|
||||
|
@ -131,6 +141,7 @@ class SpaceToDepthFwdKernel : public GpuKernel {
|
|||
size_t input_size_;
|
||||
size_t output_size_;
|
||||
size_t block_size_;
|
||||
bool is_null_input_;
|
||||
size_t in_;
|
||||
size_t ic_;
|
||||
size_t ih_;
|
||||
|
|
|
@ -31,11 +31,14 @@ namespace kernel {
|
|||
template <typename T>
|
||||
class TensorCopySlicesGpuKernel : public GpuKernel {
|
||||
public:
|
||||
TensorCopySlicesGpuKernel() : input_size_(0), update_size_(0), output_size_(0) {}
|
||||
TensorCopySlicesGpuKernel() : input_size_(0), update_size_(0), output_size_(0), is_null_input_(false) {}
|
||||
~TensorCopySlicesGpuKernel() {}
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *input_addr = GetDeviceAddress<T>(inputs, 0);
|
||||
T *update_addr = GetDeviceAddress<T>(inputs, 1);
|
||||
T *output_addr = GetDeviceAddress<T>(outputs, 0);
|
||||
|
@ -69,7 +72,13 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
|
||||
auto update_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(update_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'TensorCopySlicesGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shape_.size() > kMaxDims) {
|
||||
MS_LOG(ERROR) << "StridedSlice support dims no more than " << kMaxDims << ", but the input shape is "
|
||||
<< input_shape_.size();
|
||||
|
@ -80,6 +89,13 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
|
|||
end_ = GetAttr<std::vector<int64_t>>(kernel_node, kAttrEnd);
|
||||
strides_ = GetAttr<std::vector<int64_t>>(kernel_node, kAttrStrides);
|
||||
|
||||
if (begin_.size() > input_shape_.size()) {
|
||||
MS_LOG(ERROR) << "For 'TensorCopySlicesGpuKernel', the rank of begin attr cannot be more than the rank of input, "
|
||||
<< "but got the rank of begin attr: " << begin_.size()
|
||||
<< ", the rank of input: " << input_shape_.size();
|
||||
return false;
|
||||
}
|
||||
|
||||
FillEmptyDims(kernel_node);
|
||||
output_shape_ = input_shape_;
|
||||
FillUpdateDim();
|
||||
|
@ -100,6 +116,7 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
|
|||
auto len = begin_.size();
|
||||
size_t total_input_num = 1;
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
MS_EXCEPTION_IF_ZERO("strides_[i]", strides_[i]);
|
||||
total_input_num *= ((end_[i] - begin_[i]) / strides_[i]);
|
||||
}
|
||||
if (total_input_num != total_update_num) {
|
||||
|
@ -161,6 +178,7 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
|
|||
if (begin_[i] <= end_[i] && strides_[i] > 0) {
|
||||
update_shape_.push_back((end_[i] - 1 - begin_[i]) / strides_[i] + 1);
|
||||
} else if (begin_[i] > end_[i] && strides_[i] < 0) {
|
||||
MS_EXCEPTION_IF_ZERO("strides_[i] + 1", strides_[i] + 1);
|
||||
update_shape_.push_back((end_[i] - begin_[i] + 1) / strides_[i] + 1);
|
||||
} else {
|
||||
update_shape_.push_back(0);
|
||||
|
@ -185,6 +203,7 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
|
|||
size_t update_size_;
|
||||
size_t output_size_;
|
||||
inline static size_t kMaxDims = 8;
|
||||
bool is_null_input_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -38,7 +38,8 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
|
|||
work_shape_(nullptr),
|
||||
indices_dim_0_(0),
|
||||
indices_dim_1_(0),
|
||||
memcpy_flag_(false) {}
|
||||
memcpy_flag_(false),
|
||||
is_null_input_(false) {}
|
||||
~TensorScatterAddGpuFwdKernel() {
|
||||
if (indices_stride_ != nullptr) {
|
||||
device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(static_cast<void *>(indices_stride_));
|
||||
|
@ -54,6 +55,9 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
VARIABLE_NOT_USED(workspace);
|
||||
T *input = GetDeviceAddress<T>(inputs, 0);
|
||||
S *indices = GetDeviceAddress<S>(inputs, 1);
|
||||
|
@ -77,10 +81,10 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
|
|||
const size_t update_size = update_size_ / sizeof(T);
|
||||
const size_t output_size = output_size_ / sizeof(T);
|
||||
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudaMemcpyAsync(&output[0], &input[0], input_size_, cudaMemcpyDeviceToDevice,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr)),
|
||||
"cudaMemcpyAsync output failed");
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(
|
||||
kernel_node_,
|
||||
cudaMemcpyAsync(output, input, input_size_, cudaMemcpyDeviceToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)),
|
||||
"cudaMemcpyAsync output failed");
|
||||
|
||||
TensorScatterAdd(input, indices, update, output, block_size_, update_size, output_size, indices_dim_0_,
|
||||
indices_dim_1_, indices_stride_, work_shape_, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
|
@ -106,6 +110,13 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
|
|||
indices_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
input_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(update_shapes_) || CHECK_NULL_INPUT(indices_shapes_) ||
|
||||
CHECK_NULL_INPUT(input_shapes_) || CHECK_NULL_INPUT(output_shapes_);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'TensorScatterAddGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<size_t> shape_me = input_shapes_;
|
||||
(void)std::transform(shape_me.begin(), shape_me.end(), std::back_inserter(vec_work_shape_),
|
||||
|
@ -126,6 +137,10 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
|
|||
MS_LOG(EXCEPTION) << "Failed to alloc work_shape_work, size: " << vec_work_len;
|
||||
}
|
||||
work_shape_ = static_cast<S *>(work_shape_work);
|
||||
if (vec_work_shape_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'TensorScatterAddGpuKernel', the rank of vec work cannot be less than 1, but got "
|
||||
<< vec_work_shape_.size();
|
||||
}
|
||||
|
||||
InitSizeLists();
|
||||
|
||||
|
@ -160,6 +175,10 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
|
|||
output_size_ *= output_shapes_[i];
|
||||
}
|
||||
|
||||
if (indices_shapes_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'TensorScatterAddGpuKernel', the rank of indices cannot be less than 1, but got "
|
||||
<< indices_shapes_.size();
|
||||
}
|
||||
// calculate indices dim 0/1
|
||||
indices_dim_0_ = indices_shapes_[0];
|
||||
indices_dim_1_ = indices_shapes_[indices_shapes_.size() - 1];
|
||||
|
@ -169,6 +188,11 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
|
|||
block_size_ *= output_shapes_[i];
|
||||
}
|
||||
|
||||
if (indices_dim_1_ < 1 || indices_dim_1_ > output_shapes_.size()) {
|
||||
MS_LOG(EXCEPTION) << "For 'TensorScatterAddGpuKernel', indices_shapes[-1] cannot be less than 1 and greater than "
|
||||
<< "the rank of output_shapes, but got indices_shapes[-1]: " << indices_dim_1_
|
||||
<< ", rank of output_shapes: " << output_shapes_.size();
|
||||
}
|
||||
// calculate indices_stride
|
||||
vec_indices_stride_.resize(indices_dim_1_, 0);
|
||||
vec_indices_stride_[indices_dim_1_ - 1] = block_size_;
|
||||
|
@ -201,6 +225,7 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
|
|||
size_t indices_dim_0_;
|
||||
size_t indices_dim_1_;
|
||||
bool memcpy_flag_;
|
||||
bool is_null_input_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -63,12 +63,10 @@ class TileGpuKernel : public GpuKernel {
|
|||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but Tile needs 1 input.";
|
||||
return false;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but Tile has 1 output.";
|
||||
return false;
|
||||
}
|
||||
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
|
@ -78,6 +76,10 @@ class TileGpuKernel : public GpuKernel {
|
|||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (output_shape_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'TileGpuKernel', the rank of output cannot be less than 1, but got "
|
||||
<< output_shape_.size();
|
||||
}
|
||||
input_size_ = 1;
|
||||
for (size_t i = 0; i < input_shape_.size(); i++) {
|
||||
input_size_ *= input_shape_[i];
|
||||
|
|
|
@ -140,6 +140,9 @@ std::pair<bool, size_t> GpuKernelFactory::GpuKernelAttrCheck(const std::string &
|
|||
}
|
||||
bool flag = true;
|
||||
auto attr_size = (&(iter->second))->at(attr_index).first.GetInputSize();
|
||||
if (kernel_info->GetInputNum() > 0) {
|
||||
MS_EXCEPTION_IF_ZERO("attr size", attr_size);
|
||||
}
|
||||
// data type matching check of all input parameters of kernel
|
||||
for (size_t input_index = 0; input_index < kernel_info->GetInputNum(); input_index++) {
|
||||
GpuKernelFactory::CheckSM(kernel_info, input_index);
|
||||
|
@ -153,6 +156,9 @@ std::pair<bool, size_t> GpuKernelFactory::GpuKernelAttrCheck(const std::string &
|
|||
continue;
|
||||
}
|
||||
attr_size = (&(iter->second))->at(attr_index).first.GetOutputSize();
|
||||
if (kernel_info->GetOutputNum() > 0) {
|
||||
MS_EXCEPTION_IF_ZERO("attr size", attr_size);
|
||||
}
|
||||
// data type matching check of all output parameters of kernel
|
||||
for (size_t output_index = 0; output_index < kernel_info->GetOutputNum(); output_index++) {
|
||||
if (kernel_info->GetOutputDeviceType(output_index) !=
|
||||
|
|
|
@ -28,7 +28,14 @@ constexpr int kMaxDimsSize = 3;
|
|||
template <typename T>
|
||||
class CumProdGpuKernel : public GpuKernel {
|
||||
public:
|
||||
CumProdGpuKernel() : exclusive_(false), reverse_(false), axis_(0), input_size_0_(0), stride_(0), stride2_(0) {}
|
||||
CumProdGpuKernel()
|
||||
: exclusive_(false),
|
||||
reverse_(false),
|
||||
is_null_input_(false),
|
||||
axis_(0),
|
||||
input_size_0_(0),
|
||||
stride_(0),
|
||||
stride2_(0) {}
|
||||
~CumProdGpuKernel() = default;
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
|
@ -37,6 +44,9 @@ class CumProdGpuKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *input_addr = GetDeviceAddress<T>(inputs, 0);
|
||||
T *output_addr = GetDeviceAddress<T>(outputs, 0);
|
||||
T *ws_addr = GetDeviceAddress<T>(workspace, 0);
|
||||
|
@ -51,6 +61,12 @@ class CumProdGpuKernel : public GpuKernel {
|
|||
}
|
||||
input_size_0_ = sizeof(T);
|
||||
shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(shape_);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'CumProdGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
exclusive_ = GetAttr<bool>(kernel_node, "exclusive");
|
||||
reverse_ = GetAttr<bool>(kernel_node, "reverse");
|
||||
|
@ -93,6 +109,7 @@ class CumProdGpuKernel : public GpuKernel {
|
|||
}
|
||||
bool exclusive_;
|
||||
bool reverse_;
|
||||
bool is_null_input_;
|
||||
int axis_;
|
||||
size_t input_size_0_;
|
||||
size_t stride_;
|
||||
|
|
|
@ -39,6 +39,9 @@ class IdentityGpuKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *input_addr = GetDeviceAddress<T>(inputs, 0);
|
||||
T *output_addr = GetDeviceAddress<T>(outputs, 0);
|
||||
|
||||
|
|
|
@ -36,6 +36,9 @@ class ApplyGradientDescentKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
VARIABLE_NOT_USED(workspace);
|
||||
T *var = GetDeviceAddress<T>(inputs, 0);
|
||||
T *alpha = GetDeviceAddress<T>(inputs, 1);
|
||||
|
@ -50,14 +53,18 @@ class ApplyGradientDescentKernel : public GpuKernel {
|
|||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ApplyGradientDescent needs 3 inputs.";
|
||||
return false;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ApplyGradientDescent has 1 output.";
|
||||
return false;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ApplyGradientDescentGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
input_size_ = 1;
|
||||
for (size_t i = 0; i < input_shape.size(); i++) {
|
||||
input_size_ *= input_shape[i];
|
||||
|
@ -68,6 +75,7 @@ class ApplyGradientDescentKernel : public GpuKernel {
|
|||
|
||||
void ResetResource() noexcept override {
|
||||
input_size_ = 1;
|
||||
is_null_input_ = false;
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
|
@ -83,6 +91,7 @@ class ApplyGradientDescentKernel : public GpuKernel {
|
|||
|
||||
private:
|
||||
size_t input_size_;
|
||||
bool is_null_input_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
|
|
|
@ -36,6 +36,9 @@ class BCEWithLogitsLossKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *predict = GetDeviceAddress<T>(inputs, 0);
|
||||
T *target = GetDeviceAddress<T>(inputs, 1);
|
||||
T *weight = GetDeviceAddress<T>(inputs, 2);
|
||||
|
@ -69,25 +72,42 @@ class BCEWithLogitsLossKernel : public GpuKernel {
|
|||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 4) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but BCEWithLogitsLoss needs 4 inputs.";
|
||||
return false;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but BCEWithLogitsLoss has 1 output.";
|
||||
return false;
|
||||
}
|
||||
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
pos_weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
is_null_input_ =
|
||||
CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(weight_shape_) || CHECK_NULL_INPUT(pos_weight_shape_);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'BCEWithLogitsLossGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shape_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of input cannot be less than 1, but got "
|
||||
<< input_shape_.size();
|
||||
}
|
||||
if (weight_shape_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of weight cannot be less than 1, but got "
|
||||
<< weight_shape_.size();
|
||||
}
|
||||
if (pos_weight_shape_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of pos_weight cannot be less than 1, but got "
|
||||
<< pos_weight_shape_.size();
|
||||
}
|
||||
input_size_ = 1;
|
||||
if (input_shape_.size() > MAX_LOGITS_DIMENSION) {
|
||||
MS_LOG(EXCEPTION) << "Input dimension is " << input_shape_.size()
|
||||
<< ", but BCEWithLogitsLoss can only support up to " << MAX_LOGITS_DIMENSION << "-D.";
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < input_shape_.size(); i++) {
|
||||
input_size_ *= input_shape_[i];
|
||||
}
|
||||
// weight shape
|
||||
weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
weight_size_ = 1;
|
||||
for (size_t i = 0; i < weight_shape_.size(); i++) {
|
||||
weight_size_ *= weight_shape_[i];
|
||||
|
@ -95,7 +115,6 @@ class BCEWithLogitsLossKernel : public GpuKernel {
|
|||
weight_need_broadcast_ = NeedBroadcast(&weight_shape_, input_shape_);
|
||||
// pos_weight shape
|
||||
pos_weight_size_ = 1;
|
||||
pos_weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
for (size_t i = 0; i < pos_weight_shape_.size(); i++) {
|
||||
pos_weight_size_ *= pos_weight_shape_[i];
|
||||
}
|
||||
|
@ -110,6 +129,7 @@ class BCEWithLogitsLossKernel : public GpuKernel {
|
|||
pos_weight_size_ = 1;
|
||||
weight_need_broadcast_ = false;
|
||||
pos_weight_need_broadcast_ = false;
|
||||
is_null_input_ = false;
|
||||
input_shape_.clear();
|
||||
weight_shape_.clear();
|
||||
pos_weight_shape_.clear();
|
||||
|
@ -137,7 +157,7 @@ class BCEWithLogitsLossKernel : public GpuKernel {
|
|||
bool NeedBroadcast(std::vector<size_t> *shape, const std::vector<size_t> &result_shape) {
|
||||
// result_shape is larger that shape
|
||||
// and shape is able to broadcasted to result_shape
|
||||
if (shape->size() != result_shape.size()) {
|
||||
if (shape->size() < result_shape.size()) {
|
||||
size_t fill_size = result_shape.size() - shape->size();
|
||||
(void)shape->insert(shape->begin(), fill_size, 1);
|
||||
return true;
|
||||
|
@ -155,6 +175,7 @@ class BCEWithLogitsLossKernel : public GpuKernel {
|
|||
size_t pos_weight_size_;
|
||||
bool weight_need_broadcast_;
|
||||
bool pos_weight_need_broadcast_;
|
||||
bool is_null_input_;
|
||||
std::vector<size_t> input_shape_;
|
||||
std::vector<size_t> weight_shape_;
|
||||
std::vector<size_t> pos_weight_shape_;
|
||||
|
|
|
@ -77,7 +77,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
|||
const float beta = 0;
|
||||
|
||||
if (use_pad_) {
|
||||
T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
|
||||
T *padded = GetDeviceAddress<T>(workspace, 1);
|
||||
if (data_format_ == kOpFormat_NHWC) {
|
||||
CalPadNHWC(padded_size_ / sizeof(T), x, n_, old_height_, old_width_, c_, old_height_ + pad_height_,
|
||||
old_width_ + pad_width_, pad_top_, pad_left_, pad_value_, padded,
|
||||
|
|
|
@ -50,7 +50,7 @@ class Conv3dGpuKernel : public GpuKernel {
|
|||
const float alpha = 1;
|
||||
const float beta = 0;
|
||||
if (use_pad_) {
|
||||
T *padded_addr = GetPossiblyNullDeviceAddress<T>(workspace, 1);
|
||||
T *padded_addr = GetDeviceAddress<T>(workspace, 1);
|
||||
CalPad3d(padded_size_ / sizeof(T), input_addr, n_, c_, old_depth_, old_height_, old_width_,
|
||||
old_depth_ + pad_depth_, old_height_ + pad_height_, old_width_ + pad_width_, pad_head_, pad_top_,
|
||||
pad_left_, pad_value_, padded_addr, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
|
|
|
@ -61,7 +61,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
|
|||
const float alpha = 1;
|
||||
const float beta = 0;
|
||||
if (use_pad_) {
|
||||
T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
|
||||
T *padded = GetDeviceAddress<T>(workspace, 1);
|
||||
CalPad3d(padded_size_ / sizeof(T), x, n_, c_, old_depth_, old_height_, old_width_, old_depth_ + pad_depth_,
|
||||
old_height_ + pad_height_, old_width_ + pad_width_, pad_head_, pad_top_, pad_left_, pad_value_, padded,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
|
|
|
@ -72,7 +72,7 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
} else {
|
||||
if (greater_stride_) {
|
||||
T *stride_padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
|
||||
T *stride_padded = GetDeviceAddress<T>(workspace, 1);
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_,
|
||||
cudnnConvolutionBackwardData(cudnn_handle_, &alpha, filter_desc_, filter_addr, input_desc_, input_addr,
|
||||
|
@ -93,6 +93,23 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool CheckNull(const std::vector<size_t> filter_shape, const std::vector<size_t> input_shape) {
|
||||
is_null_input_ = CHECK_NULL_INPUT(filter_shape) || CHECK_NULL_INPUT(input_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'Conv3dTransposeGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void CheckSize(const size_t value, const size_t expect_value, const string arg_name) {
|
||||
if (value != expect_value) {
|
||||
MS_LOG(EXCEPTION) << "For 'Conv3dTransposeGpuKernel', the length of " << arg_name << " must be " << expect_value
|
||||
<< ", but got " << value;
|
||||
}
|
||||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
|
@ -105,10 +122,7 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "Conv3dTransposeGpuBkwKernel input is null.";
|
||||
InitSizeLists();
|
||||
if (CheckNull(filter_shape, input_shape)) {
|
||||
return true;
|
||||
}
|
||||
std::vector<size_t> output_shape;
|
||||
|
@ -133,6 +147,8 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
|
|||
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
|
||||
SetStrideAndDilation(kernel_node);
|
||||
std::vector<int> stride_pad_list(6, 0);
|
||||
(void)CheckSize(filter_shape.size(), 5, "filter_shape");
|
||||
(void)CheckSize(pad_list.size(), 6, "pad_list");
|
||||
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) { // pad_mode_ = same
|
||||
UpdatePaddingAndDilation(input_shape, filter_shape, pad_list.data(), stride_pad_list.data());
|
||||
}
|
||||
|
|
|
@ -101,6 +101,7 @@ class Dropout3DGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
num_chan_ = n_ * c_;
|
||||
MS_EXCEPTION_IF_ZERO("num channel", num_chan_);
|
||||
num_per_chan_ = num_count_ / num_chan_; // number of elements per channel
|
||||
|
||||
keep_prob_ = GetAttr<float>(kernel_node, "keep_prob");
|
||||
|
|
|
@ -51,12 +51,10 @@ class HSigmoidKernel : public GpuKernel {
|
|||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoid needs 1 inputs.";
|
||||
return false;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoid has 1 output.";
|
||||
return false;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
|
|
|
@ -52,12 +52,10 @@ class HSigmoidGradKernel : public GpuKernel {
|
|||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoidGrad needs 2 inputs.";
|
||||
return false;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoidGrad has 1 output.";
|
||||
return false;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
|
|
|
@ -38,6 +38,9 @@ class NLLLossGpuKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *input_device = GetDeviceAddress<T>(inputs, 0);
|
||||
int32_t *target_device = GetDeviceAddress<int32_t>(inputs, 1); // nll_loss only supports int32 target
|
||||
S *weight_device = GetDeviceAddress<S>(inputs, 2);
|
||||
|
@ -45,7 +48,9 @@ class NLLLossGpuKernel : public GpuKernel {
|
|||
T *loss_device = GetDeviceAddress<T>(outputs, 0);
|
||||
S *total_weight_device = GetDeviceAddress<S>(outputs, 1);
|
||||
|
||||
T *tmp_loss_device = GetPossiblyNullDeviceAddress<T>(workspace, 0);
|
||||
T *tmp_loss_device =
|
||||
reduction_ != 0 ? GetDeviceAddress<T>(workspace, 0) : GetPossiblyNullDeviceAddress<T>(workspace, 0);
|
||||
|
||||
S *tmp_target_weight_device = GetDeviceAddress<S>(workspace, 1);
|
||||
|
||||
NLLLoss(n_, c_, reduction_, input_device, target_device, weight_device, loss_device, total_weight_device,
|
||||
|
@ -55,6 +60,16 @@ class NLLLossGpuKernel : public GpuKernel {
|
|||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'NllLossGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shape.size() < 2) {
|
||||
MS_LOG(EXCEPTION) << "For 'NllLossGpuKernel', the rank of input cannot less than 2, but got "
|
||||
<< input_shape.size();
|
||||
}
|
||||
n_ = static_cast<int>(input_shape[0]);
|
||||
c_ = static_cast<int>(input_shape[1]);
|
||||
for (size_t i = 0; i < input_shape.size(); i++) {
|
||||
|
@ -75,6 +90,7 @@ class NLLLossGpuKernel : public GpuKernel {
|
|||
input_size_ = 1;
|
||||
n_ = 0;
|
||||
c_ = 0;
|
||||
is_null_input_ = false;
|
||||
reduction_ = 1; // default value
|
||||
tmp_loss_size_ = 0;
|
||||
tmp_target_weight_size_ = 0; // tmp_target_weight (N,) array
|
||||
|
@ -106,6 +122,7 @@ class NLLLossGpuKernel : public GpuKernel {
|
|||
size_t tmp_target_weight_size_;
|
||||
int n_;
|
||||
int c_;
|
||||
bool is_null_input_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
|
|
|
@ -38,6 +38,9 @@ class NLLLossGradGpuKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *input_device = GetDeviceAddress<T>(inputs, 0);
|
||||
T *dloss_device = GetDeviceAddress<T>(inputs, 1);
|
||||
int32_t *target_device = GetDeviceAddress<int32_t>(inputs, 2); // nll_loss_grad only supports int32 target
|
||||
|
@ -54,6 +57,16 @@ class NLLLossGradGpuKernel : public GpuKernel {
|
|||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'NllLossGradGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shape.size() < 2) {
|
||||
MS_LOG(EXCEPTION) << "For 'NllLossGradGpuKernel', the rank of input cannot less than 2, but got "
|
||||
<< input_shape.size();
|
||||
}
|
||||
n_ = static_cast<int>(input_shape[0]);
|
||||
c_ = static_cast<int>(input_shape[1]);
|
||||
for (size_t i = 0; i < input_shape.size(); i++) {
|
||||
|
@ -73,6 +86,7 @@ class NLLLossGradGpuKernel : public GpuKernel {
|
|||
input_size_ = 1;
|
||||
n_ = 0;
|
||||
c_ = 0;
|
||||
is_null_input_ = false;
|
||||
reduction_ = 1; // default value
|
||||
num_dloss_ = 1; // default size (scalar)
|
||||
input_size_list_.clear();
|
||||
|
@ -96,6 +110,7 @@ class NLLLossGradGpuKernel : public GpuKernel {
|
|||
int reduction_;
|
||||
int n_;
|
||||
int c_;
|
||||
bool is_null_input_;
|
||||
int num_dloss_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
|
|
|
@ -36,6 +36,9 @@ class ResizeBilinearGpuKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *input = GetDeviceAddress<T>(inputs, 0);
|
||||
T *output = GetDeviceAddress<T>(outputs, 0);
|
||||
float h_scale = Scaling(input_h_, output_h_, align_corners_);
|
||||
|
@ -58,8 +61,15 @@ class ResizeBilinearGpuKernel : public GpuKernel {
|
|||
}
|
||||
std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
std::vector<size_t> output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
if (input_shape.size() != 4) {
|
||||
MS_LOG(ERROR) << "Input is " << input_shape.size() << "-D, but ResizeBilinear supports only 4-D inputs.";
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ResizeBilinearGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shape.size() != 4 || output_shape.size() != 4) {
|
||||
MS_LOG(ERROR) << "For 'ResizeBilinear', the rank of input and output must be 4, but got the rank of input: "
|
||||
<< input_shape.size() << ", the rank of output: " << output_shape.size();
|
||||
return false;
|
||||
}
|
||||
n_ = SizeToInt(input_shape[0]);
|
||||
|
@ -83,6 +93,7 @@ class ResizeBilinearGpuKernel : public GpuKernel {
|
|||
|
||||
void ResetResource() noexcept override {
|
||||
align_corners_ = false;
|
||||
is_null_input_ = false;
|
||||
n_ = 0;
|
||||
c_ = 0;
|
||||
input_h_ = 0;
|
||||
|
@ -110,6 +121,7 @@ class ResizeBilinearGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool align_corners_;
|
||||
bool is_null_input_;
|
||||
int n_;
|
||||
int c_;
|
||||
int input_h_;
|
||||
|
|
|
@ -36,6 +36,9 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *dy = GetDeviceAddress<T>(inputs, 0);
|
||||
float *interim = GetDeviceAddress<float>(workspace, 0);
|
||||
T *dx = GetDeviceAddress<T>(outputs, 0);
|
||||
|
@ -67,6 +70,12 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {
|
|||
std::vector<size_t> dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
std::vector<size_t> dx_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(x_shape) || CHECK_NULL_INPUT(dx_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ResizeBilinearGradGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (dy_shape.size() != 4) {
|
||||
MS_LOG(ERROR) << "Input is " << dy_shape.size() << "-D, but ResizeBilinearGrad supports only 4-D inputs.";
|
||||
return false;
|
||||
|
@ -75,6 +84,10 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {
|
|||
MS_LOG(ERROR) << "Input is " << x_shape.size() << "-D, but ResizeBilinearGrad supports only 4-D inputs.";
|
||||
return false;
|
||||
}
|
||||
if (dx_shape.size() != 4) {
|
||||
MS_LOG(ERROR) << "For 'ResizeBilinearGradGpuKernel', the rank of output must be 4, but got " << dx_shape.size();
|
||||
return false;
|
||||
}
|
||||
n_ = SizeToInt(dy_shape[0]);
|
||||
c_ = SizeToInt(dy_shape[1]);
|
||||
dy_h_ = SizeToInt(dy_shape[2]);
|
||||
|
@ -97,6 +110,7 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {
|
|||
|
||||
void ResetResource() noexcept override {
|
||||
align_corners_ = false;
|
||||
is_null_input_ = false;
|
||||
n_ = 0;
|
||||
c_ = 0;
|
||||
dy_h_ = 0;
|
||||
|
@ -125,6 +139,7 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool align_corners_;
|
||||
bool is_null_input_;
|
||||
int n_;
|
||||
int c_;
|
||||
int dy_h_;
|
||||
|
|
|
@ -84,6 +84,10 @@ class RandpermGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
max_length_ = static_cast<size_t>(GetAttr<int64_t>(kernel_node, "max_length"));
|
||||
if (max_length_ < 1) {
|
||||
MS_LOG(ERROR) << "For 'RandpermGpuKernel', the max_length cannot be less than 1, but got " << max_length_;
|
||||
return false;
|
||||
}
|
||||
pad_ = static_cast<T>(GetAttr<int64_t>(kernel_node, "pad"));
|
||||
|
||||
InitSizeLists();
|
||||
|
|
Loading…
Reference in New Issue