!26751 optimizes the kernel error description of Adagrad, Adam, Conv2d, etc.

Merge pull request !26751 from wangshuide/wsd_master
This commit is contained in:
i-robot 2021-11-26 07:05:10 +00:00 committed by Gitee
commit d66f811022
50 changed files with 494 additions and 370 deletions

View File

@ -58,7 +58,7 @@ class ActivationGpuFwdKernel : public GpuKernel {
auto node_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kernel_map.find(node_name);
if (iter == kernel_map.end()) {
MS_LOG(EXCEPTION) << "Kernel: " << node_name << " not support.";
MS_LOG(EXCEPTION) << "Only support these activations: ReLU6, Tanh, Elu, Sigmoid currently, but got " << node_name;
}
mode_ = iter->second;
@ -66,13 +66,11 @@ class ActivationGpuFwdKernel : public GpuKernel {
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Argument number is " << input_num << ", but ActivationGpuFwdKernel needs 1.";
return false;
MS_LOG(EXCEPTION) << "For '" << node_name << "', the number of input should be 1, but got " << input_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, node_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "ActivationGpuFwdKernel input is null.";
InitSizeLists();
return true;
}

View File

@ -67,7 +67,7 @@ class ActivationGradGpuKernel : public GpuKernel {
auto node_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kernel_map.find(node_name);
if (iter == kernel_map.end()) {
MS_LOG(EXCEPTION) << "Kernel: " << node_name << " not support.";
MS_LOG(EXCEPTION) << "Only support these activations: ReLU6, Tanh, Elu, Sigmoid currently, but got " << node_name;
}
mode_ = iter->second;
@ -75,13 +75,11 @@ class ActivationGradGpuKernel : public GpuKernel {
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Argument number is " << input_num << ", but ActivationGradGpuKernel needs 2.";
return false;
MS_LOG(EXCEPTION) << "For '" << node_name << "', the number of input should be 2, but got " << input_num;
}
auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, node_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "ActivationGradGpuKernel input is null.";
InitSizeLists();
return true;
}

View File

@ -18,6 +18,7 @@
#define MINDSPORE_ADAGRAD_GPU_KERNEL_H
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/adagrad_impl.cuh"
@ -33,7 +34,8 @@ class AdagradGpuKernel : public GpuKernel {
learning_rate_size_(0),
gradient_size_(0),
update_slots(true),
is_null_input_(false) {}
is_null_input_(false),
kernel_name_("ApplyAdagrad") {}
~AdagradGpuKernel() override = default;
@ -68,11 +70,11 @@ class AdagradGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
update_slots = AnfAlgo::GetNodeAttr<bool>(kernel_node, "update_slots");
if (input_num != 4) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but adagrad needs 4 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 4, but got " << input_num;
}
variable_size_ = sizeof(T);
accumulation_size_ = sizeof(T);
@ -82,10 +84,10 @@ class AdagradGpuKernel : public GpuKernel {
auto variable_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto accumulation_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
is_null_input_ =
CHECK_NULL_INPUT(variable_shape) || CHECK_NULL_INPUT(accumulation_shape) || CHECK_NULL_INPUT(gradient_shape);
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name_, "var") ||
CHECK_SHAPE_NULL(accumulation_shape, kernel_name_, "accum") ||
CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "grad");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'AdagradGpuKernel', input is null";
InitSizeLists();
return true;
}
@ -121,6 +123,7 @@ class AdagradGpuKernel : public GpuKernel {
size_t gradient_size_;
bool update_slots;
bool is_null_input_;
std::string kernel_name_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_ADAM_GPU_KERNEL_H_
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/adam_impl.cuh"
@ -38,7 +39,8 @@ class AdamGpuKernel : public GpuKernel {
beta2_size_(0),
epsilon_size_(0),
gradient_size_(0),
is_null_input_(false) {}
is_null_input_(false),
kernel_name_("Adam") {}
~AdamGpuKernel() override = default;
@ -67,10 +69,11 @@ class AdamGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != INPUT_NUM) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but adam needs " << INPUT_NUM << " inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be " << INPUT_NUM << ", but got "
<< input_num;
}
variable_size_ = sizeof(T);
@ -88,10 +91,10 @@ class AdamGpuKernel : public GpuKernel {
auto m_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto v_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9);
is_null_input_ = CHECK_NULL_INPUT(variable_shape) || CHECK_NULL_INPUT(m_shape) || CHECK_NULL_INPUT(v_shape) ||
CHECK_NULL_INPUT(gradient_shape);
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name_, "var") ||
CHECK_SHAPE_NULL(m_shape, kernel_name_, "m") || CHECK_SHAPE_NULL(v_shape, kernel_name_, "v") ||
CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "gradient");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'AdamGpuKernel', input is null.";
InitSizeLists();
return true;
}
@ -144,6 +147,7 @@ class AdamGpuKernel : public GpuKernel {
size_t epsilon_size_;
size_t gradient_size_;
bool is_null_input_;
std::string kernel_name_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_ADAM_WEIGHT_DECAY_GPU_KERNEL_H_
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/adam_impl.cuh"
@ -37,7 +38,8 @@ class AdamWeightDecayGpuKernel : public GpuKernel {
epsilon_size_(0),
decay_size_(0),
gradient_size_(0),
is_null_input_(false) {}
is_null_input_(false),
kernel_name_("AdamWeightDecay") {}
~AdamWeightDecayGpuKernel() override = default;
@ -65,10 +67,11 @@ class AdamWeightDecayGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != INPUT_NUM) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but adam needs " << INPUT_NUM << " inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be " << INPUT_NUM << ", but got "
<< input_num;
}
variable_size_ = sizeof(T);
@ -85,10 +88,10 @@ class AdamWeightDecayGpuKernel : public GpuKernel {
auto m_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto v_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8);
is_null_input_ = CHECK_NULL_INPUT(variable_shape) || CHECK_NULL_INPUT(m_shape) || CHECK_NULL_INPUT(v_shape) ||
CHECK_NULL_INPUT(gradient_shape);
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name_, "var") ||
CHECK_SHAPE_NULL(m_shape, kernel_name_, "m") || CHECK_SHAPE_NULL(v_shape, kernel_name_, "v") ||
CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "gradient");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'AdamWeightDecayGpuKernel', input is null";
InitSizeLists();
return true;
}
@ -139,6 +142,7 @@ class AdamWeightDecayGpuKernel : public GpuKernel {
size_t decay_size_;
size_t gradient_size_;
bool is_null_input_;
std::string kernel_name_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;

View File

@ -38,7 +38,8 @@ class AdaptiveAvgPool2DKernel : public GpuKernel {
output_height(0),
output_width(0),
size(0),
is_null_input_(false) {}
is_null_input_(false),
kernel_name_("AdaptiveAvgPool2D") {}
~AdaptiveAvgPool2DKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
@ -60,6 +61,7 @@ class AdaptiveAvgPool2DKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
auto shape_addr = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "output_size");
if (shape_addr.size() == 1) {
output_height = shape_addr[0];
@ -68,14 +70,13 @@ class AdaptiveAvgPool2DKernel : public GpuKernel {
output_height = static_cast<uint>(shape_addr[0]);
output_width = static_cast<uint>(shape_addr[1]);
} else {
MS_LOG(ERROR) << "Input Error.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'output_size' should be 1 or 2, but got "
<< shape_addr.size();
}
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but adaptive_avg_pool2d needs 1 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
}
input_size_ = sizeof(T);
@ -83,17 +84,17 @@ class AdaptiveAvgPool2DKernel : public GpuKernel {
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
is_null_input_ =
CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'AdaptiveAvgPool2dGpuKernel', input or output is null";
InitSizeLists();
return true;
}
len = static_cast<uint>(input_shape.size());
if (len < 2) {
MS_LOG(ERROR) << "The input should have rank at least 2.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than 2, but got "
<< len;
}
input_height = static_cast<uint>(input_shape[len - 2]);
@ -127,6 +128,7 @@ class AdaptiveAvgPool2DKernel : public GpuKernel {
uint output_width;
uint size;
bool is_null_input_;
std::string kernel_name_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;

View File

@ -39,7 +39,9 @@ class AdaptiveAvgPool2DGradKernel : public GpuKernel {
input_width_(0),
output_height_(0),
output_width_(0),
size_(0) {}
size_(0),
is_null_input_(false),
kernel_name_("AdaptiveAvgPool2DGrad") {}
~AdaptiveAvgPool2DGradKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
@ -48,6 +50,9 @@ class AdaptiveAvgPool2DGradKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *dy_addr = GetDeviceAddress<T>(inputs, 1);
T *dx_addr = GetDeviceAddress<T>(outputs, 0);
@ -58,11 +63,11 @@ class AdaptiveAvgPool2DGradKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != kAdaptiveAvgPool2dGradInputNum) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but AdaptiveAvgPool2DGrad needs "
<< kAdaptiveAvgPool2dGradInputNum << " inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be "
<< kAdaptiveAvgPool2dGradInputNum << ", but got " << input_num;
}
input_size_ = sizeof(T);
@ -70,12 +75,22 @@ class AdaptiveAvgPool2DGradKernel : public GpuKernel {
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); // dy
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); // dx
is_null_input_ =
CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
InitSizeLists();
return true;
}
auto input_rank = input_shape.size();
auto output_rank = output_shape.size();
if (input_rank < kAdaptiveAvgPool2dGradMinRank || output_rank < kAdaptiveAvgPool2dGradMinRank) {
MS_LOG(ERROR) << "The input or output should have rank at least 2.";
return false;
if (input_rank < kAdaptiveAvgPool2dGradMinRank) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than "
<< kAdaptiveAvgPool2dGradMinRank << ", but got " << input_rank;
}
if (output_rank < kAdaptiveAvgPool2dGradMinRank) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of output cannot be less than "
<< kAdaptiveAvgPool2dGradMinRank << ", but got " << output_rank;
}
input_height_ = static_cast<uint>(input_shape[input_rank - 2]);
input_width_ = static_cast<uint>(input_shape[input_rank - 1]);
@ -109,6 +124,8 @@ class AdaptiveAvgPool2DGradKernel : public GpuKernel {
uint output_height_;
uint output_width_;
uint size_;
bool is_null_input_;
std::string kernel_name_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_APPLY_GRADIENT_DESCENT_GPU_KERNEL_H_
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/apply_gradient_descent_impl.cuh"
@ -49,19 +50,19 @@ class ApplyGradientDescentKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ApplyGradientDescent needs 3 inputs.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 3, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ApplyGradientDescent has 1 output.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "var");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'ApplyGradientDescentGpuKernel', input is null.";
InitSizeLists();
return true;
}
@ -79,6 +80,7 @@ class ApplyGradientDescentKernel : public GpuKernel {
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
kernel_name_ = "ApplyGradientDescent";
}
protected:
@ -92,6 +94,7 @@ class ApplyGradientDescentKernel : public GpuKernel {
private:
size_t input_size_;
bool is_null_input_;
std::string kernel_name_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;

View File

@ -92,7 +92,8 @@ class BatchNormGpuKernel : public GpuKernel {
} else if (kernel_name == kBatchNormWithAddAndActivation) {
bn_ops_ = CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION;
} else {
MS_LOG(EXCEPTION) << "Invalid kernel name: " << kernel_name;
MS_LOG(EXCEPTION) << "Only support these kernel names: " << kBatchNorm << ", " << kBatchNormWithActivation << ", "
<< kBatchNormWithAddAndActivation << ", but got " << kernel_name;
}
InitResource();
@ -104,23 +105,23 @@ class BatchNormGpuKernel : public GpuKernel {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (bn_ops_ == CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION) {
if (input_num != CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM) {
MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", " << kernel_name << " should be "
<< CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be "
<< CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM << ", but got " << input_num;
}
} else {
if (input_num != NO_CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM) {
MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", " << kernel_name << " should be "
<< NO_CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be "
<< NO_CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM << ", but got " << input_num;
}
}
auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (shape.size() != 4 && shape.size() != 2) {
MS_LOG(EXCEPTION) << "tensor shape is " << shape.size() << ", BatchNormGpuKernel should be 2D or 4D";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 2 or 4, but got "
<< shape.size();
}
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BatchNormGpuKernel', input is null";
InitSizeLists();
return true;
}

View File

@ -124,7 +124,9 @@ class BatchNormGradGpuKernel : public GpuKernel {
} else if (kernel_name == kBatchNormGradWithAddAndActivation) {
bn_ops_ = CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION;
} else {
MS_LOG(EXCEPTION) << "Invalid kernel name: " << kernel_name;
MS_LOG(EXCEPTION) << "Only support these kernel names: " << kBatchNormGradOpName << ", "
<< kBatchNormGradWithActivation << ", " << kBatchNormGradWithAddAndActivation << ", but got "
<< kernel_name;
}
InitResource();
@ -134,23 +136,23 @@ class BatchNormGradGpuKernel : public GpuKernel {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (bn_ops_ == CUDNN_BATCHNORM_OPS_BN) {
if (input_num != CUDNN_BATCHNORM_OPS_BN_INPUT_NUM) {
MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", " << kernel_name << " should be "
<< CUDNN_BATCHNORM_OPS_BN_INPUT_NUM;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be "
<< CUDNN_BATCHNORM_OPS_BN_INPUT_NUM << ", but got " << input_num;
}
} else {
if (input_num != NO_CUDNN_BATCHNORM_OPS_BN_INPUT_NUM) {
MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", " << kernel_name << " should be "
<< NO_CUDNN_BATCHNORM_OPS_BN_INPUT_NUM;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be "
<< NO_CUDNN_BATCHNORM_OPS_BN_INPUT_NUM << ", but got " << input_num;
}
}
auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (shape.size() != 4 && shape.size() != 2) {
MS_LOG(EXCEPTION) << "tensor shape is " << shape.size() << ", BatchNormGradGpuKernel should be 2D or 4D";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 2 or 4, but got "
<< shape.size();
}
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BatchNormGradGpuKernel', input is null";
InitSizeLists();
return true;
}

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_BCE_WITH_LOGITS_LOSS_KERNEL_H_
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/bce_with_logits_loss_impl.cuh"
@ -68,41 +69,42 @@ class BCEWithLogitsLossKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 4) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but BCEWithLogitsLoss needs 4 inputs.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 4, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but BCEWithLogitsLoss has 1 output.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
}
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
pos_weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
is_null_input_ =
CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(weight_shape_) || CHECK_NULL_INPUT(pos_weight_shape_);
is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name_, "logits") ||
CHECK_SHAPE_NULL(weight_shape_, kernel_name_, "weight") ||
CHECK_SHAPE_NULL(pos_weight_shape_, kernel_name_, "pos_weight");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BCEWithLogitsLossGpuKernel', input is null.";
InitSizeLists();
return true;
}
if (input_shape_.size() < 1) {
MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of input cannot be less than 1, but got "
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of logits cannot be less than 1, but got "
<< input_shape_.size();
}
if (weight_shape_.size() < 1) {
MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of weight cannot be less than 1, but got "
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of weight cannot be less than 1, but got "
<< weight_shape_.size();
}
if (pos_weight_shape_.size() < 1) {
MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of pos_weight cannot be less than 1, but got "
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of pos_weight cannot be less than 1, but got "
<< pos_weight_shape_.size();
}
input_size_ = 1;
if (input_shape_.size() > MAX_LOGITS_DIMENSION) {
MS_LOG(EXCEPTION) << "Input dimension is " << input_shape_.size()
<< ", but BCEWithLogitsLoss can only support up to " << MAX_LOGITS_DIMENSION << "-D.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of logits cannot be greater than "
<< MAX_LOGITS_DIMENSION << ", but got " << input_shape_.size();
}
for (size_t i = 0; i < input_shape_.size(); i++) {
input_size_ *= input_shape_[i];
@ -130,6 +132,7 @@ class BCEWithLogitsLossKernel : public GpuKernel {
weight_need_broadcast_ = false;
pos_weight_need_broadcast_ = false;
is_null_input_ = false;
kernel_name_ = "BCEWithLogitsLoss";
input_shape_.clear();
weight_shape_.clear();
pos_weight_shape_.clear();
@ -176,6 +179,7 @@ class BCEWithLogitsLossKernel : public GpuKernel {
bool weight_need_broadcast_;
bool pos_weight_need_broadcast_;
bool is_null_input_;
std::string kernel_name_;
std::vector<size_t> input_shape_;
std::vector<size_t> weight_shape_;
std::vector<size_t> pos_weight_shape_;

View File

@ -61,26 +61,27 @@ class BiasAddGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto num_dims = x_shape.size();
is_null_input_ = CHECK_NULL_INPUT(x_shape);
is_null_input_ = CHECK_SHAPE_NULL(x_shape, kernel_name_, "input_x");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BiasAddGpuKernel', input is null";
InitSizeLists();
return true;
}
if (num_dims < 2) {
MS_LOG(EXCEPTION) << "input dims must be at least 2, but got " << num_dims;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input_x cannot be less than 2, but got "
<< num_dims;
}
std::string format = GetAttr<std::string>(kernel_node, "format");
string::size_type pos = format.find("C");
if (pos == std::string::npos || pos >= num_dims) {
MS_LOG(EXCEPTION) << "format '" << format << "' invalid";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', 'C' character should be in 'format', but got " << format;
}
// Expand to 4 dims for cudnnSetTensorNdDescriptorEx.
@ -118,6 +119,7 @@ class BiasAddGpuKernel : public GpuKernel {
b_desc_ = nullptr;
op_desc_ = nullptr;
is_null_input_ = false;
kernel_name_ = "BiasAdd";
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
@ -161,6 +163,7 @@ class BiasAddGpuKernel : public GpuKernel {
cudnnTensorDescriptor_t b_desc_;
cudnnOpTensorDescriptor_t op_desc_;
bool is_null_input_;
std::string kernel_name_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;

View File

@ -33,6 +33,8 @@ class BiasAddGradGpuKernel : public GpuKernel {
public:
BiasAddGradGpuKernel()
: same_dims_(true),
is_null_input_(false),
kernel_name_("BiasAddGrad"),
use_cudnn_(false),
dy_num_(1),
db_num_(1),
@ -83,11 +85,11 @@ class BiasAddGradGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(dy_shape);
is_null_input_ = CHECK_SHAPE_NULL(dy_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BiasAddGradGpuKernel', input is null";
InitSizeLists();
return true;
}
@ -96,12 +98,13 @@ class BiasAddGradGpuKernel : public GpuKernel {
cudnn_compute_format_ = (input_device_format == kOpFormat_NHWC) ? CUDNN_TENSOR_NHWC : CUDNN_TENSOR_NCHW;
num_dims_ = dy_shape.size();
if (num_dims_ < 2) {
MS_LOG(EXCEPTION) << "input dims must be at least 2, but got " << num_dims_;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than 2, but got "
<< num_dims_;
}
std::string format = GetAttr<std::string>(kernel_node, "format");
string::size_type pos = format.find("C");
if (pos == std::string::npos || pos >= num_dims_) {
MS_LOG(EXCEPTION) << "format '" << format << "' invalid";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', 'C' character should be in 'format', but got " << format;
}
bias_size_ = dy_shape[pos];
auto num_dims_fix = std::max(num_dims_, 4UL);
@ -215,6 +218,7 @@ class BiasAddGradGpuKernel : public GpuKernel {
private:
bool same_dims_;
bool is_null_input_;
std::string kernel_name_;
bool use_cudnn_;
size_t dy_num_; // for own implementation
size_t db_num_;

View File

@ -29,7 +29,13 @@ namespace kernel {
template <typename T>
class BinaryCrossEntropyGpuKernel : public GpuKernel {
public:
BinaryCrossEntropyGpuKernel() : weight_defined_(false), input_size_(1), reduction_(1), workspace_size_(1) {}
BinaryCrossEntropyGpuKernel()
: weight_defined_(false),
is_null_input_(false),
kernel_name_("BinaryCrossEntropy"),
input_size_(1),
reduction_(1),
workspace_size_(1) {}
~BinaryCrossEntropyGpuKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
@ -55,10 +61,10 @@ class BinaryCrossEntropyGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "logits");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BinaryCrossEntropyGpuKernel', input is null";
InitSizeLists();
return true;
}
@ -95,6 +101,7 @@ class BinaryCrossEntropyGpuKernel : public GpuKernel {
private:
bool weight_defined_; // true: there are 3 inputs, false: there are 2 inputs(no [weight])
bool is_null_input_;
std::string kernel_name_;
size_t input_size_;
int reduction_;
size_t workspace_size_;

View File

@ -29,7 +29,12 @@ namespace kernel {
template <typename T>
class BinaryCrossEntropyGradGpuKernel : public GpuKernel {
public:
BinaryCrossEntropyGradGpuKernel() : input_size_(1), reduction_(1), weight_defined_(false), is_null_input_(false) {}
BinaryCrossEntropyGradGpuKernel()
: input_size_(1),
reduction_(1),
weight_defined_(false),
is_null_input_(false),
kernel_name_("BinaryCrossEntropyGrad") {}
~BinaryCrossEntropyGradGpuKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
@ -57,10 +62,10 @@ class BinaryCrossEntropyGradGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BinaryCrossEntropyGradGpuKernel', input is null";
InitSizeLists();
return true;
}
@ -95,6 +100,7 @@ class BinaryCrossEntropyGradGpuKernel : public GpuKernel {
int reduction_;
bool weight_defined_; // true: there are 4 inputs, false: there are 3 inputs(no [weight])
bool is_null_input_;
std::string kernel_name_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_FUSED_SCALE_MOMENTUM_GPU_KERNEL_H_
#include <vector>
#include <string>
#include <memory>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@ -27,7 +28,8 @@ namespace kernel {
template <typename T, typename S>
class CombineMomentumGpuKernel : public GpuKernel {
public:
CombineMomentumGpuKernel() : element_num_(1), num_(0), input_num_(6), is_null_input_(false) {}
CombineMomentumGpuKernel()
: element_num_(1), num_(0), input_num_(6), is_null_input_(false), kernel_name_("CombineMomentum") {}
~CombineMomentumGpuKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
@ -62,6 +64,7 @@ class CombineMomentumGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
num_ = GetAttr<size_t>(kernel_node, "n");
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
if (kernel_name == "CombineMomentum") {
@ -72,9 +75,9 @@ class CombineMomentumGpuKernel : public GpuKernel {
for (size_t i = 0; i < num_; i++) {
element_num_ = 1;
auto variable_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i * input_num_ + input_num_ - 5);
is_null_input_ = CHECK_NULL_INPUT(variable_shape);
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name_,
"input[" + std::to_string(i * input_num_ + input_num_ - 5) + "]");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CombineMomentumGpuKernel', input is null";
InitSizeLists();
return true;
}
@ -107,6 +110,7 @@ class CombineMomentumGpuKernel : public GpuKernel {
size_t num_;
int input_num_;
bool is_null_input_;
std::string kernel_name_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;

View File

@ -77,11 +77,10 @@ class Conv2dGpuFwdKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
if (!CheckParam(kernel_node)) {
return false;
}
(void)CheckParam(kernel_node);
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0);
auto format_attr = GetAttr<std::string>(kernel_node, "format");
@ -91,9 +90,10 @@ class Conv2dGpuFwdKernel : public GpuKernel {
auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(in_shape) || CHECK_NULL_INPUT(filter_shape) || CHECK_NULL_INPUT(output_shape);
is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "x") ||
CHECK_SHAPE_NULL(filter_shape, kernel_name_, "weight") ||
CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'Conv2dGpuFwdKernel', input or output is null.";
InitSizeLists();
return true;
}
@ -111,7 +111,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
[](const int64_t &value) { return static_cast<int>(value); });
if (pad_list.size() != 4) {
MS_LOG(EXCEPTION) << "Conv2dGpuFwdKernel pad_list must have length 4.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'pad' should be 4, but got " << pad_list.size();
}
pad_height_ = pad_list[0];
@ -196,6 +196,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
dilation_.clear();
group_ = 1;
is_null_input_ = false;
kernel_name_ = "Conv2d";
input_size_ = 0;
filter_size_ = 0;
output_size_ = 0;
@ -275,18 +276,15 @@ class Conv2dGpuFwdKernel : public GpuKernel {
}
private:
bool CheckParam(const CNodePtr &kernel_node) {
void CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but conv2d needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but conv2d needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
}
return true;
}
void Set4DDesc(const std::vector<size_t> &in_shape, const std::vector<size_t> &filter_shape,
@ -345,16 +343,20 @@ class Conv2dGpuFwdKernel : public GpuKernel {
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
[](const int64_t &value) { return static_cast<int>(value); });
if (stride_.size() != 4) {
MS_LOG(EXCEPTION) << "Conv2d's' stride must be 4d!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 4, but got "
<< stride_.size();
}
if (stride_[0] != 1 || stride_[1] != 1) {
MS_LOG(EXCEPTION) << "Conv2d stride only support 1 in N axis and C axis!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'stride' at 0 and 1 axis should be 1, but got "
<< "stride[0]: " << stride_[0] << ", stride[1]: " << stride_[1];
}
if (dilation_.size() != 4) {
MS_LOG(EXCEPTION) << "Conv2d's dilation must be 4d!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 4, but got "
<< dilation_.size();
}
if (dilation_[0] != 1 || dilation_[1] != 1) {
MS_LOG(EXCEPTION) << "Conv2d dilation only support 1 in N axis and C axis!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
}
}
cudnnHandle_t cudnn_handle_;
@ -384,6 +386,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
std::vector<int> dilation_;
int group_;
bool is_null_input_;
std::string kernel_name_;
size_t input_size_;
size_t filter_size_;
size_t output_size_;

View File

@ -52,6 +52,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
c_(0),
group_(1),
is_null_input_(false),
kernel_name_("Conv2dGradFilter"),
input_size_(0),
dy_size_(0),
output_size_(0),
@ -103,17 +104,15 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
if (!CheckParam(kernel_node)) {
return false;
}
(void)CheckParam(kernel_node);
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
auto dy_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
is_null_input_ = CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(in_shape);
is_null_input_ = CHECK_SHAPE_NULL(dy_shape, kernel_name_, "dy") || CHECK_SHAPE_NULL(in_shape, kernel_name_, "x");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'ConvGradFilterGpuBkwKernel', input is null.";
InitSizeLists();
return true;
}
@ -139,7 +138,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
[](const int64_t &value) { return static_cast<int>(value); });
if (pad_list.size() != 4) {
MS_LOG(EXCEPTION) << "For 'Conv2dGradFilterGpuBkwKernel', the length of pad_list must be 4.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'pad' should be 4, but got " << pad_list.size();
}
pad_height_ = pad_list[0];
pad_width_ = pad_list[2];
@ -270,18 +269,15 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
}
private:
bool CheckParam(const CNodePtr &kernel_node) {
void CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but ConvGradFilter needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but ConvGradFilter needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
}
return true;
}
void SelectAlgorithm(cudnnTensorDescriptor_t x_desc_real) {
constexpr int requested_algo_count = 1;
@ -345,13 +341,16 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
[](const int64_t &value) { return static_cast<int>(value); });
if (stride_.size() != 2) {
MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel's stride must be 2d!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 2, but got "
<< stride_.size();
}
if (dilation_.size() != 4) {
MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel's dilation must be 4d!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 4, but got "
<< dilation_.size();
}
if (dilation_[0] != 1 || dilation_[1] != 1) {
MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel dilation only support 1 in N axis and C axis!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
}
}
cudnnHandle_t cudnn_handle_;
@ -382,6 +381,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
std::vector<int> dilation_;
int group_;
bool is_null_input_;
std::string kernel_name_;
size_t input_size_;
size_t dy_size_;
size_t output_size_;

View File

@ -54,6 +54,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
c_(0),
group_(1),
is_null_input_(false),
kernel_name_("Conv2dGradInput"),
dy_size_(0),
w_size_(0),
output_size_(0),
@ -104,9 +105,9 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
}
bool CheckNull(const std::vector<size_t> dy_shape, const std::vector<size_t> filter_shape) {
is_null_input_ = CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(filter_shape);
is_null_input_ =
CHECK_SHAPE_NULL(dy_shape, kernel_name_, "dy") || CHECK_SHAPE_NULL(filter_shape, kernel_name_, "weight");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'Conv2dGradInputGpuKernel', input is null.";
InitSizeLists();
return true;
}
@ -114,11 +115,10 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
if (!CheckParam(kernel_node)) {
return false;
}
(void)CheckParam(kernel_node);
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0);
auto format_attr = GetAttr<std::string>(kernel_node, "format");
@ -152,7 +152,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
[](const int64_t &value) { return static_cast<int>(value); });
if (pad_list.size() != 4) {
MS_LOG(EXCEPTION) << "For 'Conv2dGradInputGpuKernel', the length of pad_list must be 4.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'pad' should be 4, but got " << pad_list.size();
}
pad_height_ = pad_list[0];
pad_width_ = pad_list[2];
@ -279,18 +279,15 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
}
private:
bool CheckParam(const CNodePtr &kernel_node) {
void CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but ConvGradInput needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but ConvGradInput needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
}
return true;
}
void SetPad(const std::vector<int> &input_shape, const CNodePtr &kernel_node) {
std::vector<int> pad_list;
@ -369,13 +366,16 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
[](const int64_t &value) { return static_cast<int>(value); });
if (stride_.size() != 2) {
MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel's stride must be 2d!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 2, but got "
<< stride_.size();
}
if (dilation_.size() != 4) {
MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel's dilation must be 4d!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 4, but got "
<< dilation_.size();
}
if (dilation_[0] != 1 || dilation_[1] != 1) {
MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel dilation only support 1 in N axis and C axis!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
}
}
cudnnHandle_t cudnn_handle_;
@ -404,6 +404,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
std::vector<int> dilation_;
int group_;
bool is_null_input_;
std::string kernel_name_;
size_t dy_size_;
size_t w_size_;
size_t output_size_;

View File

@ -69,35 +69,33 @@ class Conv3dGpuKernel : public GpuKernel {
return true;
}
bool CheckNull(const std::vector<size_t> in_shape, const std::vector<size_t> filter_shape,
const std::vector<size_t> output_shape) {
is_null_input_ = CHECK_NULL_INPUT(in_shape) || CHECK_NULL_INPUT(filter_shape) || CHECK_NULL_INPUT(output_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'Conv3dGpuKernel', input or output is null.";
InitSizeLists();
return true;
void CheckSize(const size_t value, const size_t expect_value, const string arg_name) {
if (value != expect_value) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of " << arg_name << " should be "
<< expect_value << ", but got " << value;
}
return false;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
if (!CheckParam(kernel_node)) {
return false;
}
(void)CheckParam(kernel_node);
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
data_format_ = kOpFormat_NCDHW;
auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
if (CheckNull(in_shape, filter_shape, output_shape)) {
is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "x") ||
CHECK_SHAPE_NULL(filter_shape, kernel_name_, "weight") ||
CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
InitSizeLists();
return true;
}
CheckTensorSize({in_shape});
if (in_shape.size() != 5) {
MS_LOG(EXCEPTION) << "Conv3dGpuKernel input must have rank 5.";
}
(void)CheckSize(in_shape.size(), 5, "x");
n_ = SizeToInt(in_shape[0]);
c_ = SizeToInt(in_shape[1]);
old_depth_ = SizeToInt(in_shape[2]);
@ -113,7 +111,7 @@ class Conv3dGpuKernel : public GpuKernel {
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
[](const int64_t &value) { return static_cast<int>(value); });
if (pad_list.size() != 6) {
MS_LOG(EXCEPTION) << "For 'Conv3dGpuBkwKernel', the length of pad_list must be 6, but got " << pad_list.size();
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'pad' should be 6, but got " << pad_list.size();
}
pad_depth_ = pad_list[0];
pad_height_ = pad_list[2];
@ -137,7 +135,8 @@ class Conv3dGpuKernel : public GpuKernel {
int dimA[kNumDims];
int strideApadded[kNumDims];
if (data_format_ != kOpFormat_NCDHW) {
MS_LOG(EXCEPTION) << "Conv3d only support NCDHW format right now.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'data_format' only support 'NCDHW' right now "
<< ", but got " << data_format_;
}
auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_depth_ + pad_depth_),
IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_)};
@ -203,6 +202,7 @@ class Conv3dGpuKernel : public GpuKernel {
dilation_.clear();
group_ = 1;
is_null_input_ = false;
kernel_name_ = "Conv3d";
input_size_ = 0;
filter_size_ = 0;
output_size_ = 0;
@ -282,18 +282,15 @@ class Conv3dGpuKernel : public GpuKernel {
}
private:
bool CheckParam(const CNodePtr &kernel_node) {
void CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but conv3d needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but conv3d needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
}
return true;
}
void SetNDDesc(const std::vector<size_t> &in_shape, const std::vector<size_t> &filter_shape,
@ -341,16 +338,20 @@ class Conv3dGpuKernel : public GpuKernel {
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
[](const int64_t &value) { return static_cast<int>(value); });
if (stride_.size() != 5) {
MS_LOG(EXCEPTION) << "Conv3d's' stride must be 5d, but got " << stride_.size();
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 5, but got "
<< stride_.size();
}
if (stride_[0] != 1 || stride_[1] != 1) {
MS_LOG(EXCEPTION) << "Conv3d stride only support 1 in N axis and C axis!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'stride' at 0 and 1 axis should be 1, but got "
<< "stride[0]: " << stride_[0] << ", stride[1]: " << stride_[1];
}
if (dilation_.size() != 5) {
MS_LOG(EXCEPTION) << "Conv3d's dilation must be 5d!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 5, but got "
<< dilation_.size();
}
if (dilation_[0] != 1 || dilation_[1] != 1) {
MS_LOG(EXCEPTION) << "Conv3d dilation only support 1 in N axis and C axis!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
}
}
@ -384,6 +385,7 @@ class Conv3dGpuKernel : public GpuKernel {
std::vector<int> dilation_;
int group_;
bool is_null_input_;
std::string kernel_name_;
size_t input_size_;
size_t filter_size_;
size_t output_size_;

View File

@ -84,33 +84,24 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
return true;
}
bool CheckNull(const std::vector<size_t> dy_shape, const std::vector<size_t> in_shape) {
is_null_input_ = CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(in_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'Conv3dGradInputGpuKernel', input is null.";
InitSizeLists();
return true;
}
return false;
}
void CheckSize(const size_t value, const size_t expect_value, const string arg_name) {
if (value != expect_value) {
MS_LOG(EXCEPTION) << "For 'Conv3dGradFilterGpuKernel', the length of " << arg_name << " must be " << expect_value
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of " << arg_name << " should be " << expect_value
<< ", but got " << value;
}
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
if (!CheckParam(kernel_node)) {
return false;
}
(void)CheckParam(kernel_node);
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
auto dy_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
if (CheckNull(dy_shape, in_shape)) {
is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "x") || CHECK_SHAPE_NULL(dy_shape, kernel_name_, "dy");
if (is_null_input_) {
InitSizeLists();
return true;
}
CheckTensorSize({in_shape});
@ -124,7 +115,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
}
compute_format_ = CUDNN_TENSOR_NCHW;
(void)CheckSize(in_shape.size(), 5, "in_shape");
(void)CheckSize(in_shape.size(), 5, "input shape");
n_ = SizeToInt(in_shape[0]);
c_ = SizeToInt(in_shape[1]);
old_depth_ = SizeToInt(in_shape[2]);
@ -138,7 +129,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
std::vector<int64_t> pad_list_me = GetAttr<std::vector<int64_t>>(kernel_node, "pad_list");
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
[](const int64_t &value) { return static_cast<int>(value); });
(void)CheckSize(pad_list.size(), 6, "pad_list");
(void)CheckSize(pad_list.size(), 6, "pad");
pad_depth_ = pad_list[0];
pad_height_ = pad_list[2];
pad_width_ = pad_list[4];
@ -161,7 +152,8 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
int dimA[kNumDims];
int strideApadded[kNumDims];
if (data_format_ != kOpFormat_NCDHW) {
MS_LOG(EXCEPTION) << "Conv3dGradFilterGpuKernel only support NCDHW format right now.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'data_format' only support 'NCDHW' right now "
<< ", but got " << data_format_;
}
auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_depth_ + pad_depth_),
IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_)};
@ -224,6 +216,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
c_ = 0;
group_ = 1;
is_null_input_ = false;
kernel_name_ = "Conv3dGradFilter";
input_size_ = 0;
dy_size_ = 0;
output_size_ = 0;
@ -306,18 +299,15 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
}
private:
bool CheckParam(const CNodePtr &kernel_node) {
void CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but Conv3dGradFilterGpuKernel needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but Conv3dGradFilterGpuKernel needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
}
return true;
}
void SelectAlgorithm(cudnnTensorDescriptor_t x_desc_real) {
@ -375,16 +365,20 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
[](const int64_t &value) { return static_cast<int>(value); });
if (stride_.size() != 5) {
MS_LOG(EXCEPTION) << "Conv3dGradFilterGpuKernel stride must be 5d, but got " << stride_.size();
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 5, but got "
<< stride_.size();
}
if (stride_[0] != 1 || stride_[1] != 1) {
MS_LOG(EXCEPTION) << "Conv3dGradFilterGpuKernel stride only support 1 in N axis and C axis!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'stride' at 0 and 1 axis should be 1, but got "
<< "stride[0]: " << stride_[0] << ", stride[1]: " << stride_[1];
}
if (dilation_.size() != 5) {
MS_LOG(EXCEPTION) << "Conv3dGradFilterGpuKernel dilation must be 5d!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 5, but got "
<< dilation_.size();
}
if (dilation_[0] != 1 || dilation_[1] != 1) {
MS_LOG(EXCEPTION) << "Conv3dGradFilterGpuKernel dilation only support 1 in N axis and C axis!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
}
}
@ -418,6 +412,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
std::vector<int> dilation_;
int group_;
bool is_null_input_;
std::string kernel_name_;
size_t input_size_;
size_t dy_size_;
size_t output_size_;

View File

@ -70,36 +70,33 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
return true;
}
bool CheckNull(const std::vector<size_t> dy_shape, const std::vector<size_t> filter_shape) {
is_null_input_ = CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(filter_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'Conv3dGradInputGpuKernel', input is null.";
InitSizeLists();
return true;
void CheckSize(const size_t value, const size_t expect_value, const string arg_name) {
if (value != expect_value) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of " << arg_name << " should be "
<< expect_value << ", but got " << value;
}
return false;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
if (!CheckParam(kernel_node)) {
return false;
}
(void)CheckParam(kernel_node);
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
data_format_ = kOpFormat_NCDHW;
auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
auto dy_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
if (CheckNull(dy_shape, filter_shape)) {
is_null_input_ =
CHECK_SHAPE_NULL(filter_shape, kernel_name_, "weight") || CHECK_SHAPE_NULL(dy_shape, kernel_name_, "dy");
if (is_null_input_) {
InitSizeLists();
return true;
}
std::vector<size_t> input_shape;
GetInputShape(kernel_node, &input_shape);
compute_format_ = CUDNN_TENSOR_NCHW;
CheckTensorSize({input_shape});
if (input_shape.size() != 5) {
MS_LOG(EXCEPTION) << "For 'Conv3dGradInputGpuBkwKernel', the dimension of input must be 5.";
}
(void)CheckSize(input_shape.size(), 5, "input");
n_ = SizeToInt(input_shape[0]);
c_ = SizeToInt(input_shape[1]);
old_depth_ = SizeToInt(input_shape[2]);
@ -114,7 +111,7 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
[](const int64_t &value) { return static_cast<int>(value); });
if (pad_list.size() != 6) {
MS_LOG(EXCEPTION) << "For 'Conv3dGradInputGpuBkwKernel', the length of pad_list must be 6.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'pad' should be 6, but got " << pad_list.size();
}
pad_depth_ = pad_list[0];
pad_height_ = pad_list[2];
@ -138,7 +135,8 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
int dimA[kNumDims];
int strideApadded[kNumDims];
if (data_format_ != kOpFormat_NCDHW) {
MS_LOG(EXCEPTION) << "Conv3dGradInputGpuKernel only support NCDHW format right now.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'data_format' only support 'NCDHW' right now "
<< ", but got " << data_format_;
}
auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_depth_ + pad_depth_),
IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_)};
@ -203,6 +201,7 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
c_ = 0;
group_ = 1;
is_null_input_ = false;
kernel_name_ = "Conv3dGradInput";
dy_size_ = 0;
w_size_ = 0;
output_size_ = 0;
@ -276,18 +275,15 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
}
private:
bool CheckParam(const CNodePtr &kernel_node) {
void CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but Conv3dGradInputGpuKernel needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but Conv3dGradInputGpuKernel needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
}
return true;
}
void SetPad(const std::vector<int> &input_shape, const CNodePtr &kernel_node) {
@ -350,16 +346,20 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
[](const int64_t &value) { return static_cast<int>(value); });
if (stride_.size() != 5) {
MS_LOG(EXCEPTION) << "Conv3dGradInputGpuKernel stride must be 5d, but got " << stride_.size();
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 5, but got "
<< stride_.size();
}
if (stride_[0] != 1 || stride_[1] != 1) {
MS_LOG(EXCEPTION) << "Conv3dGradInputGpuKernel stride only support 1 in N axis and C axis!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'stride' at 0 and 1 axis should be 1, but got "
<< "stride[0]: " << stride_[0] << ", stride[1]: " << stride_[1];
}
if (dilation_.size() != 5) {
MS_LOG(EXCEPTION) << "Conv3dGradInputGpuKernel dilation must be 5d!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 5, but got "
<< dilation_.size();
}
if (dilation_[0] != 1 || dilation_[1] != 1) {
MS_LOG(EXCEPTION) << "Conv3dGradInputGpuKernel dilation only support 1 in N axis and C axis!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
}
}
@ -392,6 +392,7 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
std::vector<int> dilation_;
int group_;
bool is_null_input_;
std::string kernel_name_;
size_t dy_size_;
size_t w_size_;
size_t output_size_;

View File

@ -68,7 +68,8 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
old_width_ + (1 + stride_[4]) * pad_width_, pad_head_, pad_top_, pad_left_, output_addr,
reinterpret_cast<cudaStream_t>(stream_ptr));
} else {
MS_LOG(EXCEPTION) << "ConvTranspose3d only support NCDHW format right now.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'data_format' only support 'NCDHW' right now "
<< ", but got " << data_format_;
}
} else {
if (greater_stride_) {
@ -94,9 +95,9 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
}
bool CheckNull(const std::vector<size_t> filter_shape, const std::vector<size_t> input_shape) {
is_null_input_ = CHECK_NULL_INPUT(filter_shape) || CHECK_NULL_INPUT(input_shape);
is_null_input_ =
CHECK_SHAPE_NULL(filter_shape, kernel_name_, "weight") || CHECK_SHAPE_NULL(input_shape, kernel_name_, "dout");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'Conv3dTransposeGpuKernel', input is null.";
InitSizeLists();
return true;
}
@ -105,15 +106,16 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
void CheckSize(const size_t value, const size_t expect_value, const string arg_name) {
if (value != expect_value) {
MS_LOG(EXCEPTION) << "For 'Conv3dTransposeGpuKernel', the length of " << arg_name << " must be " << expect_value
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of " << arg_name << " must be " << expect_value
<< ", but got " << value;
}
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
if (!CheckParam(kernel_node)) return false;
(void)CheckParam(kernel_node);
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0);
auto format_attr = GetAttr<std::string>(kernel_node, "format");
@ -147,8 +149,8 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
SetStrideAndDilation(kernel_node);
std::vector<int> stride_pad_list(6, 0);
(void)CheckSize(filter_shape.size(), 5, "filter_shape");
(void)CheckSize(pad_list.size(), 6, "pad_list");
(void)CheckSize(filter_shape.size(), 5, "weight shape");
(void)CheckSize(pad_list.size(), 6, "pad");
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) { // pad_mode_ = same
UpdatePaddingAndDilation(input_shape, filter_shape, pad_list.data(), stride_pad_list.data());
}
@ -255,6 +257,7 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
dilation_.clear();
group_ = 1;
is_null_input_ = false;
kernel_name_ = "Conv3dTranspose";
input_size_ = 0;
filter_size_ = 0;
output_size_ = 0;
@ -355,18 +358,15 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
}
private:
bool CheckParam(const CNodePtr &kernel_node) {
void CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but Conv3dTranspose needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but Conv3dTranspose needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
}
return true;
}
void SetPad(const std::vector<int> &output_shape, const CNodePtr &kernel_node) {
@ -430,16 +430,20 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
[](const int64_t &value) { return static_cast<int>(value); });
if (stride_.size() != 5) {
MS_LOG(EXCEPTION) << "Conv3dTransposeGpuFwdKernel's stride must be 5d!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 5, but got "
<< stride_.size();
}
if (stride_[0] != 1 || stride_[1] != 1) {
MS_LOG(EXCEPTION) << "Conv3dTransposeGpuFwdKernel stride only support 1 in N axis and C axis!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'stride' at 0 and 1 axis should be 1, but got "
<< "stride[0]: " << stride_[0] << ", stride[1]: " << stride_[1];
}
if (dilation_.size() != 5) {
MS_LOG(EXCEPTION) << "Conv3dTransposeGpuFwdKernel's dilation must be 5d!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 5, but got "
<< dilation_.size();
}
if (dilation_[0] != 1 || dilation_[1] != 1) {
MS_LOG(EXCEPTION) << "Conv3dTransposeGpuFwdKernel dilation only support 1 in N axis and C axis!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
}
}
void UpdatePaddingAndDilation(const std::vector<size_t> &input_shape, const std::vector<size_t> &filter_shape,
@ -562,6 +566,7 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
std::vector<int> dilation_;
int group_;
bool is_null_input_;
std::string kernel_name_;
size_t input_size_;
size_t filter_size_;
size_t output_size_;

View File

@ -19,6 +19,7 @@
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <limits>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@ -37,6 +38,7 @@ class CtcLossGpuKernel : public GpuKernel {
ctc_merge_repeated_(true),
ignore_longer_outputs_than_inputs_(false),
is_null_input_(false),
kernel_name_("CTCLoss"),
probs(nullptr),
label_indices(nullptr),
label_values(nullptr),
@ -77,31 +79,36 @@ class CtcLossGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
auto probs_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto indice_dims = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto labels_dims = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto sequence_length_dims = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
is_null_input_ = CHECK_NULL_INPUT(probs_shape) || CHECK_NULL_INPUT(indice_dims) || CHECK_NULL_INPUT(labels_dims) ||
CHECK_NULL_INPUT(sequence_length_dims);
is_null_input_ = CHECK_SHAPE_NULL(probs_shape, kernel_name_, "x") ||
CHECK_SHAPE_NULL(indice_dims, kernel_name_, "labels_indices") ||
CHECK_SHAPE_NULL(labels_dims, kernel_name_, "labels_values") ||
CHECK_SHAPE_NULL(sequence_length_dims, kernel_name_, "sequence_length");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CtclossGpuKernel', input is null.";
InitSizeLists();
return true;
}
if (probs_shape.size() != 3) {
MS_LOG(EXCEPTION) << "probs dims: " << probs_shape.size() << " not support.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of x should be 3, but got "
<< probs_shape.size();
}
probs_dims_[0] = probs_shape[0];
probs_dims_[1] = probs_shape[1];
probs_dims_[2] = probs_shape[2];
if (labels_dims.size() != 1) {
MS_LOG(EXCEPTION) << "labels dims: " << labels_dims.size() << " not support.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of labels_values should be 1, but got "
<< labels_dims.size();
}
if (indice_dims.size() != 2) {
MS_LOG(EXCEPTION) << "labels indice dims: " << indice_dims.size() << " not support.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of labels_indices should be 2, but got "
<< indice_dims.size();
}
label_size_ = sizeof(int);
for (auto i : labels_dims) {
@ -157,7 +164,8 @@ class CtcLossGpuKernel : public GpuKernel {
"cudaMemcpyAsync failed.");
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(stream), "cudaStreamSynchronize failed.");
if (max_time < max_sequence) {
MS_LOG(EXCEPTION) << "max_time should be greater than sequence length.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the x[0] should be equal to or greater than max_sequence, "
<< "but got x[0]: " << max_time << ", max_sequence: " << max_sequence;
}
InnerSoftMax(probs, softmax_probs, sequence_length, max_time, batch, numclass, stream);
MemsetForWS(label_value_pcr, cum_labels_length, label_squence_length, costs, grads, stream);
@ -169,7 +177,9 @@ class CtcLossGpuKernel : public GpuKernel {
"cudaMemcpyAsync failed.");
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(stream), "cudaStreamSynchronize failed.");
if (batch != batch_label + 1) {
MS_LOG(EXCEPTION) << "label batch should be equal to input batch.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the x[0] should be equal to batch_label + 1, but got x[0]: " << max_time
<< ", batch_label: " << batch_label;
}
GenLabelValue(label_value_sp, label_indices, label_values, label_squence_length, cum_labels_length,
max_labels_length, label_size_ / sizeof(int), numclass - 1, batch, stream);
@ -288,6 +298,7 @@ class CtcLossGpuKernel : public GpuKernel {
bool ctc_merge_repeated_;
bool ignore_longer_outputs_than_inputs_;
bool is_null_input_;
std::string kernel_name_;
T kLogZero_ = -std::numeric_limits<T>::infinity();
// Heap parameter

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_DROPOUT3D_GPU_KERNEL_H_
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/dropout3d_impl.cuh"
@ -69,17 +70,17 @@ class Dropout3DGpuFwdKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
cudnn_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but Dropout3DGpuFwdKernel needs 1.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
}
std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "Dropout3DGpuKernel input is null.";
InitSizeLists();
return true;
}
@ -88,7 +89,8 @@ class Dropout3DGpuFwdKernel : public GpuKernel {
size_t dims = input_shape.size();
if (dims != 5) {
MS_LOG(EXCEPTION) << "Input dims " << dims << "not supported. Must be in NCDHW format.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input should be 5, but got "
<< input_shape.size();
}
// get N and C values from 5 dim input tensor
@ -106,7 +108,8 @@ class Dropout3DGpuFwdKernel : public GpuKernel {
keep_prob_ = GetAttr<float>(kernel_node, "keep_prob");
if ((keep_prob_ < 0.0) || (keep_prob_ > 1.0)) {
MS_LOG(EXCEPTION) << "keep_prob is out of range [0.0, 1.0]";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'keep_prob' should be in range [0.0, 1.0], "
<< "but got " << keep_prob_;
}
InitSizeLists();
@ -116,6 +119,7 @@ class Dropout3DGpuFwdKernel : public GpuKernel {
void ResetResource() noexcept override {
cudnn_handle_ = nullptr;
is_null_input_ = false;
kernel_name_ = "Dropout3D";
num_count_ = 0;
keep_prob_ = 0.0;
states_init_ = false;
@ -145,6 +149,7 @@ class Dropout3DGpuFwdKernel : public GpuKernel {
cudnnHandle_t cudnn_handle_;
curandGenerator_t curand_generator_;
bool is_null_input_;
std::string kernel_name_;
bool states_init_;
size_t num_count_;
size_t n_;

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_DROPOUT_GPU_KERNEL_H_
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cuh"
@ -65,17 +66,17 @@ class DropoutGpuFwdKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
InitResource();
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but DropoutGpuFwdKernel needs 1.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'DropoutGpuKernel', input is null.";
InitSizeLists();
return true;
}
@ -101,6 +102,7 @@ class DropoutGpuFwdKernel : public GpuKernel {
void ResetResource() noexcept override {
cudnn_handle_ = nullptr;
is_null_input_ = false;
kernel_name_ = "Dropout";
num_count_ = 0;
keep_prob_ = 0.0;
seed_ = 0;
@ -125,6 +127,7 @@ class DropoutGpuFwdKernel : public GpuKernel {
private:
cudnnHandle_t cudnn_handle_;
bool is_null_input_;
std::string kernel_name_;
size_t num_count_;
float keep_prob_;
bool states_init_;

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_DROPOUT_GRAD_KERNEL_H_
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cuh"
@ -27,7 +28,8 @@ namespace kernel {
template <typename T>
class DropoutGradGpuBwdKernel : public GpuKernel {
public:
DropoutGradGpuBwdKernel() : cudnn_handle_(nullptr), is_null_input_(false), num_count_(0), keep_prob_(0.0) {}
DropoutGradGpuBwdKernel()
: cudnn_handle_(nullptr), is_null_input_(false), kernel_name_("DropoutGrad"), num_count_(0), keep_prob_(0.0) {}
~DropoutGradGpuBwdKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
@ -48,18 +50,17 @@ class DropoutGradGpuBwdKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
InitResource();
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Argument number is " << input_num << ", but DropoutGradGpuBwdKernel needs 2.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
}
auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'DropoutGradGpuKernel', input is null.";
InitSizeLists();
return true;
}
@ -89,6 +90,7 @@ class DropoutGradGpuBwdKernel : public GpuKernel {
private:
cudnnHandle_t cudnn_handle_;
bool is_null_input_;
std::string kernel_name_;
size_t num_count_;
float keep_prob_;
std::vector<size_t> input_size_list_;

View File

@ -19,6 +19,7 @@
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@ -50,10 +51,10 @@ class FlattenGpuFwdKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'FlattenGpuKernel', input is null.";
InitSizeLists();
return true;
}
@ -67,6 +68,7 @@ class FlattenGpuFwdKernel : public GpuKernel {
void ResetResource() noexcept override {
input_size_ = 0;
is_null_input_ = false;
kernel_name_ = "Flatten";
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
@ -85,6 +87,7 @@ class FlattenGpuFwdKernel : public GpuKernel {
size_t input_size_;
bool is_null_input_;
std::string kernel_name_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -19,6 +19,7 @@
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@ -51,16 +52,15 @@ class FlattenGardGpuBkwKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Argument number is " << input_num << ", but FlattenGardGpuFwdKernel needs 1.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
}
auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'FlattenGradGpuKernel', input is null.";
InitSizeLists();
return true;
}
@ -79,6 +79,7 @@ class FlattenGardGpuBkwKernel : public GpuKernel {
void ResetResource() noexcept override {
input_size_ = 0;
is_null_input_ = false;
kernel_name_ = "FlattenGrad";
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
@ -97,6 +98,7 @@ class FlattenGardGpuBkwKernel : public GpuKernel {
size_t input_size_;
bool is_null_input_;
std::string kernel_name_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_FTRL_GPU_KERNEL_H_
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cuh"
@ -36,7 +37,8 @@ class FtrlGpuKernel : public GpuKernel {
l1_regularization_size_(0),
l2_regularization_size_(0),
learning_rate_power_size_(0),
is_null_input_(false) {}
is_null_input_(false),
kernel_name_("ApplyFtrl") {}
~FtrlGpuKernel() override = default;
@ -63,10 +65,11 @@ class FtrlGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != INPUT_NUM) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but ftrl needs " << INPUT_NUM << " inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be " << INPUT_NUM << ", but got "
<< input_num;
}
variable_size_ = sizeof(T);
@ -82,10 +85,11 @@ class FtrlGpuKernel : public GpuKernel {
auto accumulation_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto linear_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
is_null_input_ = CHECK_NULL_INPUT(variable_shape) || CHECK_NULL_INPUT(accumulation_shape) ||
CHECK_NULL_INPUT(linear_shape) || CHECK_NULL_INPUT(gradient_shape);
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name_, "var") ||
CHECK_SHAPE_NULL(accumulation_shape, kernel_name_, "accum") ||
CHECK_SHAPE_NULL(linear_shape, kernel_name_, "linear") ||
CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "grad");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'FtrlGpuKernel', input is null.";
InitSizeLists();
return true;
}
@ -132,6 +136,7 @@ class FtrlGpuKernel : public GpuKernel {
size_t l2_regularization_size_;
size_t learning_rate_power_size_;
bool is_null_input_;
std::string kernel_name_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;

View File

@ -38,9 +38,8 @@ class FusedAdamWeightDecayGpuKernel : public GpuKernel {
}
auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, node_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'FusedAdamWeightDecayGpuKernel', input is null.";
InitSizeLists();
return true;
}

View File

@ -52,10 +52,10 @@ class FusedAddReluGradV2GpuKernel : public GpuKernel {
bool Init(const CNodePtr &kernel_node) override {
MS_EXCEPTION_IF_NULL(kernel_node);
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'FusedAddReluGradV2GpuKernel', input is null.";
InitSizeLists();
return true;
}

View File

@ -52,10 +52,10 @@ class FusedAddReluV2GpuKernel : public GpuKernel {
bool Init(const CNodePtr &kernel_node) override {
MS_EXCEPTION_IF_NULL(kernel_node);
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'FusedAddReluV2GpuKernel', input is null.";
InitSizeLists();
return true;
}

View File

@ -50,16 +50,16 @@ class FusedScaleMomentumGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != INPUT_NUM) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but FusedMomentum needs " << INPUT_NUM << " inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be " << INPUT_NUM << ", but got "
<< input_num;
}
auto variable_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
is_null_input_ = CHECK_NULL_INPUT(variable_shape);
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'FusedScaleMomentumGpuKernel', input is null.";
InitSizeLists();
return true;
}

View File

@ -50,16 +50,16 @@ class FusedWeightDecayMomentumGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != INPUT_NUM) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but FusedMomentum needs " << INPUT_NUM << " inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be " << INPUT_NUM << ", but got "
<< input_num;
}
auto variable_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
is_null_input_ = CHECK_NULL_INPUT(variable_shape);
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name, "variable");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'FusedWeightDecayMomentumGpuKernel', input is null.";
InitSizeLists();
return true;
}

View File

@ -51,16 +51,16 @@ class FusedWeightDecayScaleMomentumGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != INPUT_NUM) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but FusedMomentum needs " << INPUT_NUM << " inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be " << INPUT_NUM << ", but got "
<< input_num;
}
auto variable_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
is_null_input_ = CHECK_NULL_INPUT(variable_shape);
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name, "variable");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'FusedWeightDecayScaleMomentumGpuKernel', input is null.";
InitSizeLists();
return true;
}

View File

@ -48,12 +48,12 @@ class GeLUGpuGradKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
InitResource();
input_size_ = sizeof(T);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'GeluGradGpuKernel', input is null.";
InitSizeLists();
return true;
}

View File

@ -47,12 +47,12 @@ class GeluGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
InitResource();
input_size_ = sizeof(T);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'GeluGpuKernel', input is null.";
InitSizeLists();
return true;
}

View File

@ -47,19 +47,19 @@ class HSigmoidKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoid needs 1 inputs.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoid has 1 output.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of output should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'HsigmoidGpuKernel', input is null.";
InitSizeLists();
return true;
}

View File

@ -48,19 +48,19 @@ class HSigmoidGradKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoidGrad needs 2 inputs.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoidGrad has 1 output.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of output should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'HsigmoidGradGpuKernel', input is null.";
InitSizeLists();
return true;
}

View File

@ -47,19 +47,19 @@ class HSwishKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSwish needs 1 inputs.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSwish needs 1 output.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of output should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'HswishGpuKernel', input is null.";
InitSizeLists();
return true;
}

View File

@ -48,19 +48,19 @@ class HSwishGradKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSwishGrad needs 2 inputs.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSwishGrad has 1 output.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of output should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'HswishGradGpuKernel', input is null.";
InitSizeLists();
return true;
}

View File

@ -48,6 +48,7 @@ class Im2ColGpuFwdKernel : public GpuKernel {
n_(0),
c_(0),
is_null_input_(false),
kernel_name_("Im2col"),
input_size_(0),
output_size_(0),
padded_size_(0),
@ -82,21 +83,21 @@ class Im2ColGpuFwdKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
if (!CheckParam(kernel_node)) {
return false;
}
(void)CheckParam(kernel_node);
auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(in_shape) || CHECK_NULL_INPUT(output_shape);
is_null_input_ =
CHECK_SHAPE_NULL(in_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'Im2ColGpuKernel', input or output is null.";
InitSizeLists();
return true;
}
if (in_shape.size() != 4) {
MS_LOG(EXCEPTION) << "For 'Im2ColGpuKernel', the dimension of input must be 4, but got " << in_shape.size();
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input should be 4, but got "
<< in_shape.size();
}
std::vector<int> filter_shape;
std::vector<int64_t> filter_shape_me = GetAttr<std::vector<int64_t>>(kernel_node, "kernel_size");
@ -188,20 +189,17 @@ class Im2ColGpuFwdKernel : public GpuKernel {
}
private:
bool CheckParam(const CNodePtr &kernel_node) {
void CheckParam(const CNodePtr &kernel_node) {
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but Im2Col needs 1 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but Im2Col needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
}
return true;
}
void SetPad(const std::vector<size_t> &in_shape, const CNodePtr &kernel_node) {
std::vector<int> pad_list;
@ -215,7 +213,7 @@ class Im2ColGpuFwdKernel : public GpuKernel {
old_width_ = SizeToInt(in_shape[3]);
if (pad_list.size() != 4) {
MS_LOG(EXCEPTION) << "For 'Im2ColGpuKernel', the length of pad_list must be 4, but got " << pad_list.size();
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'pad' should be 4, but got " << pad_list.size();
}
pad_height_ = pad_list[0] + pad_list[1];
pad_width_n = pad_list[2] + pad_list[3];
@ -267,16 +265,20 @@ class Im2ColGpuFwdKernel : public GpuKernel {
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
[](const int64_t &value) { return static_cast<int>(value); });
if (stride_.size() != 4) {
MS_LOG(EXCEPTION) << "Im2Col's stride must be 4d!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 4, but got "
<< stride_.size();
}
if (stride_[0] != 1 || stride_[1] != 1) {
MS_LOG(EXCEPTION) << "Im2Col's stride only support 1 in N axis and C axis!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'stride' at 0 and 1 axis should be 1, but got "
<< "stride[0]: " << stride_[0] << ", stride[1]: " << stride_[1];
}
if (dilation_.size() != 4) {
MS_LOG(EXCEPTION) << "Im2Col's dilation must be 4d!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 4, but got "
<< dilation_.size();
}
if (dilation_[0] != 1 || dilation_[1] != 1) {
MS_LOG(EXCEPTION) << "Im2Col's dilation only support 1 in N axis and C axis!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
}
}
@ -304,6 +306,7 @@ class Im2ColGpuFwdKernel : public GpuKernel {
std::vector<int> stride_;
std::vector<int> dilation_;
bool is_null_input_;
std::string kernel_name_;
size_t input_size_;
size_t output_size_;
size_t padded_size_;

View File

@ -109,15 +109,15 @@ class InstanceNormGpuKernel : public GpuKernel {
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 5) {
MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", " << kernel_name << " should be 5";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 5, but got " << input_num;
}
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (input_shape_.size() != 4) {
MS_LOG(EXCEPTION) << "tensor shape is " << input_shape_.size() << ", InstanceNormGpuKernel should be 4";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input_x should be 4, but got "
<< input_shape_.size();
}
is_null_input_ = CHECK_NULL_INPUT(input_shape_);
is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name, "input_x");
if (is_null_input_) {
MS_LOG(WARNING) << "InstanceNormGpuKernel input is null";
InitSizeLists();
return true;
}

View File

@ -113,16 +113,16 @@ class InstanceNormGradGpuKernel : public GpuKernel {
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 5) {
MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", " << kernel_name << " should be 5";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 5, but got " << input_num;
}
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (input_shape_.size() != 4) {
MS_LOG(EXCEPTION) << "tensor shape is " << input_shape_.size() << ", InstanceNormGradGpuKernel should be 4";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got "
<< input_shape_.size();
}
is_null_input_ = CHECK_NULL_INPUT(input_shape_);
is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "InstanceNormGradGpuKernel input is null";
InitSizeLists();
return true;
}

View File

@ -50,10 +50,10 @@ class KLDivLossGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "logits");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'KlDivLossGpuKernel', input is null.";
InitSizeLists();
return true;
}

View File

@ -51,10 +51,10 @@ class KLDivLossGradGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'KlDivLossGradGpuKernel', input is null.";
InitSizeLists();
return true;
}

View File

@ -41,6 +41,7 @@ class L2NormalizeGpuKernel : public GpuKernel {
outputC_descriptor_(nullptr),
all_match_(false),
is_null_input_(false),
kernel_name_("L2Normalize"),
input_size_(0),
output_size_(0),
workspace_size_(0),
@ -86,12 +87,11 @@ class L2NormalizeGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
if (!CheckIONumber(kernel_node)) {
return false;
}
(void)CheckIONumber(kernel_node);
int input_dim_length = SizeToInt(AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0).size());
int axis = LongToInt(GetAttr<int64_t>(kernel_node, "axis"));
@ -100,9 +100,9 @@ class L2NormalizeGpuKernel : public GpuKernel {
auto inputA_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(inputA_shape) || CHECK_NULL_INPUT(output_shape);
is_null_input_ =
CHECK_SHAPE_NULL(inputA_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'L2NormalizeGpuKernel', input or output is null.";
InitSizeLists();
return true;
}
@ -110,15 +110,10 @@ class L2NormalizeGpuKernel : public GpuKernel {
for (auto dim : output_shape) {
output_size_ *= dim;
}
is_null_input_ = CHECK_NULL_INPUT(inputA_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "L2NormalizeGPUKernel input is null";
InitSizeLists();
return true;
}
CheckTensorSize({inputA_shape, output_shape});
if (inputA_shape.size() > MAX_DIMS) {
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than " << MAX_DIMS;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << MAX_DIMS
<< ", but got " << inputA_shape.size();
}
std::vector<size_t> outputC_shape = output_shape;
@ -129,8 +124,9 @@ class L2NormalizeGpuKernel : public GpuKernel {
outputC_shape[axis_] = 1;
if (inputA_shape.size() != output_shape.size() || inputA_shape.size() != outputC_shape.size()) {
MS_LOG(ERROR) << "Input shape size need equal to output shape size";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input and output should be the same, but "
<< "got the dimension of input: " << inputA_shape.size()
<< ", the dimension of output: " << output_shape.size();
}
lhs_shape_.resize(MAX_DIMS, 1);
@ -183,18 +179,15 @@ class L2NormalizeGpuKernel : public GpuKernel {
}
private:
bool CheckIONumber(const CNodePtr &kernel_node) {
void CheckIONumber(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but l2normalize op needs 1 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but l2normalize op needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
}
return true;
}
void DestroyResource() noexcept {
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyReduceTensorDescriptor(reduce_tensor_descriptor_),
@ -255,6 +248,7 @@ class L2NormalizeGpuKernel : public GpuKernel {
bool all_match_;
bool is_null_input_;
std::string kernel_name_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;

View File

@ -43,6 +43,7 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
outputC_descriptor_(nullptr),
all_match_(false),
is_null_input_(false),
kernel_name_("L2NormalizeGrad"),
output_size_(0),
workspace_size_(0),
epsilon_(0.0),
@ -112,28 +113,29 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
bool CheckInputShape(const std::vector<size_t> &output_shape) {
for (auto &shape : input_shape_list_) {
if (output_shape != shape) {
MS_LOG(EXCEPTION) << "Input shape and output shape should be same!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input and output should be the same, but "
<< "got the shape of input: " << CONVERT_VECTOR_TO_STRING(shape)
<< ", the shape of output: " << CONVERT_VECTOR_TO_STRING(output_shape);
}
}
is_null_input_ = CHECK_NULL_INPUT(input_shape_list_[0]);
is_null_input_ = CHECK_SHAPE_NULL(input_shape_list_[0], kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "L2NormalizeGPUKernel input is null";
InitSizeLists();
return false;
}
if (input_shape_list_[0].size() > MAX_DIMS) {
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than " << MAX_DIMS;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << MAX_DIMS
<< ", but got " << input_shape_list_[0].size();
}
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
if (!CheckIONumber(kernel_node)) {
return false;
}
(void)CheckIONumber(kernel_node);
int input_dim_length = SizeToInt(AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0).size());
int axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
axis_ = axis < 0 ? (axis + input_dim_length) : axis;
@ -144,9 +146,8 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
input_shape_list_.emplace_back(AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i));
}
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(output_shape);
is_null_input_ = CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'L2NormalizeGradGpuKernel', output is null.";
InitSizeLists();
return true;
}
@ -187,18 +188,16 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
}
protected:
bool CheckIONumber(const CNodePtr &kernel_node) {
void CheckIONumber(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != INPUT_SIZE) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but l2normalize op needs " << INPUT_SIZE << " inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be " << INPUT_SIZE << ", but got "
<< input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but l2normalize op needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
}
return true;
}
void InitResource() override {
cudnn_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
@ -315,6 +314,7 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
bool all_match_;
bool is_null_input_;
std::string kernel_name_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;

View File

@ -20,8 +20,10 @@
#include <cublas_v2.h>
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <map>
#include <sstream>
#include "utils/log_adapter.h"
#include "utils/trace_base.h"
#include "include/curand.h"
@ -214,6 +216,34 @@ inline bool CheckNullInput(const std::vector<size_t> &input_shape) {
}
#define CHECK_NULL_INPUT(input_shape) mindspore::device::gpu::CheckNullInput(input_shape)
inline std::string ConvertVectorToString(const std::vector<size_t> &value) {
std::stringstream ss;
ss << "(";
for (auto it = value.begin(); it != value.end(); it++) {
if (it == value.begin()) {
ss << *it;
} else {
ss << ", " << *it;
}
}
ss << ")";
return ss.str();
}
#define CONVERT_VECTOR_TO_STRING(value) mindspore::device::gpu::ConvertVectorToString(value)
inline bool CheckShapeNull(const std::vector<size_t> &shape, std::string kernel_name, std::string param_name) {
if (CHECK_NULL_INPUT(shape)) {
MS_LOG(WARNING) << "For '" << kernel_name << "', the shape of " << param_name << " cannot contain zero, but got "
<< CONVERT_VECTOR_TO_STRING(shape);
return true;
}
return false;
}
#define CHECK_SHAPE_NULL(shape, kernel_name, param_name) \
mindspore::device::gpu::CheckShapeNull(shape, kernel_name, param_name)
inline const char *CurandGetErrorString(curandStatus_t status) {
switch (status) {
case CURAND_STATUS_VERSION_MISMATCH: