!26751 optimizes the kernel error description of Adagrad, Adam, Conv2d, etc.
Merge pull request !26751 from wangshuide/wsd_master
This commit is contained in:
commit
d66f811022
|
@ -58,7 +58,7 @@ class ActivationGpuFwdKernel : public GpuKernel {
|
|||
auto node_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto iter = kernel_map.find(node_name);
|
||||
if (iter == kernel_map.end()) {
|
||||
MS_LOG(EXCEPTION) << "Kernel: " << node_name << " not support.";
|
||||
MS_LOG(EXCEPTION) << "Only support these activations: ReLU6, Tanh, Elu, Sigmoid currently, but got " << node_name;
|
||||
}
|
||||
mode_ = iter->second;
|
||||
|
||||
|
@ -66,13 +66,11 @@ class ActivationGpuFwdKernel : public GpuKernel {
|
|||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Argument number is " << input_num << ", but ActivationGpuFwdKernel needs 1.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << node_name << "', the number of input should be 1, but got " << input_num;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, node_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "ActivationGpuFwdKernel input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -67,7 +67,7 @@ class ActivationGradGpuKernel : public GpuKernel {
|
|||
auto node_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto iter = kernel_map.find(node_name);
|
||||
if (iter == kernel_map.end()) {
|
||||
MS_LOG(EXCEPTION) << "Kernel: " << node_name << " not support.";
|
||||
MS_LOG(EXCEPTION) << "Only support these activations: ReLU6, Tanh, Elu, Sigmoid currently, but got " << node_name;
|
||||
}
|
||||
mode_ = iter->second;
|
||||
|
||||
|
@ -75,13 +75,11 @@ class ActivationGradGpuKernel : public GpuKernel {
|
|||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Argument number is " << input_num << ", but ActivationGradGpuKernel needs 2.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << node_name << "', the number of input should be 2, but got " << input_num;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, node_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "ActivationGradGpuKernel input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_ADAGRAD_GPU_KERNEL_H
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/adagrad_impl.cuh"
|
||||
|
@ -33,7 +34,8 @@ class AdagradGpuKernel : public GpuKernel {
|
|||
learning_rate_size_(0),
|
||||
gradient_size_(0),
|
||||
update_slots(true),
|
||||
is_null_input_(false) {}
|
||||
is_null_input_(false),
|
||||
kernel_name_("ApplyAdagrad") {}
|
||||
|
||||
~AdagradGpuKernel() override = default;
|
||||
|
||||
|
@ -68,11 +70,11 @@ class AdagradGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
update_slots = AnfAlgo::GetNodeAttr<bool>(kernel_node, "update_slots");
|
||||
if (input_num != 4) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but adagrad needs 4 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 4, but got " << input_num;
|
||||
}
|
||||
variable_size_ = sizeof(T);
|
||||
accumulation_size_ = sizeof(T);
|
||||
|
@ -82,10 +84,10 @@ class AdagradGpuKernel : public GpuKernel {
|
|||
auto variable_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto accumulation_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
is_null_input_ =
|
||||
CHECK_NULL_INPUT(variable_shape) || CHECK_NULL_INPUT(accumulation_shape) || CHECK_NULL_INPUT(gradient_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name_, "var") ||
|
||||
CHECK_SHAPE_NULL(accumulation_shape, kernel_name_, "accum") ||
|
||||
CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "grad");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'AdagradGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -121,6 +123,7 @@ class AdagradGpuKernel : public GpuKernel {
|
|||
size_t gradient_size_;
|
||||
bool update_slots;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_ADAM_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/adam_impl.cuh"
|
||||
|
@ -38,7 +39,8 @@ class AdamGpuKernel : public GpuKernel {
|
|||
beta2_size_(0),
|
||||
epsilon_size_(0),
|
||||
gradient_size_(0),
|
||||
is_null_input_(false) {}
|
||||
is_null_input_(false),
|
||||
kernel_name_("Adam") {}
|
||||
|
||||
~AdamGpuKernel() override = default;
|
||||
|
||||
|
@ -67,10 +69,11 @@ class AdamGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != INPUT_NUM) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but adam needs " << INPUT_NUM << " inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be " << INPUT_NUM << ", but got "
|
||||
<< input_num;
|
||||
}
|
||||
|
||||
variable_size_ = sizeof(T);
|
||||
|
@ -88,10 +91,10 @@ class AdamGpuKernel : public GpuKernel {
|
|||
auto m_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto v_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9);
|
||||
is_null_input_ = CHECK_NULL_INPUT(variable_shape) || CHECK_NULL_INPUT(m_shape) || CHECK_NULL_INPUT(v_shape) ||
|
||||
CHECK_NULL_INPUT(gradient_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name_, "var") ||
|
||||
CHECK_SHAPE_NULL(m_shape, kernel_name_, "m") || CHECK_SHAPE_NULL(v_shape, kernel_name_, "v") ||
|
||||
CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "gradient");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'AdamGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -144,6 +147,7 @@ class AdamGpuKernel : public GpuKernel {
|
|||
size_t epsilon_size_;
|
||||
size_t gradient_size_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_ADAM_WEIGHT_DECAY_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/adam_impl.cuh"
|
||||
|
@ -37,7 +38,8 @@ class AdamWeightDecayGpuKernel : public GpuKernel {
|
|||
epsilon_size_(0),
|
||||
decay_size_(0),
|
||||
gradient_size_(0),
|
||||
is_null_input_(false) {}
|
||||
is_null_input_(false),
|
||||
kernel_name_("AdamWeightDecay") {}
|
||||
|
||||
~AdamWeightDecayGpuKernel() override = default;
|
||||
|
||||
|
@ -65,10 +67,11 @@ class AdamWeightDecayGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != INPUT_NUM) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but adam needs " << INPUT_NUM << " inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be " << INPUT_NUM << ", but got "
|
||||
<< input_num;
|
||||
}
|
||||
|
||||
variable_size_ = sizeof(T);
|
||||
|
@ -85,10 +88,10 @@ class AdamWeightDecayGpuKernel : public GpuKernel {
|
|||
auto m_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto v_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8);
|
||||
is_null_input_ = CHECK_NULL_INPUT(variable_shape) || CHECK_NULL_INPUT(m_shape) || CHECK_NULL_INPUT(v_shape) ||
|
||||
CHECK_NULL_INPUT(gradient_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name_, "var") ||
|
||||
CHECK_SHAPE_NULL(m_shape, kernel_name_, "m") || CHECK_SHAPE_NULL(v_shape, kernel_name_, "v") ||
|
||||
CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "gradient");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'AdamWeightDecayGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -139,6 +142,7 @@ class AdamWeightDecayGpuKernel : public GpuKernel {
|
|||
size_t decay_size_;
|
||||
size_t gradient_size_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
|
|
|
@ -38,7 +38,8 @@ class AdaptiveAvgPool2DKernel : public GpuKernel {
|
|||
output_height(0),
|
||||
output_width(0),
|
||||
size(0),
|
||||
is_null_input_(false) {}
|
||||
is_null_input_(false),
|
||||
kernel_name_("AdaptiveAvgPool2D") {}
|
||||
~AdaptiveAvgPool2DKernel() override = default;
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
|
@ -60,6 +61,7 @@ class AdaptiveAvgPool2DKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto shape_addr = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "output_size");
|
||||
if (shape_addr.size() == 1) {
|
||||
output_height = shape_addr[0];
|
||||
|
@ -68,14 +70,13 @@ class AdaptiveAvgPool2DKernel : public GpuKernel {
|
|||
output_height = static_cast<uint>(shape_addr[0]);
|
||||
output_width = static_cast<uint>(shape_addr[1]);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Input Error.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'output_size' should be 1 or 2, but got "
|
||||
<< shape_addr.size();
|
||||
}
|
||||
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but adaptive_avg_pool2d needs 1 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
|
||||
}
|
||||
|
||||
input_size_ = sizeof(T);
|
||||
|
@ -83,17 +84,17 @@ class AdaptiveAvgPool2DKernel : public GpuKernel {
|
|||
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'AdaptiveAvgPool2dGpuKernel', input or output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
len = static_cast<uint>(input_shape.size());
|
||||
|
||||
if (len < 2) {
|
||||
MS_LOG(ERROR) << "The input should have rank at least 2.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than 2, but got "
|
||||
<< len;
|
||||
}
|
||||
|
||||
input_height = static_cast<uint>(input_shape[len - 2]);
|
||||
|
@ -127,6 +128,7 @@ class AdaptiveAvgPool2DKernel : public GpuKernel {
|
|||
uint output_width;
|
||||
uint size;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
|
|
|
@ -39,7 +39,9 @@ class AdaptiveAvgPool2DGradKernel : public GpuKernel {
|
|||
input_width_(0),
|
||||
output_height_(0),
|
||||
output_width_(0),
|
||||
size_(0) {}
|
||||
size_(0),
|
||||
is_null_input_(false),
|
||||
kernel_name_("AdaptiveAvgPool2DGrad") {}
|
||||
~AdaptiveAvgPool2DGradKernel() override = default;
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
|
@ -48,6 +50,9 @@ class AdaptiveAvgPool2DGradKernel : public GpuKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *dy_addr = GetDeviceAddress<T>(inputs, 1);
|
||||
T *dx_addr = GetDeviceAddress<T>(outputs, 0);
|
||||
|
||||
|
@ -58,11 +63,11 @@ class AdaptiveAvgPool2DGradKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != kAdaptiveAvgPool2dGradInputNum) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but AdaptiveAvgPool2DGrad needs "
|
||||
<< kAdaptiveAvgPool2dGradInputNum << " inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be "
|
||||
<< kAdaptiveAvgPool2dGradInputNum << ", but got " << input_num;
|
||||
}
|
||||
|
||||
input_size_ = sizeof(T);
|
||||
|
@ -70,12 +75,22 @@ class AdaptiveAvgPool2DGradKernel : public GpuKernel {
|
|||
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); // dy
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); // dx
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
auto input_rank = input_shape.size();
|
||||
auto output_rank = output_shape.size();
|
||||
if (input_rank < kAdaptiveAvgPool2dGradMinRank || output_rank < kAdaptiveAvgPool2dGradMinRank) {
|
||||
MS_LOG(ERROR) << "The input or output should have rank at least 2.";
|
||||
return false;
|
||||
if (input_rank < kAdaptiveAvgPool2dGradMinRank) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than "
|
||||
<< kAdaptiveAvgPool2dGradMinRank << ", but got " << input_rank;
|
||||
}
|
||||
if (output_rank < kAdaptiveAvgPool2dGradMinRank) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of output cannot be less than "
|
||||
<< kAdaptiveAvgPool2dGradMinRank << ", but got " << output_rank;
|
||||
}
|
||||
input_height_ = static_cast<uint>(input_shape[input_rank - 2]);
|
||||
input_width_ = static_cast<uint>(input_shape[input_rank - 1]);
|
||||
|
@ -109,6 +124,8 @@ class AdaptiveAvgPool2DGradKernel : public GpuKernel {
|
|||
uint output_height_;
|
||||
uint output_width_;
|
||||
uint size_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_APPLY_GRADIENT_DESCENT_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/apply_gradient_descent_impl.cuh"
|
||||
|
@ -49,19 +50,19 @@ class ApplyGradientDescentKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ApplyGradientDescent needs 3 inputs.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 3, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ApplyGradientDescent has 1 output.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "var");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ApplyGradientDescentGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -79,6 +80,7 @@ class ApplyGradientDescentKernel : public GpuKernel {
|
|||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
kernel_name_ = "ApplyGradientDescent";
|
||||
}
|
||||
|
||||
protected:
|
||||
|
@ -92,6 +94,7 @@ class ApplyGradientDescentKernel : public GpuKernel {
|
|||
private:
|
||||
size_t input_size_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
|
|
|
@ -92,7 +92,8 @@ class BatchNormGpuKernel : public GpuKernel {
|
|||
} else if (kernel_name == kBatchNormWithAddAndActivation) {
|
||||
bn_ops_ = CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Invalid kernel name: " << kernel_name;
|
||||
MS_LOG(EXCEPTION) << "Only support these kernel names: " << kBatchNorm << ", " << kBatchNormWithActivation << ", "
|
||||
<< kBatchNormWithAddAndActivation << ", but got " << kernel_name;
|
||||
}
|
||||
|
||||
InitResource();
|
||||
|
@ -104,23 +105,23 @@ class BatchNormGpuKernel : public GpuKernel {
|
|||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (bn_ops_ == CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION) {
|
||||
if (input_num != CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM) {
|
||||
MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", " << kernel_name << " should be "
|
||||
<< CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be "
|
||||
<< CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM << ", but got " << input_num;
|
||||
}
|
||||
} else {
|
||||
if (input_num != NO_CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM) {
|
||||
MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", " << kernel_name << " should be "
|
||||
<< NO_CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be "
|
||||
<< NO_CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM << ", but got " << input_num;
|
||||
}
|
||||
}
|
||||
|
||||
auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
if (shape.size() != 4 && shape.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "tensor shape is " << shape.size() << ", BatchNormGpuKernel should be 2D or 4D";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 2 or 4, but got "
|
||||
<< shape.size();
|
||||
}
|
||||
is_null_input_ = CHECK_NULL_INPUT(shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'BatchNormGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -124,7 +124,9 @@ class BatchNormGradGpuKernel : public GpuKernel {
|
|||
} else if (kernel_name == kBatchNormGradWithAddAndActivation) {
|
||||
bn_ops_ = CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Invalid kernel name: " << kernel_name;
|
||||
MS_LOG(EXCEPTION) << "Only support these kernel names: " << kBatchNormGradOpName << ", "
|
||||
<< kBatchNormGradWithActivation << ", " << kBatchNormGradWithAddAndActivation << ", but got "
|
||||
<< kernel_name;
|
||||
}
|
||||
|
||||
InitResource();
|
||||
|
@ -134,23 +136,23 @@ class BatchNormGradGpuKernel : public GpuKernel {
|
|||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (bn_ops_ == CUDNN_BATCHNORM_OPS_BN) {
|
||||
if (input_num != CUDNN_BATCHNORM_OPS_BN_INPUT_NUM) {
|
||||
MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", " << kernel_name << " should be "
|
||||
<< CUDNN_BATCHNORM_OPS_BN_INPUT_NUM;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be "
|
||||
<< CUDNN_BATCHNORM_OPS_BN_INPUT_NUM << ", but got " << input_num;
|
||||
}
|
||||
} else {
|
||||
if (input_num != NO_CUDNN_BATCHNORM_OPS_BN_INPUT_NUM) {
|
||||
MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", " << kernel_name << " should be "
|
||||
<< NO_CUDNN_BATCHNORM_OPS_BN_INPUT_NUM;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be "
|
||||
<< NO_CUDNN_BATCHNORM_OPS_BN_INPUT_NUM << ", but got " << input_num;
|
||||
}
|
||||
}
|
||||
|
||||
auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
if (shape.size() != 4 && shape.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "tensor shape is " << shape.size() << ", BatchNormGradGpuKernel should be 2D or 4D";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 2 or 4, but got "
|
||||
<< shape.size();
|
||||
}
|
||||
is_null_input_ = CHECK_NULL_INPUT(shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'BatchNormGradGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_BCE_WITH_LOGITS_LOSS_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/bce_with_logits_loss_impl.cuh"
|
||||
|
@ -68,41 +69,42 @@ class BCEWithLogitsLossKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 4) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but BCEWithLogitsLoss needs 4 inputs.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 4, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but BCEWithLogitsLoss has 1 output.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
pos_weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
is_null_input_ =
|
||||
CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(weight_shape_) || CHECK_NULL_INPUT(pos_weight_shape_);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name_, "logits") ||
|
||||
CHECK_SHAPE_NULL(weight_shape_, kernel_name_, "weight") ||
|
||||
CHECK_SHAPE_NULL(pos_weight_shape_, kernel_name_, "pos_weight");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'BCEWithLogitsLossGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shape_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of input cannot be less than 1, but got "
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of logits cannot be less than 1, but got "
|
||||
<< input_shape_.size();
|
||||
}
|
||||
if (weight_shape_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of weight cannot be less than 1, but got "
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of weight cannot be less than 1, but got "
|
||||
<< weight_shape_.size();
|
||||
}
|
||||
if (pos_weight_shape_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of pos_weight cannot be less than 1, but got "
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of pos_weight cannot be less than 1, but got "
|
||||
<< pos_weight_shape_.size();
|
||||
}
|
||||
input_size_ = 1;
|
||||
if (input_shape_.size() > MAX_LOGITS_DIMENSION) {
|
||||
MS_LOG(EXCEPTION) << "Input dimension is " << input_shape_.size()
|
||||
<< ", but BCEWithLogitsLoss can only support up to " << MAX_LOGITS_DIMENSION << "-D.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of logits cannot be greater than "
|
||||
<< MAX_LOGITS_DIMENSION << ", but got " << input_shape_.size();
|
||||
}
|
||||
for (size_t i = 0; i < input_shape_.size(); i++) {
|
||||
input_size_ *= input_shape_[i];
|
||||
|
@ -130,6 +132,7 @@ class BCEWithLogitsLossKernel : public GpuKernel {
|
|||
weight_need_broadcast_ = false;
|
||||
pos_weight_need_broadcast_ = false;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "BCEWithLogitsLoss";
|
||||
input_shape_.clear();
|
||||
weight_shape_.clear();
|
||||
pos_weight_shape_.clear();
|
||||
|
@ -176,6 +179,7 @@ class BCEWithLogitsLossKernel : public GpuKernel {
|
|||
bool weight_need_broadcast_;
|
||||
bool pos_weight_need_broadcast_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
std::vector<size_t> input_shape_;
|
||||
std::vector<size_t> weight_shape_;
|
||||
std::vector<size_t> pos_weight_shape_;
|
||||
|
|
|
@ -61,26 +61,27 @@ class BiasAddGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto num_dims = x_shape.size();
|
||||
is_null_input_ = CHECK_NULL_INPUT(x_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(x_shape, kernel_name_, "input_x");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'BiasAddGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (num_dims < 2) {
|
||||
MS_LOG(EXCEPTION) << "input dims must be at least 2, but got " << num_dims;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input_x cannot be less than 2, but got "
|
||||
<< num_dims;
|
||||
}
|
||||
|
||||
std::string format = GetAttr<std::string>(kernel_node, "format");
|
||||
string::size_type pos = format.find("C");
|
||||
if (pos == std::string::npos || pos >= num_dims) {
|
||||
MS_LOG(EXCEPTION) << "format '" << format << "' invalid";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', 'C' character should be in 'format', but got " << format;
|
||||
}
|
||||
|
||||
// Expand to 4 dims for cudnnSetTensorNdDescriptorEx.
|
||||
|
@ -118,6 +119,7 @@ class BiasAddGpuKernel : public GpuKernel {
|
|||
b_desc_ = nullptr;
|
||||
op_desc_ = nullptr;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "BiasAdd";
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
|
@ -161,6 +163,7 @@ class BiasAddGpuKernel : public GpuKernel {
|
|||
cudnnTensorDescriptor_t b_desc_;
|
||||
cudnnOpTensorDescriptor_t op_desc_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
|
|
|
@ -33,6 +33,8 @@ class BiasAddGradGpuKernel : public GpuKernel {
|
|||
public:
|
||||
BiasAddGradGpuKernel()
|
||||
: same_dims_(true),
|
||||
is_null_input_(false),
|
||||
kernel_name_("BiasAddGrad"),
|
||||
use_cudnn_(false),
|
||||
dy_num_(1),
|
||||
db_num_(1),
|
||||
|
@ -83,11 +85,11 @@ class BiasAddGradGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
auto dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(dy_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(dy_shape, kernel_name_, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'BiasAddGradGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -96,12 +98,13 @@ class BiasAddGradGpuKernel : public GpuKernel {
|
|||
cudnn_compute_format_ = (input_device_format == kOpFormat_NHWC) ? CUDNN_TENSOR_NHWC : CUDNN_TENSOR_NCHW;
|
||||
num_dims_ = dy_shape.size();
|
||||
if (num_dims_ < 2) {
|
||||
MS_LOG(EXCEPTION) << "input dims must be at least 2, but got " << num_dims_;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than 2, but got "
|
||||
<< num_dims_;
|
||||
}
|
||||
std::string format = GetAttr<std::string>(kernel_node, "format");
|
||||
string::size_type pos = format.find("C");
|
||||
if (pos == std::string::npos || pos >= num_dims_) {
|
||||
MS_LOG(EXCEPTION) << "format '" << format << "' invalid";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', 'C' character should be in 'format', but got " << format;
|
||||
}
|
||||
bias_size_ = dy_shape[pos];
|
||||
auto num_dims_fix = std::max(num_dims_, 4UL);
|
||||
|
@ -215,6 +218,7 @@ class BiasAddGradGpuKernel : public GpuKernel {
|
|||
private:
|
||||
bool same_dims_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
bool use_cudnn_;
|
||||
size_t dy_num_; // for own implementation
|
||||
size_t db_num_;
|
||||
|
|
|
@ -29,7 +29,13 @@ namespace kernel {
|
|||
template <typename T>
|
||||
class BinaryCrossEntropyGpuKernel : public GpuKernel {
|
||||
public:
|
||||
BinaryCrossEntropyGpuKernel() : weight_defined_(false), input_size_(1), reduction_(1), workspace_size_(1) {}
|
||||
BinaryCrossEntropyGpuKernel()
|
||||
: weight_defined_(false),
|
||||
is_null_input_(false),
|
||||
kernel_name_("BinaryCrossEntropy"),
|
||||
input_size_(1),
|
||||
reduction_(1),
|
||||
workspace_size_(1) {}
|
||||
~BinaryCrossEntropyGpuKernel() override = default;
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
|
@ -55,10 +61,10 @@ class BinaryCrossEntropyGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "logits");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'BinaryCrossEntropyGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -95,6 +101,7 @@ class BinaryCrossEntropyGpuKernel : public GpuKernel {
|
|||
private:
|
||||
bool weight_defined_; // true: there are 3 inputs, false: there are 2 inputs(no [weight])
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
size_t input_size_;
|
||||
int reduction_;
|
||||
size_t workspace_size_;
|
||||
|
|
|
@ -29,7 +29,12 @@ namespace kernel {
|
|||
template <typename T>
|
||||
class BinaryCrossEntropyGradGpuKernel : public GpuKernel {
|
||||
public:
|
||||
BinaryCrossEntropyGradGpuKernel() : input_size_(1), reduction_(1), weight_defined_(false), is_null_input_(false) {}
|
||||
BinaryCrossEntropyGradGpuKernel()
|
||||
: input_size_(1),
|
||||
reduction_(1),
|
||||
weight_defined_(false),
|
||||
is_null_input_(false),
|
||||
kernel_name_("BinaryCrossEntropyGrad") {}
|
||||
~BinaryCrossEntropyGradGpuKernel() override = default;
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
|
@ -57,10 +62,10 @@ class BinaryCrossEntropyGradGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'BinaryCrossEntropyGradGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -95,6 +100,7 @@ class BinaryCrossEntropyGradGpuKernel : public GpuKernel {
|
|||
int reduction_;
|
||||
bool weight_defined_; // true: there are 4 inputs, false: there are 3 inputs(no [weight])
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_FUSED_SCALE_MOMENTUM_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
|
@ -27,7 +28,8 @@ namespace kernel {
|
|||
template <typename T, typename S>
|
||||
class CombineMomentumGpuKernel : public GpuKernel {
|
||||
public:
|
||||
CombineMomentumGpuKernel() : element_num_(1), num_(0), input_num_(6), is_null_input_(false) {}
|
||||
CombineMomentumGpuKernel()
|
||||
: element_num_(1), num_(0), input_num_(6), is_null_input_(false), kernel_name_("CombineMomentum") {}
|
||||
~CombineMomentumGpuKernel() override = default;
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
|
@ -62,6 +64,7 @@ class CombineMomentumGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
num_ = GetAttr<size_t>(kernel_node, "n");
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (kernel_name == "CombineMomentum") {
|
||||
|
@ -72,9 +75,9 @@ class CombineMomentumGpuKernel : public GpuKernel {
|
|||
for (size_t i = 0; i < num_; i++) {
|
||||
element_num_ = 1;
|
||||
auto variable_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i * input_num_ + input_num_ - 5);
|
||||
is_null_input_ = CHECK_NULL_INPUT(variable_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name_,
|
||||
"input[" + std::to_string(i * input_num_ + input_num_ - 5) + "]");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'CombineMomentumGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -107,6 +110,7 @@ class CombineMomentumGpuKernel : public GpuKernel {
|
|||
size_t num_;
|
||||
int input_num_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
|
|
|
@ -77,11 +77,10 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
(void)CheckParam(kernel_node);
|
||||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0);
|
||||
auto format_attr = GetAttr<std::string>(kernel_node, "format");
|
||||
|
@ -91,9 +90,10 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
|||
auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(in_shape) || CHECK_NULL_INPUT(filter_shape) || CHECK_NULL_INPUT(output_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "x") ||
|
||||
CHECK_SHAPE_NULL(filter_shape, kernel_name_, "weight") ||
|
||||
CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'Conv2dGpuFwdKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -111,7 +111,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
|||
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (pad_list.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "Conv2dGpuFwdKernel pad_list must have length 4.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'pad' should be 4, but got " << pad_list.size();
|
||||
}
|
||||
|
||||
pad_height_ = pad_list[0];
|
||||
|
@ -196,6 +196,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
|||
dilation_.clear();
|
||||
group_ = 1;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "Conv2d";
|
||||
input_size_ = 0;
|
||||
filter_size_ = 0;
|
||||
output_size_ = 0;
|
||||
|
@ -275,18 +276,15 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but conv2d needs 2 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but conv2d needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Set4DDesc(const std::vector<size_t> &in_shape, const std::vector<size_t> &filter_shape,
|
||||
|
@ -345,16 +343,20 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
|||
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (stride_.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "Conv2d's' stride must be 4d!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 4, but got "
|
||||
<< stride_.size();
|
||||
}
|
||||
if (stride_[0] != 1 || stride_[1] != 1) {
|
||||
MS_LOG(EXCEPTION) << "Conv2d stride only support 1 in N axis and C axis!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'stride' at 0 and 1 axis should be 1, but got "
|
||||
<< "stride[0]: " << stride_[0] << ", stride[1]: " << stride_[1];
|
||||
}
|
||||
if (dilation_.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "Conv2d's dilation must be 4d!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 4, but got "
|
||||
<< dilation_.size();
|
||||
}
|
||||
if (dilation_[0] != 1 || dilation_[1] != 1) {
|
||||
MS_LOG(EXCEPTION) << "Conv2d dilation only support 1 in N axis and C axis!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
|
||||
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
|
||||
}
|
||||
}
|
||||
cudnnHandle_t cudnn_handle_;
|
||||
|
@ -384,6 +386,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
|||
std::vector<int> dilation_;
|
||||
int group_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
size_t input_size_;
|
||||
size_t filter_size_;
|
||||
size_t output_size_;
|
||||
|
|
|
@ -52,6 +52,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
|||
c_(0),
|
||||
group_(1),
|
||||
is_null_input_(false),
|
||||
kernel_name_("Conv2dGradFilter"),
|
||||
input_size_(0),
|
||||
dy_size_(0),
|
||||
output_size_(0),
|
||||
|
@ -103,17 +104,15 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
(void)CheckParam(kernel_node);
|
||||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
auto dy_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
is_null_input_ = CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(in_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(dy_shape, kernel_name_, "dy") || CHECK_SHAPE_NULL(in_shape, kernel_name_, "x");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ConvGradFilterGpuBkwKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -139,7 +138,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
|||
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (pad_list.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "For 'Conv2dGradFilterGpuBkwKernel', the length of pad_list must be 4.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'pad' should be 4, but got " << pad_list.size();
|
||||
}
|
||||
pad_height_ = pad_list[0];
|
||||
pad_width_ = pad_list[2];
|
||||
|
@ -270,18 +269,15 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but ConvGradFilter needs 2 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but ConvGradFilter needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
void SelectAlgorithm(cudnnTensorDescriptor_t x_desc_real) {
|
||||
constexpr int requested_algo_count = 1;
|
||||
|
@ -345,13 +341,16 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
|||
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (stride_.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel's stride must be 2d!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 2, but got "
|
||||
<< stride_.size();
|
||||
}
|
||||
if (dilation_.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel's dilation must be 4d!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 4, but got "
|
||||
<< dilation_.size();
|
||||
}
|
||||
if (dilation_[0] != 1 || dilation_[1] != 1) {
|
||||
MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel dilation only support 1 in N axis and C axis!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
|
||||
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
|
||||
}
|
||||
}
|
||||
cudnnHandle_t cudnn_handle_;
|
||||
|
@ -382,6 +381,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
|||
std::vector<int> dilation_;
|
||||
int group_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
size_t input_size_;
|
||||
size_t dy_size_;
|
||||
size_t output_size_;
|
||||
|
|
|
@ -54,6 +54,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
|||
c_(0),
|
||||
group_(1),
|
||||
is_null_input_(false),
|
||||
kernel_name_("Conv2dGradInput"),
|
||||
dy_size_(0),
|
||||
w_size_(0),
|
||||
output_size_(0),
|
||||
|
@ -104,9 +105,9 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool CheckNull(const std::vector<size_t> dy_shape, const std::vector<size_t> filter_shape) {
|
||||
is_null_input_ = CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(filter_shape);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(dy_shape, kernel_name_, "dy") || CHECK_SHAPE_NULL(filter_shape, kernel_name_, "weight");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'Conv2dGradInputGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -114,11 +115,10 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
(void)CheckParam(kernel_node);
|
||||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0);
|
||||
auto format_attr = GetAttr<std::string>(kernel_node, "format");
|
||||
|
@ -152,7 +152,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
|||
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (pad_list.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "For 'Conv2dGradInputGpuKernel', the length of pad_list must be 4.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'pad' should be 4, but got " << pad_list.size();
|
||||
}
|
||||
pad_height_ = pad_list[0];
|
||||
pad_width_ = pad_list[2];
|
||||
|
@ -279,18 +279,15 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but ConvGradInput needs 2 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but ConvGradInput needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
void SetPad(const std::vector<int> &input_shape, const CNodePtr &kernel_node) {
|
||||
std::vector<int> pad_list;
|
||||
|
@ -369,13 +366,16 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
|||
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (stride_.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel's stride must be 2d!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 2, but got "
|
||||
<< stride_.size();
|
||||
}
|
||||
if (dilation_.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel's dilation must be 4d!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 4, but got "
|
||||
<< dilation_.size();
|
||||
}
|
||||
if (dilation_[0] != 1 || dilation_[1] != 1) {
|
||||
MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel dilation only support 1 in N axis and C axis!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
|
||||
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
|
||||
}
|
||||
}
|
||||
cudnnHandle_t cudnn_handle_;
|
||||
|
@ -404,6 +404,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
|||
std::vector<int> dilation_;
|
||||
int group_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
size_t dy_size_;
|
||||
size_t w_size_;
|
||||
size_t output_size_;
|
||||
|
|
|
@ -69,35 +69,33 @@ class Conv3dGpuKernel : public GpuKernel {
|
|||
|
||||
return true;
|
||||
}
|
||||
bool CheckNull(const std::vector<size_t> in_shape, const std::vector<size_t> filter_shape,
|
||||
const std::vector<size_t> output_shape) {
|
||||
is_null_input_ = CHECK_NULL_INPUT(in_shape) || CHECK_NULL_INPUT(filter_shape) || CHECK_NULL_INPUT(output_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'Conv3dGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
|
||||
void CheckSize(const size_t value, const size_t expect_value, const string arg_name) {
|
||||
if (value != expect_value) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of " << arg_name << " should be "
|
||||
<< expect_value << ", but got " << value;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
(void)CheckParam(kernel_node);
|
||||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
data_format_ = kOpFormat_NCDHW;
|
||||
auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
if (CheckNull(in_shape, filter_shape, output_shape)) {
|
||||
is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "x") ||
|
||||
CHECK_SHAPE_NULL(filter_shape, kernel_name_, "weight") ||
|
||||
CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
CheckTensorSize({in_shape});
|
||||
if (in_shape.size() != 5) {
|
||||
MS_LOG(EXCEPTION) << "Conv3dGpuKernel input must have rank 5.";
|
||||
}
|
||||
(void)CheckSize(in_shape.size(), 5, "x");
|
||||
n_ = SizeToInt(in_shape[0]);
|
||||
c_ = SizeToInt(in_shape[1]);
|
||||
old_depth_ = SizeToInt(in_shape[2]);
|
||||
|
@ -113,7 +111,7 @@ class Conv3dGpuKernel : public GpuKernel {
|
|||
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (pad_list.size() != 6) {
|
||||
MS_LOG(EXCEPTION) << "For 'Conv3dGpuBkwKernel', the length of pad_list must be 6, but got " << pad_list.size();
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'pad' should be 6, but got " << pad_list.size();
|
||||
}
|
||||
pad_depth_ = pad_list[0];
|
||||
pad_height_ = pad_list[2];
|
||||
|
@ -137,7 +135,8 @@ class Conv3dGpuKernel : public GpuKernel {
|
|||
int dimA[kNumDims];
|
||||
int strideApadded[kNumDims];
|
||||
if (data_format_ != kOpFormat_NCDHW) {
|
||||
MS_LOG(EXCEPTION) << "Conv3d only support NCDHW format right now.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'data_format' only support 'NCDHW' right now "
|
||||
<< ", but got " << data_format_;
|
||||
}
|
||||
auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_depth_ + pad_depth_),
|
||||
IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_)};
|
||||
|
@ -203,6 +202,7 @@ class Conv3dGpuKernel : public GpuKernel {
|
|||
dilation_.clear();
|
||||
group_ = 1;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "Conv3d";
|
||||
input_size_ = 0;
|
||||
filter_size_ = 0;
|
||||
output_size_ = 0;
|
||||
|
@ -282,18 +282,15 @@ class Conv3dGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but conv3d needs 2 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but conv3d needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void SetNDDesc(const std::vector<size_t> &in_shape, const std::vector<size_t> &filter_shape,
|
||||
|
@ -341,16 +338,20 @@ class Conv3dGpuKernel : public GpuKernel {
|
|||
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (stride_.size() != 5) {
|
||||
MS_LOG(EXCEPTION) << "Conv3d's' stride must be 5d, but got " << stride_.size();
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 5, but got "
|
||||
<< stride_.size();
|
||||
}
|
||||
if (stride_[0] != 1 || stride_[1] != 1) {
|
||||
MS_LOG(EXCEPTION) << "Conv3d stride only support 1 in N axis and C axis!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'stride' at 0 and 1 axis should be 1, but got "
|
||||
<< "stride[0]: " << stride_[0] << ", stride[1]: " << stride_[1];
|
||||
}
|
||||
if (dilation_.size() != 5) {
|
||||
MS_LOG(EXCEPTION) << "Conv3d's dilation must be 5d!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 5, but got "
|
||||
<< dilation_.size();
|
||||
}
|
||||
if (dilation_[0] != 1 || dilation_[1] != 1) {
|
||||
MS_LOG(EXCEPTION) << "Conv3d dilation only support 1 in N axis and C axis!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
|
||||
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -384,6 +385,7 @@ class Conv3dGpuKernel : public GpuKernel {
|
|||
std::vector<int> dilation_;
|
||||
int group_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
size_t input_size_;
|
||||
size_t filter_size_;
|
||||
size_t output_size_;
|
||||
|
|
|
@ -84,33 +84,24 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool CheckNull(const std::vector<size_t> dy_shape, const std::vector<size_t> in_shape) {
|
||||
is_null_input_ = CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(in_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'Conv3dGradInputGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void CheckSize(const size_t value, const size_t expect_value, const string arg_name) {
|
||||
if (value != expect_value) {
|
||||
MS_LOG(EXCEPTION) << "For 'Conv3dGradFilterGpuKernel', the length of " << arg_name << " must be " << expect_value
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of " << arg_name << " should be " << expect_value
|
||||
<< ", but got " << value;
|
||||
}
|
||||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
(void)CheckParam(kernel_node);
|
||||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
auto in_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
auto dy_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
if (CheckNull(dy_shape, in_shape)) {
|
||||
is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "x") || CHECK_SHAPE_NULL(dy_shape, kernel_name_, "dy");
|
||||
if (is_null_input_) {
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
CheckTensorSize({in_shape});
|
||||
|
@ -124,7 +115,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
compute_format_ = CUDNN_TENSOR_NCHW;
|
||||
(void)CheckSize(in_shape.size(), 5, "in_shape");
|
||||
(void)CheckSize(in_shape.size(), 5, "input shape");
|
||||
n_ = SizeToInt(in_shape[0]);
|
||||
c_ = SizeToInt(in_shape[1]);
|
||||
old_depth_ = SizeToInt(in_shape[2]);
|
||||
|
@ -138,7 +129,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
|
|||
std::vector<int64_t> pad_list_me = GetAttr<std::vector<int64_t>>(kernel_node, "pad_list");
|
||||
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)CheckSize(pad_list.size(), 6, "pad_list");
|
||||
(void)CheckSize(pad_list.size(), 6, "pad");
|
||||
pad_depth_ = pad_list[0];
|
||||
pad_height_ = pad_list[2];
|
||||
pad_width_ = pad_list[4];
|
||||
|
@ -161,7 +152,8 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
|
|||
int dimA[kNumDims];
|
||||
int strideApadded[kNumDims];
|
||||
if (data_format_ != kOpFormat_NCDHW) {
|
||||
MS_LOG(EXCEPTION) << "Conv3dGradFilterGpuKernel only support NCDHW format right now.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'data_format' only support 'NCDHW' right now "
|
||||
<< ", but got " << data_format_;
|
||||
}
|
||||
auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_depth_ + pad_depth_),
|
||||
IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_)};
|
||||
|
@ -224,6 +216,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
|
|||
c_ = 0;
|
||||
group_ = 1;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "Conv3dGradFilter";
|
||||
input_size_ = 0;
|
||||
dy_size_ = 0;
|
||||
output_size_ = 0;
|
||||
|
@ -306,18 +299,15 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but Conv3dGradFilterGpuKernel needs 2 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but Conv3dGradFilterGpuKernel needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void SelectAlgorithm(cudnnTensorDescriptor_t x_desc_real) {
|
||||
|
@ -375,16 +365,20 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
|
|||
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (stride_.size() != 5) {
|
||||
MS_LOG(EXCEPTION) << "Conv3dGradFilterGpuKernel stride must be 5d, but got " << stride_.size();
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 5, but got "
|
||||
<< stride_.size();
|
||||
}
|
||||
if (stride_[0] != 1 || stride_[1] != 1) {
|
||||
MS_LOG(EXCEPTION) << "Conv3dGradFilterGpuKernel stride only support 1 in N axis and C axis!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'stride' at 0 and 1 axis should be 1, but got "
|
||||
<< "stride[0]: " << stride_[0] << ", stride[1]: " << stride_[1];
|
||||
}
|
||||
if (dilation_.size() != 5) {
|
||||
MS_LOG(EXCEPTION) << "Conv3dGradFilterGpuKernel dilation must be 5d!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 5, but got "
|
||||
<< dilation_.size();
|
||||
}
|
||||
if (dilation_[0] != 1 || dilation_[1] != 1) {
|
||||
MS_LOG(EXCEPTION) << "Conv3dGradFilterGpuKernel dilation only support 1 in N axis and C axis!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
|
||||
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -418,6 +412,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
|
|||
std::vector<int> dilation_;
|
||||
int group_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
size_t input_size_;
|
||||
size_t dy_size_;
|
||||
size_t output_size_;
|
||||
|
|
|
@ -70,36 +70,33 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool CheckNull(const std::vector<size_t> dy_shape, const std::vector<size_t> filter_shape) {
|
||||
is_null_input_ = CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(filter_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'Conv3dGradInputGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
void CheckSize(const size_t value, const size_t expect_value, const string arg_name) {
|
||||
if (value != expect_value) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of " << arg_name << " should be "
|
||||
<< expect_value << ", but got " << value;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
(void)CheckParam(kernel_node);
|
||||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
data_format_ = kOpFormat_NCDHW;
|
||||
auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
auto dy_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
if (CheckNull(dy_shape, filter_shape)) {
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(filter_shape, kernel_name_, "weight") || CHECK_SHAPE_NULL(dy_shape, kernel_name_, "dy");
|
||||
if (is_null_input_) {
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
std::vector<size_t> input_shape;
|
||||
GetInputShape(kernel_node, &input_shape);
|
||||
compute_format_ = CUDNN_TENSOR_NCHW;
|
||||
CheckTensorSize({input_shape});
|
||||
if (input_shape.size() != 5) {
|
||||
MS_LOG(EXCEPTION) << "For 'Conv3dGradInputGpuBkwKernel', the dimension of input must be 5.";
|
||||
}
|
||||
(void)CheckSize(input_shape.size(), 5, "input");
|
||||
n_ = SizeToInt(input_shape[0]);
|
||||
c_ = SizeToInt(input_shape[1]);
|
||||
old_depth_ = SizeToInt(input_shape[2]);
|
||||
|
@ -114,7 +111,7 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
|
|||
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (pad_list.size() != 6) {
|
||||
MS_LOG(EXCEPTION) << "For 'Conv3dGradInputGpuBkwKernel', the length of pad_list must be 6.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'pad' should be 6, but got " << pad_list.size();
|
||||
}
|
||||
pad_depth_ = pad_list[0];
|
||||
pad_height_ = pad_list[2];
|
||||
|
@ -138,7 +135,8 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
|
|||
int dimA[kNumDims];
|
||||
int strideApadded[kNumDims];
|
||||
if (data_format_ != kOpFormat_NCDHW) {
|
||||
MS_LOG(EXCEPTION) << "Conv3dGradInputGpuKernel only support NCDHW format right now.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'data_format' only support 'NCDHW' right now "
|
||||
<< ", but got " << data_format_;
|
||||
}
|
||||
auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_depth_ + pad_depth_),
|
||||
IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_)};
|
||||
|
@ -203,6 +201,7 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
|
|||
c_ = 0;
|
||||
group_ = 1;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "Conv3dGradInput";
|
||||
dy_size_ = 0;
|
||||
w_size_ = 0;
|
||||
output_size_ = 0;
|
||||
|
@ -276,18 +275,15 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but Conv3dGradInputGpuKernel needs 2 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but Conv3dGradInputGpuKernel needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void SetPad(const std::vector<int> &input_shape, const CNodePtr &kernel_node) {
|
||||
|
@ -350,16 +346,20 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
|
|||
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (stride_.size() != 5) {
|
||||
MS_LOG(EXCEPTION) << "Conv3dGradInputGpuKernel stride must be 5d, but got " << stride_.size();
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 5, but got "
|
||||
<< stride_.size();
|
||||
}
|
||||
if (stride_[0] != 1 || stride_[1] != 1) {
|
||||
MS_LOG(EXCEPTION) << "Conv3dGradInputGpuKernel stride only support 1 in N axis and C axis!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'stride' at 0 and 1 axis should be 1, but got "
|
||||
<< "stride[0]: " << stride_[0] << ", stride[1]: " << stride_[1];
|
||||
}
|
||||
if (dilation_.size() != 5) {
|
||||
MS_LOG(EXCEPTION) << "Conv3dGradInputGpuKernel dilation must be 5d!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 5, but got "
|
||||
<< dilation_.size();
|
||||
}
|
||||
if (dilation_[0] != 1 || dilation_[1] != 1) {
|
||||
MS_LOG(EXCEPTION) << "Conv3dGradInputGpuKernel dilation only support 1 in N axis and C axis!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
|
||||
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -392,6 +392,7 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
|
|||
std::vector<int> dilation_;
|
||||
int group_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
size_t dy_size_;
|
||||
size_t w_size_;
|
||||
size_t output_size_;
|
||||
|
|
|
@ -68,7 +68,8 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
|
|||
old_width_ + (1 + stride_[4]) * pad_width_, pad_head_, pad_top_, pad_left_, output_addr,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "ConvTranspose3d only support NCDHW format right now.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'data_format' only support 'NCDHW' right now "
|
||||
<< ", but got " << data_format_;
|
||||
}
|
||||
} else {
|
||||
if (greater_stride_) {
|
||||
|
@ -94,9 +95,9 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool CheckNull(const std::vector<size_t> filter_shape, const std::vector<size_t> input_shape) {
|
||||
is_null_input_ = CHECK_NULL_INPUT(filter_shape) || CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(filter_shape, kernel_name_, "weight") || CHECK_SHAPE_NULL(input_shape, kernel_name_, "dout");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'Conv3dTransposeGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -105,15 +106,16 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
|
|||
|
||||
void CheckSize(const size_t value, const size_t expect_value, const string arg_name) {
|
||||
if (value != expect_value) {
|
||||
MS_LOG(EXCEPTION) << "For 'Conv3dTransposeGpuKernel', the length of " << arg_name << " must be " << expect_value
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of " << arg_name << " must be " << expect_value
|
||||
<< ", but got " << value;
|
||||
}
|
||||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
if (!CheckParam(kernel_node)) return false;
|
||||
(void)CheckParam(kernel_node);
|
||||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0);
|
||||
auto format_attr = GetAttr<std::string>(kernel_node, "format");
|
||||
|
@ -147,8 +149,8 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
|
|||
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
|
||||
SetStrideAndDilation(kernel_node);
|
||||
std::vector<int> stride_pad_list(6, 0);
|
||||
(void)CheckSize(filter_shape.size(), 5, "filter_shape");
|
||||
(void)CheckSize(pad_list.size(), 6, "pad_list");
|
||||
(void)CheckSize(filter_shape.size(), 5, "weight shape");
|
||||
(void)CheckSize(pad_list.size(), 6, "pad");
|
||||
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) { // pad_mode_ = same
|
||||
UpdatePaddingAndDilation(input_shape, filter_shape, pad_list.data(), stride_pad_list.data());
|
||||
}
|
||||
|
@ -255,6 +257,7 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
|
|||
dilation_.clear();
|
||||
group_ = 1;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "Conv3dTranspose";
|
||||
input_size_ = 0;
|
||||
filter_size_ = 0;
|
||||
output_size_ = 0;
|
||||
|
@ -355,18 +358,15 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but Conv3dTranspose needs 2 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but Conv3dTranspose needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void SetPad(const std::vector<int> &output_shape, const CNodePtr &kernel_node) {
|
||||
|
@ -430,16 +430,20 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
|
|||
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (stride_.size() != 5) {
|
||||
MS_LOG(EXCEPTION) << "Conv3dTransposeGpuFwdKernel's stride must be 5d!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 5, but got "
|
||||
<< stride_.size();
|
||||
}
|
||||
if (stride_[0] != 1 || stride_[1] != 1) {
|
||||
MS_LOG(EXCEPTION) << "Conv3dTransposeGpuFwdKernel stride only support 1 in N axis and C axis!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'stride' at 0 and 1 axis should be 1, but got "
|
||||
<< "stride[0]: " << stride_[0] << ", stride[1]: " << stride_[1];
|
||||
}
|
||||
if (dilation_.size() != 5) {
|
||||
MS_LOG(EXCEPTION) << "Conv3dTransposeGpuFwdKernel's dilation must be 5d!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 5, but got "
|
||||
<< dilation_.size();
|
||||
}
|
||||
if (dilation_[0] != 1 || dilation_[1] != 1) {
|
||||
MS_LOG(EXCEPTION) << "Conv3dTransposeGpuFwdKernel dilation only support 1 in N axis and C axis!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
|
||||
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
|
||||
}
|
||||
}
|
||||
void UpdatePaddingAndDilation(const std::vector<size_t> &input_shape, const std::vector<size_t> &filter_shape,
|
||||
|
@ -562,6 +566,7 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
|
|||
std::vector<int> dilation_;
|
||||
int group_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
size_t input_size_;
|
||||
size_t filter_size_;
|
||||
size_t output_size_;
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <limits>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
|
@ -37,6 +38,7 @@ class CtcLossGpuKernel : public GpuKernel {
|
|||
ctc_merge_repeated_(true),
|
||||
ignore_longer_outputs_than_inputs_(false),
|
||||
is_null_input_(false),
|
||||
kernel_name_("CTCLoss"),
|
||||
probs(nullptr),
|
||||
label_indices(nullptr),
|
||||
label_values(nullptr),
|
||||
|
@ -77,31 +79,36 @@ class CtcLossGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
auto probs_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto indice_dims = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto labels_dims = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
auto sequence_length_dims = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
is_null_input_ = CHECK_NULL_INPUT(probs_shape) || CHECK_NULL_INPUT(indice_dims) || CHECK_NULL_INPUT(labels_dims) ||
|
||||
CHECK_NULL_INPUT(sequence_length_dims);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(probs_shape, kernel_name_, "x") ||
|
||||
CHECK_SHAPE_NULL(indice_dims, kernel_name_, "labels_indices") ||
|
||||
CHECK_SHAPE_NULL(labels_dims, kernel_name_, "labels_values") ||
|
||||
CHECK_SHAPE_NULL(sequence_length_dims, kernel_name_, "sequence_length");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'CtclossGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (probs_shape.size() != 3) {
|
||||
MS_LOG(EXCEPTION) << "probs dims: " << probs_shape.size() << " not support.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of x should be 3, but got "
|
||||
<< probs_shape.size();
|
||||
}
|
||||
probs_dims_[0] = probs_shape[0];
|
||||
probs_dims_[1] = probs_shape[1];
|
||||
probs_dims_[2] = probs_shape[2];
|
||||
|
||||
if (labels_dims.size() != 1) {
|
||||
MS_LOG(EXCEPTION) << "labels dims: " << labels_dims.size() << " not support.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of labels_values should be 1, but got "
|
||||
<< labels_dims.size();
|
||||
}
|
||||
if (indice_dims.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "labels indice dims: " << indice_dims.size() << " not support.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of labels_indices should be 2, but got "
|
||||
<< indice_dims.size();
|
||||
}
|
||||
label_size_ = sizeof(int);
|
||||
for (auto i : labels_dims) {
|
||||
|
@ -157,7 +164,8 @@ class CtcLossGpuKernel : public GpuKernel {
|
|||
"cudaMemcpyAsync failed.");
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(stream), "cudaStreamSynchronize failed.");
|
||||
if (max_time < max_sequence) {
|
||||
MS_LOG(EXCEPTION) << "max_time should be greater than sequence length.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the x[0] should be equal to or greater than max_sequence, "
|
||||
<< "but got x[0]: " << max_time << ", max_sequence: " << max_sequence;
|
||||
}
|
||||
InnerSoftMax(probs, softmax_probs, sequence_length, max_time, batch, numclass, stream);
|
||||
MemsetForWS(label_value_pcr, cum_labels_length, label_squence_length, costs, grads, stream);
|
||||
|
@ -169,7 +177,9 @@ class CtcLossGpuKernel : public GpuKernel {
|
|||
"cudaMemcpyAsync failed.");
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(stream), "cudaStreamSynchronize failed.");
|
||||
if (batch != batch_label + 1) {
|
||||
MS_LOG(EXCEPTION) << "label batch should be equal to input batch.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', the x[0] should be equal to batch_label + 1, but got x[0]: " << max_time
|
||||
<< ", batch_label: " << batch_label;
|
||||
}
|
||||
GenLabelValue(label_value_sp, label_indices, label_values, label_squence_length, cum_labels_length,
|
||||
max_labels_length, label_size_ / sizeof(int), numclass - 1, batch, stream);
|
||||
|
@ -288,6 +298,7 @@ class CtcLossGpuKernel : public GpuKernel {
|
|||
bool ctc_merge_repeated_;
|
||||
bool ignore_longer_outputs_than_inputs_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
T kLogZero_ = -std::numeric_limits<T>::infinity();
|
||||
|
||||
// Heap parameter
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_DROPOUT3D_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/dropout3d_impl.cuh"
|
||||
|
@ -69,17 +70,17 @@ class Dropout3DGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
cudnn_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
|
||||
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but Dropout3DGpuFwdKernel needs 1.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
|
||||
}
|
||||
|
||||
std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "Dropout3DGpuKernel input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -88,7 +89,8 @@ class Dropout3DGpuFwdKernel : public GpuKernel {
|
|||
|
||||
size_t dims = input_shape.size();
|
||||
if (dims != 5) {
|
||||
MS_LOG(EXCEPTION) << "Input dims " << dims << "not supported. Must be in NCDHW format.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input should be 5, but got "
|
||||
<< input_shape.size();
|
||||
}
|
||||
|
||||
// get N and C values from 5 dim input tensor
|
||||
|
@ -106,7 +108,8 @@ class Dropout3DGpuFwdKernel : public GpuKernel {
|
|||
|
||||
keep_prob_ = GetAttr<float>(kernel_node, "keep_prob");
|
||||
if ((keep_prob_ < 0.0) || (keep_prob_ > 1.0)) {
|
||||
MS_LOG(EXCEPTION) << "keep_prob is out of range [0.0, 1.0]";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'keep_prob' should be in range [0.0, 1.0], "
|
||||
<< "but got " << keep_prob_;
|
||||
}
|
||||
|
||||
InitSizeLists();
|
||||
|
@ -116,6 +119,7 @@ class Dropout3DGpuFwdKernel : public GpuKernel {
|
|||
void ResetResource() noexcept override {
|
||||
cudnn_handle_ = nullptr;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "Dropout3D";
|
||||
num_count_ = 0;
|
||||
keep_prob_ = 0.0;
|
||||
states_init_ = false;
|
||||
|
@ -145,6 +149,7 @@ class Dropout3DGpuFwdKernel : public GpuKernel {
|
|||
cudnnHandle_t cudnn_handle_;
|
||||
curandGenerator_t curand_generator_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
bool states_init_;
|
||||
size_t num_count_;
|
||||
size_t n_;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_DROPOUT_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cuh"
|
||||
|
@ -65,17 +66,17 @@ class DropoutGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
InitResource();
|
||||
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but DropoutGpuFwdKernel needs 1.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
|
||||
}
|
||||
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'DropoutGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -101,6 +102,7 @@ class DropoutGpuFwdKernel : public GpuKernel {
|
|||
void ResetResource() noexcept override {
|
||||
cudnn_handle_ = nullptr;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "Dropout";
|
||||
num_count_ = 0;
|
||||
keep_prob_ = 0.0;
|
||||
seed_ = 0;
|
||||
|
@ -125,6 +127,7 @@ class DropoutGpuFwdKernel : public GpuKernel {
|
|||
private:
|
||||
cudnnHandle_t cudnn_handle_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
size_t num_count_;
|
||||
float keep_prob_;
|
||||
bool states_init_;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_DROPOUT_GRAD_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cuh"
|
||||
|
@ -27,7 +28,8 @@ namespace kernel {
|
|||
template <typename T>
|
||||
class DropoutGradGpuBwdKernel : public GpuKernel {
|
||||
public:
|
||||
DropoutGradGpuBwdKernel() : cudnn_handle_(nullptr), is_null_input_(false), num_count_(0), keep_prob_(0.0) {}
|
||||
DropoutGradGpuBwdKernel()
|
||||
: cudnn_handle_(nullptr), is_null_input_(false), kernel_name_("DropoutGrad"), num_count_(0), keep_prob_(0.0) {}
|
||||
~DropoutGradGpuBwdKernel() override = default;
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
|
@ -48,18 +50,17 @@ class DropoutGradGpuBwdKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
InitResource();
|
||||
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Argument number is " << input_num << ", but DropoutGradGpuBwdKernel needs 2.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
|
||||
}
|
||||
|
||||
auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'DropoutGradGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -89,6 +90,7 @@ class DropoutGradGpuBwdKernel : public GpuKernel {
|
|||
private:
|
||||
cudnnHandle_t cudnn_handle_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
size_t num_count_;
|
||||
float keep_prob_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
|
||||
|
@ -50,10 +51,10 @@ class FlattenGpuFwdKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'FlattenGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -67,6 +68,7 @@ class FlattenGpuFwdKernel : public GpuKernel {
|
|||
void ResetResource() noexcept override {
|
||||
input_size_ = 0;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "Flatten";
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
|
@ -85,6 +87,7 @@ class FlattenGpuFwdKernel : public GpuKernel {
|
|||
|
||||
size_t input_size_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
|
||||
|
@ -51,16 +52,15 @@ class FlattenGardGpuBkwKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Argument number is " << input_num << ", but FlattenGardGpuFwdKernel needs 1.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
|
||||
}
|
||||
|
||||
auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'FlattenGradGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -79,6 +79,7 @@ class FlattenGardGpuBkwKernel : public GpuKernel {
|
|||
void ResetResource() noexcept override {
|
||||
input_size_ = 0;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "FlattenGrad";
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
|
@ -97,6 +98,7 @@ class FlattenGardGpuBkwKernel : public GpuKernel {
|
|||
|
||||
size_t input_size_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_FTRL_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cuh"
|
||||
|
@ -36,7 +37,8 @@ class FtrlGpuKernel : public GpuKernel {
|
|||
l1_regularization_size_(0),
|
||||
l2_regularization_size_(0),
|
||||
learning_rate_power_size_(0),
|
||||
is_null_input_(false) {}
|
||||
is_null_input_(false),
|
||||
kernel_name_("ApplyFtrl") {}
|
||||
|
||||
~FtrlGpuKernel() override = default;
|
||||
|
||||
|
@ -63,10 +65,11 @@ class FtrlGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != INPUT_NUM) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but ftrl needs " << INPUT_NUM << " inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be " << INPUT_NUM << ", but got "
|
||||
<< input_num;
|
||||
}
|
||||
|
||||
variable_size_ = sizeof(T);
|
||||
|
@ -82,10 +85,11 @@ class FtrlGpuKernel : public GpuKernel {
|
|||
auto accumulation_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto linear_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
is_null_input_ = CHECK_NULL_INPUT(variable_shape) || CHECK_NULL_INPUT(accumulation_shape) ||
|
||||
CHECK_NULL_INPUT(linear_shape) || CHECK_NULL_INPUT(gradient_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name_, "var") ||
|
||||
CHECK_SHAPE_NULL(accumulation_shape, kernel_name_, "accum") ||
|
||||
CHECK_SHAPE_NULL(linear_shape, kernel_name_, "linear") ||
|
||||
CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "grad");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'FtrlGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -132,6 +136,7 @@ class FtrlGpuKernel : public GpuKernel {
|
|||
size_t l2_regularization_size_;
|
||||
size_t learning_rate_power_size_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
|
|
|
@ -38,9 +38,8 @@ class FusedAdamWeightDecayGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
|
||||
is_null_input_ = CHECK_NULL_INPUT(shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(shape, node_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'FusedAdamWeightDecayGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -52,10 +52,10 @@ class FusedAddReluGradV2GpuKernel : public GpuKernel {
|
|||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'FusedAddReluGradV2GpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -52,10 +52,10 @@ class FusedAddReluV2GpuKernel : public GpuKernel {
|
|||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'FusedAddReluV2GpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -50,16 +50,16 @@ class FusedScaleMomentumGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != INPUT_NUM) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but FusedMomentum needs " << INPUT_NUM << " inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be " << INPUT_NUM << ", but got "
|
||||
<< input_num;
|
||||
}
|
||||
|
||||
auto variable_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
is_null_input_ = CHECK_NULL_INPUT(variable_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'FusedScaleMomentumGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -50,16 +50,16 @@ class FusedWeightDecayMomentumGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != INPUT_NUM) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but FusedMomentum needs " << INPUT_NUM << " inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be " << INPUT_NUM << ", but got "
|
||||
<< input_num;
|
||||
}
|
||||
|
||||
auto variable_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
is_null_input_ = CHECK_NULL_INPUT(variable_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name, "variable");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'FusedWeightDecayMomentumGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -51,16 +51,16 @@ class FusedWeightDecayScaleMomentumGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != INPUT_NUM) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but FusedMomentum needs " << INPUT_NUM << " inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be " << INPUT_NUM << ", but got "
|
||||
<< input_num;
|
||||
}
|
||||
|
||||
auto variable_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
is_null_input_ = CHECK_NULL_INPUT(variable_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name, "variable");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'FusedWeightDecayScaleMomentumGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -48,12 +48,12 @@ class GeLUGpuGradKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
InitResource();
|
||||
input_size_ = sizeof(T);
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'GeluGradGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -47,12 +47,12 @@ class GeluGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
InitResource();
|
||||
input_size_ = sizeof(T);
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'GeluGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -47,19 +47,19 @@ class HSigmoidKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoid needs 1 inputs.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 1, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoid has 1 output.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'HsigmoidGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -48,19 +48,19 @@ class HSigmoidGradKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoidGrad needs 2 inputs.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 2, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoidGrad has 1 output.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'HsigmoidGradGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -47,19 +47,19 @@ class HSwishKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSwish needs 1 inputs.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 1, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSwish needs 1 output.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'HswishGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -48,19 +48,19 @@ class HSwishGradKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSwishGrad needs 2 inputs.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 2, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSwishGrad has 1 output.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'HswishGradGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -48,6 +48,7 @@ class Im2ColGpuFwdKernel : public GpuKernel {
|
|||
n_(0),
|
||||
c_(0),
|
||||
is_null_input_(false),
|
||||
kernel_name_("Im2col"),
|
||||
input_size_(0),
|
||||
output_size_(0),
|
||||
padded_size_(0),
|
||||
|
@ -82,21 +83,21 @@ class Im2ColGpuFwdKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
(void)CheckParam(kernel_node);
|
||||
auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(in_shape) || CHECK_NULL_INPUT(output_shape);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(in_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'Im2ColGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (in_shape.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "For 'Im2ColGpuKernel', the dimension of input must be 4, but got " << in_shape.size();
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input should be 4, but got "
|
||||
<< in_shape.size();
|
||||
}
|
||||
std::vector<int> filter_shape;
|
||||
std::vector<int64_t> filter_shape_me = GetAttr<std::vector<int64_t>>(kernel_node, "kernel_size");
|
||||
|
@ -188,20 +189,17 @@ class Im2ColGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but Im2Col needs 1 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
|
||||
}
|
||||
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but Im2Col needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
void SetPad(const std::vector<size_t> &in_shape, const CNodePtr &kernel_node) {
|
||||
std::vector<int> pad_list;
|
||||
|
@ -215,7 +213,7 @@ class Im2ColGpuFwdKernel : public GpuKernel {
|
|||
old_width_ = SizeToInt(in_shape[3]);
|
||||
|
||||
if (pad_list.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "For 'Im2ColGpuKernel', the length of pad_list must be 4, but got " << pad_list.size();
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'pad' should be 4, but got " << pad_list.size();
|
||||
}
|
||||
pad_height_ = pad_list[0] + pad_list[1];
|
||||
pad_width_n = pad_list[2] + pad_list[3];
|
||||
|
@ -267,16 +265,20 @@ class Im2ColGpuFwdKernel : public GpuKernel {
|
|||
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (stride_.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "Im2Col's stride must be 4d!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'stride' should be 4, but got "
|
||||
<< stride_.size();
|
||||
}
|
||||
if (stride_[0] != 1 || stride_[1] != 1) {
|
||||
MS_LOG(EXCEPTION) << "Im2Col's stride only support 1 in N axis and C axis!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'stride' at 0 and 1 axis should be 1, but got "
|
||||
<< "stride[0]: " << stride_[0] << ", stride[1]: " << stride_[1];
|
||||
}
|
||||
if (dilation_.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "Im2Col's dilation must be 4d!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'dilation' should be 4, but got "
|
||||
<< dilation_.size();
|
||||
}
|
||||
if (dilation_[0] != 1 || dilation_[1] != 1) {
|
||||
MS_LOG(EXCEPTION) << "Im2Col's dilation only support 1 in N axis and C axis!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'dilation' at 0 and 1 axis should be 1, but got "
|
||||
<< "dilation[0]: " << dilation_[0] << ", dilation[1]: " << dilation_[1];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -304,6 +306,7 @@ class Im2ColGpuFwdKernel : public GpuKernel {
|
|||
std::vector<int> stride_;
|
||||
std::vector<int> dilation_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
size_t input_size_;
|
||||
size_t output_size_;
|
||||
size_t padded_size_;
|
||||
|
|
|
@ -109,15 +109,15 @@ class InstanceNormGpuKernel : public GpuKernel {
|
|||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 5) {
|
||||
MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", " << kernel_name << " should be 5";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 5, but got " << input_num;
|
||||
}
|
||||
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
if (input_shape_.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "tensor shape is " << input_shape_.size() << ", InstanceNormGpuKernel should be 4";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input_x should be 4, but got "
|
||||
<< input_shape_.size();
|
||||
}
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape_);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name, "input_x");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "InstanceNormGpuKernel input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -113,16 +113,16 @@ class InstanceNormGradGpuKernel : public GpuKernel {
|
|||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 5) {
|
||||
MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", " << kernel_name << " should be 5";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 5, but got " << input_num;
|
||||
}
|
||||
|
||||
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
if (input_shape_.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "tensor shape is " << input_shape_.size() << ", InstanceNormGradGpuKernel should be 4";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got "
|
||||
<< input_shape_.size();
|
||||
}
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape_);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "InstanceNormGradGpuKernel input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -50,10 +50,10 @@ class KLDivLossGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "logits");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'KlDivLossGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -51,10 +51,10 @@ class KLDivLossGradGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'KlDivLossGradGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@ class L2NormalizeGpuKernel : public GpuKernel {
|
|||
outputC_descriptor_(nullptr),
|
||||
all_match_(false),
|
||||
is_null_input_(false),
|
||||
kernel_name_("L2Normalize"),
|
||||
input_size_(0),
|
||||
output_size_(0),
|
||||
workspace_size_(0),
|
||||
|
@ -86,12 +87,11 @@ class L2NormalizeGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
if (!CheckIONumber(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
(void)CheckIONumber(kernel_node);
|
||||
int input_dim_length = SizeToInt(AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0).size());
|
||||
|
||||
int axis = LongToInt(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
|
@ -100,9 +100,9 @@ class L2NormalizeGpuKernel : public GpuKernel {
|
|||
|
||||
auto inputA_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(inputA_shape) || CHECK_NULL_INPUT(output_shape);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(inputA_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'L2NormalizeGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -110,15 +110,10 @@ class L2NormalizeGpuKernel : public GpuKernel {
|
|||
for (auto dim : output_shape) {
|
||||
output_size_ *= dim;
|
||||
}
|
||||
is_null_input_ = CHECK_NULL_INPUT(inputA_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "L2NormalizeGPUKernel input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
CheckTensorSize({inputA_shape, output_shape});
|
||||
if (inputA_shape.size() > MAX_DIMS) {
|
||||
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than " << MAX_DIMS;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << MAX_DIMS
|
||||
<< ", but got " << inputA_shape.size();
|
||||
}
|
||||
|
||||
std::vector<size_t> outputC_shape = output_shape;
|
||||
|
@ -129,8 +124,9 @@ class L2NormalizeGpuKernel : public GpuKernel {
|
|||
outputC_shape[axis_] = 1;
|
||||
|
||||
if (inputA_shape.size() != output_shape.size() || inputA_shape.size() != outputC_shape.size()) {
|
||||
MS_LOG(ERROR) << "Input shape size need equal to output shape size";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input and output should be the same, but "
|
||||
<< "got the dimension of input: " << inputA_shape.size()
|
||||
<< ", the dimension of output: " << output_shape.size();
|
||||
}
|
||||
|
||||
lhs_shape_.resize(MAX_DIMS, 1);
|
||||
|
@ -183,18 +179,15 @@ class L2NormalizeGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
bool CheckIONumber(const CNodePtr &kernel_node) {
|
||||
void CheckIONumber(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but l2normalize op needs 1 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but l2normalize op needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
void DestroyResource() noexcept {
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyReduceTensorDescriptor(reduce_tensor_descriptor_),
|
||||
|
@ -255,6 +248,7 @@ class L2NormalizeGpuKernel : public GpuKernel {
|
|||
|
||||
bool all_match_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
|
|
|
@ -43,6 +43,7 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
|
|||
outputC_descriptor_(nullptr),
|
||||
all_match_(false),
|
||||
is_null_input_(false),
|
||||
kernel_name_("L2NormalizeGrad"),
|
||||
output_size_(0),
|
||||
workspace_size_(0),
|
||||
epsilon_(0.0),
|
||||
|
@ -112,28 +113,29 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
|
|||
bool CheckInputShape(const std::vector<size_t> &output_shape) {
|
||||
for (auto &shape : input_shape_list_) {
|
||||
if (output_shape != shape) {
|
||||
MS_LOG(EXCEPTION) << "Input shape and output shape should be same!";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input and output should be the same, but "
|
||||
<< "got the shape of input: " << CONVERT_VECTOR_TO_STRING(shape)
|
||||
<< ", the shape of output: " << CONVERT_VECTOR_TO_STRING(output_shape);
|
||||
}
|
||||
}
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape_list_[0]);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape_list_[0], kernel_name_, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "L2NormalizeGPUKernel input is null";
|
||||
InitSizeLists();
|
||||
return false;
|
||||
}
|
||||
if (input_shape_list_[0].size() > MAX_DIMS) {
|
||||
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than " << MAX_DIMS;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << MAX_DIMS
|
||||
<< ", but got " << input_shape_list_[0].size();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
if (!CheckIONumber(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
(void)CheckIONumber(kernel_node);
|
||||
int input_dim_length = SizeToInt(AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0).size());
|
||||
int axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
axis_ = axis < 0 ? (axis + input_dim_length) : axis;
|
||||
|
@ -144,9 +146,8 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
|
|||
input_shape_list_.emplace_back(AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i));
|
||||
}
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(output_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'L2NormalizeGradGpuKernel', output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -187,18 +188,16 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
protected:
|
||||
bool CheckIONumber(const CNodePtr &kernel_node) {
|
||||
void CheckIONumber(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != INPUT_SIZE) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but l2normalize op needs " << INPUT_SIZE << " inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be " << INPUT_SIZE << ", but got "
|
||||
<< input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but l2normalize op needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
void InitResource() override {
|
||||
cudnn_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
|
||||
|
@ -315,6 +314,7 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
|
|||
|
||||
bool all_match_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
|
|
|
@ -20,8 +20,10 @@
|
|||
#include <cublas_v2.h>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include "utils/log_adapter.h"
|
||||
#include "utils/trace_base.h"
|
||||
#include "include/curand.h"
|
||||
|
@ -214,6 +216,34 @@ inline bool CheckNullInput(const std::vector<size_t> &input_shape) {
|
|||
}
|
||||
#define CHECK_NULL_INPUT(input_shape) mindspore::device::gpu::CheckNullInput(input_shape)
|
||||
|
||||
inline std::string ConvertVectorToString(const std::vector<size_t> &value) {
|
||||
std::stringstream ss;
|
||||
ss << "(";
|
||||
for (auto it = value.begin(); it != value.end(); it++) {
|
||||
if (it == value.begin()) {
|
||||
ss << *it;
|
||||
} else {
|
||||
ss << ", " << *it;
|
||||
}
|
||||
}
|
||||
ss << ")";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
#define CONVERT_VECTOR_TO_STRING(value) mindspore::device::gpu::ConvertVectorToString(value)
|
||||
|
||||
inline bool CheckShapeNull(const std::vector<size_t> &shape, std::string kernel_name, std::string param_name) {
|
||||
if (CHECK_NULL_INPUT(shape)) {
|
||||
MS_LOG(WARNING) << "For '" << kernel_name << "', the shape of " << param_name << " cannot contain zero, but got "
|
||||
<< CONVERT_VECTOR_TO_STRING(shape);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#define CHECK_SHAPE_NULL(shape, kernel_name, param_name) \
|
||||
mindspore::device::gpu::CheckShapeNull(shape, kernel_name, param_name)
|
||||
|
||||
inline const char *CurandGetErrorString(curandStatus_t status) {
|
||||
switch (status) {
|
||||
case CURAND_STATUS_VERSION_MISMATCH:
|
||||
|
|
Loading…
Reference in New Issue