forked from mindspore-Ecosystem/mindspore
optimizes the kernel error description of Split, Meshgrid, Select, etc.
This commit is contained in:
parent
f85dea2959
commit
bb935faca9
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_ARGMAX_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/argmax_impl.cuh"
|
||||
|
@ -33,7 +34,8 @@ class ArgmaxGpuKernel : public GpuKernel {
|
|||
bound_(0),
|
||||
outer_size_(0),
|
||||
inner_size_(0),
|
||||
is_null_input_(false) {}
|
||||
is_null_input_(false),
|
||||
kernel_name_("Argmax") {}
|
||||
~ArgmaxGpuKernel() override = default;
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
|
@ -54,18 +56,20 @@ class ArgmaxGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(shape) || CHECK_NULL_INPUT(output_shape);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ArgmaxGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
int64_t dims = shape.size();
|
||||
int64_t axis = GetAttr<int64_t>(kernel_node, "axis");
|
||||
if (axis < -dims || axis >= dims) {
|
||||
MS_LOG(EXCEPTION) << "axis must be in the range [-rank, rank)";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'axis' should be in the range [-" << dims << "," << dims
|
||||
<< "), but got " << axis;
|
||||
}
|
||||
|
||||
if (axis < 0) {
|
||||
|
@ -81,7 +85,8 @@ class ArgmaxGpuKernel : public GpuKernel {
|
|||
}
|
||||
bound_ = static_cast<S>(shape[axis]);
|
||||
if (shape[axis] != static_cast<size_t>(bound_)) {
|
||||
MS_LOG(EXCEPTION) << "Bound's shape is larger than index type and overflows when casting.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of shape[axis] should be "
|
||||
<< static_cast<size_t>(bound_) << ", but got " << shape[axis];
|
||||
}
|
||||
outer_size_ = 1;
|
||||
for (int64_t i = axis - 1; i >= 0; i--) {
|
||||
|
@ -112,6 +117,7 @@ class ArgmaxGpuKernel : public GpuKernel {
|
|||
size_t outer_size_;
|
||||
size_t inner_size_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -53,17 +53,17 @@ class ArgMaxAndMinWithValueGpuKernel : public GpuKernel {
|
|||
small_ = (kernel_name == "ArgMinWithValue") ? true : false;
|
||||
std::vector<size_t> shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 1);
|
||||
is_null_input_ = CHECK_NULL_INPUT(shape) || CHECK_NULL_INPUT(output_shape);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(shape, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ArgmaxwithvalueGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
int64_t dims = SizeToLong(shape.size());
|
||||
int64_t axis = GetAttr<int64_t>(kernel_node, "axis");
|
||||
if (axis < -dims || axis >= dims) {
|
||||
MS_LOG(ERROR) << "axis must be in the range [-rank, rank)";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the 'axis' should be in the range [-" << dims << "," << dims
|
||||
<< "), but got " << axis;
|
||||
}
|
||||
if (axis < 0) {
|
||||
axis += dims;
|
||||
|
@ -78,7 +78,8 @@ class ArgMaxAndMinWithValueGpuKernel : public GpuKernel {
|
|||
}
|
||||
bound_ = static_cast<S>(shape[axis]);
|
||||
if (shape[axis] != static_cast<size_t>(bound_)) {
|
||||
MS_LOG(EXCEPTION) << "bound's shape is larger than index type and overflows when casting.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the value of shape[axis] should be "
|
||||
<< static_cast<size_t>(bound_) << ", but got " << shape[axis];
|
||||
}
|
||||
outerSize_ = 1;
|
||||
for (int64_t i = axis - 1; i >= 0; i--) {
|
||||
|
|
|
@ -80,25 +80,23 @@ class ArrayReduceGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
auto type_id = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
|
||||
auto type_name = TypeIdLabel(type_id);
|
||||
auto node_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
if ((node_name == kReduceAnyOpName || node_name == kReduceAllOpName) && type_id != kNumberTypeBool) {
|
||||
MS_LOG(ERROR) << "Input data type of ReduceAny or ReduceAll should be bool, but got " << type_name;
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the input data type should be bool, but got " << type_name;
|
||||
}
|
||||
data_type_ = GetCudnnDataType(type_name);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but reduce op needs 1 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but reduce op needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
int input_dim_length = SizeToInt(AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0).size());
|
||||
|
||||
|
@ -123,15 +121,15 @@ class ArrayReduceGpuKernel : public GpuKernel {
|
|||
int axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
axis < 0 ? axis_.push_back(axis + input_dim_length) : axis_.push_back(axis);
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Attribute axis type is invalid.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute 'axis' type is invalid.";
|
||||
}
|
||||
keep_dims_ = GetAttr<bool>(kernel_node, "keep_dims");
|
||||
|
||||
auto inputA_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
auto outputC_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(inputA_shape) || CHECK_NULL_INPUT(outputC_shape);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(inputA_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(outputC_shape, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ArrayReduceGpuKernel', input or output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -157,6 +155,7 @@ class ArrayReduceGpuKernel : public GpuKernel {
|
|||
input_size_ = 0;
|
||||
output_size_ = 0;
|
||||
workspace_size_ = 0;
|
||||
kernel_name_ = "ArrayReduce";
|
||||
axis_.clear();
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
|
@ -205,7 +204,9 @@ class ArrayReduceGpuKernel : public GpuKernel {
|
|||
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto iter = kReduceTypeMap.find(kernel_name);
|
||||
if (iter == kReduceTypeMap.end()) {
|
||||
MS_LOG(EXCEPTION) << "Array reduce kernel type " << kernel_name << " is not supported.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "Only support these array reduce kernel types: "
|
||||
<< "ReduceMax, ReduceMean, ReduceSum, ReduceMin, ReduceAny, ReduceAll, ReduceProd currently"
|
||||
<< ", but got " << kernel_name;
|
||||
}
|
||||
reduce_tensor_op_ = iter->second;
|
||||
// add check for float64
|
||||
|
@ -301,6 +302,7 @@ class ArrayReduceGpuKernel : public GpuKernel {
|
|||
size_t input_size_;
|
||||
size_t output_size_;
|
||||
size_t workspace_size_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_BATCHOSPACE_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/batchtospace_impl.cuh"
|
||||
|
@ -49,9 +50,8 @@ class BatchToSpaceGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
(void)CheckParam(kernel_node);
|
||||
input_size_ = sizeof(T);
|
||||
for (size_t idx = 0; idx < input_shape_.size(); ++idx) {
|
||||
input_size_ *= input_shape_[idx];
|
||||
|
@ -79,6 +79,7 @@ class BatchToSpaceGpuKernel : public GpuKernel {
|
|||
oc_ = 0;
|
||||
oh_ = 0;
|
||||
ow_ = 0;
|
||||
kernel_name_ = "BatchToSpace";
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
crops_.clear();
|
||||
|
@ -91,37 +92,36 @@ class BatchToSpaceGpuKernel : public GpuKernel {
|
|||
output_size_list_.push_back(output_size_);
|
||||
}
|
||||
|
||||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
block_size_ = GetAttr<int64_t>(kernel_node, "block_size");
|
||||
if (block_size_ < 1) {
|
||||
MS_LOG(ERROR) << "block_size can not be less than 1.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'block_size' cannot be less than 1, but got "
|
||||
<< block_size_;
|
||||
}
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "input_num is " << input_num << ", but BatchToSpace needs 1 input.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "output_num is " << output_num << ", but BatchToSpace needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
|
||||
// check input_shape
|
||||
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
if (input_shape.size() != SHAPE_SIZE) {
|
||||
MS_LOG(ERROR) << "Input is " << input_shape.size() << "-D, but BatchToSpace supports 4-D tensor.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input should be 4, but got "
|
||||
<< input_shape.size();
|
||||
}
|
||||
if ((input_shape[0] % (block_size_ * block_size_)) != 0) {
|
||||
MS_LOG(ERROR) << "input_shape[0] must be divisible by product of block_shape";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', input_shape[0] should be divisible by product of block_shape, but got input_shape[0]: "
|
||||
<< input_shape[0] << ", block_shape: " << block_size_;
|
||||
}
|
||||
for (size_t idx = 0; idx < SHAPE_SIZE; ++idx) {
|
||||
if (input_shape[idx] < 1) {
|
||||
MS_LOG(ERROR) << "input_shape[" << idx << "] can not less than 1";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input cannot be less than 1, but got "
|
||||
<< CONVERT_VECTOR_TO_STRING(input_shape);
|
||||
}
|
||||
}
|
||||
input_shape_.assign(input_shape.begin(), input_shape.end());
|
||||
|
@ -130,28 +130,29 @@ class BatchToSpaceGpuKernel : public GpuKernel {
|
|||
crops_ = (GetAttr<std::vector<std::vector<int64_t>>>(kernel_node, "crops"));
|
||||
|
||||
if (crops_.size() != CROPS_SHAPE_0) {
|
||||
MS_LOG(ERROR) << "crops.size() in BatchToSpace needs 2.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of 'crops' should be " << CROPS_SHAPE_0
|
||||
<< ", but got " << crops_.size();
|
||||
}
|
||||
if (crops_[0].size() != CROPS_SHAPE_1 || crops_[1].size() != CROPS_SHAPE_1) {
|
||||
MS_LOG(ERROR) << "crops[i].size() in BatchToSpace needs 2.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of element of 'crops' should be " << CROPS_SHAPE_1
|
||||
<< ", but got the size of crops[0]: " << crops_[0].size()
|
||||
<< ", the size of crops[1]: " << crops_[1].size();
|
||||
} else {
|
||||
for (size_t idx_i = 0; idx_i < CROPS_SHAPE_0; ++idx_i) {
|
||||
for (size_t idx_j = 0; idx_j < CROPS_SHAPE_1; ++idx_j) {
|
||||
if (crops_[idx_i][idx_j] < 0) {
|
||||
MS_LOG(ERROR) << "the number in crops can not be less than 0.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', the element of 'crops' should be greater than or equal to 0, but got crops["
|
||||
<< idx_i << "][" << idx_j << "]: " << crops_[idx_i][idx_j];
|
||||
}
|
||||
}
|
||||
auto tmp_shape = input_shape[idx_i + CROPS_SHAPE_1] * block_size_ - crops_[idx_i][0] - crops_[idx_i][1];
|
||||
if (tmp_shape <= 0) {
|
||||
MS_LOG(ERROR) << "out_shape can not be less 1.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of output should be greater than 0, but got "
|
||||
<< tmp_shape;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -172,6 +173,7 @@ class BatchToSpaceGpuKernel : public GpuKernel {
|
|||
size_t oc_;
|
||||
size_t oh_;
|
||||
size_t ow_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_BROADCAST_TO_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh"
|
||||
|
@ -28,7 +29,7 @@ constexpr size_t SHAPE_SIZE = 4;
|
|||
template <typename T>
|
||||
class BroadcastToGpuKernel : public GpuKernel {
|
||||
public:
|
||||
BroadcastToGpuKernel() {}
|
||||
BroadcastToGpuKernel() : kernel_name_("BroadcastTo") {}
|
||||
~BroadcastToGpuKernel() = default;
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
|
@ -49,20 +50,24 @@ class BroadcastToGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto input_shapes = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto output_shapes = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(output_shapes);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(input_shapes, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shapes, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'BroadcastToGpuKernel', input or output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shapes.size() > SHAPE_SIZE || output_shapes.size() > SHAPE_SIZE) {
|
||||
MS_LOG(EXCEPTION) << "BroadcastTo operation not support dim greater than " << SHAPE_SIZE;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input and output cannot be greater than "
|
||||
<< SHAPE_SIZE << ", but got the dimension of input: " << input_shapes.size()
|
||||
<< ", the dimension of output: " << output_shapes.size();
|
||||
}
|
||||
|
||||
if (output_shapes.size() < input_shapes.size()) {
|
||||
MS_LOG(EXCEPTION) << "The rank of BroadcastTo's output cannot be smaller than the rank of the input.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of output cannot be less than "
|
||||
<< input_shapes.size() << ", but got " << output_shapes.size();
|
||||
}
|
||||
|
||||
size_t offset = output_shapes.size() - input_shapes.size();
|
||||
|
@ -92,6 +97,7 @@ class BroadcastToGpuKernel : public GpuKernel {
|
|||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CAST_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/cast_impl.cuh"
|
||||
|
@ -47,19 +48,20 @@ class CastGpuKernel : public GpuKernel {
|
|||
} else if (input_addr != nullptr && output_addr != nullptr) {
|
||||
Cast(input_size_, input_addr, output_addr, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
} else {
|
||||
MS_LOG(EXCEPTION)
|
||||
<< "The input and output device addresses for CastGpuKernel should be both null or both not null.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', the input and output device addresses should be both null or both not null";
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto input_shapes = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto output_shapes = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(output_shapes);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(input_shapes, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shapes, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'CastGpuKernel', input or output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -74,7 +76,9 @@ class CastGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
if (input_size_ != output_size_) {
|
||||
MS_LOG(EXCEPTION) << "Input size is not equal to output size.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', the size of input and output should be the same, but got the size of input: "
|
||||
<< input_size_ << ", the size of output: " << output_size_;
|
||||
}
|
||||
InitSizeLists();
|
||||
return true;
|
||||
|
@ -84,6 +88,7 @@ class CastGpuKernel : public GpuKernel {
|
|||
input_size_ = 1;
|
||||
output_size_ = 1;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "Cast";
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
|
@ -103,6 +108,7 @@ class CastGpuKernel : public GpuKernel {
|
|||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_CONCATV2_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
|
@ -34,6 +35,7 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
|
|||
output_size_(0),
|
||||
all_size_before_axis_(1),
|
||||
all_size_axis_(1),
|
||||
kernel_name_("ConcatV2"),
|
||||
inputs_host_(nullptr),
|
||||
len_axis_(nullptr) {}
|
||||
~ConcatV2GpuFwdKernel() override = default;
|
||||
|
@ -71,6 +73,7 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
|
@ -79,8 +82,8 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
|
|||
int dims = SizeToInt(input_shape.size());
|
||||
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
if (axis_ < -dims || axis_ >= dims) {
|
||||
MS_LOG(ERROR) << "axis must be in the range [-rank, rank)";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'axis' should be in the range [-" << dims << "," << dims
|
||||
<< "), but got " << axis_;
|
||||
}
|
||||
if (axis_ < 0) {
|
||||
axis_ += dims;
|
||||
|
@ -135,8 +138,7 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
|
|||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but ConcatV2GpuFwdKernel needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -145,6 +147,7 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
|
|||
size_t output_size_;
|
||||
int all_size_before_axis_;
|
||||
int all_size_axis_;
|
||||
std::string kernel_name_;
|
||||
std::unique_ptr<T *[]> inputs_host_;
|
||||
std::unique_ptr<int[]> len_axis_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
|
|
|
@ -64,15 +64,14 @@ class CropAndResizeGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 4) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but CropAndResize needs 4 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 4, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but CropAndResize has 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
// input image
|
||||
auto input_image_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
|
@ -80,19 +79,19 @@ class CropAndResizeGpuKernel : public GpuKernel {
|
|||
auto input_box_index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
auto input_crop_size_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_image_shape) || CHECK_NULL_INPUT(input_boxes_shape) ||
|
||||
CHECK_NULL_INPUT(input_box_index_shape) || CHECK_NULL_INPUT(input_crop_size_shape) ||
|
||||
CHECK_NULL_INPUT(output_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_image_shape, kernel_name, "x") ||
|
||||
CHECK_SHAPE_NULL(input_boxes_shape, kernel_name, "boxes") ||
|
||||
CHECK_SHAPE_NULL(input_box_index_shape, kernel_name, "boxes_index") ||
|
||||
CHECK_SHAPE_NULL(input_crop_size_shape, kernel_name, "crop_size") ||
|
||||
CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'CropAndResizeGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
size_t input_image_shape_len = input_image_shape.size();
|
||||
if (input_image_shape_len != 4) {
|
||||
MS_LOG(ERROR) << " image tensor is " << input_image_shape_len << "-D, but CropAndResize supports only " << 4
|
||||
<< "-D image tensors.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of x should be 4, but got "
|
||||
<< input_image_shape_len;
|
||||
}
|
||||
input_image_size_ = 1;
|
||||
for (size_t i = 0; i < input_image_shape_len; i++) {
|
||||
|
@ -104,9 +103,8 @@ class CropAndResizeGpuKernel : public GpuKernel {
|
|||
// input boxes
|
||||
size_t input_boxes_shape_len = input_boxes_shape.size();
|
||||
if (input_boxes_shape_len != 2) {
|
||||
MS_LOG(ERROR) << "Boxes is rank" << input_boxes_shape_len << " but CropAndResize supports only rank " << 2
|
||||
<< " for boxes.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of boxes should be 2, but got "
|
||||
<< input_boxes_shape_len;
|
||||
}
|
||||
input_boxes_size_ = 1;
|
||||
for (size_t i = 0; i < input_boxes_shape_len; i++) {
|
||||
|
@ -116,9 +114,8 @@ class CropAndResizeGpuKernel : public GpuKernel {
|
|||
// input box_index
|
||||
size_t input_box_index_shape_len = input_box_index_shape.size();
|
||||
if (input_box_index_shape_len != 1) {
|
||||
MS_LOG(ERROR) << "Box_index is rank " << input_box_index_shape_len << " but CropAndResize supports only rank "
|
||||
<< 1 << " for box_index.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of box_index should be 1, but got "
|
||||
<< input_box_index_shape_len;
|
||||
}
|
||||
input_box_ind_size_ = 1;
|
||||
input_box_ind_size_ *= input_box_index_shape[0]; // single dim required
|
||||
|
@ -126,14 +123,12 @@ class CropAndResizeGpuKernel : public GpuKernel {
|
|||
// input crop_size
|
||||
size_t input_crop_size_shape_len = input_crop_size_shape.size();
|
||||
if (input_crop_size_shape_len != 1) {
|
||||
MS_LOG(ERROR) << "Crop_size is rank " << input_crop_size_shape_len << "-D, but CropAndResize supports only rank "
|
||||
<< 1 << " for Crop_size.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of crop_size should be 1, but got "
|
||||
<< input_crop_size_shape_len;
|
||||
}
|
||||
if (input_crop_size_shape[0] != 2) {
|
||||
MS_LOG(ERROR) << "Crop_size is size " << input_crop_size_shape[0] << "-D, but CropAndResize supports only size "
|
||||
<< 2 << " for Crop_size.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the first element in crop_size should be 2, but got "
|
||||
<< input_crop_size_shape[0];
|
||||
}
|
||||
input_crop_size_ = 1;
|
||||
input_crop_size_ *= input_crop_size_shape[0];
|
||||
|
@ -141,8 +136,8 @@ class CropAndResizeGpuKernel : public GpuKernel {
|
|||
// output
|
||||
auto output_shape_len = output_shape.size();
|
||||
if (output_shape_len != 4) {
|
||||
MS_LOG(ERROR) << "For 'CropAndResize', the rank of output should be 4, but got " << output_shape_len;
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output should be 4, but got "
|
||||
<< output_shape_len;
|
||||
}
|
||||
output_size_ = 1;
|
||||
for (size_t i = 0; i < output_shape_len; i++) {
|
||||
|
|
|
@ -53,35 +53,34 @@ class DepthToSpaceFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
block_size_ = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "block_size"));
|
||||
if (block_size_ == 0) {
|
||||
MS_LOG(ERROR) << "block_size_ can not be 0.";
|
||||
return false;
|
||||
if (block_size_ < 2) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the 'block_size' cannot be less than 2, but got "
|
||||
<< block_size_;
|
||||
}
|
||||
// check input num and output num
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but DepthToSpace needs 1 input.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
|
||||
}
|
||||
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", DepthToSpace needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
// check input_shape
|
||||
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'DepthToSpaceGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
shape_size_ = input_shape.size();
|
||||
if (shape_size_ != DEPTHTOSPACE_BUFFER_DIMENSION) {
|
||||
MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but DepthToSpace supports 4-D tensor.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be "
|
||||
<< DEPTHTOSPACE_BUFFER_DIMENSION << ", but got " << shape_size_;
|
||||
}
|
||||
// get input and out put information
|
||||
input_size_ = 1;
|
||||
|
|
|
@ -116,10 +116,10 @@ class DynamicRangeGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_count = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_count != 3) {
|
||||
MS_LOG(ERROR) << input_count << " inputs were provided, but DynamicRangeGpuKernel expects 3.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_count;
|
||||
}
|
||||
|
||||
max_output_length_ = GetAttr<int64_t>(kernel_node, "maxlen");
|
||||
|
|
|
@ -53,16 +53,16 @@ class DynamicShapeGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
size_t input_count = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_count != 1) {
|
||||
MS_LOG(EXCEPTION) << input_count << " arguments were provided, but DynamicShapeGpuKernel expects 1.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_count;
|
||||
}
|
||||
|
||||
std::vector<size_t> prev_node_output_shape_tmp = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(prev_node_output_shape_tmp);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(prev_node_output_shape_tmp, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'DynamicShapeGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -59,6 +59,7 @@ class EmbeddingLookupKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num == 3) {
|
||||
|
@ -67,20 +68,21 @@ class EmbeddingLookupKernel : public GpuKernel {
|
|||
} else if (input_num == 2) {
|
||||
MS_LOG(INFO) << " EmbeddingLookup running in Normal Mode.";
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but EmbeddingLookup needs 2 or 3.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2 or 3, but got " << input_num;
|
||||
}
|
||||
input_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
indices_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
|
||||
output_shapes_ = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
is_null_input_ =
|
||||
CHECK_NULL_INPUT(input_shapes_) || CHECK_NULL_INPUT(indices_shapes_) || CHECK_NULL_INPUT(output_shapes_);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shapes_, kernel_name, "input") ||
|
||||
CHECK_SHAPE_NULL(indices_shapes_, kernel_name, "input_indices") ||
|
||||
CHECK_SHAPE_NULL(output_shapes_, kernel_name, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'EmbeddingLookupGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shapes_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'EmbeddingLookupGpuKernel', the rank of input cannot be less than 1.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 1, but got "
|
||||
<< input_shapes_.size();
|
||||
}
|
||||
if (!is_dynamic_shape_) {
|
||||
offset_ = GetAttr<int64_t>(kernel_node, "offset");
|
||||
|
|
|
@ -85,20 +85,21 @@ class ExtractImagePatchesKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ExtractImagePatches needs 1 inputs.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ExtractImagePatches has 1 output.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(input_shape, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ExtractImagePatchesGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -113,9 +114,9 @@ class ExtractImagePatchesKernel : public GpuKernel {
|
|||
output_size_ *= output_shape[i];
|
||||
}
|
||||
if (input_shape.size() != 4 || output_shape.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "For 'ExtractImagePatchesGpuKernel', the rank of input and output should be 4, "
|
||||
<< "but got the rank of input: " << input_shape.size()
|
||||
<< ", the rank of output: " << output_shape.size();
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the dimension of input and output should be 4, but got the dimension of input: "
|
||||
<< input_shape.size() << ", the dimension of output: " << output_shape.size();
|
||||
}
|
||||
// transposed NHWC shape
|
||||
t_output_shape_ = {output_shape[0], output_shape[2], output_shape[3], output_shape[1]};
|
||||
|
@ -125,9 +126,10 @@ class ExtractImagePatchesKernel : public GpuKernel {
|
|||
auto strides = GetAttr<std::vector<int64_t>>(kernel_node, "strides");
|
||||
auto rates = GetAttr<std::vector<int64_t>>(kernel_node, "rates");
|
||||
if (ksizes.size() != 4 || strides.size() != 4 || rates.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "For 'ExtractImagePatchesGpuKernel', the rank of ksizes, strides and rates should be 4, "
|
||||
<< "but got the rank of ksizes: " << ksizes.size()
|
||||
<< ", the rank of strides: " << strides.size() << ", the rank of rates: " << rates.size();
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the size of 'ksizes', 'strides' and 'rates' should be 4, but got the size of 'ksizes': "
|
||||
<< ksizes.size() << ", the size of 'strides': " << strides.size()
|
||||
<< ", the size of 'rates': " << rates.size();
|
||||
}
|
||||
|
||||
ksize_row_ = ksizes[2];
|
||||
|
@ -161,7 +163,8 @@ class ExtractImagePatchesKernel : public GpuKernel {
|
|||
row_padding_top_ = ((output_rows_ - 1) * stride_row_ + patch_rows_eff - input_row_size_) / 2;
|
||||
col_padding_left_ = ((output_cols_ - 1) * stride_col_ + patch_cols_eff - input_col_size_) / 2;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Invalid padding value: " << padding << ".";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the 'padding' should be 'VALID' or 'SAME', but got "
|
||||
<< padding;
|
||||
}
|
||||
|
||||
row_stride_ = ksize_col_;
|
||||
|
|
|
@ -49,30 +49,32 @@ class GatherGpuFwdKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
InitResource();
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but GatherGpuFwdKernel needs 2.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
|
||||
}
|
||||
input_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
index_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ =
|
||||
CHECK_NULL_INPUT(input_shapes_) || CHECK_NULL_INPUT(index_shapes_) || CHECK_NULL_INPUT(output_shapes_);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shapes_, kernel_name, "input") ||
|
||||
CHECK_SHAPE_NULL(index_shapes_, kernel_name, "input_indices") ||
|
||||
CHECK_SHAPE_NULL(output_shapes_, kernel_name, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'GatherGpuKernel', input or output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shapes_.size() != index_shapes_.size() || input_shapes_.size() != output_shapes_.size()) {
|
||||
MS_LOG(ERROR) << "The shape of input, index and output should be same.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input and output should be the same "
|
||||
<< index_shapes_.size() << ", but got the dimension of input: " << input_shapes_.size()
|
||||
<< ", the dimension of output: " << output_shapes_.size();
|
||||
}
|
||||
int dims = SizeToInt(input_shapes_.size());
|
||||
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "dim"));
|
||||
if (axis_ < -dims || axis_ >= dims) {
|
||||
MS_LOG(ERROR) << "axis must be in the range [-rank, rank)";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'axis' should be in the range [-" << dims << "," << dims
|
||||
<< "), but got " << axis_;
|
||||
}
|
||||
if (axis_ < 0) {
|
||||
axis_ += dims;
|
||||
|
|
|
@ -49,30 +49,33 @@ class GatherGradGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
InitResource();
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but GatherGradGpuKernel needs 2.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
|
||||
}
|
||||
index_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
grad_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ =
|
||||
CHECK_NULL_INPUT(index_shapes_) || CHECK_NULL_INPUT(grad_shapes_) || CHECK_NULL_INPUT(output_shapes_);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(index_shapes_, kernel_name, "index") ||
|
||||
CHECK_SHAPE_NULL(grad_shapes_, kernel_name, "grad") ||
|
||||
CHECK_SHAPE_NULL(output_shapes_, kernel_name, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'GatherGradGpuKernel', input or output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (grad_shapes_.size() != index_shapes_.size() || grad_shapes_.size() != output_shapes_.size()) {
|
||||
MS_LOG(ERROR) << "The shape of grad, index and output should be same.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the dimension of grad, index and output should be the same, but got the dimension of "
|
||||
<< "grad: " << grad_shapes_.size() << ", the dimension of index: " << index_shapes_.size()
|
||||
<< ", the dimension of output: " << output_shapes_.size();
|
||||
}
|
||||
int dims = SizeToInt(grad_shapes_.size());
|
||||
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "dim"));
|
||||
if (axis_ < -dims || axis_ >= dims) {
|
||||
MS_LOG(ERROR) << "axis must be in the range [-rank, rank)";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'axis' should be in the range [-" << dims << "," << dims
|
||||
<< "), but got " << axis_;
|
||||
}
|
||||
if (axis_ < 0) {
|
||||
axis_ += dims;
|
||||
|
|
|
@ -71,20 +71,21 @@ class GatherNdGpuFwdKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
memcpy_flag_ = false;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but GatherNdGpuFwdKernel needs 2.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
|
||||
}
|
||||
input_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
indices_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ =
|
||||
CHECK_NULL_INPUT(input_shapes_) || CHECK_NULL_INPUT(indices_shapes_) || CHECK_NULL_INPUT(output_shapes_);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shapes_, kernel_name, "input_x") ||
|
||||
CHECK_SHAPE_NULL(indices_shapes_, kernel_name, "indices") ||
|
||||
CHECK_SHAPE_NULL(output_shapes_, kernel_name, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'GatherndGpuKernel', input or output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -106,14 +107,18 @@ class GatherNdGpuFwdKernel : public GpuKernel {
|
|||
const size_t strides_len = sizeof(S) * batch_strides_.size();
|
||||
void *dev_batch_strides_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(strides_len);
|
||||
if (dev_batch_strides_work == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failed to alloc dev_batch_strides_work, size: " << strides_len;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the memory alloc of dev_batch_strides_work should be successful, but failed, got size: "
|
||||
<< strides_len;
|
||||
}
|
||||
dev_batch_strides_ = static_cast<S *>(dev_batch_strides_work);
|
||||
|
||||
const size_t indices_len = sizeof(S) * batch_indices_.size();
|
||||
void *dev_batch_indices_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(indices_len);
|
||||
if (dev_batch_indices_work == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failed to alloc dev_batch_indices_work, size: " << indices_len;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the memory alloc of dev_batch_indices_work should be successful, but failed, got size: "
|
||||
<< indices_len;
|
||||
}
|
||||
dev_batch_indices_ = static_cast<S *>(dev_batch_indices_work);
|
||||
|
||||
|
|
|
@ -64,6 +64,7 @@ class GatherV2GpuFwdKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
|
@ -73,15 +74,15 @@ class GatherV2GpuFwdKernel : public GpuKernel {
|
|||
} else if (input_num == 2) {
|
||||
MS_LOG(INFO) << " GatherGpuV2FwdKernel running in Normal Mode.";
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but GatherGpuV2FwdKernel needs 2 or 3.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2 or 3, but got " << input_num;
|
||||
}
|
||||
input_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
indices_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
|
||||
output_shapes_ = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
is_null_input_ =
|
||||
CHECK_NULL_INPUT(input_shapes_) || CHECK_NULL_INPUT(indices_shapes_) || CHECK_NULL_INPUT(output_shapes_);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shapes_, kernel_name, "input") ||
|
||||
CHECK_SHAPE_NULL(indices_shapes_, kernel_name, "indices") ||
|
||||
CHECK_SHAPE_NULL(output_shapes_, kernel_name, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'GatherV2GpuKernel', input or output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -89,8 +90,8 @@ class GatherV2GpuFwdKernel : public GpuKernel {
|
|||
int dims = SizeToInt(input_shapes_.size());
|
||||
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
if (axis_ < -dims || axis_ >= dims) {
|
||||
MS_LOG(ERROR) << "axis must be in the range [-rank, rank)";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'axis' should be in the range [-" << dims << "," << dims
|
||||
<< "), but got " << axis_;
|
||||
}
|
||||
Reshape();
|
||||
}
|
||||
|
|
|
@ -95,27 +95,25 @@ class InTopKGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
size_t input_count = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_count != 2) {
|
||||
MS_LOG(ERROR) << input_count << " inputs were provided, but InTopKGpuKernel expects 2.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_count;
|
||||
}
|
||||
|
||||
size_t output_count = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_count != 1) {
|
||||
MS_LOG(ERROR) << "Number of outputs is " << output_count << ", but should be 1 for InTopKGpuKernel.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of output should be 1, but got " << output_count;
|
||||
}
|
||||
|
||||
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
if (input_shape_.size() < 2) {
|
||||
MS_LOG(EXCEPTION) << "For 'InTopKGpuKernel', the rank of input cannot be less than 2, but got "
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 2, but got "
|
||||
<< input_shape_.size();
|
||||
}
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape_);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'InTopKGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -69,14 +69,15 @@ class MeshgridGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
std::string indexing = GetAttr<std::string>(kernel_node, "indexing");
|
||||
if (indexing == "xy") {
|
||||
swap_indexing_ = true;
|
||||
} else if (indexing == "ij") {
|
||||
swap_indexing_ = false;
|
||||
} else {
|
||||
MS_LOG(ERROR) << "invalid string for argument \"indexing\", must be \"xy\" or \"ij\" but got " << indexing;
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the value of 'indexing' should be \"xy\" or \"ij\", but got "
|
||||
<< indexing;
|
||||
}
|
||||
|
||||
input_size_ = 1;
|
||||
|
@ -84,8 +85,8 @@ class MeshgridGpuKernel : public GpuKernel {
|
|||
for (size_t i = 0; i < input_count_; i++) {
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i);
|
||||
if (input_shape.size() < 1) {
|
||||
MS_LOG(ERROR) << "For 'MeshGridGpuKernel', the rank of input" << i << " cannot be less than 1.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input[" << i << "] cannot be less than 1, "
|
||||
<< "but got " << input_shape.size();
|
||||
}
|
||||
size_t input_size = input_shape[0];
|
||||
input_shapes_.push_back(input_size);
|
||||
|
@ -97,17 +98,16 @@ class MeshgridGpuKernel : public GpuKernel {
|
|||
|
||||
// inferred shape swaps output shape for us if needed
|
||||
output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(output_shape_);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(output_shape_, kernel_name, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'MeshGridGpuKernel', output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (output_count_ != input_count_) {
|
||||
MS_LOG(ERROR) << "output count is " << output_count_ << ", but MeshgridGpuKernel needs " << input_count_
|
||||
<< " output(s).";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the number of inputs and outputs should be the same, but got the number of inputs: "
|
||||
<< input_count_ << ", the number of outputs: " << output_count_;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < output_shape_.size(); i++) {
|
||||
|
|
|
@ -49,21 +49,23 @@ class OneHotGpuFwdKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
int64_t axis = GetAttr<int64_t>(kernel_node, "axis");
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(input_shape, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'OneHotGpuKernel', input or output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
int64_t input_dims = static_cast<int64_t>(input_shape.size());
|
||||
int64_t output_dims = static_cast<int64_t>(output_shape.size());
|
||||
if (axis >= input_dims || axis >= output_dims) {
|
||||
MS_LOG(ERROR) << "invalid one hot axis value: " << axis << " for input dims size: " << input_shape.size()
|
||||
<< " or output dims size: " << output_dims;
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', the 'axis' should be less than the dimension of input and output"
|
||||
<< ", but got 'axis': " << axis << ", the dimension of input: " << input_dims
|
||||
<< ", the dimension of output: " << output_dims;
|
||||
}
|
||||
const int64_t default_axis = -1;
|
||||
|
||||
|
|
|
@ -46,20 +46,18 @@ class OnesLikeGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but oneslike needs 1 input.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but oneslike needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'OneslikeGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_PACK_GPU_KERNEL_H
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
|
@ -29,7 +30,13 @@ template <typename T>
|
|||
class PackGpuFwdKernel : public GpuKernel {
|
||||
public:
|
||||
PackGpuFwdKernel()
|
||||
: axis_(0), is_null_input_(false), input_num_(1), output_size_(0), dims_behind_axis_(1), inputs_host_(nullptr) {}
|
||||
: axis_(0),
|
||||
is_null_input_(false),
|
||||
input_num_(1),
|
||||
output_size_(0),
|
||||
dims_behind_axis_(1),
|
||||
inputs_host_(nullptr),
|
||||
kernel_name_("Pack") {}
|
||||
~PackGpuFwdKernel() override = default;
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
|
@ -55,10 +62,9 @@ class PackGpuFwdKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
(void)CheckParam(kernel_node);
|
||||
axis_ = static_cast<int32_t>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
if (axis_ < 0) {
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
|
@ -73,9 +79,8 @@ class PackGpuFwdKernel : public GpuKernel {
|
|||
for (size_t i = 0; i < input_num_; i++) {
|
||||
size_t input_size = 1;
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'PackGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -90,9 +95,8 @@ class PackGpuFwdKernel : public GpuKernel {
|
|||
workspace_size_list_.push_back(sizeof(T *) * input_num_);
|
||||
|
||||
auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(output_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'PackGpuKernel', output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -109,13 +113,11 @@ class PackGpuFwdKernel : public GpuKernel {
|
|||
void InitSizeLists() override {}
|
||||
|
||||
private:
|
||||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but PackGpuFwdKernel needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
int axis_;
|
||||
bool is_null_input_;
|
||||
|
@ -126,6 +128,7 @@ class PackGpuFwdKernel : public GpuKernel {
|
|||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -45,20 +45,18 @@ class RangeGPUKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but Range needs 1 input.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but Range needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'RangeGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -57,33 +57,32 @@ class ResizeNearestNeighborGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but ResizeNearestNeighbor needs 1 input.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but ResizeNearestNeighbor has 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
shape_size_ = input_shape.size();
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(input_shape, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ResizeNearestNeighborGpuKernel', input or output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (shape_size_ != RESIZENEARESTNEIGHBOR_DIMENSION) {
|
||||
MS_LOG(ERROR) << "Input is " << shape_size_ << "-D, but ResizeNearestNeighbor supports only "
|
||||
<< RESIZENEARESTNEIGHBOR_DIMENSION << "-D inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be "
|
||||
<< RESIZENEARESTNEIGHBOR_DIMENSION << ", but got " << shape_size_;
|
||||
}
|
||||
if (shape_size_ != output_shape.size()) {
|
||||
MS_LOG(ERROR) << "The dim of input and output must be same.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the dimension of input and output should be the same, but got the dimension of input: "
|
||||
<< shape_size_ << ", the dimension of output: " << output_shape.size();
|
||||
}
|
||||
input_size_ = 1;
|
||||
for (size_t i = 0; i < shape_size_; i++) {
|
||||
|
|
|
@ -57,40 +57,39 @@ class ResizeNearestNeighborGradGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but ResizeNearestNeighbor needs 1 input.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but ResizeNearestNeighbor has 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
shape_size_ = input_shape.size();
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(input_shape, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ResizeNearestNeighborGradGpuKernel', input or output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (shape_size_ != RESIZENEARESTNEIGHBORGRAD_DIMENSION) {
|
||||
MS_LOG(ERROR) << "Input is " << shape_size_ << "-D, but ResizeNearestNeighbor supports only "
|
||||
<< RESIZENEARESTNEIGHBORGRAD_DIMENSION << "-D inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be "
|
||||
<< RESIZENEARESTNEIGHBORGRAD_DIMENSION << ", but got " << shape_size_;
|
||||
}
|
||||
if (shape_size_ != output_shape.size()) {
|
||||
MS_LOG(ERROR) << "The dim of input and output must be same.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the dimension of input and output should be the same, but got the dimension of input: "
|
||||
<< shape_size_ << ", the dimension of output: " << output_shape.size();
|
||||
}
|
||||
input_size_ = 1;
|
||||
for (size_t i = 0; i < shape_size_; i++) {
|
||||
input_size_ *= input_shape[i];
|
||||
if (input_shape[i] == 0) {
|
||||
MS_LOG(ERROR) << "The shape of input has 0.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the shape of input at " << i << " index cannot be 0, "
|
||||
<< "but got " << input_shape[i];
|
||||
}
|
||||
input_shape_.push_back(input_shape[i]);
|
||||
}
|
||||
|
@ -99,8 +98,8 @@ class ResizeNearestNeighborGradGpuKernel : public GpuKernel {
|
|||
for (size_t i = 0; i < shape_size_; i++) {
|
||||
output_size_ *= output_shape[i];
|
||||
if (input_shape[i] == 0) {
|
||||
MS_LOG(ERROR) << "The shape of output has 0.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the shape of output at " << i << " index cannot be 0, "
|
||||
<< "but got " << input_shape[i];
|
||||
}
|
||||
output_shape_.push_back(output_shape[i]);
|
||||
}
|
||||
|
|
|
@ -66,28 +66,27 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
batch_dim_ = GetAttr<int64_t>(kernel_node, "batch_dim");
|
||||
seq_dim_ = GetAttr<int64_t>(kernel_node, "seq_dim");
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but ReverseSequence needs 2 input.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but ReverseSequence needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto seq_len_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(seq_len_shape);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(input_shape_, kernel_name, "x") || CHECK_SHAPE_NULL(seq_len_shape, kernel_name, "seq_lengths");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ReverseSequenceGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shape_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'ReverseSequenceGpuKernel', the rank of input cannot be less than 1, but got "
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 1, but got "
|
||||
<< input_shape_.size();
|
||||
}
|
||||
input_size_ = 1;
|
||||
|
|
|
@ -69,28 +69,27 @@ class ReverseV2GpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_count = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_count != 1) {
|
||||
MS_LOG(ERROR) << input_count << " inputs were provided, but ReverseV2GpuKernel expects 1.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_count;
|
||||
}
|
||||
|
||||
size_t output_count = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_count != 1) {
|
||||
MS_LOG(ERROR) << "Number of outputs is " << output_count << ", but should be 1 for ReverseV2GpuKernel.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 2, but got " << output_count;
|
||||
}
|
||||
|
||||
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape_);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ReverseV2GpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
input_rank_ = input_shape_.size();
|
||||
if (input_rank_ < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'ReverseV2GpuKernel', the rank of input cannot be less than 1, bot got " << input_rank_;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 1, but got "
|
||||
<< input_rank_;
|
||||
}
|
||||
input_size_ = 1;
|
||||
for (size_t i = 0; i < input_rank_; i++) {
|
||||
|
@ -105,7 +104,8 @@ class ReverseV2GpuKernel : public GpuKernel {
|
|||
|
||||
axis_ = GetAttr<std::vector<int64_t>>(kernel_node, "axis");
|
||||
if (axis_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'ReverseV2GpuKernel', the rank of axis cannot be less than 1, bot got " << axis_.size();
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the size of 'axis' cannot be less than 1, but got "
|
||||
<< axis_.size();
|
||||
}
|
||||
for (int64_t &dimension : axis_) {
|
||||
if (dimension < 0) {
|
||||
|
|
|
@ -63,20 +63,21 @@ class ScatterFunctorKernel : public GpuKernel {
|
|||
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto iter = kScatterFunctorTypeMap.find(kernel_name);
|
||||
if (iter == kScatterFunctorTypeMap.end()) {
|
||||
MS_LOG(EXCEPTION) << "Scatter functor " << kernel_name << " is not supported.";
|
||||
MS_LOG(EXCEPTION)
|
||||
<< "For '" << kernel_name
|
||||
<< "Only support these scatter functors: ScatterUpdate, ScatterAdd or ScatterSub currently, but got "
|
||||
<< kernel_name;
|
||||
} else {
|
||||
scatter_functor_type_ = iter->second;
|
||||
}
|
||||
kernel_node_ = kernel_node;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but " << kernel_name << " needs 3 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but " << kernel_name << " has 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
input_size_ = 1;
|
||||
|
|
|
@ -69,20 +69,20 @@ class ScatterNdFunctorKernel : public GpuKernel {
|
|||
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto iter = kScatterNdFunctorTypeMap.find(kernel_name);
|
||||
if (iter == kScatterNdFunctorTypeMap.end()) {
|
||||
MS_LOG(EXCEPTION) << "ScatterNd functor " << kernel_name << " is not supported.";
|
||||
MS_LOG(EXCEPTION)
|
||||
<< "Only support these scatter functors: ScatterNdUpdate, ScatterNdAdd or ScatterNdSub currently, but got "
|
||||
<< kernel_name;
|
||||
} else {
|
||||
scatter_nd_functor_type_ = iter->second;
|
||||
}
|
||||
kernel_node_ = kernel_node;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but " << kernel_name << " needs 3 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but " << kernel_name << " has 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
|
@ -90,17 +90,21 @@ class ScatterNdFunctorKernel : public GpuKernel {
|
|||
auto updates_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
auto index_depth = indices_shape.back();
|
||||
if (index_depth > input_shape.size()) {
|
||||
MS_LOG(EXCEPTION) << "Value of last dimension of indices is greater than shape rank";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the last dimension value of indices should be greater than "
|
||||
<< input_shape.size() << ", but got " << index_depth;
|
||||
}
|
||||
if (indices_shape.size() < 2) {
|
||||
MS_LOG(EXCEPTION) << "Indices dimension less than 2";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of indices cannot be greater than 2, but got "
|
||||
<< indices_shape.size();
|
||||
}
|
||||
if (updates_shape.size() != indices_shape.size() - 1 + input_shape.size() - index_depth) {
|
||||
MS_LOG(EXCEPTION) << "Update, shape rank and indices rank inconsistent";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the dimension of updates, indices, shape should be consistent.";
|
||||
}
|
||||
for (size_t i = 0; i < indices_shape.size() - 1; ++i) {
|
||||
if (updates_shape[i] != indices_shape[i]) {
|
||||
MS_LOG(EXCEPTION) << "Value of " << i << "th dimension of indices is not equal to that update";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << ", value of " << i
|
||||
<< "th dimension of indices is not equal to that update";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -90,26 +90,27 @@ class ScatterNdGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
memcpy_flag_ = false;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but transpose needs 2 input.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
|
||||
return false;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but transpose needs 1 output.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
|
||||
return false;
|
||||
}
|
||||
|
||||
input_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
indices_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ =
|
||||
CHECK_NULL_INPUT(input_shapes_) || CHECK_NULL_INPUT(indices_shapes_) || CHECK_NULL_INPUT(output_shapes_);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shapes_, kernel_name, "input") ||
|
||||
CHECK_SHAPE_NULL(indices_shapes_, kernel_name, "indices") ||
|
||||
CHECK_SHAPE_NULL(output_shapes_, kernel_name, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'ScatterNdGpuKernel', input or output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -122,14 +123,18 @@ class ScatterNdGpuFwdKernel : public GpuKernel {
|
|||
const size_t indices_len = sizeof(S) * vec_indices_stride_.size();
|
||||
void *indices_stride_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(indices_len);
|
||||
if (indices_stride_work == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failed to alloc indices_stride_work, size: " << indices_len;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the memory alloc of indices_stride_work should be successful, but failed, got size: "
|
||||
<< indices_len;
|
||||
}
|
||||
indices_stride_ = static_cast<S *>(indices_stride_work);
|
||||
|
||||
const size_t vec_work_len = sizeof(S) * vec_work_shape_.size();
|
||||
void *work_shape_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(vec_work_len);
|
||||
if (work_shape_work == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failed to alloc work_shape_work, size: " << vec_work_len;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the memory alloc of indices_stride_work should be successful, but failed, got size: "
|
||||
<< vec_work_len;
|
||||
}
|
||||
work_shape_ = static_cast<S *>(work_shape_work);
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SELECT_GPU_KERNEL_H
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/select_impl.cuh"
|
||||
|
@ -27,7 +28,7 @@ namespace kernel {
|
|||
template <typename T>
|
||||
class SelectGpuKernel : public GpuKernel {
|
||||
public:
|
||||
SelectGpuKernel() : input_size_(0), output_size_(0), is_null_input_(false) {}
|
||||
SelectGpuKernel() : input_size_(0), output_size_(0), is_null_input_(false), kernel_name_("Select") {}
|
||||
~SelectGpuKernel() override = default;
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
|
@ -48,13 +49,11 @@ class SelectGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
(void)CheckParam(kernel_node);
|
||||
auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'SelectGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -77,18 +76,15 @@ class SelectGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but SelectGpuKernel needs 3 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 3, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but SelectGpuKernel needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
|
@ -98,6 +94,7 @@ class SelectGpuKernel : public GpuKernel {
|
|||
size_t input_size_;
|
||||
size_t output_size_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SLICE_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
|
@ -29,7 +30,8 @@ namespace kernel {
|
|||
template <typename T>
|
||||
class SliceGpuFwdKernel : public GpuKernel {
|
||||
public:
|
||||
SliceGpuFwdKernel() : is_null_input_(false), input_size_(0), output_size_(0), workspace_size_(0) {}
|
||||
SliceGpuFwdKernel()
|
||||
: is_null_input_(false), input_size_(0), output_size_(0), workspace_size_(0), kernel_name_("Slice") {}
|
||||
~SliceGpuFwdKernel() override = default;
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
|
@ -86,15 +88,14 @@ class SliceGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
(void)CheckParam(kernel_node);
|
||||
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
auto out_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(out_shape);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(out_shape, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'SliceGpuKernel', input or output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -139,34 +140,32 @@ class SliceGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but SliceGpuFwdKernel needs 1 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but SliceGpuFwdKernel needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
if (input_shape.size() > 7) {
|
||||
MS_LOG(ERROR) << "Input dims is " << input_shape.size() << ", but SliceGpuFwdKernel olny support 7d or lower.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than 7, but got "
|
||||
<< input_shape.size();
|
||||
}
|
||||
if (input_shape.size() == 0) {
|
||||
MS_LOG(ERROR) << "Input dims is " << input_shape.size() << ", scalar is not supported.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be equal to 0, but got "
|
||||
<< input_shape.size();
|
||||
}
|
||||
auto size = GetAttr<std::vector<int64_t>>(kernel_node, "size");
|
||||
auto begin = GetAttr<std::vector<int64_t>>(kernel_node, "begin");
|
||||
|
||||
if (size.size() != input_shape.size() || begin.size() != input_shape.size()) {
|
||||
MS_LOG(ERROR) << "For 'SliceGpuFwdKernel', the dims of size and begin should be equal to the dims of input, "
|
||||
<< "but got dims of input: " << input_shape.size() << ", dims of size: " << size.size()
|
||||
<< ", dims of begin: " << begin.size();
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', the dimension of size, begin and input_x should be the same, but got the dimension "
|
||||
<< "of size: " << size.size() << ", the dimension of begin: " << begin.size()
|
||||
<< ", the dimension of input_x: " << input_shape.size();
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < input_shape.size(); i++) {
|
||||
|
@ -174,7 +173,9 @@ class SliceGpuFwdKernel : public GpuKernel {
|
|||
size[i] = input_shape[i] - begin[i];
|
||||
}
|
||||
if (input_shape[i] <= 0 || size[i] <= 0) {
|
||||
MS_LOG(WARNING) << "Slice output is null.";
|
||||
MS_LOG(WARNING) << "For '" << kernel_name_
|
||||
<< "', the element of 'size' and the shape of input_x should be greater than 0, but got "
|
||||
<< "size[" << i << "]: " << size[i] << ", input_x.shape[" << i << "] " << input_shape[i];
|
||||
is_null_input_ = true;
|
||||
}
|
||||
}
|
||||
|
@ -183,8 +184,6 @@ class SliceGpuFwdKernel : public GpuKernel {
|
|||
[](const int64_t &e) { return static_cast<int32_t>(e); });
|
||||
(void)std::transform(begin.begin(), begin.end(), std::back_inserter(begin_),
|
||||
[](const int64_t &e) { return static_cast<int32_t>(e); });
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// use int32_t, a smaller type than the typical size_t, so that we can add higher
|
||||
|
@ -202,6 +201,7 @@ class SliceGpuFwdKernel : public GpuKernel {
|
|||
size_t input_size_;
|
||||
size_t output_size_;
|
||||
size_t workspace_size_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -31,7 +31,12 @@ template <typename T>
|
|||
class SliceGradGpuKernel : public GpuKernel {
|
||||
public:
|
||||
SliceGradGpuKernel()
|
||||
: is_strided_slice_(false), is_null_input_(false), input_size_(0), output_size_(0), workspace_size_(0) {}
|
||||
: is_strided_slice_(false),
|
||||
is_null_input_(false),
|
||||
input_size_(0),
|
||||
output_size_(0),
|
||||
workspace_size_(0),
|
||||
kernel_name_("SliceGrad") {}
|
||||
~SliceGradGpuKernel() override = default;
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
|
@ -52,9 +57,8 @@ class SliceGradGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
(void)CheckParam(kernel_node);
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto data_format = AnfAlgo::GetInputFormat(kernel_node, 0);
|
||||
if (kernel_name == "StridedSliceGrad") {
|
||||
|
@ -73,9 +77,8 @@ class SliceGradGpuKernel : public GpuKernel {
|
|||
size_ = GetAttr<std::vector<int64_t>>(kernel_node, "end");
|
||||
} else {
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'SliceGradGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -83,9 +86,8 @@ class SliceGradGpuKernel : public GpuKernel {
|
|||
size_ = GetAttr<std::vector<int64_t>>(kernel_node, "size");
|
||||
}
|
||||
auto dy_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(dy_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(dy_shape, kernel_name_, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'SliceGradGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -133,18 +135,16 @@ class SliceGradGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but SliceGradGpuKernel needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
if (input_shape.size() > 4) {
|
||||
MS_LOG(ERROR) << "Input dims is " << input_shape.size() << ", but SliceGradGpuKernel only support 4d or lower.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than 4, but got "
|
||||
<< input_shape.size();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<int64_t> begin_;
|
||||
|
@ -161,6 +161,7 @@ class SliceGradGpuKernel : public GpuKernel {
|
|||
size_t input_size_;
|
||||
size_t output_size_;
|
||||
size_t workspace_size_;
|
||||
std::string kernel_name_;
|
||||
}; // namespace kernel
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -117,31 +117,29 @@ class SortGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_count = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_count != 1) {
|
||||
MS_LOG(ERROR) << input_count << " inputs were provided, but SortGpuKernel expects 1.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_count;
|
||||
}
|
||||
|
||||
size_t output_count = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_count != 2) {
|
||||
MS_LOG(ERROR) << "Number of outputs is " << output_count << ", but should be 2 for SortGpuKernel.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 2, but got " << output_count;
|
||||
}
|
||||
|
||||
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape_);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'SortGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
input_rank_ = input_shape_.size();
|
||||
if (input_rank_ > TRANSPOSE_MAX_DIMENSION || input_rank_ < 1) {
|
||||
MS_LOG(ERROR) << "For 'SortGpuKernel', the rank of input cannot be more than " << TRANSPOSE_MAX_DIMENSION
|
||||
<< " dimensions or less than 1 dimension.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be greater than "
|
||||
<< TRANSPOSE_MAX_DIMENSION << ", or less than 1"
|
||||
<< ", but got " << input_rank_;
|
||||
}
|
||||
|
||||
input_size_ = 1;
|
||||
|
@ -156,9 +154,8 @@ class SortGpuKernel : public GpuKernel {
|
|||
axis_ += input_rank_;
|
||||
}
|
||||
if ((size_t)axis_ >= input_rank_) {
|
||||
MS_LOG(ERROR) << "For 'SortGpuKernel', axis should be less than the rank of input, bot got axis: " << axis_
|
||||
<< " the rank of input: " << input_rank_;
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the value of 'axis' should be less than " << input_rank_
|
||||
<< ", but got " << (size_t)axis_;
|
||||
}
|
||||
|
||||
perm_.resize(input_rank_);
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPACETOBATCH_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/spacetobatch_impl.cuh"
|
||||
|
@ -51,9 +52,8 @@ class SpaceToBatchGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
(void)CheckParam(kernel_node);
|
||||
input_size_ = sizeof(T);
|
||||
for (size_t idx = 0; idx < input_shape_.size(); ++idx) {
|
||||
input_size_ *= input_shape_[idx];
|
||||
|
@ -80,6 +80,7 @@ class SpaceToBatchGpuKernel : public GpuKernel {
|
|||
oc_ = 0;
|
||||
oh_ = 0;
|
||||
ow_ = 0;
|
||||
kernel_name_ = "SpaceToBatch";
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
paddings_.clear();
|
||||
|
@ -96,58 +97,58 @@ class SpaceToBatchGpuKernel : public GpuKernel {
|
|||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
block_size_ = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "block_size"));
|
||||
if (block_size_ < 1) {
|
||||
MS_LOG(ERROR) << "block_size can not be less than 1.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'block_size' cannot be less than 1, but got "
|
||||
<< block_size_;
|
||||
}
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "input_num is " << input_num << ", but BatchToSpace needs 1 input.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "output_num is " << output_num << ", but BatchToSpace needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
|
||||
// check input_shape
|
||||
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
if (input_shape.size() != SHAPE_SIZE) {
|
||||
MS_LOG(ERROR) << "Input is " << input_shape.size() << "-D, but BatchToSpace supports 4-D tensor.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be equal to " << SHAPE_SIZE
|
||||
<< ", but got " << input_shape.size();
|
||||
}
|
||||
input_shape_.assign(input_shape.begin(), input_shape.end());
|
||||
// check paddings_
|
||||
paddings_ = GetAttr<std::vector<std::vector<int64_t>>>(kernel_node, "paddings");
|
||||
if (paddings_.size() != PADDING_SHAPE_0) {
|
||||
MS_LOG(ERROR) << "paddings.size() in BatchToSpace needs 2.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of 'paddings' cannot be equal to " << PADDING_SHAPE_0
|
||||
<< ", but got " << paddings_.size();
|
||||
}
|
||||
if (paddings_[0].size() != PADDING_SHAPE_1 || paddings_[1].size() != PADDING_SHAPE_1) {
|
||||
MS_LOG(ERROR) << "paddings[i].size() in BatchToSpace needs 2.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of 'paddings' cannot be equal to " << PADDING_SHAPE_0
|
||||
<< ", but got " << paddings_.size();
|
||||
} else {
|
||||
for (size_t idx_i = 0; idx_i < PADDING_SHAPE_0; ++idx_i) {
|
||||
for (size_t idx_j = 0; idx_j < PADDING_SHAPE_1; ++idx_j) {
|
||||
if (paddings_[idx_i][idx_j] < 0) {
|
||||
MS_LOG(ERROR) << "the number in paddings can not be less than 0.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the element of 'paddings' cannot be less than 0, "
|
||||
<< "but got paddings[" << idx_i << "][ " << idx_j << "]: " << paddings_[idx_i][idx_j];
|
||||
}
|
||||
}
|
||||
auto tmp_shape = input_shape[idx_i + PADDING_SHAPE_1] + paddings_[idx_i][0] + paddings_[idx_i][1];
|
||||
if ((tmp_shape % block_size_) != 0) {
|
||||
MS_LOG(ERROR) << "padded shape must be divisible by block_size";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', padded shape should be divisible by block_size, , but got padded shape: "
|
||||
<< tmp_shape << ", block_size: " << block_size_;
|
||||
}
|
||||
if ((tmp_shape / block_size_) == 0) {
|
||||
MS_LOG(ERROR) << "padded shape can not be less than block_size";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', padded shape cannot be less than block_size"
|
||||
<< ", but got padded shape: " << tmp_shape << ", block_size: " << block_size_;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string kernel_name_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
|
|
|
@ -54,35 +54,34 @@ class SpaceToDepthFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
block_size_ = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "block_size"));
|
||||
if (block_size_ == 0) {
|
||||
MS_LOG(ERROR) << "block_size_ can not be 0.";
|
||||
return false;
|
||||
if (block_size_ < 2) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the 'block_size' cannot be less than 2, but got "
|
||||
<< block_size_;
|
||||
}
|
||||
// check input num and output num
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but SpaceToDepth needs 1 input.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
|
||||
}
|
||||
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", SpaceToDepth needs 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 2, but got " << output_num;
|
||||
}
|
||||
// check input_shape
|
||||
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'SpaceToDepthGpuKernel', input is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
shape_size_ = input_shape.size();
|
||||
if (shape_size_ != SPACETODEPTH_BUFFER_DIMENSION) {
|
||||
MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but SpaceToDepth supports 4-D tensor.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be equal to "
|
||||
<< SPACETODEPTH_BUFFER_DIMENSION << ", but got " << shape_size_;
|
||||
}
|
||||
// get input and out put information
|
||||
input_size_ = 1;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPLIT_GPU_KERNEL_H
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
|
@ -54,18 +55,19 @@ class SplitGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'SplitGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
int dims = SizeToInt(input_shape.size());
|
||||
axis_ = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
if (axis_ < -dims || axis_ >= dims) {
|
||||
MS_LOG(EXCEPTION) << "axis must be in the range [-rank, rank)";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'axis' should be in the range [-" << dims << "," << dims
|
||||
<< "), but got " << axis_;
|
||||
}
|
||||
if (axis_ < 0) {
|
||||
axis_ += dims;
|
||||
|
@ -77,9 +79,7 @@ class SplitGpuFwdKernel : public GpuKernel {
|
|||
|
||||
output_num_ = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "output_num"));
|
||||
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
(void)CheckParam(kernel_node);
|
||||
input_size_ = 1;
|
||||
all_size_before_axis_ = 1;
|
||||
all_size_axis_ = 1;
|
||||
|
@ -100,9 +100,8 @@ class SplitGpuFwdKernel : public GpuKernel {
|
|||
for (int i = 0; i < output_num_; i++) {
|
||||
size_t output_size = 1;
|
||||
auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, i);
|
||||
is_null_input_ = CHECK_NULL_INPUT(output_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "SplitGpuKernel output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -125,6 +124,7 @@ class SplitGpuFwdKernel : public GpuKernel {
|
|||
all_size_before_axis_ = 1;
|
||||
all_size_axis_ = 1;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "Split";
|
||||
outputs_host_ = nullptr;
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
|
@ -135,36 +135,33 @@ class SplitGpuFwdKernel : public GpuKernel {
|
|||
void InitSizeLists() override {}
|
||||
|
||||
private:
|
||||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
auto input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
int dims = SizeToInt(input_shape.size());
|
||||
int output_num = SizeToInt(AnfAlgo::GetOutputTensorNum(kernel_node));
|
||||
if (output_num <= 0) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", must > 0.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be greater than 0, but got "
|
||||
<< output_num;
|
||||
}
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but Split needs 1 input.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
|
||||
}
|
||||
if (dims == 0) {
|
||||
MS_LOG(ERROR) << "Input dims is " << dims << ", scalar is not supported.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be 0, but got " << dims;
|
||||
}
|
||||
if (axis_ < -dims || axis_ >= dims) {
|
||||
MS_LOG(ERROR) << "Attr axis " << axis_ << " must be in " << -dims << "~" << dims;
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'axis' should be in the range [-" << dims << "," << dims
|
||||
<< "), but got " << axis_;
|
||||
}
|
||||
if (output_num_ > SizeToInt(input_shape[axis_])) {
|
||||
MS_LOG(ERROR) << "Attr output_num " << output_num_ << "must be less than" << input_shape[axis_];
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs cannot be greater than "
|
||||
<< SizeToInt(input_shape[axis_]) << ", but got " << output_num_;
|
||||
}
|
||||
if (output_num_ != output_num) {
|
||||
MS_LOG(ERROR) << "Output num is " << output_num << ", but need " << output_num_;
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be " << output_num_
|
||||
<< ", but got " << output_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
int axis_;
|
||||
int output_num_;
|
||||
|
@ -177,6 +174,7 @@ class SplitGpuFwdKernel : public GpuKernel {
|
|||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -52,24 +52,23 @@ class SqueezeGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
auto axis = GetAttr<std::vector<int64_t>>(kernel_node, "axis");
|
||||
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'SqueezeGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
int64_t dims = SizeToLong(input_shape.size());
|
||||
if (dims == 0) {
|
||||
MS_LOG(ERROR) << "Squeeze requires input tensor's dimension can't be 0, but got 0.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be 0, but got " << dims;
|
||||
}
|
||||
for (const auto i : axis) {
|
||||
if (i < -dims || i >= dims) {
|
||||
MS_LOG(ERROR) << "Squeeze requires axis should be in [" << -dims << ", " << dims << "), but got " << i << ".";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the 'axis' should be in the range [-" << dims << "," << dims
|
||||
<< "), but got " << i;
|
||||
}
|
||||
}
|
||||
input_size_ = std::accumulate(input_shape.begin(), input_shape.end(), sizeof(T), std::multiplies<size_t>());
|
||||
|
|
|
@ -50,17 +50,16 @@ class StridedSliceGpuKernel : public GpuKernel, public StridedSliceGpuCommon {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
null_output_ = CHECK_NULL_INPUT(input_shape_);
|
||||
null_output_ = CHECK_SHAPE_NULL(input_shape_, kernel_name, "input");
|
||||
if (null_output_) {
|
||||
MS_LOG(WARNING) << "For 'StridedSliceGpuKernel', input is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shape_.size() > MAX_DIMS) {
|
||||
MS_LOG(ERROR) << "StridedSlice support dims no more than " << MAX_DIMS << ", but the input shape is "
|
||||
<< input_shape_.size();
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be greater than " << MAX_DIMS
|
||||
<< ", but got " << input_shape_.size();
|
||||
}
|
||||
|
||||
CollectInfo(kernel_node);
|
||||
|
|
|
@ -50,13 +50,14 @@ class StridedSliceGradGpuKernel : public GpuKernel, public StridedSliceGpuCommon
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
std::vector<int64_t> shapex = GetAttr<std::vector<int64_t>>(kernel_node, "shapex");
|
||||
for (auto x : shapex) {
|
||||
input_shape_.push_back(static_cast<size_t>(x));
|
||||
}
|
||||
if (input_shape_.size() > MAX_DIMS) {
|
||||
MS_LOG(ERROR) << "StridedSliceGrad support support dims less than " << input_shape_.size();
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be greater than " << MAX_DIMS
|
||||
<< ", but got " << input_shape_.size();
|
||||
}
|
||||
|
||||
CollectInfo(kernel_node);
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_TENSOR_STRIDE_UPDATE_GPU_KERNEL_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include <functional>
|
||||
|
@ -31,7 +32,8 @@ namespace kernel {
|
|||
template <typename T>
|
||||
class TensorCopySlicesGpuKernel : public GpuKernel {
|
||||
public:
|
||||
TensorCopySlicesGpuKernel() : input_size_(0), update_size_(0), output_size_(0), is_null_input_(false) {}
|
||||
TensorCopySlicesGpuKernel()
|
||||
: input_size_(0), update_size_(0), output_size_(0), is_null_input_(false), kernel_name_("TensorCopySlices") {}
|
||||
~TensorCopySlicesGpuKernel() {}
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
|
@ -57,32 +59,30 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
|
|||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but TensorCopySlices needs 2 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
|
||||
}
|
||||
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but TensorCopySlices has 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
|
||||
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto update_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(update_shape);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(input_shape_, kernel_name_, "input") || CHECK_SHAPE_NULL(update_shape, kernel_name_, "update");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'TensorCopySlicesGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shape_.size() > kMaxDims) {
|
||||
MS_LOG(ERROR) << "StridedSlice support dims no more than " << kMaxDims << ", but the input shape is "
|
||||
<< input_shape_.size();
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << kMaxDims
|
||||
<< ", but got " << input_shape_.size();
|
||||
}
|
||||
|
||||
begin_ = GetAttr<std::vector<int64_t>>(kernel_node, kAttrBegin);
|
||||
|
@ -90,10 +90,9 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
|
|||
strides_ = GetAttr<std::vector<int64_t>>(kernel_node, kAttrStrides);
|
||||
|
||||
if (begin_.size() > input_shape_.size()) {
|
||||
MS_LOG(ERROR) << "For 'TensorCopySlicesGpuKernel', the rank of begin attr cannot be more than the rank of input, "
|
||||
<< "but got the rank of begin attr: " << begin_.size()
|
||||
<< ", the rank of input: " << input_shape_.size();
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', the size of 'begin' cannot be greater than the dimension of input, but got the "
|
||||
<< "size of 'begin': " << begin_.size() << ", the dimension of input: " << input_shape_.size();
|
||||
}
|
||||
|
||||
FillEmptyDims(kernel_node);
|
||||
|
@ -111,7 +110,9 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
|
|||
auto update_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
size_t total_update_num = std::accumulate(update_shape.begin(), update_shape.end(), 1, std::multiplies<size_t>());
|
||||
if (begin_.size() != end_.size() || end_.size() != strides_.size()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid attr begin:" << begin_ << " end:" << end_ << " strides:" << strides_;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of 'begin', 'strides' and 'end' should be the same "
|
||||
<< "but got the size of 'begin': " << begin_.size()
|
||||
<< ", the size of 'strides':" << strides_.size() << ", the size of 'end':" << end_.size();
|
||||
}
|
||||
auto len = begin_.size();
|
||||
size_t total_input_num = 1;
|
||||
|
@ -120,7 +121,8 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
|
|||
total_input_num *= ((end_[i] - begin_[i]) / strides_[i]);
|
||||
}
|
||||
if (total_input_num != total_update_num) {
|
||||
MS_LOG(EXCEPTION) << "Invalid update_shape:" << update_shape << ". Maybe you need to broadcast it.";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', invalid 'update_shape':" << update_shape
|
||||
<< ". Maybe you need to broadcast it.";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -204,6 +206,7 @@ class TensorCopySlicesGpuKernel : public GpuKernel {
|
|||
size_t output_size_;
|
||||
inline static size_t kMaxDims = 8;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_TENSOR_SCATTER_ADD_GPU_KERNEL_H
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/tensor_scatter_add.cuh"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
|
@ -39,7 +40,8 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
|
|||
indices_dim_0_(0),
|
||||
indices_dim_1_(0),
|
||||
memcpy_flag_(false),
|
||||
is_null_input_(false) {}
|
||||
is_null_input_(false),
|
||||
kernel_name_("TensorScatterAdd") {}
|
||||
~TensorScatterAddGpuFwdKernel() {
|
||||
if (indices_stride_ != nullptr) {
|
||||
device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(static_cast<void *>(indices_stride_));
|
||||
|
@ -93,17 +95,16 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
memcpy_flag_ = false;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but TensorScatterAdd needs 3 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 3, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but TensorScatterAdd has 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
|
||||
update_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
|
@ -112,8 +113,11 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
|
|||
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(update_shapes_) || CHECK_NULL_INPUT(indices_shapes_) ||
|
||||
CHECK_NULL_INPUT(input_shapes_) || CHECK_NULL_INPUT(output_shapes_);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(update_shapes_, kernel_name_, "updates") ||
|
||||
CHECK_SHAPE_NULL(indices_shapes_, kernel_name_, "indices") ||
|
||||
CHECK_SHAPE_NULL(input_shapes_, kernel_name_, "input_x") ||
|
||||
CHECK_SHAPE_NULL(output_shapes_, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'TensorScatterAddGpuKernel', input or output is null.";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -127,18 +131,22 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
|
|||
const size_t indices_len = sizeof(S) * vec_indices_stride_.size();
|
||||
void *indices_stride_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(indices_len);
|
||||
if (indices_stride_work == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failed to alloc indices_stride_work, size: " << indices_len;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', the memory alloc of indices_stride_work should be successful, but failed, got size: "
|
||||
<< indices_len;
|
||||
}
|
||||
indices_stride_ = static_cast<S *>(indices_stride_work);
|
||||
|
||||
const size_t vec_work_len = sizeof(S) * vec_work_shape_.size();
|
||||
void *work_shape_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(vec_work_len);
|
||||
if (work_shape_work == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failed to alloc work_shape_work, size: " << vec_work_len;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', the memory alloc of work_shape_work should be successful, but failed, got size: "
|
||||
<< vec_work_len;
|
||||
}
|
||||
work_shape_ = static_cast<S *>(work_shape_work);
|
||||
if (vec_work_shape_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'TensorScatterAddGpuKernel', the rank of vec work cannot be less than 1, but got "
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of vec work cannot be less than 1, but got "
|
||||
<< vec_work_shape_.size();
|
||||
}
|
||||
|
||||
|
@ -176,7 +184,7 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
if (indices_shapes_.size() < 1) {
|
||||
MS_LOG(EXCEPTION) << "For 'TensorScatterAddGpuKernel', the rank of indices cannot be less than 1, but got "
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of indices cannot be less than 1, but got "
|
||||
<< indices_shapes_.size();
|
||||
}
|
||||
// calculate indices dim 0/1
|
||||
|
@ -189,9 +197,9 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
if (indices_dim_1_ < 1 || indices_dim_1_ > output_shapes_.size()) {
|
||||
MS_LOG(EXCEPTION) << "For 'TensorScatterAddGpuKernel', indices_shapes[-1] cannot be less than 1 and greater than "
|
||||
<< "the rank of output_shapes, but got indices_shapes[-1]: " << indices_dim_1_
|
||||
<< ", rank of output_shapes: " << output_shapes_.size();
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', indices_shapes[-1] cannot be less than 1 and greater than "
|
||||
<< "the dimension of output_shapes, but got indices_shapes[-1]: " << indices_dim_1_
|
||||
<< ", dimension of output_shapes: " << output_shapes_.size();
|
||||
}
|
||||
// calculate indices_stride
|
||||
vec_indices_stride_.resize(indices_dim_1_, 0);
|
||||
|
@ -226,6 +234,7 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel {
|
|||
size_t indices_dim_1_;
|
||||
bool memcpy_flag_;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -90,17 +90,16 @@ class TensorScatterMaxGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
memcpy_flag_ = false;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but TensorScatterMax needs 3 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but TensorScatterMax has 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
|
||||
update_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
|
@ -117,14 +116,18 @@ class TensorScatterMaxGpuKernel : public GpuKernel {
|
|||
const size_t indices_len = sizeof(S) * vec_indices_stride_.size();
|
||||
void *indices_stride_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(indices_len);
|
||||
if (indices_stride_work == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failed to alloc indices_stride_work, size: " << indices_len;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the memory alloc of indices_stride_work should be successful, but failed, got size: "
|
||||
<< indices_len;
|
||||
}
|
||||
indices_stride_ = static_cast<S *>(indices_stride_work);
|
||||
|
||||
const size_t vec_work_len = sizeof(S) * vec_work_shape_.size();
|
||||
void *work_shape_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(vec_work_len);
|
||||
if (work_shape_work == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failed to alloc work_shape_work, size: " << vec_work_len;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the memory alloc of work_shape_work should be successful, but failed, got size: "
|
||||
<< vec_work_len;
|
||||
}
|
||||
work_shape_ = static_cast<S *>(work_shape_work);
|
||||
|
||||
|
|
|
@ -89,17 +89,16 @@ class TensorScatterMinGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
memcpy_flag_ = false;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but TensorScatterMin needs 3 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but TensorScatterMin has 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of output should be 1, but got " << output_num;
|
||||
}
|
||||
|
||||
update_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
|
@ -116,14 +115,18 @@ class TensorScatterMinGpuKernel : public GpuKernel {
|
|||
const size_t indices_len = sizeof(S) * vec_indices_stride_.size();
|
||||
void *indices_stride_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(indices_len);
|
||||
if (indices_stride_work == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failed to alloc indices_stride_work, size: " << indices_len;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the memory alloc of indices_stride_work should be successful, but failed, got size: "
|
||||
<< indices_len;
|
||||
}
|
||||
indices_stride_ = static_cast<S *>(indices_stride_work);
|
||||
|
||||
const size_t vec_work_len = sizeof(S) * vec_work_shape_.size();
|
||||
void *work_shape_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(vec_work_len);
|
||||
if (work_shape_work == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failed to alloc work_shape_work, size: " << vec_work_len;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the memory alloc of work_shape_work should be successful, but failed, got size: "
|
||||
<< vec_work_len;
|
||||
}
|
||||
work_shape_ = static_cast<S *>(work_shape_work);
|
||||
|
||||
|
|
|
@ -90,17 +90,16 @@ class TensorScatterSubGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
memcpy_flag_ = false;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but TensorScatterSub needs 3 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but TensorScatterSub has 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
|
||||
update_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
|
@ -117,14 +116,18 @@ class TensorScatterSubGpuKernel : public GpuKernel {
|
|||
const size_t indices_len = sizeof(S) * vec_indices_stride_.size();
|
||||
void *indices_stride_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(indices_len);
|
||||
if (indices_stride_work == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failed to alloc indices_stride_work, size: " << indices_len;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the memory alloc of indices_stride_work should be successful, but failed, got size: "
|
||||
<< indices_len;
|
||||
}
|
||||
indices_stride_ = static_cast<S *>(indices_stride_work);
|
||||
|
||||
const size_t vec_work_len = sizeof(S) * vec_work_shape_.size();
|
||||
void *work_shape_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(vec_work_len);
|
||||
if (work_shape_work == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failed to alloc work_shape_work, size: " << vec_work_len;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the memory alloc of work_shape_work should be successful, but failed, got size: "
|
||||
<< vec_work_len;
|
||||
}
|
||||
work_shape_ = static_cast<S *>(work_shape_work);
|
||||
|
||||
|
|
|
@ -93,27 +93,27 @@ class TensorScatterUpdateGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
memcpy_flag_ = false;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but TensorScatterUpdate needs 3 inputs.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but TensorScatterUpdate has 1 output.";
|
||||
return false;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
|
||||
update_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
indices_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
input_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(update_shapes_) || CHECK_NULL_INPUT(indices_shapes_) ||
|
||||
CHECK_NULL_INPUT(input_shapes_) || CHECK_NULL_INPUT(output_shapes_);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(update_shapes_, kernel_name, "update") ||
|
||||
CHECK_SHAPE_NULL(indices_shapes_, kernel_name, "indices") ||
|
||||
CHECK_SHAPE_NULL(input_shapes_, kernel_name, "input_x") ||
|
||||
CHECK_SHAPE_NULL(output_shapes_, kernel_name, "output");
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "For 'TensorScatterUpdateGpuKernel', input or output is null";
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -126,14 +126,18 @@ class TensorScatterUpdateGpuFwdKernel : public GpuKernel {
|
|||
const size_t indices_len = sizeof(S) * vec_indices_stride_.size();
|
||||
void *indices_stride_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(indices_len);
|
||||
if (indices_stride_work == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failed to alloc indices_stride_work, size: " << indices_len;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the memory alloc of indices_stride_work should be successful, but failed, got size: "
|
||||
<< indices_len;
|
||||
}
|
||||
indices_stride_ = static_cast<S *>(indices_stride_work);
|
||||
|
||||
const size_t vec_work_len = sizeof(S) * vec_work_shape_.size();
|
||||
void *work_shape_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(vec_work_len);
|
||||
if (work_shape_work == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failed to alloc work_shape_work, size: " << vec_work_len;
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name
|
||||
<< "', the memory alloc of work_shape_work should be successful, but failed, got size: "
|
||||
<< vec_work_len;
|
||||
}
|
||||
work_shape_ = static_cast<S *>(work_shape_work);
|
||||
|
||||
|
|
Loading…
Reference in New Issue