optimizes the kernel error description of GPU about tile,topk,transpose etc.

This commit is contained in:
tacyi139 2021-12-28 11:48:43 +08:00
parent 348f8d8fd7
commit 0609a235fd
68 changed files with 500 additions and 462 deletions

View File

@ -96,12 +96,10 @@ class ScatterNdGpuFwdKernel : public GpuKernel {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
return false;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
return false;
}
input_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);

View File

@ -59,25 +59,26 @@ class TileGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but Tile needs 1 input.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but Tile has 1 output.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(output_shape_);
is_null_input_ =
CHECK_SHAPE_NULL(input_shape_, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape_, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'TileGpuKernel', input or output is null";
InitSizeLists();
return true;
}
if (output_shape_.size() < 1) {
MS_LOG(EXCEPTION) << "For 'TileGpuKernel', the rank of output cannot be less than 1, but got "
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be less than 1, but got "
<< output_shape_.size();
}
input_size_ = 1;
@ -87,8 +88,8 @@ class TileGpuKernel : public GpuKernel {
output_size_ = 1;
if (output_shape_.size() > TILE_MAX_DIMENSION) {
MS_LOG(EXCEPTION) << "Output is " << output_shape_.size() << "-D, but Tile supports up to " << TILE_MAX_DIMENSION
<< "-D.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be greater than "
<< TILE_MAX_DIMENSION << ", but got " << output_shape_.size();
}
shape_size_ = output_shape_.size();
for (size_t i = 0; i < output_shape_.size(); i++) {

View File

@ -83,12 +83,13 @@ class TopKGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto input_shapes = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto output_shapes = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(output_shapes);
is_null_input_ =
CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") || CHECK_SHAPE_NULL(output_shapes, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'TopkGpuKernel', input or output is null";
InitSizeLists();
return true;
}

View File

@ -74,28 +74,26 @@ class TransposeGpuFwdKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but transpose needs 1 input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but transpose needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'TransposeGpuKernel', input is null";
InitSizeLists();
return true;
}
shape_size_ = input_shape.size();
if (shape_size_ > TRANSPOSE_MAX_DIMENSION) {
MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but transpose supports max " << TRANSPOSE_MAX_DIMENSION
<< "-D inputs.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be greater than "
<< TRANSPOSE_MAX_DIMENSION << ", but got " << shape_size_;
}
input_size_ = 1;

View File

@ -50,11 +50,11 @@ class UniqueGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
std::vector<size_t> shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UniqueGpuKernel', input is null";
InitSizeLists();
return true;
}

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_UNPACK_GPU_KERNEL_H
#include <vector>
#include <string>
#include <memory>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@ -55,10 +56,9 @@ class UnpackGpuFwdKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
if (!CheckParam(kernel_node)) {
return false;
}
(void)CheckParam(kernel_node);
axis_ = static_cast<int32_t>(GetAttr<int64_t>(kernel_node, "axis"));
if (axis_ < 0) {
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
@ -73,9 +73,8 @@ class UnpackGpuFwdKernel : public GpuKernel {
for (size_t i = 0; i < output_num_; i++) {
size_t _size = 1;
auto _shape = AnfAlgo::GetOutputDeviceShape(kernel_node, i);
is_null_input_ = CHECK_NULL_INPUT(_shape);
is_null_input_ = CHECK_SHAPE_NULL(_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UnpackGpuKernel', output is null";
InitSizeLists();
return true;
}
@ -87,9 +86,8 @@ class UnpackGpuFwdKernel : public GpuKernel {
workspace_size_list_.push_back(sizeof(T *) * output_num_);
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UnpackGpuKernel', input is null";
InitSizeLists();
return true;
}
@ -108,13 +106,11 @@ class UnpackGpuFwdKernel : public GpuKernel {
void InitSizeLists() override {}
private:
bool CheckParam(const CNodePtr &kernel_node) {
void CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "input number is " << input_num << ", but UnpackGpuFwdKernel needs 1 input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
}
return true;
}
int axis_;
bool is_null_input_;

View File

@ -54,14 +54,15 @@ class UnsortedSegmentMaxGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto segment_ids_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ =
CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(segment_ids_shapes) || CHECK_NULL_INPUT(output_shapes);
is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") ||
CHECK_SHAPE_NULL(segment_ids_shapes, kernel_name, "segment_ids") ||
CHECK_SHAPE_NULL(output_shapes, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UnsortedSegmentMaxGpuKernel', input or output is null";
InitSizeLists();
return true;
}
@ -73,9 +74,8 @@ class UnsortedSegmentMaxGpuKernel : public GpuKernel {
MS_LOG(INFO) << "UnsortedSegmentMax Kernel Input count is 2";
}
if (output_shapes.size() < 1) {
MS_LOG(EXCEPTION)
<< "For UnsortedSegmentMax, output shape incorrect rank. Expect Rank at least rank 1, got Rank: "
<< output_shapes.size() << ".";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be less than 1, but got "
<< output_shapes.size();
}
num_segments_ = output_shapes[0];
input_size_ = 1;

View File

@ -49,13 +49,14 @@ class UnsortedSegmentMinGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto segment_ids_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ =
CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(segment_ids_shapes) || CHECK_NULL_INPUT(output_shapes);
is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") ||
CHECK_SHAPE_NULL(segment_ids_shapes, kernel_name, "segment_ids") ||
CHECK_SHAPE_NULL(output_shapes, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UnsortedSegmentMinGpuKernel', input or output is null";
InitSizeLists();
return true;
}
@ -67,9 +68,8 @@ class UnsortedSegmentMinGpuKernel : public GpuKernel {
MS_LOG(INFO) << "UnsortedSegmentMin Kernel Input count is 2";
}
if (output_shapes.size() < 1) {
MS_LOG(EXCEPTION)
<< "For UnsortedSegmentMin, output shape incorrect rank. Expect Rank at least rank 1, got Rank: "
<< output_shapes.size() << ".";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be less than 1, but got "
<< output_shapes.size();
}
num_segments_ = output_shapes[0];
input_size_ = 1;
@ -96,7 +96,6 @@ class UnsortedSegmentMinGpuKernel : public GpuKernel {
InitSizeLists();
return true;
}
void ResetResource() noexcept override {
num_segments_ = 1;
inner_size_ = 1;

View File

@ -52,13 +52,15 @@ class UnsortedSegmentSumGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto ids_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(ids_shapes) || CHECK_NULL_INPUT(output_shapes);
is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") ||
CHECK_SHAPE_NULL(ids_shapes, kernel_name, "segment_ids") ||
CHECK_SHAPE_NULL(output_shapes, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UnsortedSegmentSumGpuKernel', input or output is null";
InitSizeLists();
return true;
}

View File

@ -51,12 +51,12 @@ class ZerosLikeGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
std::vector<size_t> input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'ZeroslikeGpuKernel', input is null";
InitSizeLists();
return true;
}

View File

@ -55,7 +55,7 @@ class CustomAOTGpuKernel : public GpuKernel {
if (!handle_) {
handle_ = dlopen(file_path_.c_str(), RTLD_LAZY | RTLD_LOCAL);
if (!handle_) {
MS_LOG(ERROR) << "Open Error: " << dlerror();
MS_LOG(ERROR) << "For '" << kernel_name_ << "', open should be successful, but error, " << dlerror();
return false;
}
}
@ -65,7 +65,7 @@ class CustomAOTGpuKernel : public GpuKernel {
reinterpret_cast<std::add_pointer<int(int, void **, int *, int64_t **, const char **, void *, void *)>::type>(
dlsym(handle_, func_name_.c_str()));
if (auto error_info = dlerror(); error_info != nullptr) {
MS_LOG(ERROR) << error_info;
MS_LOG(ERROR) << "For '" << kernel_name_ << "', error info: " << error_info;
return false;
}
}
@ -79,7 +79,8 @@ class CustomAOTGpuKernel : public GpuKernel {
ret = aot_func_(nparam, &params[0], &ndims_[0], &shapes_[0], &type_pointer_list_[0], stream_ptr, nullptr);
}
} catch (const std::exception &e) {
MS_LOG(ERROR) << "CustomAOT operator failed when running user defined file " << file_path_ << "! "
MS_LOG(ERROR) << "For '" << kernel_name_ << "', operator failed when running user defined file " << file_path_
<< "! "
<< "Error message is " << e.what();
return false;
}
@ -88,14 +89,15 @@ class CustomAOTGpuKernel : public GpuKernel {
case 0:
break;
case 1:
MS_LOG(ERROR) << "Number of parameters passed to AOT kernel is " << nparam
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the number of parameters passed to AOT kernel is " << nparam
<< ", inconsistent with what the user wants";
return false;
case 2:
MS_LOG(ERROR) << "Type of parameters passed to AOT kernel is inconsistent with what the user wants";
MS_LOG(ERROR) << "For '" << kernel_name_
<< "', type of parameters passed to AOT kernel is inconsistent with what the user wants";
return false;
default:
MS_LOG(ERROR) << "Error occurred when running AOT kernel, "
MS_LOG(ERROR) << "For '" << kernel_name_ << "', error occurred when running AOT kernel, "
<< "error id is " << ret;
return false;
}
@ -104,27 +106,25 @@ class CustomAOTGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
const auto &exec_info = AnfAlgo::GetNodeAttr<std::string>(kernel_node, "func_name");
if (auto pos = exec_info.find(":"); pos != std::string::npos) {
auto path = exec_info.substr(0, pos);
auto real_path = FileUtils::GetRealPath(path.c_str());
if (!real_path.has_value()) {
MS_LOG(ERROR) << "Invalid file path, " << path << " does not exist.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the file path should be exist, but got " << path;
}
file_path_ = real_path.value();
func_name_ = exec_info.substr(pos + 1);
} else {
MS_LOG(ERROR) << "Wrong execute info:" << exec_info;
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', Wrong execute info:" << exec_info;
}
num_input_ = AnfAlgo::GetInputTensorNum(kernel_node);
auto input_type_list = AnfAlgo::GetAllInputDeviceTypes(kernel_node);
if (num_input_ != input_type_list.size()) {
MS_LOG(ERROR) << "Input shapes'size is " << num_input_ << ", while input types' size is "
<< input_type_list.size();
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be " << input_type_list.size()
<< ", but got " << num_input_;
}
for (size_t i = 0; i < num_input_; i++) {
@ -141,9 +141,8 @@ class CustomAOTGpuKernel : public GpuKernel {
auto output_type_list = AnfAlgo::GetAllOutputDeviceTypes(kernel_node);
if (num_output_ != output_type_list.size()) {
MS_LOG(ERROR) << "Output shapes'size is " << num_output_ << ", while output types' size is "
<< output_type_list.size();
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be " << output_type_list.size()
<< ", but got " << num_output_;
}
for (size_t i = 0; i < num_output_; i++) {

View File

@ -61,12 +61,13 @@ bool DatasetInitKernel::Launch(const std::vector<AddressPtr> &, const std::vecto
size_t len = total_bytes_ * buffer_q_capacity_;
if (!device::gpu::GPUMemoryAllocator::GetInstance().AllocBufferQueueMem(len, &addr)) {
MS_LOG(EXCEPTION) << "Memory not enough: failed to allocate GPU buffer queue memory[" << len << "].";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', memory not enough: failed to allocate GPU buffer queue memory["
<< len << "].";
}
auto status = GpuBufferMgr::GetInstance().Create(0, queue_name_, addr, shapes_, buffer_q_capacity_);
if (status) {
MS_LOG(EXCEPTION) << "Init Dataset Failed. len: " << len << ", status:" << status;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', init Dataset Failed. len: " << len << ", status:" << status;
}
return true;

View File

@ -121,7 +121,7 @@ bool DatasetIteratorKernel::ReadDevice(void **addr, size_t *len) {
#ifdef ENABLE_DUMP_IR
mindspore::RDR::TriggerAll();
#endif
MS_LOG(EXCEPTION) << "Get data timeout";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', get data timeout";
}
}
#ifndef ENABLE_SECURITY
@ -130,7 +130,7 @@ bool DatasetIteratorKernel::ReadDevice(void **addr, size_t *len) {
profiling_op_->RecordData(queue_size, start_time_stamp, end_time_stamp);
}
#endif
MS_LOG(ERROR) << "Get data failed, errcode " << ret;
MS_LOG(ERROR) << "For '" << kernel_name_ << "', get data failed, errcode " << ret;
return false;
}
return true;
@ -141,7 +141,7 @@ bool DatasetIteratorKernel::Launch(const std::vector<AddressPtr> &, const std::v
if (handle_ == HandleMgr::INVALID_HANDLE) {
handle_ = GpuBufferMgr::GetInstance().Open(0, queue_name_, output_size_list_);
if (handle_ == HandleMgr::INVALID_HANDLE) {
MS_LOG(EXCEPTION) << "Gpu Queue(" << queue_name_ << ") Open Failed";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', gpu Queue(" << queue_name_ << ") Open Failed";
}
}
@ -151,7 +151,7 @@ bool DatasetIteratorKernel::Launch(const std::vector<AddressPtr> &, const std::v
return false;
}
if (total_bytes_ != len) {
MS_LOG(ERROR) << "Dataset front error. read: " << len << ", expect: " << total_bytes_ << ", ";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', dataset front error, read: " << len << ", expect: " << total_bytes_;
return false;
}

View File

@ -26,7 +26,7 @@
namespace mindspore {
namespace kernel {
GetNextProfiling::GetNextProfiling(const std::string &path) : profiling_path_(path) {}
GetNextProfiling::GetNextProfiling(const std::string &path) : profiling_path_(path), kernel_name_("GetNextProfiling") {}
void GetNextProfiling::GetDeviceId() {
auto context_ptr = MsContext::GetInstance();
@ -44,13 +44,13 @@ void GetNextProfiling::Init() {
void GetNextProfiling::SaveProfilingData() {
std::ofstream handle(file_name_, std::ios::trunc);
if (!handle.is_open()) {
MS_LOG(ERROR) << "Open get-next profiling file failed.";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', open get-next profiling file failed.";
return;
}
for (uint32_t index = 0; index < queue_size_.size(); index++) {
if (index > time_stamp_.size() - 1) {
handle.close();
MS_LOG(EXCEPTION) << "index exceeds time_stamp_ size.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', index exceeds time_stamp_ size.";
}
handle << Name() << " " << time_stamp_[index].first << " " << time_stamp_[index].second << " " << queue_size_[index]
<< std::endl;
@ -62,7 +62,7 @@ void GetNextProfiling::SaveProfilingData() {
void GetNextProfiling::ChangeFileMode() {
if (chmod(common::SafeCStr(file_name_), S_IRUSR | S_IWUSR) == -1) {
MS_LOG(ERROR) << "Modify file:" << file_name_ << " to rw fail.";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', modify file:" << file_name_ << " to rw fail.";
return;
}
}

View File

@ -44,6 +44,7 @@ class GetNextProfiling : public ProfilingOp {
std::vector<std::pair<uint64_t, uint64_t>> time_stamp_; // First value of std::pair is the start time stamp,
// Second value of std::pair is the stop time stamp
std::string device_id_;
std::string kernel_name_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -90,6 +90,7 @@ class PrintGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_node_ = kernel_node;
if (AnfAlgo::HasNodeAttr("string_pos", kernel_node)) {
@ -105,9 +106,8 @@ class PrintGpuKernel : public GpuKernel {
input_flag_ = SetInputFlag(&string_pos_, input_tensor_num);
for (size_t i = 0; i < input_tensor_num; i++) {
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'PrintGpuKernel', input is null";
InitSizeLists();
return true;
}
@ -183,7 +183,7 @@ class PrintGpuKernel : public GpuKernel {
input_device_data->push_back(GetDeviceAddress<double>(inputs, i));
break;
default:
MS_LOG(EXCEPTION) << "TypeId: " << type_id << " is not supported in Print.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the typeid cannot be " << type_id;
}
}
}
@ -200,7 +200,7 @@ class PrintGpuKernel : public GpuKernel {
}
for (size_t i = 0; i < string_pos->size(); i++) {
if ((*string_pos)[i] < 0) {
MS_LOG(EXCEPTION) << "string_pos cannot be a negative value";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', string_pos cannot be a negative value";
}
auto index = IntToSize((*string_pos)[i]);
res[index] = -1;

View File

@ -65,22 +65,21 @@ class AddNGpuFwdKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
num_input_ = GetAttr<int64_t>(kernel_node, "n");
if (num_input_ != input_num) {
MS_LOG(ERROR) << "Input number is " << num_input_ << " in attr, but got " << input_num << "input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << num_input_ << ", but got "
<< input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but cudnnAddTensor needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "AddNGpuFwdKernel input is null";
InitSizeLists();
return true;
}

View File

@ -49,20 +49,18 @@ class AssignAddGpuFwdKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but cudnnAddTensor needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but cudnnAddTensor needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "AssignAddGpuFwdKernel input is null";
InitSizeLists();
return true;
}

View File

@ -57,13 +57,15 @@ class BroadcastComplexOpGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
GetOpType(kernel_node);
auto shape1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto shape2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto shape3 = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
need_broadcast_ = AnfAlgo::IsTensorBroadcast(shape1, shape2);
if (need_broadcast_ && shape1.size() > MAX_DIMS) {
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than " << MAX_DIMS;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << MAX_DIMS
<< ", but got " << shape1.size();
}
lhs_shape_.resize(MAX_DIMS, 1);
@ -128,7 +130,9 @@ class BroadcastComplexOpGpuKernel : public GpuKernel {
return;
}
MS_LOG(EXCEPTION) << "operation " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< ", only support these types: RealDiv, Mul, Sub, Add, Div or Complex currently, but got "
<< kernel_name;
}
BroadcastOpType op_type_;

View File

@ -70,20 +70,22 @@ class BroadcastOpGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
GetOpType(kernel_node);
auto shape1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto shape2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto shape3 = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape1) || CHECK_NULL_INPUT(shape2) || CHECK_NULL_INPUT(shape3);
is_null_input_ = CHECK_SHAPE_NULL(shape1, kernel_name_, "input") ||
CHECK_SHAPE_NULL(shape2, kernel_name_, "input") ||
CHECK_SHAPE_NULL(shape3, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BroadcastGpuKernel', input or output is null";
InitSizeLists();
return true;
}
need_broadcast_ = AnfAlgo::IsTensorBroadcast(shape1, shape2);
if (need_broadcast_ && shape1.size() > MAX_DIMS) {
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than: " << MAX_DIMS << ", actual size is "
<< shape1.size();
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << MAX_DIMS
<< ", but got " << shape1.size();
}
lhs_shape_.resize(MAX_DIMS, 1);
@ -94,7 +96,8 @@ class BroadcastOpGpuKernel : public GpuKernel {
if (i < MAX_DIMS) {
output_shape_[i] = shape3[i];
} else {
MS_LOG(EXCEPTION) << "Output index: " << i << " should be less than " << MAX_DIMS;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the index of output should be less than " << MAX_DIMS
<< ", but got " << i;
}
}
output_num_ *= shape3[i];
@ -106,7 +109,8 @@ class BroadcastOpGpuKernel : public GpuKernel {
lhs_shape_[j + lhs_offset] = shape1[j];
} else {
auto index = j + lhs_offset;
MS_LOG(EXCEPTION) << "Invalid input1 index: " << index;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the index of input cannot be " << index << ", but got "
<< index;
}
}
input1_num_ *= shape1[j];
@ -118,7 +122,8 @@ class BroadcastOpGpuKernel : public GpuKernel {
rhs_shape_[k + rhs_offset] = shape2[k];
} else {
auto index = k + rhs_offset;
MS_LOG(EXCEPTION) << "Invalid input2 index: " << index;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the index of input cannot be " << index << ", but got "
<< index;
}
}
input2_num_ *= shape2[k];
@ -201,7 +206,10 @@ class BroadcastOpGpuKernel : public GpuKernel {
return;
}
MS_LOG(EXCEPTION) << "operation " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< ", only support these types: Maximum, Minimum, Pow, RealDiv, Mul, Sub, Add, Div, DivNoNan, "
"Mod, FloorDiv, AbsGrad, FloorMod, Atan2, TruncateDiv or TruncateMod currently, but got "
<< kernel_name;
}
BroadcastOpType op_type_;

View File

@ -69,20 +69,23 @@ class BroadcastOpGradGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
GetOpType(kernel_node);
auto shape1 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto shape2 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto shape3 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
is_null_input_ = CHECK_NULL_INPUT(shape1) || CHECK_NULL_INPUT(shape2) || CHECK_NULL_INPUT(shape3);
is_null_input_ = CHECK_SHAPE_NULL(shape1, kernel_name_, "input_1") ||
CHECK_SHAPE_NULL(shape2, kernel_name_, "input_2") ||
CHECK_SHAPE_NULL(shape3, kernel_name_, "input_3");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BroadcastGradGpuKernel', input or output is null";
InitSizeLists();
return true;
}
need_broadcast_ = AnfAlgo::IsTensorBroadcast(shape1, shape2);
if (need_broadcast_ && shape1.size() > kMaxShapeSize) {
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than " << kMaxShapeSize;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than "
<< kMaxShapeSize << ", but got " << shape1.size();
}
for (size_t i = 0; i < shape3.size(); i++) {
@ -98,8 +101,8 @@ class BroadcastOpGradGpuKernel : public GpuKernel {
x1_shape_[i + x1_offset] = shape1[i];
} else {
auto index = i + x1_offset;
MS_LOG(EXCEPTION) << "For 'BroadcastOpGrad', the dimension of input cannot be greater than " << kMaxShapeSize
<< ", but got " << (index + 1);
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than "
<< kMaxShapeSize << ", but got " << (index + 1);
}
}
input1_num_ *= shape1[i];
@ -111,7 +114,8 @@ class BroadcastOpGradGpuKernel : public GpuKernel {
x2_shape_[i + x2_offset] = shape2[i];
} else {
auto index = i + x2_offset;
MS_LOG(EXCEPTION) << "Invalid input2 index: " << index;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than "
<< kMaxShapeSize << ", but got " << (index + 1);
}
}
input2_num_ *= shape2[i];
@ -162,7 +166,8 @@ class BroadcastOpGradGpuKernel : public GpuKernel {
auto iter = kBroadcastTypeMap.find(kernel_name);
if (iter == kBroadcastTypeMap.end()) {
MS_LOG(EXCEPTION) << "operation " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< ", only support these types: MaximumGrad or MinimumGrad currently, but got " << kernel_name;
} else {
op_type_ = iter->second;
}

View File

@ -66,14 +66,14 @@ class CastAllGpuFwdKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
num_input_ = GetAttr<size_t>(kernel_node, "n");
size_ = std::make_unique<size_t[]>(num_input_);
for (size_t i = 0; i < num_input_; i++) {
auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CastAllGpuKernel', input is null";
InitSizeLists();
return true;
}

View File

@ -19,6 +19,7 @@
#include <cublas_v2.h>
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <algorithm>
#include "backend/kernel_compiler/gpu/cuda_impl/eye_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/matrix_split_impl.cuh"
@ -62,6 +63,7 @@ class CholeskyGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
lower_ = static_cast<bool>(GetAttr<bool>(kernel_node, kLower));
split_dim_ = static_cast<int>(GetAttr<int64_t>(kernel_node, kSplitDim));
@ -77,11 +79,11 @@ class CholeskyGpuKernel : public GpuKernel {
auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex);
is_null_input_ = CHECK_NULL_INPUT(in_shape);
is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(EXCEPTION) << "For 'CholeskyGpuKernel', input is null";
InitSizeLists();
return true;
}
if (split_dim_ == 0) {
return InitNoSplitDim(in_shape);
}
@ -103,11 +105,11 @@ class CholeskyGpuKernel : public GpuKernel {
cho_row_ = in_shape.at(kDim1);
cho_col_ = in_shape.at(kDim2);
} else {
MS_LOG(ERROR) << "Input Only support Rank 2 OR 3";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of input only should be 2 or 3";
return false;
}
if (cho_row_ != cho_col_) {
MS_LOG(ERROR) << "Cholesky need square matrix as input.";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the shape of input should be square matrix";
return false;
}
// set matrix row or col to be lead dimension
@ -121,13 +123,14 @@ class CholeskyGpuKernel : public GpuKernel {
bool InitSplitDim(const std::vector<size_t> &in_shape) {
if (in_shape.size() != kCholeskyNormalShape) {
MS_LOG(ERROR) << "Cholesky Split Matrix Need Input Rank as 2.";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of input should be " << kCholeskyNormalShape
<< ", but got " << in_shape.size();
return false;
}
cho_row_ = in_shape.at(kDim0);
cho_col_ = in_shape.at(kDim1);
if (cho_row_ != cho_col_) {
MS_LOG(ERROR) << "Cholesky Split Matrix Need Square Matrix as Input.";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the shape of input should be square matrix";
return false;
}
@ -206,7 +209,7 @@ class CholeskyGpuKernel : public GpuKernel {
kernel_node_, cusolverDnDpotrfBatched(handle_, uplo_, m_, d_array_addr, lda_, d_info_array_addr, batch_),
"cusolver cholesky batched Fail");
} else {
MS_LOG(EXCEPTION) << "cholesky factorization do not support other data type but only float or double, right now.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the data type only should be float or double, right now.";
}
size_t output_elements = outputs.at(kDim0)->size / unit_size_;
// copy results from written input's matrix to output's matrix by up or lower flag.
@ -243,7 +246,7 @@ class CholeskyGpuKernel : public GpuKernel {
kernel_node_, cusolverDnDpotrfBatched(handle_, uplo_, m_, d_array_addr, lda_, d_info_array_addr, batch_),
"cusolver cholesky batched Fail");
} else {
MS_LOG(EXCEPTION) << "cholesky factorization do not support other data type but only float or double, right now.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the data type only should be float or double, right now.";
}
TriangleMatrixCopy(d_batch_input_addr, output_addr, uplo_, outputs[0]->size / sizeof(T), ldb_, m_,

View File

@ -19,6 +19,7 @@
#include <cublas_v2.h>
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <algorithm>
#include "backend/kernel_compiler/gpu/cuda_impl/triangle_matrix_copy_impl.cuh"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
@ -41,7 +42,7 @@ class CholeskySolveGpuKernel : public GpuKernel {
public:
using pointer = T *;
CholeskySolveGpuKernel() = default;
CholeskySolveGpuKernel() : is_null_input_(false) {}
~CholeskySolveGpuKernel() = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
@ -49,6 +50,9 @@ class CholeskySolveGpuKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(handle_, reinterpret_cast<cudaStream_t>(stream_ptr)),
"cusolverDnSetStream failed");
auto input_a_addr = GetDeviceAddress<T>(inputs, kDim0);
@ -81,7 +85,7 @@ class CholeskySolveGpuKernel : public GpuKernel {
d_b_array_addr, ldb_, d_info_array_addr, batch_),
"cusolver cholesky solve batched Fail");
} else {
MS_LOG(EXCEPTION) << "cholesky solve do not support other data type but only float or double, right now.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the data type only should be float or double, right now.";
}
size_t output_elements = outputs.at(kDim0)->size / unit_size_;
// copy results from written input's matrix to output's matrix.
@ -90,6 +94,7 @@ class CholeskySolveGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
lower_ = static_cast<bool>(GetAttr<bool>(kernel_node, kLower));
// gpu input is col major default, so need to change row major.
@ -103,10 +108,14 @@ class CholeskySolveGpuKernel : public GpuKernel {
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle();
auto in_a_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kDim0);
auto in_b_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kDim1);
if (CHECK_NULL_INPUT(in_a_shape) || CHECK_NULL_INPUT(in_b_shape)) {
MS_LOG(EXCEPTION) << "For 'CholeskySolveGpuKernel', input is null";
is_null_input_ =
CHECK_SHAPE_NULL(in_a_shape, kernel_name_, "input_a") || CHECK_SHAPE_NULL(in_b_shape, kernel_name_, "input_b");
if (is_null_input_) {
InitSizeLists();
return true;
}
return InitDim(in_a_shape, in_b_shape);
(void)InitDim(in_a_shape, in_b_shape);
return true;
}
protected:
@ -127,7 +136,7 @@ class CholeskySolveGpuKernel : public GpuKernel {
}
private:
bool InitDim(const std::vector<size_t> &in_a_shape, const std::vector<size_t> &in_b_shape) {
void InitDim(const std::vector<size_t> &in_a_shape, const std::vector<size_t> &in_b_shape) {
if (in_a_shape.size() == kCholeskyDefaultShape) {
batch_ = 1;
cho_row_ = in_a_shape.at(kDim0);
@ -141,17 +150,14 @@ class CholeskySolveGpuKernel : public GpuKernel {
cho_row_ = in_a_shape.at(kDim1);
cho_col_ = in_a_shape.at(kDim2);
} else {
MS_LOG(ERROR) << "Input Only support Rank 2 OR 3";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input only should be 2 or 3";
}
if (cho_row_ != cho_col_) {
MS_LOG(ERROR) << "Cholesky need square matrix as input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input should be square matrix";
}
size_t b_row = in_b_shape.size() == kCholeskyBatchedShape ? in_b_shape.at(kDim1) : in_b_shape.at(kDim0);
if (cho_row_ != b_row) {
MS_LOG(ERROR) << "Cholesky right hand matrix is not equal to left matrix.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', right hand matrix should be equal to left matrix";
}
m_ = SizeToInt(in_a_shape.at(kDim1));
lda_ = m_;
@ -159,7 +165,6 @@ class CholeskySolveGpuKernel : public GpuKernel {
h_a_array_.resize(batch_);
h_b_array_.resize(batch_);
InitSizeLists();
return true;
}
size_t cho_row_{0};
size_t cho_col_{0};
@ -177,6 +182,7 @@ class CholeskySolveGpuKernel : public GpuKernel {
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
bool is_null_input_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -19,6 +19,7 @@
#include <cublas_v2.h>
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <algorithm>
#include "backend/kernel_compiler/gpu/cuda_impl/eye_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/matrix_split_impl.cuh"
@ -67,13 +68,13 @@ class CholeskyTrsmGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle();
blas_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle();
auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(in_shape);
is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CholeskyTrsmSolveGpuKernel', input is null";
InitSizeLists();
return true;
}
@ -84,12 +85,10 @@ class CholeskyTrsmGpuKernel : public GpuKernel {
}
} else {
if (in_shape.size() != 2) {
MS_LOG(ERROR) << "CholeskyTrsm Split Matrix Need Input Rank as 2.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input only should be 2";
}
if (in_shape[0] != in_shape[1]) {
MS_LOG(ERROR) << "CholeskyTrsm Split Matrix Need Square Matrix as Input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input should be square matrix";
}
InitDimOthers(kernel_node, in_shape);
}
@ -201,17 +200,19 @@ class CholeskyTrsmGpuKernel : public GpuKernel {
if (in_shape.size() == 2) {
batch_ = 1;
if (in_shape[0] != in_shape[1]) {
MS_LOG(ERROR) << "CholeskyTrsm shape0: " << in_shape[0] << ", is not equal to shape1: " << in_shape[1];
MS_LOG(ERROR) << "For '" << kernel_name_ << "', shape0 should be equal to " << in_shape[1] << ", but got "
<< in_shape[0];
return false;
}
} else if (in_shape.size() == 3) {
batch_ = SizeToInt(in_shape[0]);
if (in_shape[1] != in_shape[2]) {
MS_LOG(ERROR) << "CholeskyTrsm shape1: " << in_shape[1] << ", is not equal to shape2: " << in_shape[2];
MS_LOG(ERROR) << "For '" << kernel_name_ << "', shape1 should be equal to " << in_shape[2] << ", but got "
<< in_shape[1];
return false;
}
} else {
MS_LOG(ERROR) << "Input Only support Rank 2 OR 3";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of input only should be 2 or 3";
return false;
}

View File

@ -55,15 +55,15 @@ class CumProdGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but CumProdGpuKernel needs 1.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
input_size_0_ = sizeof(T);
shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape_);
is_null_input_ = CHECK_SHAPE_NULL(shape_, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CumProdGpuKernel', input is null.";
InitSizeLists();
return true;
}
@ -72,7 +72,8 @@ class CumProdGpuKernel : public GpuKernel {
reverse_ = GetAttr<bool>(kernel_node, "reverse");
int input_dim_length = SizeToInt(shape_.size());
if (axis_ >= input_dim_length) {
MS_LOG(EXCEPTION) << "Axis is: " << axis_ << " out of bounds.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the value of 'axis' should be less than " << input_dim_length
<< ", but got " << axis_;
}
while (axis_ < 0) {
axis_ += input_dim_length;

View File

@ -55,15 +55,15 @@ class CumSumGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but CumSumGpuKernel needs 1.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
input_size_0_ = sizeof(T);
shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape_);
is_null_input_ = CHECK_SHAPE_NULL(shape_, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CumSumGpuKernel', input is null";
InitSizeLists();
return true;
}
@ -72,7 +72,8 @@ class CumSumGpuKernel : public GpuKernel {
reverse_ = GetAttr<bool>(kernel_node, "reverse");
int input_dim_length = SizeToInt(shape_.size());
if (axis_ >= input_dim_length) {
MS_LOG(EXCEPTION) << "Axis is: " << axis_ << " out of bounds.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the value of 'axis' should be less than " << input_dim_length
<< ", but got " << axis_;
}
while (axis_ < 0) {
axis_ += input_dim_length;

View File

@ -47,7 +47,8 @@ class DetTriangleGpuKernel : public GpuKernel {
if (!CheckTriangle(input_addr, fill_mode_, matrix_n_, outputs[0]->size / sizeof(T),
reinterpret_cast<cudaStream_t>(stream_ptr))) {
MS_LOG(ERROR) << "The elements in the upper half of the matrix should be all 0, fill mode is: " << fill_mode_;
MS_LOG(ERROR) << "For '" << kernel_name_
<< "', the elements in the upper half of the matrix should be all 0, fill mode is: " << fill_mode_;
return false;
}
DetTriangle(input_addr, output_addr, matrix_n_, outputs[0]->size / sizeof(T),
@ -56,21 +57,20 @@ class DetTriangleGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but DetTriangle needs 1 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but DetTriangle needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
is_null_input_ =
CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'DeterminantTriangleGpuKernel', input or output is null";
InitSizeLists();
return true;
}
@ -79,8 +79,8 @@ class DetTriangleGpuKernel : public GpuKernel {
}
if (input_shape.size() < 2) {
MS_LOG(ERROR) << "The input should have rank at least 2.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than 2, but got "
<< input_shape.size();
}
matrix_n_ = input_shape[input_shape.size() - 1];
@ -89,12 +89,11 @@ class DetTriangleGpuKernel : public GpuKernel {
output_size_ *= output_shape[i];
}
if (matrix_n_ == 0 || output_size_ != input_size_ / matrix_n_ / matrix_n_) {
MS_LOG(ERROR) << "The output shape is wrong.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of output should be "
<< (input_size_ / matrix_n_ / matrix_n_) << ", but got " << output_size_;
}
if (input_shape[input_shape.size() - 2] != input_shape[input_shape.size() - 1]) {
MS_LOG(ERROR) << "The matrix should be in shape of square.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input should be square matrix";
}
auto prim = AnfAlgo::GetCNodePrimitive(kernel_node);
MS_EXCEPTION_IF_NULL(prim);

View File

@ -21,6 +21,7 @@
#include <cusolverDn.h>
#include <cuda_runtime.h>
#include <vector>
#include <string>
#include <complex>
#include <algorithm>
#include <type_traits>
@ -52,13 +53,14 @@ struct Complex_traits<Complex<T>> {
template <typename T>
class EighcGpuKernel : public GpuKernel {
public:
EighcGpuKernel() = default;
EighcGpuKernel() : is_null_input_(false) {}
~EighcGpuKernel() = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
blas_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle();
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
auto A_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
@ -70,13 +72,14 @@ class EighcGpuKernel : public GpuKernel {
jobz_ = CUSOLVER_EIG_MODE_NOVECTOR;
}
cusolver_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle();
bool is_null_input = CHECK_NULL_INPUT(A_shape);
if (is_null_input) {
MS_LOG(EXCEPTION) << "For 'EighValue GpuKernel', input is null";
is_null_input_ = CHECK_SHAPE_NULL(A_shape, kernel_name_, "input");
if (is_null_input_) {
InitSizeLists();
return true;
}
if (A_shape.size() != kShape2dDims || A_shape[1] != A_shape[1]) {
MS_LOG(EXCEPTION) << "wrong array shape, A should be a square matrix, but got [" << A_shape[0] << " X "
<< A_shape[1] << "]";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input should be square matrix, but got ["
<< A_shape[0] << " X " << A_shape[1] << "]";
}
m_ = A_shape[0];
InitSizeLists();
@ -85,6 +88,9 @@ class EighcGpuKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
CHECK_CUBLAS_RET_WITH_ERROR(cublasSetStream(blas_handle_, reinterpret_cast<cudaStream_t>(stream_ptr)),
"cublasSetStream failed");
CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(cusolver_handle_, reinterpret_cast<cudaStream_t>(stream_ptr)),
@ -136,7 +142,7 @@ class EighcGpuKernel : public GpuKernel {
}
d_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(sizeof(T) * lwork);
if (!d_work) {
MS_LOG(EXCEPTION) << "GPU memory alloca failed.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', GPU memory alloca failed.";
}
if constexpr (std::is_same_v<T, Complex<float>>) {
cusolverDnCheevd(cusolver_handle_, jobz_, uplo_, m_, reinterpret_cast<cuComplex *>(w_v_addr), lda_, w_w_addr,
@ -198,6 +204,7 @@ class EighcGpuKernel : public GpuKernel {
std::vector<size_t> output_size_list_{};
std::vector<size_t> workspace_size_list_{};
using D = typename Complex_traits<T>::value_type;
bool is_null_input_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -39,13 +39,14 @@ constexpr char LOWER[] = "lower";
template <typename T>
class EighGpuKernel : public GpuKernel {
public:
EighGpuKernel() = default;
EighGpuKernel() : is_null_input_(false) {}
~EighGpuKernel() = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
auto A_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
compute_eigen_vectors_ = static_cast<bool>(GetAttr<bool>(kernel_node, C_EIEH_VECTOR));
@ -56,13 +57,14 @@ class EighGpuKernel : public GpuKernel {
jobz_ = CUSOLVER_EIG_MODE_NOVECTOR;
}
cusolver_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle();
bool is_null_input = CHECK_NULL_INPUT(A_shape);
if (is_null_input) {
MS_LOG(EXCEPTION) << "For 'EighValue GpuKernel', input is null";
is_null_input_ = CHECK_SHAPE_NULL(A_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();
return true;
}
if (A_shape.size() != kShape2dDims || A_shape[0] != A_shape[1]) {
MS_LOG(EXCEPTION) << "wrong array shape, A should be a square matrix, but got [" << A_shape[0] << " X "
<< A_shape[1] << "]";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the shape of input should be square matrix, but got ["
<< A_shape[0] << " X " << A_shape[1] << "]";
}
m_ = A_shape[0];
InitSizeLists();
@ -71,6 +73,9 @@ class EighGpuKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(cusolver_handle_, reinterpret_cast<cudaStream_t>(stream_ptr)),
"cusolverDnSetStream failed");
// matrix A, input or output(eigenvector)
@ -152,6 +157,7 @@ class EighGpuKernel : public GpuKernel {
cusolverEigMode_t jobz_ = CUSOLVER_EIG_MODE_NOVECTOR;
bool compute_eigen_vectors_{false};
bool lower_{true};
bool is_null_input_;
std::vector<T *> h_array_{};
std::vector<size_t> input_size_list_{};
std::vector<size_t> output_size_list_{};

View File

@ -49,23 +49,21 @@ class EqualCountGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but equalcount needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but equalcount needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
output_size_ = sizeof(T);
input_size_ = sizeof(T);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'EqualcountGpuKernel', input is null";
InitSizeLists();
return true;
}

View File

@ -77,13 +77,11 @@ class FloatStatusGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
if (!CheckParam(kernel_node)) {
return false;
}
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
(void)CheckParam(kernel_node);
auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'FloatStatusGpuKernel', input is null";
InitSizeLists();
return true;
}
@ -91,10 +89,10 @@ class FloatStatusGpuKernel : public GpuKernel {
for (size_t x : shape) {
input_size_ = input_size_ * x;
}
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kOpTypeMap.find(kernel_name);
if (iter == kOpTypeMap.end()) {
MS_LOG(EXCEPTION) << "FloatStatus kernel " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << ", only support these types: FloatStatus, IsInf, IsNan, IsFinite "
<< "currently, but got " << kernel_name;
}
kernel_name_ = iter->second;
@ -114,18 +112,16 @@ class FloatStatusGpuKernel : public GpuKernel {
}
private:
bool CheckParam(const CNodePtr &kernel_node) {
void CheckParam(const CNodePtr &kernel_node) {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but FloatStatusGpuKernel needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but FloatStatusGpuKernel needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
return true;
}
std::vector<size_t> input_size_list_;

View File

@ -52,20 +52,18 @@ class IdentityGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but identity needs 1 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but identity needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "IdentityGpuKernel input is null";
InitSizeLists();
return true;
}

View File

@ -62,18 +62,20 @@ class IndexAddGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but index add needs 3 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
}
std::vector<size_t> dst_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
std::vector<size_t> index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
std::vector<size_t> src_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
is_null_input_ = CHECK_NULL_INPUT(dst_shape) || CHECK_NULL_INPUT(index_shape) || CHECK_NULL_INPUT(src_shape);
is_null_input_ = CHECK_SHAPE_NULL(dst_shape, kernel_name, "x") ||
CHECK_SHAPE_NULL(index_shape, kernel_name, "indices") ||
CHECK_SHAPE_NULL(src_shape, kernel_name, "y");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'IndexAddGpuKernel', input is null";
InitSizeLists();
return true;
}
@ -91,8 +93,8 @@ class IndexAddGpuKernel : public GpuKernel {
inner_size_ *= src_shape[i];
}
if (axis < 0 || axis >= SizeToInt(src_shape.size()) || axis >= SizeToInt(dst_shape.size())) {
MS_LOG(EXCEPTION) << "Init axis size failed, actual src axis size is " << src_axis_size_
<< ", actual dst axis size is " << dst_axis_size_;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the size of 'axis' cannot be greater than or equal to "
<< SizeToInt(src_shape.size()) << " or " << SizeToInt(dst_shape.size()) << ", but got " << axis;
}
src_axis_size_ = src_shape[axis];
dst_axis_size_ = dst_shape[axis];

View File

@ -49,37 +49,34 @@ class LinSpaceGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but DynamicLinSpace needs 3 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but DynamicLinSpace needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto value_count = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_1) || CHECK_NULL_INPUT(input_2) || CHECK_NULL_INPUT(value_count);
is_null_input_ = CHECK_SHAPE_NULL(input_1, kernel_name, "start") ||
CHECK_SHAPE_NULL(input_2, kernel_name, "stop") ||
CHECK_SHAPE_NULL(value_count, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'LinspaceGpuKernel', input is null";
InitSizeLists();
return true;
}
// error checking input data
if ((input_1.size() != 0) || (input_2.size() != 0)) {
MS_LOG(ERROR) << "For LinShape "
<< "both start and end must be 0-D Tensors. Got " << input_1.size() << " and " << input_2.size()
<< ".";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', both start and end should be 0-D Tensors, but got dimension "
<< "of start: " << input_1.size() << " and dimension of end: " << input_2.size();
}
if (value_count.size() != 1) {
MS_LOG(ERROR) << "For LinShape, output shape incorrect rank. Expect Rank: 1, got Rank: " << value_count.size()
<< ".";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output should be 1, but got "
<< value_count.size();
}
value_count_ = value_count[0];
InitSizeLists();

View File

@ -49,11 +49,11 @@ class LogicalNotGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'LogicalGpuKernel', input is null";
InitSizeLists();
return true;
}

View File

@ -19,6 +19,7 @@
#include <cublas_v2.h>
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <algorithm>
#include <type_traits>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
@ -39,7 +40,7 @@ constexpr size_t kLuNormalShape = 2;
template <typename T>
class LUGpuKernel : public GpuKernel {
public:
LUGpuKernel() = default;
LUGpuKernel() : is_null_input_(false) {}
~LUGpuKernel() = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
@ -47,6 +48,9 @@ class LUGpuKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(handle_, reinterpret_cast<cudaStream_t>(stream_ptr)),
"cusolverDnSetStream failed");
auto input_addr = GetDeviceAddress<T>(inputs, kDim0);
@ -65,7 +69,7 @@ class LUGpuKernel : public GpuKernel {
"cusolver query lu work size fail");
if (cudaMalloc(reinterpret_cast<void **>(&d_work_), unit_size_ * lwork_) != cudaSuccess) {
MS_LOG(EXCEPTION) << "cusolver malloc work size fail";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', cusolver malloc work size fail";
}
CHECK_CUSOLVER_RET_WITH_EXCEPT(
@ -79,7 +83,7 @@ class LUGpuKernel : public GpuKernel {
// 5. malloc device working space of getrf
if (cudaMalloc(reinterpret_cast<void **>(&d_work_), unit_size_ * lwork_) != cudaSuccess) {
MS_LOG(EXCEPTION) << "cusolver malloc work size fail";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', cusolver malloc work size fail";
}
// 6. solve to lu factorization according to cuSolver api, outputs have been written to input's matrix.
@ -87,7 +91,7 @@ class LUGpuKernel : public GpuKernel {
kernel_node_, cusolverDnDgetrf(handle_, m_, m_, input_addr, lda_, d_work_, piv_output_addr, info_output_addr),
"cusolver lu fail");
} else {
MS_LOG(EXCEPTION) << "cholesky factorization do not support other data type but only float or double, right now.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the data type only should be float or double, right now.";
}
// 7. copy results from written input's matrix to output's matrix.
// if (cudaMemcpy(output_addr, input_addr, lda_ * m_ * unit_size_, cudaMemcpyDeviceToDevice) != cudaSuccess) {
@ -101,14 +105,16 @@ class LUGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
// 1. get CuSolver Dense matrix handler
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle();
auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
// 2. check input shape not null
bool is_null_input = CHECK_NULL_INPUT(in_shape);
if (is_null_input) {
MS_LOG(EXCEPTION) << "For 'PureCholeskyGpuKernel', input is null";
is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "input");
if (is_null_input_) {
InitSizeLists();
return true;
}
// 3. calculate input size
if (!InitInputSize(in_shape)) {
@ -126,11 +132,11 @@ class LUGpuKernel : public GpuKernel {
lu_row_ = in_shape.at(kDim0);
lu_col_ = in_shape.at(kDim1);
} else {
MS_LOG(ERROR) << "Input Only support Rank 1 OR 2";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of input only should be 1 or 2";
return false;
}
if (lu_row_ != lu_col_) {
MS_LOG(ERROR) << "Cholesky need square matrix as input.";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the shape of input should be square matrix";
return false;
}
// set matrix row or col to be lead dimension
@ -170,6 +176,7 @@ class LUGpuKernel : public GpuKernel {
std::vector<size_t> input_size_list_{};
std::vector<size_t> output_size_list_{};
std::vector<size_t> workspace_size_list_{};
bool is_null_input_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -91,13 +91,14 @@ class MatMulGpuKernel : public GpuKernel {
"cublasGemmStridedBatchedEx failed. Possible reasons: the GPU is occupied by other processes.");
}
} catch (const std::exception &e) {
MS_LOG(EXCEPTION) << "Encountered an exception: " << e.what() << " when invoke cublas "
<< (batch_ == 1 ? "cublasGemmEx" : "cublasGemmStridedBatchedEx");
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', encountered an exception: " << e.what() << " when invoke "
<< "cublas " << (batch_ == 1 ? "cublasGemmEx" : "cublasGemmStridedBatchedEx");
}
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle();
dtype_a_ = GetCudaDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
@ -105,7 +106,7 @@ class MatMulGpuKernel : public GpuKernel {
dtype_c_ = GetCudaDataType(TypeIdLabel(AnfAlgo::GetOutputDeviceDataType(kernel_node, 0)));
auto node_name = AnfAlgo::GetCNodeName(kernel_node);
if (dtype_a_ != dtype_b_ || dtype_a_ != dtype_c_) {
MS_LOG(EXCEPTION) << "input and output types are not the same in " << node_name;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', input and output types are not the same in " << node_name;
}
if (dtype_a_ == CUDA_R_16F && dtype_b_ == CUDA_R_16F && dtype_c_ == CUDA_R_16F) {
MS_LOG(INFO) << "input and output type is float16, allow to use Tensor Core operations if possible";
@ -113,15 +114,16 @@ class MatMulGpuKernel : public GpuKernel {
}
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
auto input1_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(output_shape) || CHECK_NULL_INPUT(input1_shape);
is_null_input_ =
CHECK_SHAPE_NULL(input1_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'MatmulGpuKernel', input or output is null";
InitSizeLists();
return true;
}
auto dims = output_shape.size();
if (dims < 2) {
MS_LOG(EXCEPTION) << "Output dims " << dims << " not support.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of output cannot be less than 2, but got "
<< dims;
}
m_ = output_shape[dims - 2];
@ -139,7 +141,7 @@ class MatMulGpuKernel : public GpuKernel {
} else if (!transpose && input1_shape.size() > (dims - 1)) {
k_ = input1_shape[dims - 1];
} else {
MS_LOG(EXCEPTION) << "Init k_ via input1_shape failed.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', init k_ via input1_shape failed.";
}
transpose = GetAttr<bool>(kernel_node, "transpose_x2");

View File

@ -19,6 +19,7 @@
#include <cublas_v2.h>
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <type_traits>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@ -90,28 +91,30 @@ class MatrixInverseGpuKernel : public GpuKernel {
reinterpret_cast<double **>(inv_batch_addr), len, info_addr, batchsize),
"cublas trsm batched Fail");
} else {
MS_LOG(EXCEPTION) << "The data type entered must be float or double.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the data type entered must be float or double.";
}
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle();
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'MatrixInverseGpuKernel', input is null";
InitSizeLists();
return true;
}
if (input_shape.size() < 2) {
MS_LOG(EXCEPTION) << "The dim entered needs to be greater than 2, but " << input_shape.size() << " was taken";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than 2, but got "
<< input_shape.size();
}
size_t last_index = input_shape.size() - 1;
if (input_shape[last_index] != input_shape[last_index - 1]) {
MS_LOG(EXCEPTION) << "The last two dimensions of the input matrix should be equal!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the last two dimensions of the input matrix should be equal, "
<< "but got one: " << input_shape[last_index] << ", another: " << input_shape[last_index - 1];
}
size_ = input_shape[last_index];
for (size_t i = 0; i < last_index - 1; i++) {

View File

@ -59,7 +59,7 @@ class MultinomialGpuKernel : public GpuKernel {
T *probs_addr = GetDeviceAddress<T>(inputs, 0);
int64_t *num_sample_addr = GetDeviceAddress<int64_t>(inputs, 1);
if (distributions_ == 0) {
MS_LOG(ERROR) << "Divide by zero. the distributions_ is 0.";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', divide by zero. the distributions_ is 0.";
return false;
}
@ -83,22 +83,20 @@ class MultinomialGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but multinomial needs 2 input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but multinomial needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape_0 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape_0) || CHECK_NULL_INPUT(output_shape);
is_null_input_ =
CHECK_SHAPE_NULL(input_shape_0, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'MultinomialGpuKernel', input or output is null";
InitSizeLists();
return true;
}

View File

@ -65,24 +65,22 @@ class NMSWithMaskGpuFwdKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
iou_value_ = GetAttr<float>(kernel_node, "iou_threshold");
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but NMSWithMask needs 1 input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 3) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but NMSWithMask needs 3 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 3, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NMSWithMaskGpuKernel', input is null";
InitSizeLists();
return true;
}

View File

@ -120,7 +120,7 @@ class RandomOpGpuKernel : public GpuKernel {
inputs[2]->size / sizeof(T), output_addr, outputs[0]->size / sizeof(T),
reinterpret_cast<cudaStream_t>(stream_ptr));
if (!ret) {
MS_LOG(ERROR) << "For UniformInt op, `minval` should be strictly less than `maxval`";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', `minval` should be strictly less than `maxval`";
return false;
}
break;
@ -148,7 +148,8 @@ class RandomOpGpuKernel : public GpuKernel {
break;
}
default: {
MS_LOG(EXCEPTION) << "Random operation " << random_op_type_ << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: StandardNormal, CudnnUniformReal, "
<< "UniformInt, UniformReal currently, but got " << random_op_type_;
}
}
return true;
@ -158,29 +159,27 @@ class RandomOpGpuKernel : public GpuKernel {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kRandomOpTypeMap.find(kernel_name);
if (iter == kRandomOpTypeMap.end()) {
MS_LOG(EXCEPTION) << "Random operation " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << ", only support these types: StandardNormal, CudnnUniformReal, "
<< "UniformInt, UniformReal currently, but got " << kernel_name;
} else {
random_op_type_ = iter->second;
}
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if ((random_op_type_ == RANDOM_OP_NORMAL || random_op_type_ == RANDOM_OP_UNIFORM_REAL) && input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but random op needs 1 input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
if (random_op_type_ == RANDOM_OP_UNIFORM_INT && input_num != 3) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but random op needs 3 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but random op needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape_0 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape_0) || CHECK_NULL_INPUT(output_shape);
is_null_input_ =
CHECK_SHAPE_NULL(input_shape_0, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'RandomOpGpuKernel', input or output is null";
InitSizeLists();
return true;
}

View File

@ -51,11 +51,12 @@ class SquareSumAllGpuFwdKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(ERROR) << "SquareSumAllGpuFwdKernel input is null";
return false;
InitSizeLists();
return true;
}
for (size_t i = 0; i < input_shape.size(); i++) {
input_size_ *= input_shape[i];

View File

@ -56,18 +56,21 @@ class SquaredDifferenceOpGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto input_shape1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape1) || CHECK_NULL_INPUT(input_shape2) || CHECK_NULL_INPUT(output_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape1, kernel_name, "input") ||
CHECK_SHAPE_NULL(input_shape2, kernel_name, "input") ||
CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'SquaredDifferenceGpuKernel', input or output is null";
InitSizeLists();
return true;
}
need_broadcast_ = IsBroadcast(input_shape1, input_shape2);
if (need_broadcast_ && output_shape.size() > MAX_DIMS) {
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than " << MAX_DIMS;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be greater than " << MAX_DIMS
<< ", but got " << output_shape.size();
}
lhs_shape_.resize(MAX_DIMS, 1);
@ -86,7 +89,8 @@ class SquaredDifferenceOpGpuKernel : public GpuKernel {
lhs_shape_[j + lhs_offset] = input_shape1[j];
} else {
auto index = j + lhs_offset;
MS_LOG(EXCEPTION) << "Invalid input1 index: " << index;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the index of input cannot be " << index << ", but got "
<< index;
}
}
input1_num_ *= input_shape1[j];
@ -98,7 +102,8 @@ class SquaredDifferenceOpGpuKernel : public GpuKernel {
rhs_shape_[k + rhs_offset] = input_shape2[k];
} else {
auto index = k + rhs_offset;
MS_LOG(EXCEPTION) << "Invalid input2 index: " << index;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the index of input cannot be " << index << ", but got "
<< index;
}
}
input2_num_ *= input_shape2[k];

View File

@ -118,28 +118,30 @@ class TrsmGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
blas_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle();
auto A_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto b_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
is_null_input_ = CHECK_NULL_INPUT(A_shape) || CHECK_NULL_INPUT(b_shape);
is_null_input_ =
CHECK_SHAPE_NULL(A_shape, kernel_name, "input_A") || CHECK_SHAPE_NULL(b_shape, kernel_name, "input_b");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'TrsmGpuKernel', input is null";
InitSizeLists();
return true;
}
if (A_shape[kDim0] != A_shape[kDim1]) {
MS_LOG(EXCEPTION) << "wrong array shape, A should be a squre matrix, but got [" << A_shape[kDim0] << " X "
<< A_shape[kDim1] << "]";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the shape of input should be square matrix, but got ["
<< A_shape[kDim0] << " X " << A_shape[kDim1] << "]";
}
m_ = A_shape[kDim0];
if (b_shape.size() != kAVectorxDimNum && b_shape.size() != kAMatrixDimNum) {
MS_LOG(EXCEPTION) << "wrong array shape, b should be 1D or 2D, but got [" << b_shape.size() << "] dimensions";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 1 or 2, but got "
<< b_shape.size();
}
if (b_shape[kDim0] != m_) {
MS_LOG(EXCEPTION) << "wrong array shape, b should match the shape of A, excepted [" << m_ << "] but got ["
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the shape of input should be [" << m_ << "], but got ["
<< b_shape[kDim0] << "]";
}
if (b_shape.size() == kAVectorxDimNum || (b_shape.size() == kAMatrixDimNum && b_shape[kDim1] == 1)) {
@ -158,7 +160,7 @@ class TrsmGpuKernel : public GpuKernel {
} else if (trans == "T") {
trans_ = CUBLAS_OP_N;
} else {
MS_LOG(EXCEPTION) << "trans should be in [N, T], but got [" << trans << "]";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', trans should be in [N, T], but got [" << trans << "]";
}
bool lower = AnfAlgo::GetNodeAttr<bool>(kernel_node, "lower");

View File

@ -64,29 +64,28 @@ class UnaryOpComplexGpuKernel : public GpuKernel {
break;
}
default: {
MS_LOG(EXCEPTION) << "Unary operation " << unary_op_type_ << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: Real, Imag, Conj currently, "
<< "but got " << unary_op_type_;
}
}
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
GetOpType(kernel_node);
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but unary op needs 1 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but unary op needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 3, but got " << output_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "UnaryOpGpuKernel input is null";
InitSizeLists();
return true;
}
@ -123,8 +122,8 @@ class UnaryOpComplexGpuKernel : public GpuKernel {
unary_op_type_ = iter->second;
return;
}
MS_LOG(EXCEPTION) << "operation " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: Real, Imag, Conj currently, but got "
<< kernel_name;
}
private:

View File

@ -184,7 +184,9 @@ class UnaryOpGpuKernel : public GpuKernel {
break;
}
default: {
MS_LOG(EXCEPTION) << "Unary operation " << unary_op_type_ << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: Exp, Expm1, Log, Log1p, Erf, Erfc,"
<< " Neg, Reciprocal, Square, Sqrt, Rsqrt, Sin, Cos, Asin, ACos, Atan, Asinh, Acosh, Abs, "
<< "Floor, Rint, Round, Real, Imag, Sign, Conj currently, but got " << unary_op_type_;
}
}
return true;
@ -193,23 +195,22 @@ class UnaryOpGpuKernel : public GpuKernel {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kUnaryOpTypeMap.find(kernel_name);
if (iter == kUnaryOpTypeMap.end()) {
MS_LOG(EXCEPTION) << "Unary operation " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << ", only support these types: Exp, Expm1, Log, Log1p, Erf, Erfc,"
<< " Neg, Reciprocal, Square, Sqrt, Rsqrt, Sin, Cos, Asin, ACos, Atan, Asinh, Acosh, Abs, "
<< "Floor, Rint, Round, Real, Imag, Sign, Conj currently, but got " << kernel_name;
}
unary_op_type_ = iter->second;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but unary op needs 1 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but unary op needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UnaryOpGpuKernel', input is null";
InitSizeLists();
return true;
}

View File

@ -112,7 +112,9 @@ class UnaryGradOpGpuKernel : public GpuKernel {
break;
}
default: {
MS_LOG(EXCEPTION) << "Unary grad operation " << unary_grad_op_type_ << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: SqrtGrad, RsqrtGrad, AsinGrad, "
<< "ACosGrad, AtanGrad, AsinhGrad, AcoshGrad, ReciprocalGrad currently, but got "
<< unary_grad_op_type_;
}
}
return true;
@ -121,23 +123,22 @@ class UnaryGradOpGpuKernel : public GpuKernel {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kUnaryGradOpTypeMap.find(kernel_name);
if (iter == kUnaryGradOpTypeMap.end()) {
MS_LOG(EXCEPTION) << "Unary grad operation " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << ", only support these types: SqrtGrad, RsqrtGrad, AsinGrad, "
<< "ACosGrad, AtanGrad, AsinhGrad, AcoshGrad, ReciprocalGrad currently, but got "
<< kernel_name;
}
unary_grad_op_type_ = iter->second;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but unary grad op needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but unary grad op needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UnaryOpGradGpuKernel', input is null";
InitSizeLists();
return true;
}
@ -145,9 +146,8 @@ class UnaryGradOpGpuKernel : public GpuKernel {
input_size_ *= input_shape[i];
}
auto dx_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
is_null_input_ = CHECK_NULL_INPUT(dx_shape);
is_null_input_ = CHECK_SHAPE_NULL(dx_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "UnaryGradOpGpuKernel input 1 is null";
InitSizeLists();
return true;
}
@ -155,7 +155,8 @@ class UnaryGradOpGpuKernel : public GpuKernel {
dx_size_ *= dx_shape[i];
}
if (input_size_ != dx_size_) {
MS_LOG(WARNING) << "UnaryGradOpGpuKernel inputs should be same, but got " << input_size_ << " and " << dx_size_;
MS_LOG(WARNING) << "For '" << kernel_name << "', both inputs should be equal, but got " << input_size_ << " and "
<< dx_size_;
InitSizeLists();
return true;
}

View File

@ -19,6 +19,7 @@
#include <cublas_v2.h>
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/kernel_constants.h"
@ -98,7 +99,8 @@ class UpdateThorGradientGpuKernel : public GpuKernel {
CUDA_R_32F, algo_),
"cublasSgemm Call Fail");
} catch (const std::exception &e) {
MS_LOG(EXCEPTION) << "Encountered an exception: " << e.what() << "when invoke cubals cublasGemmStridedBatchedEx";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", encountered an exception: " << e.what()
<< " when invoke cubals cublasGemmStridedBatchedEx";
}
auto r_input_addr = workspace1_addr;
@ -147,9 +149,7 @@ class UpdateThorGradientGpuKernel : public GpuKernel {
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle();
if (!SetProperty(kernel_node)) {
return false;
}
(void)SetProperty(kernel_node);
InitSizeLists();
return true;
}
@ -188,22 +188,20 @@ class UpdateThorGradientGpuKernel : public GpuKernel {
}
private:
bool SetProperty(const CNodePtr &kernel_node) {
void SetProperty(const CNodePtr &kernel_node) {
auto matrix_a_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto matrix_g_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
is_null_input_ =
CHECK_NULL_INPUT(matrix_a_shape) || CHECK_NULL_INPUT(gradient_shape) || CHECK_NULL_INPUT(matrix_g_shape);
is_null_input_ = CHECK_SHAPE_NULL(matrix_a_shape, kernel_name_, "matrix_a") ||
CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "gradient") ||
CHECK_SHAPE_NULL(matrix_g_shape, kernel_name_, "matrix_g");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UpdateThorGradientGpuKernel', input is null";
InitSizeLists();
return true;
return;
}
split_dim = LongToSize(GetAttr<int64_t>(kernel_node, "split_dim"));
if (split_dim == 0) {
MS_LOG(ERROR) << "Divide by zero, split_dim can not be zero.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", divide by zero, split_dim cannot be 0, but got " << split_dim;
}
gradient_size.batch_h = gradient_shape[0] / split_dim;
gradient_size.batch_w = gradient_shape[1] / split_dim;
@ -244,7 +242,6 @@ class UpdateThorGradientGpuKernel : public GpuKernel {
gradient_size.ori_w = gradient_shape[1];
gradient_size.ori_h = gradient_shape[0];
gradient_size.dtype = GetCudaDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 1)));
return true;
}
size_t split_dim;

View File

@ -71,13 +71,15 @@ class NcclCollectiveGpuKernel : public NcclGpuKernel {
break;
}
default: {
MS_LOG(EXCEPTION) << "Kernel type " << nccl_kernel_type_ << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: AllReduce, AllGather, Broadcast, "
<< "ReduceScatter currently, but got " << nccl_kernel_type_;
}
}
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_node_ = kernel_node;
nccl_data_type_ = nccl_dtype(AnfAlgo::GetInputDeviceDataType(kernel_node, 0));
@ -87,9 +89,8 @@ class NcclCollectiveGpuKernel : public NcclGpuKernel {
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
for (size_t i = 0; i < input_num; ++i) {
auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, i);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NcclCollectiveGpuKernel', input is null";
InitSizeLists();
return true;
}
@ -103,9 +104,8 @@ class NcclCollectiveGpuKernel : public NcclGpuKernel {
}
for (size_t i = 0; i < output_num; ++i) {
auto shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, i);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NcclCollectiveGpuKernel', output is null";
InitSizeLists();
return true;
}
@ -201,7 +201,8 @@ class NcclCollectiveGpuKernel : public NcclGpuKernel {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kNcclTypeMap.find(kernel_name);
if (iter == kNcclTypeMap.end()) {
MS_LOG(EXCEPTION) << "Kernel " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: AllReduce, AllGather, Broadcast, "
<< "ReduceScatter currently, but got " << kernel_name;
} else {
nccl_kernel_type_ = iter->second;
}
@ -220,7 +221,8 @@ class NcclCollectiveGpuKernel : public NcclGpuKernel {
} else if (type == "prod") {
nccl_reduce_type_ = ncclProd;
} else {
MS_LOG(EXCEPTION) << "Nccl reduce type " << type << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: sum, max, min, prod currently, "
<< "but got " << type;
}
}

View File

@ -52,13 +52,15 @@ class NcclP2PGpuKernel : public NcclGpuKernel {
break;
}
default: {
MS_LOG(EXCEPTION) << "Kernel type " << nccl_kernel_type_ << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: AllToAllv, NeighborExchange "
<< "currently, but got " << nccl_kernel_type_;
}
}
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_node_ = kernel_node;
InferCommType(kernel_node);
@ -73,9 +75,8 @@ class NcclP2PGpuKernel : public NcclGpuKernel {
}
for (size_t i = 0; i < input_num; ++i) {
auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, i);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NcclP2PGpuKernel', input shape is null ";
InitSizeLists();
return true;
}
@ -88,9 +89,8 @@ class NcclP2PGpuKernel : public NcclGpuKernel {
}
for (size_t i = 0; i < output_num; ++i) {
auto shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, i);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NcclP2PGpuKernel', output shape is null";
InitSizeLists();
return true;
}
@ -157,10 +157,12 @@ class NcclP2PGpuKernel : public NcclGpuKernel {
// send_rank_id and recv rank_id size needs to be equal to input_list size
if (send_rank_ids.size() != input_size_list_.size()) {
MS_LOG(ERROR) << "Trying to use AlltoAllv, but send_rank_ids vector size not equals to input_list size.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", trying to use AlltoAllv, the size of send_rank_ids vector "
<< "should be " << input_size_list_.size() << ", but got " << send_rank_ids.size();
}
if (recv_rank_ids.size() != output_size_list_.size()) {
MS_LOG(ERROR) << "Trying to use AlltoAllv, but recv_rank_ids vector size not equals to output_list size.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", trying to use AlltoAllv, the size of recv_rank_ids vector "
<< "should be " << output_size_list_.size() << ", but got " << recv_rank_ids.size();
}
// This implementation refers to NVIDIA NCCL 2.11 doc.
@ -182,7 +184,8 @@ class NcclP2PGpuKernel : public NcclGpuKernel {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kNcclTypeMap.find(kernel_name);
if (iter == kNcclTypeMap.end()) {
MS_LOG(EXCEPTION) << "Kernel " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: AllToAllv, NeighborExchange "
<< "currently, but got " << kernel_name;
} else {
nccl_kernel_type_ = iter->second;
}

View File

@ -46,21 +46,20 @@ class NcclRecvGpuKernel : public NcclGpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_node_ = kernel_node;
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but NCCL receive needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
src_rank_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "src_rank"));
group_name_ = GetAttr<std::string>(kernel_node, kAttrGroup);
nccl_data_type_ = nccl_dtype(AnfAlgo::GetOutputDeviceDataType(kernel_node, 0));
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(output_shape);
is_null_input_ = CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NcclRecvGpuKernel', output is null";
InitSizeLists();
return true;
}

View File

@ -46,12 +46,12 @@ class NcclSendGpuKernel : public NcclGpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but NCCL send needs 1 input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
dest_rank_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "dest_rank"));
@ -60,9 +60,8 @@ class NcclSendGpuKernel : public NcclGpuKernel {
MS_LOG(INFO) << "NcclSend dest rank is " << dest_rank_ << ", group name is " << group_name_;
auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NcclSendGpuKernel', input is null";
InitSizeLists();
return true;
}

View File

@ -41,6 +41,9 @@ class SyncBatchNormGpuKernel : public NcclGpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *x = GetDeviceAddress<T>(inputs, 0);
S *scale = GetDeviceAddress<S>(inputs, 1);
S *bias = GetDeviceAddress<S>(inputs, 2);
@ -78,6 +81,7 @@ class SyncBatchNormGpuKernel : public NcclGpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto root_rank = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kAttrRootRank);
if (root_rank) {
root_ = static_cast<int>(GetValue<int64_t>(root_rank));
@ -86,24 +90,22 @@ class SyncBatchNormGpuKernel : public NcclGpuKernel {
group_name_ = GetAttr<std::string>(kernel_node, kAttrGroup);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 5) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but SyncBatchNorm needs 5 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 5, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 5) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but SyncBatchNorm needs 5 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 5, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (CHECK_NULL_INPUT(input_shape)) {
MS_LOG(WARNING) << "SyncBatchNorm input is null";
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();
return true;
}
auto input_shape_dims = input_shape.size();
if (input_shape_dims != 4 && input_shape_dims != 2) {
MS_LOG(EXCEPTION) << "Tensor shape is " << input_shape.size()
<< ", SyncBatchNormGpuKernel input should be 2D or 4D";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input only should be 2 or 4, but got "
<< input_shape_dims;
}
input_size_ = 1;
for (auto dim : input_shape) {
@ -175,6 +177,7 @@ class SyncBatchNormGpuKernel : public NcclGpuKernel {
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
is_null_input_ = false;
}
protected:
@ -233,6 +236,7 @@ class SyncBatchNormGpuKernel : public NcclGpuKernel {
string group_name_;
int root_;
cudaStream_t comm_stream_;
bool is_null_input_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -41,6 +41,9 @@ class SyncBatchNormGradGpuKernel : public NcclGpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *dy = GetDeviceAddress<T>(inputs, 0);
T *x_input = GetDeviceAddress<T>(inputs, 1);
S *scale = GetDeviceAddress<S>(inputs, 2);
@ -65,6 +68,7 @@ class SyncBatchNormGradGpuKernel : public NcclGpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto root_rank = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kAttrRootRank);
if (root_rank) {
root_ = static_cast<int>(GetValue<int64_t>(root_rank));
@ -73,24 +77,22 @@ class SyncBatchNormGradGpuKernel : public NcclGpuKernel {
group_name_ = GetAttr<std::string>(kernel_node, kAttrGroup);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 5) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but SyncBatchNormGrad needs 5 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 5, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 3) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but SyncBatchNormGrad needs 5 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 3, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (CHECK_NULL_INPUT(input_shape)) {
MS_LOG(WARNING) << "SyncBatchNormGrad input is null";
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();
return true;
}
auto input_shape_dims = input_shape.size();
if (input_shape_dims != 4 && input_shape_dims != 2) {
MS_LOG(EXCEPTION) << "Tensor shape is " << input_shape.size()
<< ", SyncBatchNormGpuGrad input should be 2D or 4D";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input only should be 2 or 4, but got "
<< input_shape_dims;
}
input_size_ = 1;
for (auto dim : input_shape) {
@ -155,6 +157,7 @@ class SyncBatchNormGradGpuKernel : public NcclGpuKernel {
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
is_null_input_ = false;
}
protected:
@ -201,6 +204,7 @@ class SyncBatchNormGradGpuKernel : public NcclGpuKernel {
string group_name_;
int root_;
cudaStream_t comm_stream_;
bool is_null_input_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_OTHER_ASSIGN_GPU_KERNEL_H_
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@ -52,15 +53,13 @@ class AssignGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_node_ = kernel_node;
if (!CheckParam(kernel_node)) {
return false;
}
(void)CheckParam(kernel_node);
auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'AssignGpuKernel', input is null";
InitSizeLists();
return true;
}
@ -80,19 +79,16 @@ class AssignGpuKernel : public GpuKernel {
}
private:
bool CheckParam(const CNodePtr &kernel_node) {
void CheckParam(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but AssignGpuKernel needs 2 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but AssignGpuKernel needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
}
return true;
}
std::vector<size_t> input_size_list_;

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_KERNEL_GPU_OTHER_BOUNDINGBOX_DECODE_GPU_KERNEL_H
#include <vector>
#include <string>
#include <algorithm>
#include "backend/kernel_compiler/gpu/cuda_impl/boundingbox_decode_impl.cuh"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
@ -46,7 +47,8 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
T *bboxes_addr = GetDeviceAddress<T>(outputs, 0);
if (inputs[0]->size != inputs[1]->size) {
MS_LOG(ERROR) << "Rois box size must equal with deltas box size -" << inputs[1]->size << ", but got"
MS_LOG(ERROR) << "For '" << kernel_name_
<< "', rois box size must equal with deltas box size: " << inputs[1]->size << ", but got "
<< inputs[0]->size;
return false;
}
@ -54,7 +56,7 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
const size_t coordinate = 4;
const size_t block_size = inputs[0]->size / sizeof(T);
if ((block_size % coordinate) != 0) {
MS_LOG(ERROR) << "The size of the box must be a multiple of 4.";
MS_LOG(ERROR) << "For '" << kernel_name_ << ", the size of the box should be a multiple of 4.";
return false;
}
@ -65,11 +67,11 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but BoundingBoxDecode needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
}
rois_size_ = sizeof(T);
deltas_size_ = sizeof(T);
@ -78,9 +80,10 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
auto logits_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto labels_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(logits_shape) || CHECK_NULL_INPUT(labels_shape) || CHECK_NULL_INPUT(output_shape);
is_null_input_ = CHECK_SHAPE_NULL(logits_shape, kernel_name_, "anchor_box") ||
CHECK_SHAPE_NULL(labels_shape, kernel_name_, "deltas") ||
CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BoundingBoxDecodeGpuKernel', input or output is null";
InitSizeLists();
return true;
}
@ -111,7 +114,7 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
means_.emplace_back(mean);
}
} else {
MS_LOG(EXCEPTION) << "Attribute means type is invalid.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute means type is invalid.";
}
auto stds = prim->GetAttr("stds");
@ -124,7 +127,7 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
stds_.emplace_back(std);
}
} else {
MS_LOG(EXCEPTION) << "Attribute stds type is invalid.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute stds type is invalid.";
}
std::vector<int64_t> max_shape_me = GetAttr<std::vector<int64_t>>(kernel_node, "max_shape");
@ -133,11 +136,13 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
wh_ratio_clip_ = GetAttr<float>(kernel_node, "wh_ratio_clip");
if (means_.size() < coordinate_size || stds_.size() < coordinate_size) {
MS_LOG(EXCEPTION) << "The size of means or stds is less than 4.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the both size of means or stds cannot be less than 4, but got"
<< " the size of means: " << means_.size() << ", the size of stds: " << stds_.size();
}
if (max_shape_.size() < 2) {
MS_LOG(EXCEPTION) << "The size of max_shape is less than 2.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of max_shape cannot be less than 2, but got "
<< max_shape_.size();
}
return true;

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_KERNEL_GPU_OTHER_BOUNDINGBOX_ENCODE_GPU_KERNEL_H
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/cuda_impl/boundingbox_encode_impl.cuh"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@ -28,7 +29,6 @@ template <typename T>
class BoundingBoxEncodeGpuKernel : public GpuKernel {
public:
BoundingBoxEncodeGpuKernel() : anchor_size_(0), groundtruth_size_(0), deltas_size_(0), is_null_input_(false) {}
~BoundingBoxEncodeGpuKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
@ -45,7 +45,8 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
T *deltas_addr = GetDeviceAddress<T>(outputs, 0);
if (inputs[0]->size != inputs[1]->size) {
MS_LOG(ERROR) << "Anchor box size must equal with groundtruth box size -" << inputs[1]->size << ", but got"
MS_LOG(ERROR) << "For '" << kernel_name_
<< "', anchor box size must equal with groundtruth box size: " << inputs[1]->size << ", but got "
<< inputs[0]->size;
return false;
}
@ -53,7 +54,7 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
const size_t coordinate = 4;
const size_t block_size = inputs[0]->size / sizeof(T);
if ((block_size % coordinate) != 0) {
MS_LOG(ERROR) << "The size of the box must be a multiple of 4.";
MS_LOG(ERROR) << "For '" << kernel_name_ << ", the size of the box should be a multiple of 4.";
return false;
}
@ -64,11 +65,11 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but BoundingBoxEncode needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
}
anchor_size_ = sizeof(T);
groundtruth_size_ = sizeof(T);
@ -77,9 +78,10 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
auto logits_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto labels_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(logits_shape) || CHECK_NULL_INPUT(labels_shape) || CHECK_NULL_INPUT(output_shape);
is_null_input_ = CHECK_SHAPE_NULL(logits_shape, kernel_name_, "anchor_box") ||
CHECK_SHAPE_NULL(labels_shape, kernel_name_, "groundtruth_box") ||
CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BoundingBoxEncodeGpuKernel', input or output is null";
InitSizeLists();
return true;
}
@ -110,7 +112,7 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
means_.emplace_back(mean);
}
} else {
MS_LOG(EXCEPTION) << "Attribute means type is invalid.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute means type is invalid.";
}
auto stds = prim->GetAttr("stds");
MS_EXCEPTION_IF_NULL(stds);
@ -122,11 +124,12 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
stds_.emplace_back(std);
}
} else {
MS_LOG(EXCEPTION) << "Attribute stds type is invalid.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute stds type is invalid.";
}
if (means_.size() < coordinate_size || stds_.size() < coordinate_size) {
MS_LOG(EXCEPTION) << "The size of means or stds is less than 4.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the both size of means or stds cannot be less than 4, but got"
<< " the size of means: " << means_.size() << ", the size of stds: " << stds_.size();
}
return true;

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_KERNEL_GPU_OTHER_CHECK_VALID_GPU_KERNEL_H
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/cuda_impl/check_valid_impl.cuh"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@ -28,7 +29,6 @@ template <typename T, typename S>
class CheckValidGpuKernel : public GpuKernel {
public:
CheckValidGpuKernel() : anchor_boxes_size_(0), img_metas_size_(0), valid_size_(0), is_null_input_(false) {}
~CheckValidGpuKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
@ -48,7 +48,7 @@ class CheckValidGpuKernel : public GpuKernel {
const size_t coordinate = 4;
const size_t block_size = inputs[0]->size / sizeof(T);
if ((block_size % coordinate) != 0) {
MS_LOG(ERROR) << "The size of the box must be a multiple of 4.";
MS_LOG(ERROR) << "For '" << kernel_name_ << ", the size of the box should be a multiple of 4.";
return false;
}
@ -58,11 +58,11 @@ class CheckValidGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but CheckValid needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
}
anchor_boxes_size_ = sizeof(T);
img_metas_size_ = sizeof(T);
@ -71,10 +71,10 @@ class CheckValidGpuKernel : public GpuKernel {
auto anchor_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto img_metas_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto valid_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ =
CHECK_NULL_INPUT(anchor_boxes_shape) || CHECK_NULL_INPUT(img_metas_shape) || CHECK_NULL_INPUT(valid_shape);
is_null_input_ = CHECK_SHAPE_NULL(anchor_boxes_shape, kernel_name_, "bboxes") ||
CHECK_SHAPE_NULL(img_metas_shape, kernel_name_, "img_metas") ||
CHECK_SHAPE_NULL(valid_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CheckValidGpuKernel', input or output is null";
InitSizeLists();
return true;
}

View File

@ -62,18 +62,17 @@ class GpuConvertToDynamicShapeGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_node_ = kernel_node;
size_t input_count = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_count != 1) {
MS_LOG(ERROR) << input_count << "inputs were provided, but GpuConvertToDynamicShapeGpuKernel expects 1.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_count;
}
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape_);
is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'GpuConvertToDynamicShapeGpuKernel', input is null";
InitSizeLists();
return true;
}

View File

@ -29,7 +29,6 @@ template <typename T>
class IOUGpuKernel : public GpuKernel {
public:
IOUGpuKernel() : gt_boxes_size_(0), anchor_boxes_size_(0), iou_size_(0), mode_(0), is_null_input_(false) {}
~IOUGpuKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
@ -50,7 +49,7 @@ class IOUGpuKernel : public GpuKernel {
const size_t block_size_0 = inputs[0]->size / sizeof(T);
const size_t block_size_1 = inputs[1]->size / sizeof(T);
if ((block_size_0 % coordinate) != 0 || (block_size_1 % coordinate) != 0) {
MS_LOG(ERROR) << "The size of the box must be a multiple of 4.";
MS_LOG(ERROR) << "For '" << kernel_name_ << ", the size of the box should be a multiple of 4.";
return false;
}
@ -65,8 +64,7 @@ class IOUGpuKernel : public GpuKernel {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but IOU needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
}
gt_boxes_size_ = sizeof(T);
anchor_boxes_size_ = sizeof(T);
@ -75,10 +73,10 @@ class IOUGpuKernel : public GpuKernel {
auto gt_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto anchor_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto iou_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ =
CHECK_NULL_INPUT(gt_boxes_shape) || CHECK_NULL_INPUT(anchor_boxes_shape) || CHECK_NULL_INPUT(iou_shape);
is_null_input_ = CHECK_SHAPE_NULL(gt_boxes_shape, kernel_name_, "anchor_boxes") ||
CHECK_SHAPE_NULL(anchor_boxes_shape, kernel_name_, "gt_boxes") ||
CHECK_SHAPE_NULL(iou_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'IOUGpuKernel', input or output is null";
InitSizeLists();
return true;
}
@ -103,8 +101,7 @@ class IOUGpuKernel : public GpuKernel {
} else if (mode == "iof") {
mode_ = 1;
} else {
MS_LOG(ERROR) << "Mode only support 'iou' or 'iof'.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', mode only support 'iou' or 'iof'.";
}
return true;

View File

@ -66,26 +66,25 @@ class BatchNormFold2GpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
InitResource();
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != INPUT_NUM) {
MS_LOG(ERROR) << "Argument number is " << input_num << ", but BatchNormFold2GpuKernel needs " << INPUT_NUM
<< " inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << INPUT_NUM << ", but got "
<< input_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "BatchNormFold2GpuKernel input is null";
InitSizeLists();
return true;
}
if (input_shape.size() != 4) {
MS_LOG(ERROR) << "BatchNormFold2GpuKernel input shape needs (N,C,H,W).";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got "
<< input_shape.size();
}
batch_size_ = input_shape[0];
channel_ = input_shape[1];

View File

@ -94,26 +94,25 @@ class BatchNormFold2GradGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != INPUT_NUM) {
MS_LOG(ERROR) << "Argument number is " << input_num << ", but BatchNormFold2GradGpuKernel needs " << INPUT_NUM
<< " inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << INPUT_NUM << ", but got "
<< input_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "BatchNormFold2GradGpuKernel input is null";
InitSizeLists();
return true;
}
if (input_shape.size() != 4) {
MS_LOG(ERROR) << "BatchNormFold2GradGpuKernel input shape needs (N,C,H,W).";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got "
<< input_shape.size();
}
batch_size_ = input_shape[0];
channel_ = input_shape[1];

View File

@ -99,18 +99,17 @@ class BatchNormFoldGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 4) {
MS_LOG(ERROR) << "Input number is " << input_num << " but BatchNormFold GpuKernel OP needs 4 input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 4, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 4) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but BatchNormFold GpuKernel OP needs 4 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 4, but got " << output_num;
}
auto prim = AnfAlgo::GetCNodePrimitive(kernel_node);
@ -122,16 +121,14 @@ class BatchNormFoldGpuKernel : public GpuKernel {
freeze_bn_ = static_cast<int>(GetValue<int64_t>(prim->GetAttr("freeze_bn")));
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BatchNormFoldGpuKernel', input is null";
InitSizeLists();
return true;
}
if (input_shape.size() != 4) {
MS_LOG(ERROR) << "Input shape is " << input_shape.size()
<< ", but BatchNormFold GpuKernel OP needs 4DTensor input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got "
<< input_shape.size();
}
CheckTensorSize({input_shape});
batch_ = input_shape[0];

View File

@ -78,18 +78,17 @@ class BatchNormFoldGradGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != INPUT_NUM) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but BatchNormFoldGrad GpuKernel OP needs " << INPUT_NUM
<< " inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << INPUT_NUM << ", but got "
<< input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but BatchNormFoldGrad GpuKernel OP needs 4 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto prim = AnfAlgo::GetCNodePrimitive(kernel_node);
@ -99,16 +98,14 @@ class BatchNormFoldGradGpuKernel : public GpuKernel {
freeze_bn_ = static_cast<int>(GetValue<int64_t>(prim->GetAttr("freeze_bn")));
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BatchNormFoldGradGpuKernel', input is null";
InitSizeLists();
return true;
}
if (input_shape.size() != 4) {
MS_LOG(ERROR) << "Input shape is " << input_shape.size()
<< ", but BatchNormFoldGrad GpuKernel OP needs 4DTensor input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got "
<< input_shape.size();
}
batch_ = input_shape[0];
channel_ = input_shape[1];

View File

@ -49,24 +49,23 @@ class CorrectionMulGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
InitResource();
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(ERROR) << "Argument number is " << input_num << ", but CorrectionMulGpuKernel needs 3.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CorrectionMulGpuKernel', input is null";
InitSizeLists();
return true;
}
if (input_shape.size() != 4) {
MS_LOG(ERROR) << "CorrectionMulGpuKernel input shape needs (N,C,H,W).";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got "
<< input_shape.size();
}
batch_size_ = input_shape[0];
channel_ = input_shape[1];

View File

@ -55,24 +55,23 @@ class CorrectionMulGradGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
InitResource();
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 4) {
MS_LOG(ERROR) << "Argument number is " << input_num << ", but CorrectionMulGradGpuKernel needs 4.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 4, but got " << input_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CorrectionMulGradGpuKernel', input is null";
InitSizeLists();
return true;
}
if (input_shape.size() != 4) {
MS_LOG(ERROR) << "CorrectionMulGradGpuKernel input shape needs (N,C,H,W).";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got "
<< input_shape.size();
}
batch_size_ = input_shape[0];
channel_ = input_shape[1];

View File

@ -45,17 +45,16 @@ const std::vector<size_t> &FakeLearnedScaleQuantPerChannelGpuKernel::GetWorkspac
}
bool FakeLearnedScaleQuantPerChannelGpuKernel::Init(const CNodePtr &kernel_node) {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(EXCEPTION) << "Input number is " << input_num
<< ", but FakeLearnedScaleQuantPerChannel GpuKernel OP needs 3 Input.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num
<< ", but FakeLearnedScaleQuantPerChannel GpuKernel OP needs 1 output.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
quant_delay_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay")));