support int64 shape

This commit is contained in:
Yi Huaijie 2020-11-04 10:24:11 +08:00
parent 0c588571b4
commit d7faa77b5e
534 changed files with 3827 additions and 3525 deletions

View File

@ -116,7 +116,7 @@ void ParseAttrValue(const std::string &type, const std::string &attr_name, const
MS_EXCEPTION_IF_NULL(node_attr);
MS_EXCEPTION_IF_NULL(value);
if (type == "int") {
auto attr_value = GetValue<int>(value);
auto attr_value = static_cast<int>(GetValue<int64_t>(value));
(*node_attr)[attr_name].set_i(attr_value);
} else if (type == "str") {
auto attr_value = GetValue<std::string>(value);
@ -128,15 +128,15 @@ void ParseAttrValue(const std::string &type, const std::string &attr_name, const
auto attr_value = GetValue<float>(value);
(*node_attr)[attr_name].set_f(attr_value);
} else if (type == "listInt") {
std::vector<int> attr_value;
std::vector<int64_t> attr_value;
auto value_type = value->type();
MS_EXCEPTION_IF_NULL(value_type);
auto value_type_str = value_type->ToString();
if (value_type_str == "Int32") {
int data = GetValue<int>(value);
if (value_type_str == "Int64") {
int64_t data = GetValue<int64_t>(value);
attr_value.push_back(data);
} else {
attr_value = GetValue<std::vector<int>>(value);
attr_value = GetValue<std::vector<int64_t>>(value);
}
mindspore::AttrValue input_shape_attr;
mindspore::AttrValue_ArrayValue *input_shape_attr_list = input_shape_attr.mutable_array();

View File

@ -168,7 +168,7 @@ class CNodeDecoder {
output_formats_.push_back(output_desc[kJsonKeyFormat]);
output_types_.push_back(DtypeToTypeId(output_desc[kJsonKeyDataType]));
auto get_item =
func_graph->NewCNode({NewValueNode(prim::kPrimTupleGetItem), cnode_, NewValueNode(SizeToInt(j))});
func_graph->NewCNode({NewValueNode(prim::kPrimTupleGetItem), cnode_, NewValueNode(SizeToLong(j))});
func_graph->AddNode(get_item);
nodes_map_[output_desc[kJsonKeyTensorName]] = get_item;
}

View File

@ -35,7 +35,10 @@ std::vector<int> GetDynInputSize(const AnfNodePtr &anf_node) {
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
MS_EXCEPTION_IF_NULL(primitive);
if (primitive->HasAttr(kAttrDynInputSizes)) {
dyn_input_sizes = GetValue<const std::vector<int>>(primitive->GetAttr(kAttrDynInputSizes));
std::vector<int64_t> dyn_input_sizes_me =
GetValue<const std::vector<int64_t>>(primitive->GetAttr(kAttrDynInputSizes));
(void)std::transform(dyn_input_sizes_me.begin(), dyn_input_sizes_me.end(), std::back_inserter(dyn_input_sizes),
[](const int64_t &value) { return static_cast<int>(value); });
}
return dyn_input_sizes;
}
@ -256,7 +259,7 @@ void AkgKernelJsonGenerator::GetAttrJson(const AnfNodePtr &anf_node, const std::
std::string type = op_attr->type();
(*attr_json)[kJsonKeyDataType] = type;
if (type == "int") {
(*attr_json)[kJsonKeyValue] = GetValue<int>(attr_value);
(*attr_json)[kJsonKeyValue] = static_cast<int>(GetValue<int64_t>(attr_value));
} else if (type == "str") {
(*attr_json)[kJsonKeyValue] = GetValue<std::string>(attr_value);
} else if (type == "bool") {
@ -264,7 +267,11 @@ void AkgKernelJsonGenerator::GetAttrJson(const AnfNodePtr &anf_node, const std::
} else if (type == "float") {
(*attr_json)[kJsonKeyValue] = GetValue<float>(attr_value);
} else if (type == "listInt") {
(*attr_json)[kJsonKeyValue] = GetValue<std::vector<int>>(attr_value);
std::vector<int> list_int;
std::vector<int64_t> list_int_me = GetValue<std::vector<int64_t>>(attr_value);
(void)std::transform(list_int_me.begin(), list_int_me.end(), std::back_inserter(list_int),
[](const int64_t &value) { return static_cast<int>(value); });
(*attr_json)[kJsonKeyValue] = list_int;
} else if (type == "listStr") {
std::vector<std::string> data_format;
if (op_attr->name() == kArgDataformat) {

View File

@ -235,7 +235,7 @@ size_t GetDtypeNbyte(const std::string &dtypes) {
}
bool SetInputKernelBuilderInfo(const std::vector<std::shared_ptr<OpIOInfo>> &inputs, size_t real_input_num,
size_t builder_idex, const std::vector<int> &dyn_input_sizes,
size_t builder_idex, const std::vector<int64_t> &dyn_input_sizes,
const std::shared_ptr<KernelBuildInfo::KernelBuildInfoBuilder> &builder) {
MS_EXCEPTION_IF_NULL(builder);
@ -262,7 +262,7 @@ bool SetInputKernelBuilderInfo(const std::vector<std::shared_ptr<OpIOInfo>> &inp
return false;
}
for (int t = 0; t < dyn_input_sizes[dyn_input_idx]; t++) {
for (int64_t t = 0; t < dyn_input_sizes[dyn_input_idx]; t++) {
kernel_info_index++;
auto type_id = DtypeToTypeId(dtypes[builder_idex]);
inputs_device_type.push_back(type_id);
@ -376,11 +376,11 @@ bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr<const OpIn
size_t real_output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
std::vector<std::shared_ptr<OpIOInfo>> inputs = op_info_ptr->inputs_ptr();
std::vector<std::shared_ptr<OpIOInfo>> outputs = op_info_ptr->outputs_ptr();
std::vector<int> dyn_input_sizes;
std::vector<int64_t> dyn_input_sizes;
auto primitive = AnfAlgo::GetCNodePrimitive(kernel_node);
MS_EXCEPTION_IF_NULL(primitive);
if (primitive->GetAttr("dyn_input_sizes") != nullptr) {
dyn_input_sizes = GetValue<std::vector<int>>(primitive->GetAttr("dyn_input_sizes"));
dyn_input_sizes = GetValue<std::vector<int64_t>>(primitive->GetAttr("dyn_input_sizes"));
}
if (inputs.size() > 0) {
MS_EXCEPTION_IF_NULL(inputs[0]);
@ -552,11 +552,11 @@ std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(cons
continue;
}
std::vector<int> dyn_input_sizes;
std::vector<int64_t> dyn_input_sizes;
auto prim = AnfAlgo::GetCNodePrimitive(anf_node);
MS_EXCEPTION_IF_NULL(prim);
if (prim->GetAttr(kAttrDynInputSizes) != nullptr) {
dyn_input_sizes = GetValue<const std::vector<int>>(prim->GetAttr(kAttrDynInputSizes));
dyn_input_sizes = GetValue<const std::vector<int64_t>>(prim->GetAttr(kAttrDynInputSizes));
}
if (dyn_input_sizes.empty()) {
@ -764,28 +764,26 @@ bool IsWeightBoundary(const AnfNodePtr &node) {
return false;
}
std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode) {
std::vector<int64_t> GetReduceAttrAxis(const CNodePtr &cnode) {
if (AnfAlgo::GetInputTensorNum(cnode) != AnfAlgo::GetOutputTensorNum(cnode) &&
AnfAlgo::GetInputTensorNum(cnode) != 1) {
MS_LOG(EXCEPTION) << "the kind of reduce node [" << cnode->DebugString()
<< "] is not single input or single output ";
}
std::vector<int> axis;
std::vector<int64_t> axis;
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, 0);
auto primitive = AnfAlgo::GetCNodePrimitive(cnode);
MS_EXCEPTION_IF_NULL(primitive);
auto axis_attr = primitive->GetAttr(kAxis);
if (axis_attr == nullptr) {
MS_LOG(ERROR) << "This node does't have axie attr.";
return std::vector<int>();
return std::vector<int64_t>();
}
auto type = axis_attr->type();
MS_EXCEPTION_IF_NULL(type);
std::vector<int> axis_list;
if (type->ToString() == kTypeInt32) {
axis_list.emplace_back(GetValue<int>(axis_attr));
std::vector<int64_t> axis_list;
if (axis_attr->isa<Int64Imm>()) {
axis_list.emplace_back(GetValue<int64_t>(axis_attr));
} else {
axis_list = GetValue<std::vector<int>>(axis_attr);
axis_list = GetValue<std::vector<int64_t>>(axis_attr);
}
for (const auto &elem : axis_list) {
if (elem < 0) {

View File

@ -100,7 +100,7 @@ void GetFuncGraphOutputNodes(const FuncGraphPtr &func_graph, std::vector<AnfNode
bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann::json *const node_json);
void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<AnfNodePtr, size_t>> *node_list);
bool IsWeightBoundary(const AnfNodePtr &node);
std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode);
std::vector<int64_t> GetReduceAttrAxis(const CNodePtr &cnode);
std::string GetProcessorStr(const AnfNodePtr &anf_node);
template <typename T>

View File

@ -27,7 +27,7 @@ void ArgmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) {
batch_size_ = shape[0];
class_num_ = shape[1];
int axis = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS);
int64_t axis = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
if (axis != -1 && axis != 1) {
MS_LOG(EXCEPTION) << "argmax kernel not support axis " << axis;
}

View File

@ -22,12 +22,12 @@ namespace kernel {
void ConcatCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
axis_ = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS);
axis_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
auto input_1_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (axis_ < 0) {
axis_ = axis_ + SizeToInt(input_1_shape.size());
axis_ = axis_ + SizeToLong(input_1_shape.size());
}
axis_ += 4 - input_1_shape.size();
axis_ += 4 - SizeToLong(input_1_shape.size());
auto input_num = AnfAlgo::GetInputTensorNum(kernel_node);
for (size_t i = 0; i < input_num; i++) {

View File

@ -36,7 +36,7 @@ class ConcatCPUKernel : public CPUKernel {
void CheckParam(const CNodePtr &kernel_node);
void CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, size_t dim2,
float **output_addr, size_t *buff_size);
int axis_;
int64_t axis_;
std::vector<std::vector<size_t>> input_shape_list_;
std::vector<size_t> output_shape_;
};

View File

@ -22,7 +22,7 @@ namespace mindspore {
namespace kernel {
void EmbeddingLookUpCommGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
split_num_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "split_num");
split_num_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "split_num");
MS_LOG(INFO) << "split_num: " << split_num_;
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (input_shape[0] % split_num_ != 0) {
@ -47,9 +47,9 @@ bool EmbeddingLookUpCommGradCPUKernel::Launch(const std::vector<kernel::AddressP
MS_LOG(DEBUG) << "output addr: " << output_addr << "output size: " << output_size;
memset_s(output_addr, output_size, 0, output_size);
const std::vector<int> &rank_group = {0, 1, 2, 3, 4, 5, 6, 7};
size_t input_split_lens = input_size / split_num_ / sizeof(float_t);
size_t output_split_lens = output_size / split_num_ / sizeof(float_t);
for (int i = 0; i < split_num_; i++) {
size_t input_split_lens = input_size / LongToSize(split_num_) / sizeof(float_t);
size_t output_split_lens = output_size / LongToSize(split_num_) / sizeof(float_t);
for (int64_t i = 0; i < split_num_; i++) {
MPIAllGather(input_addr + i * input_split_lens, output_addr + i * output_split_lens, rank_group, input_split_lens);
}
#if defined(_WIN32) || defined(_WIN64)

View File

@ -34,7 +34,7 @@ class EmbeddingLookUpCommGradCPUKernel : public CPUKernel {
private:
void CheckParam(const CNodePtr &kernel_node);
int split_num_;
int64_t split_num_;
};
MS_REG_CPU_KERNEL(EmbeddingLookupCommGrad,

View File

@ -61,7 +61,7 @@ void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) {
indices_lens_ *= shape;
}
if (AnfAlgo::HasNodeAttr(kAttrOffset, kernel_node)) {
offset_ = AnfAlgo::GetNodeAttr<int>(kernel_node, kAttrOffset);
offset_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, kAttrOffset);
}
indices_data_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 1);
}

View File

@ -37,7 +37,7 @@ class EmbeddingLookUpCPUKernel : public CPUKernel {
protected:
void CheckParam(const CNodePtr &kernel_node);
int offset_{0};
int64_t offset_{0};
size_t indices_lens_{1};
size_t first_dim_size_{1};
size_t outer_dim_size_{1};

View File

@ -23,9 +23,9 @@ void GatherV2CPUKernel::InitKernel(const CNodePtr &kernel_node) {
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
indices_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
axis_ = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS);
axis_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
if (axis_ < 0) {
axis_ = axis_ + SizeToInt(input_shape_.size());
axis_ = axis_ + SizeToLong(input_shape_.size());
}
axis_ += 4 - input_shape_.size();
CPUKernelUtils::ExpandDimsTo4(&input_shape_);
@ -75,7 +75,7 @@ void GatherV2CPUKernel::CopyDataToOutput(const std::vector<kernel::AddressPtr> &
MS_LOG(EXCEPTION) << "The indices value is less than 0.";
}
size_t index = IntToSize(indices_addr[i]);
if (index >= input_shape_[IntToSize(axis_)]) {
if (index >= input_shape_[LongToSize(axis_)]) {
auto ret = memset_s(*output_addr, *buff_size, 0., num * sizeof(float));
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memset failed.";

View File

@ -39,7 +39,7 @@ class GatherV2CPUKernel : public CPUKernel {
std::vector<size_t> input_shape_;
std::vector<size_t> indices_shape_;
std::vector<size_t> output_shape_;
int axis_;
int64_t axis_;
};
MS_REG_CPU_KERNEL(

View File

@ -15,6 +15,7 @@
*/
#include "backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h"
#include <string>
#include <algorithm>
#include "utils/ms_utils.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
@ -30,7 +31,7 @@ void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_LOG(EXCEPTION) << "conv2d only support nchw input!";
}
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
size_t group = IntToSize(AnfAlgo::GetNodeAttr<int>(kernel_node, GROUP));
size_t group = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, GROUP));
if (group != 1) {
if (src_shape[1] % group != 0) {
MS_LOG(EXCEPTION) << "conv2d channels should be divided by group!";
@ -41,8 +42,14 @@ void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) {
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
std::vector<int> stride_ori;
std::vector<int> dilation_ori;
auto stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDE);
auto dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, DILATION);
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_ori),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_ori),
[](const int64_t &value) { return static_cast<int>(value); });
if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) {
MS_LOG(EXCEPTION) << "conv2d only support equal stride, and stride must be 4d!";
}

View File

@ -15,6 +15,7 @@
*/
#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h"
#include <string>
#include <algorithm>
#include "utils/ms_utils.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
@ -30,7 +31,7 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_LOG(EXCEPTION) << ("conv2d grad filter only support nchw input!");
}
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
size_t group = IntToSize(AnfAlgo::GetNodeAttr<int>(kernel_node, GROUP));
size_t group = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, GROUP));
if (group != 1) {
if (src_shape[1] % group != 0) {
MS_LOG(EXCEPTION) << "conv2d channels should be divided by group!";
@ -41,8 +42,14 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
std::vector<int> stride_ori;
std::vector<int> dilation_ori;
auto stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDE);
auto dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, DILATION);
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_ori),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_ori),
[](const int64_t &value) { return static_cast<int>(value); });
if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel only support equal stride, and stride must be 2d!";
}

View File

@ -15,6 +15,7 @@
*/
#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h"
#include <string>
#include <algorithm>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "utils/ms_utils.h"
@ -30,7 +31,7 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_LOG(EXCEPTION) << "conv2d grad filter only support nchw input!";
}
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
size_t group = IntToSize(AnfAlgo::GetNodeAttr<int>(kernel_node, GROUP));
size_t group = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, GROUP));
if (group != 1) {
if (src_shape[1] % group != 0) {
MS_LOG(EXCEPTION) << "conv2d channels should be divided by group!";
@ -42,8 +43,14 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
std::vector<int> stride_ori;
std::vector<int> dilation_ori;
auto stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDE);
auto dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, DILATION);
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_ori),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_ori),
[](const int64_t &value) { return static_cast<int>(value); });
if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel only support equal stride, and stride must be 2d!";
}

View File

@ -76,9 +76,9 @@ void LstmCPUKernel::CheckParam(const CNodePtr &kernel_node) {
std::vector<size_t> src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<size_t> src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2);
bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional");
input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size");
hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size");
num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers");
input_size_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "input_size"));
hidden_size_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "hidden_size"));
num_layers_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "num_layers"));
has_bias_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "has_bias");
batch_size_ = SizeToInt(src_shape[1]);
seq_len_ = SizeToInt(src_shape[0]);

View File

@ -94,9 +94,9 @@ void LSTMGradCPUKernel::CheckParam(const CNodePtr &kernel_node) {
std::vector<size_t> src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<size_t> src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2);
bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional");
input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size");
hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size");
num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers");
input_size_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "input_size");
hidden_size_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "hidden_size");
num_layers_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "num_layers");
has_bias_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "has_bias");
batch_size_ = SizeToInt(src_shape[1]);
seq_len_ = SizeToInt(src_shape[0]);
@ -104,20 +104,20 @@ void LSTMGradCPUKernel::CheckParam(const CNodePtr &kernel_node) {
if (bidirectional_) {
num_directions_ = 2;
}
const int gate_size = 4 * hidden_size_;
const int64_t gate_size = 4 * hidden_size_;
if (num_layers_ <= 0) {
MS_LOG(EXCEPTION) << "layers must be greater than zero!";
}
if (num_layers_ > kMaxLSTMLayer) {
MS_LOG(EXCEPTION) << "layers must be lower than 100!";
}
for (int i = 0; i < num_layers_; ++i) {
for (int64_t i = 0; i < num_layers_; ++i) {
weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_);
weight_h_size_ += gate_size * hidden_size_;
}
weight_size_ = weight_size_ * num_directions_;
weight_h_size_ = weight_h_size_ * num_directions_;
if (num_directions_ * num_layers_ != SizeToInt(src_h_shape[0])) {
if (num_directions_ * num_layers_ != SizeToLong(src_h_shape[0])) {
MS_LOG(EXCEPTION) << "error iteration shape!";
}
if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) {

View File

@ -44,13 +44,13 @@ class LSTMGradCPUKernel : public MKLCPUKernel {
const dnnl::memory &diff_bias_memory);
void ResetMemory(const dnnl::memory &mem, string name);
void CheckParam(const CNodePtr &kernel_node);
int weight_size_ = 0;
int weight_h_size_ = 0;
int input_size_;
int hidden_size_;
int num_layers_;
int batch_size_;
int seq_len_;
int64_t weight_size_ = 0;
int64_t weight_h_size_ = 0;
int64_t input_size_;
int64_t hidden_size_;
int64_t num_layers_;
int64_t batch_size_;
int64_t seq_len_;
int num_directions_;
bool bidirectional_;
bool has_bias_;

View File

@ -56,7 +56,10 @@ void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pa
padding_r->emplace_back(0);
padding_r->emplace_back(0);
} else {
std::vector<int> pad = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, PAD_LIST);
std::vector<int> pad;
std::vector<int64_t> pad_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, PAD_LIST);
(void)std::transform(pad_me.begin(), pad_me.end(), std::back_inserter(pad),
[](const int64_t &value) { return static_cast<int>(value); });
padding_l->emplace_back(pad[0]);
padding_l->emplace_back(pad[1]);
padding_r->emplace_back(pad[2]);

View File

@ -28,8 +28,14 @@ void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) {
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
std::vector<int> origin_kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE);
std::vector<int> strides = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES);
std::vector<int> origin_kernel_sizes;
std::vector<int> strides;
std::vector<int64_t> kernel_sizes_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, KSIZE);
std::vector<int64_t> strides_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDES);
(void)std::transform(kernel_sizes_me.begin(), kernel_sizes_me.end(), std::back_inserter(origin_kernel_sizes),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides),
[](const int64_t &value) { return static_cast<int>(value); });
if (origin_kernel_sizes.size() != 4 || strides.size() != 4) {
MS_LOG(EXCEPTION) << "invalid kernel size " << origin_kernel_sizes.size() << " or stride size " << strides.size();
}

View File

@ -27,8 +27,14 @@ void PoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
src_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
dst_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<int> kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE);
std::vector<int> strides = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES);
std::vector<int> kernel_sizes;
std::vector<int> strides;
auto kernel_sizes_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, KSIZE);
auto strides_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDES);
(void)std::transform(kernel_sizes_me.begin(), kernel_sizes_me.end(), std::back_inserter(kernel_sizes),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides),
[](const int64_t &value) { return static_cast<int>(value); });
if (kernel_sizes.size() != 4 || strides.size() != 4 || src_shape_.size() != 4 || dst_shape_.size() != 4) {
MS_LOG(EXCEPTION) << "pooling grad invalid input size";
}

View File

@ -14,6 +14,7 @@
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h"
#include <algorithm>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "utils/ms_utils.h"
@ -23,7 +24,10 @@ namespace kernel {
void SoftmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<int> axis_list = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, AXIS);
std::vector<int> axis_list;
std::vector<int64_t> axis_list_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, AXIS);
(void)std::transform(axis_list_me.begin(), axis_list_me.end(), std::back_inserter(axis_list),
[](const int64_t &value) { return static_cast<int>(value); });
if (axis_list.size() != 1) {
MS_LOG(EXCEPTION) << "cpu softmax only support input axis size 1";
}

View File

@ -24,14 +24,14 @@ void OneHotCPUKernel::InitKernel(const CNodePtr &kernel_node) {
if (output_shape.size() < 2) {
MS_LOG(EXCEPTION) << "invalid output shape size: " << output_shape.size();
}
int axis = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS);
if (axis != -1 && IntToSize(axis) >= output_shape.size()) {
int64_t axis = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
if (axis != -1 && LongToSize(axis) >= output_shape.size()) {
MS_LOG(EXCEPTION) << "invalid axis: " << axis;
}
if (axis == -1) {
axis_ = output_shape.size() - 1;
} else {
axis_ = IntToSize(axis);
axis_ = LongToSize(axis);
}
depth_ = output_shape[axis_];
stride_ = 1;

View File

@ -44,7 +44,8 @@ void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) {
values.insert(values.end(), output_shape.begin(), output_shape.end());
MS_LOG(INFO) << "Init embedding lookup proxy kernel, input shape:" << input_shape
<< ", indices_shape:" << indices_shape << ", output_shape:" << output_shape;
std::vector<int> lens{SizeToInt(input_shape.size()), SizeToInt(indices_shape.size()), SizeToInt(output_shape.size())};
std::vector<int64_t> lens{SizeToLong(input_shape.size()), SizeToLong(indices_shape.size()),
SizeToLong(output_shape.size())};
if (mindspore::ps::Util::IsRoleOfWorker()) {
mindspore::ps::worker.AddEmbeddingTable(key_, input_shape[axis]);
mindspore::ps::worker.InitPSEmbeddingTable(keys, values, lens);

View File

@ -39,11 +39,11 @@ class PushKernel : public CPUKernel {
}
std::vector<size_t> keys;
std::vector<uintptr_t> addrs;
std::vector<int> sizes;
std::vector<int64_t> sizes;
for (auto input : inputs) {
keys.push_back(key_);
addrs.push_back(reinterpret_cast<uintptr_t>(input->addr));
sizes.push_back(SizeToInt(input->size) / sizeof(T));
sizes.push_back(SizeToLong(input->size) / sizeof(T));
}
mindspore::ps::worker.Push(keys, addrs, sizes);
auto ret = memcpy_s(outputs[0]->addr, outputs[0]->size, &key_, sizeof(size_t));
@ -56,8 +56,9 @@ class PushKernel : public CPUKernel {
void Init(const CNodePtr &kernel_node) {
key_ = AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
auto optim_input_shapes = AnfAlgo::GetNodeAttr<std::vector<std::vector<int>>>(kernel_node, "optim_input_shapes");
std::vector<int> only_shape_indices = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "only_shape_indices");
auto optim_input_shapes =
AnfAlgo::GetNodeAttr<std::vector<std::vector<int64_t>>>(kernel_node, "optim_input_shapes");
auto only_shape_indices = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "only_shape_indices");
MS_LOG(INFO) << "Key " << key_ << " optimizer input shapes are:" << optim_input_shapes;
MS_LOG(INFO) << "Only init shape indices are " << only_shape_indices;
for (size_t i = 0; i < optim_input_shapes.size(); i++) {
@ -66,7 +67,7 @@ class PushKernel : public CPUKernel {
if (std::count(only_shape_indices.begin(), only_shape_indices.end(), i) == 0) {
size_t size = sizeof(T);
for (size_t j = 0; j < shape.size(); j++) {
size *= shape[j];
size *= LongToSize(shape[j]);
}
input_size_list_.push_back(size);
}

View File

@ -16,6 +16,7 @@
#include <string>
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/cpu/reduce_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
@ -87,7 +88,10 @@ bool ReduceCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
void ReduceCPUKernel::CheckAxis(const CNodePtr &kernel_node) {
auto axis_addr = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(AXIS);
if (axis_addr->isa<ValueTuple>()) {
auto attr_axis = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, AXIS);
std::vector<int> attr_axis;
std::vector<int64_t> attr_axis_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, AXIS);
(void)std::transform(attr_axis_me.begin(), attr_axis_me.end(), std::back_inserter(attr_axis),
[](const int64_t &value) { return static_cast<int>(value); });
if (attr_axis.size() > shape_.size()) {
MS_LOG(EXCEPTION) << "invalid axis size: " << axis_.size();
} else if (attr_axis.empty()) {
@ -105,8 +109,8 @@ void ReduceCPUKernel::CheckAxis(const CNodePtr &kernel_node) {
axis_.push_back(IntToSize(axis));
}
}
} else if (axis_addr->isa<Int32Imm>()) {
int axis = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS);
} else if (axis_addr->isa<Int64Imm>()) {
int axis = static_cast<int64_t>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS));
while (axis < 0) {
axis += SizeToInt(shape_.size());
}

View File

@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include "backend/kernel_compiler/cpu/slice_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
@ -21,17 +22,26 @@ namespace kernel {
void SliceCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
begin_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, BEGIN);
std::vector<int64_t> begin_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, BEGIN);
(void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_),
[](const int64_t &value) { return static_cast<int>(value); });
auto prim = AnfAlgo::GetCNodePrimitive(kernel_node);
MS_EXCEPTION_IF_NULL(prim);
auto strides = prim->GetAttr(STRIDES);
if (strides != nullptr) {
strides_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES);
end_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, END);
std::vector<int64_t> strides_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDES);
std::vector<int64_t> end_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, END);
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides_),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(end_me.begin(), end_me.end(), std::back_inserter(end_),
[](const int64_t &value) { return static_cast<int>(value); });
TransArg();
ClipBegin();
} else {
auto sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, SIZE);
std::vector<int> sizes;
std::vector<int64_t> sizes_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, SIZE);
(void)std::transform(sizes_me.begin(), sizes_me.end(), std::back_inserter(sizes),
[](const int64_t &value) { return static_cast<int>(value); });
if (sizes.size() != input_shape_.size() || begin_.size() != input_shape_.size()) {
MS_LOG(EXCEPTION) << "begin|size|input size must be equal";
}

View File

@ -14,6 +14,7 @@
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/slice_grad_cpu_kernel.h"
#include <algorithm>
#include "runtime/device/cpu/cpu_device_address.h"
#include "ir/primitive.h"
@ -22,19 +23,27 @@ namespace kernel {
void SliceGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
begin_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, BEGIN);
std::vector<int64_t> begin_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, BEGIN);
(void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_),
[](const int64_t &value) { return static_cast<int>(value); });
auto prim = AnfAlgo::GetCNodePrimitive(kernel_node);
MS_EXCEPTION_IF_NULL(prim);
auto strides = prim->GetAttr(STRIDES);
if (strides != nullptr) {
strides_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES);
end_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, END);
std::vector<int64_t> strides_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDES);
std::vector<int64_t> end_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, END);
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides_),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(end_me.begin(), end_me.end(), std::back_inserter(end_),
[](const int64_t &value) { return static_cast<int>(value); });
if (strides_.size() != end_.size() || strides_.size() != output_shape_.size()) {
MS_LOG(EXCEPTION) << "stride|end|input size must be equal";
}
FormatArgs(true);
} else {
size_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, SIZE);
std::vector<int64_t> size_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, SIZE);
(void)std::transform(size_me.begin(), size_me.end(), std::back_inserter(size_),
[](const int64_t &value) { return static_cast<int>(value); });
if (size_.size() != output_shape_.size() || begin_.size() != output_shape_.size()) {
MS_LOG(EXCEPTION) << "begin|size|input size must be equal";
}

View File

@ -15,6 +15,7 @@
*/
#include "backend/kernel_compiler/cpu/tile_cpu_kernel.h"
#include <algorithm>
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
@ -23,7 +24,9 @@ void TileCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
y_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
multiples_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "multiples");
std::vector<int64_t> multiples_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "multiples");
(void)std::transform(multiples_me.begin(), multiples_me.end(), std::back_inserter(multiples_),
[](const int64_t &value) { return static_cast<int>(value); });
dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);
}

View File

@ -15,6 +15,7 @@
*/
#include "backend/kernel_compiler/cpu/transpose_cpu_kernel.h"
#include <algorithm>
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
@ -22,7 +23,9 @@ const size_t kMaxDim = 100;
void TransposeCPUFwdKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
axis_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "perm");
std::vector<int64_t> axis_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "perm");
(void)std::transform(axis_me.begin(), axis_me.end(), std::back_inserter(axis_),
[](const int64_t &value) { return static_cast<int>(value); });
if (shape_.size() != axis_.size()) {
MS_LOG(EXCEPTION) << "The size of input shape and transpose axis shape must be equal.";
}

View File

@ -43,9 +43,9 @@ template <typename T>
void UniqueCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
auto x_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto y_addr = reinterpret_cast<T *>(outputs[0]->addr);
auto idx_addr = reinterpret_cast<int *>(outputs[1]->addr);
auto idx_addr = reinterpret_cast<int64_t *>(outputs[1]->addr);
std::unordered_map<T, int> uniq;
std::unordered_map<T, int64_t> uniq;
int n = SizeToInt(n_);
uniq.reserve(n * 2);
for (int i = 0, j = 0; i < n; ++i) {

View File

@ -64,7 +64,7 @@ class ArgmaxGpuKernel : public GpuKernel {
<< "-D inputs.";
}
axis_ = GetAttr<int>(kernel_node, "axis");
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
if (axis_ < 0) {
axis_ += SizeToInt(input_shape.size());
}

View File

@ -47,7 +47,7 @@ class ArgmaxWithValueGpuKernel : public GpuKernel {
std::vector<size_t> shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 1);
int dims = shape.size();
int axis = GetAttr<int>(kernel_node, "axis");
int axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
if (axis < 0) {
axis += dims;
}

View File

@ -20,6 +20,7 @@
#include <map>
#include <string>
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/kernel_constants.h"
@ -97,7 +98,10 @@ class ArrayReduceGpuKernel : public GpuKernel {
if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("axis")->isa<ValueTuple>() ||
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("axis")->isa<ValueList>()) {
auto attr_axis = GetAttr<std::vector<int>>(kernel_node, "axis");
std::vector<int> attr_axis;
std::vector<int64_t> attr_axis_me = GetAttr<std::vector<int64_t>>(kernel_node, "axis");
(void)std::transform(attr_axis_me.begin(), attr_axis_me.end(), std::back_inserter(attr_axis),
[](const int64_t &value) { return static_cast<int>(value); });
if (attr_axis.empty()) {
axis_.push_back(-1);
} else {
@ -105,8 +109,8 @@ class ArrayReduceGpuKernel : public GpuKernel {
axis < 0 ? axis_.push_back(axis + input_dim_length) : axis_.push_back(axis);
}
}
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("axis")->isa<Int32Imm>()) {
int axis = GetAttr<int>(kernel_node, "axis");
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("axis")->isa<Int64Imm>()) {
int axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
axis < 0 ? axis_.push_back(axis + input_dim_length) : axis_.push_back(axis);
} else {
MS_LOG(EXCEPTION) << "Attribute axis type is invalid.";

View File

@ -63,7 +63,7 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
if (!CheckParam(kernel_node)) {
return false;
}
axis_ = GetAttr<int>(kernel_node, "axis");
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
if (axis_ < 0) {
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
axis_ += SizeToInt(input_shape.size());

View File

@ -55,7 +55,7 @@ class GatherGpuFwdKernel : public GpuKernel {
index_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
axis_ = GetAttr<int>(kernel_node, "dim");
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "dim"));
if (axis_ < 0) {
axis_ = axis_ + SizeToInt(input_shapes_.size());
}

View File

@ -55,7 +55,7 @@ class GatherGradGpuKernel : public GpuKernel {
grad_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
axis_ = GetAttr<int>(kernel_node, "dim");
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "dim"));
if (axis_ < 0) {
axis_ = axis_ + SizeToInt(index_shapes_.size());
}

View File

@ -56,7 +56,7 @@ class GatherV2GpuFwdKernel : public GpuKernel {
indices_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
axis_ = GetAttr<int>(kernel_node, "axis");
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
if (axis_ < 0) {
axis_ = axis_ + SizeToInt(input_shapes_.size());
}

View File

@ -45,7 +45,7 @@ class OneHotGpuFwdKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
int axis = GetAttr<int>(kernel_node, "axis");
int axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
auto input = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto output = AnfAlgo::GetOutputInferShape(kernel_node, 0);
int input_size = SizeToInt(input.size());

View File

@ -114,12 +114,12 @@ class RepeatElementsGpuKernel : public GpuKernel {
}
std::reverse(input_shape_cumulative_product_.begin(), input_shape_cumulative_product_.end());
axis_ = GetAttr<int>(kernel_node, "axis");
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
if (axis_ < 0) {
axis_ += input_dim_;
}
rep_ = GetAttr<int>(kernel_node, "rep");
rep_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "rep"));
output_size_ = input_size_ * rep_;
output_shape_ = input_shape_;
output_shape_[axis_] *= rep_;

View File

@ -60,11 +60,11 @@ class RepeatElementsGradGpuKernel : public GpuKernel {
std::vector<size_t> dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
int dy_dim = dy_shape.size();
axis_ = GetAttr<int>(kernel_node, "axis");
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
if (axis_ < 0) {
axis_ += dy_dim;
}
rep_ = GetAttr<int>(kernel_node, "rep");
rep_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "rep"));
if (axis_ >= dy_dim) {
axis_ = dy_dim - 1;
rep_ = 1;

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_KERNEL_GPU_SCATTER_ND_GPU_KERNEL_H
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cuh"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@ -98,7 +99,9 @@ class ScatterNdGpuFwdKernel : public GpuKernel {
indices_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
vec_work_shape_ = GetAttr<std::vector<S>>(kernel_node, "shape");
std::vector<int64_t> shape_me = GetAttr<std::vector<int64_t>>(kernel_node, "shape");
(void)std::transform(shape_me.begin(), shape_me.end(), std::back_inserter(vec_work_shape_),
[](const int64_t &value) { return static_cast<S>(value); });
GetSize();

View File

@ -19,6 +19,7 @@
#include <vector>
#include <utility>
#include <algorithm>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
@ -106,8 +107,12 @@ class SliceGpuFwdKernel : public GpuKernel {
MS_LOG(ERROR) << "Input dims is " << input_shape.size() << ", scalar is not supported.";
return false;
}
size_ = GetAttr<std::vector<int>>(kernel_node, "size");
begin_ = GetAttr<std::vector<int>>(kernel_node, "begin");
std::vector<int64_t> size_me = GetAttr<std::vector<int64_t>>(kernel_node, "size");
std::vector<int64_t> begin_me = GetAttr<std::vector<int64_t>>(kernel_node, "begin");
(void)std::transform(size_me.begin(), size_me.end(), std::back_inserter(size_),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_),
[](const int64_t &value) { return static_cast<int>(value); });
for (size_t i = 0; i < input_shape.size(); i++) {
if (input_shape[i] <= 0 || size_[i] <= 0) {
MS_LOG(WARNING) << "Slice output is null.";

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SLICE_GRAD_GPU_KERNEL_H
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
@ -50,27 +51,38 @@ class SliceGradGpuKernel : public GpuKernel {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
if (kernel_name == "StridedSliceGrad") {
is_strided_slice_ = true;
auto shapex = GetAttr<std::vector<int>>(kernel_node, "shapex");
std::vector<int> shapex;
std::vector<int64_t> shapex_me = GetAttr<std::vector<int64_t>>(kernel_node, "shapex");
(void)std::transform(shapex_me.begin(), shapex_me.end(), std::back_inserter(shapex),
[](const int64_t &value) { return static_cast<int>(value); });
for (auto x : shapex) {
input_shape_.push_back(IntToSize(x));
}
for (auto i = input_shape_.size(); i < 4; i++) {
(void)input_shape_.insert(input_shape_.begin(), 1);
}
strides_ = GetAttr<std::vector<int>>(kernel_node, "strides");
std::vector<int64_t> strides_me = GetAttr<std::vector<int64_t>>(kernel_node, "strides");
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides_),
[](const int64_t &value) { return static_cast<int>(value); });
for (auto i = strides_.size(); i < 4; i++) {
(void)strides_.insert(strides_.begin(), 1);
}
size_ = GetAttr<std::vector<int>>(kernel_node, "end");
std::vector<int64_t> size_me = GetAttr<std::vector<int64_t>>(kernel_node, "end");
(void)std::transform(size_me.begin(), size_me.end(), std::back_inserter(size_),
[](const int64_t &value) { return static_cast<int>(value); });
} else {
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
ShapeNdTo4d(input_shape, &input_shape_);
size_ = GetAttr<std::vector<int>>(kernel_node, "size");
std::vector<int64_t> size_me = GetAttr<std::vector<int64_t>>(kernel_node, "size");
(void)std::transform(size_me.begin(), size_me.end(), std::back_inserter(size_),
[](const int64_t &value) { return static_cast<int>(value); });
}
auto dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
ShapeNdTo4d(dy_shape, &dy_shape_);
begin_ = GetAttr<std::vector<int>>(kernel_node, "begin");
std::vector<int64_t> begin_me = GetAttr<std::vector<int64_t>>(kernel_node, "begin");
(void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_),
[](const int64_t &value) { return static_cast<int>(value); });
DealParam();
input_size_ = input_shape_[0] * input_shape_[1] * input_shape_[2] * input_shape_[3] * sizeof(T);

View File

@ -57,12 +57,12 @@ class SplitGpuFwdKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
axis_ = GetAttr<int>(kernel_node, "axis");
axis_ = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "axis"));
if (axis_ < 0) {
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
axis_ += SizeToInt(input_shape.size());
}
output_num_ = GetAttr<int>(kernel_node, "output_num");
output_num_ = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "output_num"));
if (!CheckParam(kernel_node)) {
return false;

View File

@ -81,9 +81,15 @@ class StridedSliceGpuKernel : public GpuKernel {
private:
void FillEmptyDims(const CNodePtr &kernel_node) {
begin_ = GetAttr<std::vector<int>>(kernel_node, "begin");
end_ = GetAttr<std::vector<int>>(kernel_node, "end");
strides_ = GetAttr<std::vector<int>>(kernel_node, "strides");
std::vector<int64_t> begin_me = GetAttr<std::vector<int64_t>>(kernel_node, "begin");
std::vector<int64_t> end_me = GetAttr<std::vector<int64_t>>(kernel_node, "end");
std::vector<int64_t> strides_me = GetAttr<std::vector<int64_t>>(kernel_node, "strides");
(void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(end_me.begin(), end_me.end(), std::back_inserter(end_),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides_),
[](const int64_t &value) { return static_cast<int>(value); });
for (size_t i = 0; i < MAX_DIMS; i++) {
if (i < begin_.size()) {
@ -111,7 +117,7 @@ class StridedSliceGpuKernel : public GpuKernel {
}
void ParseMasks(const CNodePtr &kernel_node) {
auto begin_mask_int = GetAttr<int>(kernel_node, "begin_mask");
auto begin_mask_int = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "begin_mask"));
auto begin_mask = Dec2Bin(begin_mask_int);
for (size_t i = 0; i < begin_mask.size(); i++) {
if (begin_mask[i]) {
@ -119,7 +125,7 @@ class StridedSliceGpuKernel : public GpuKernel {
}
}
auto end_mask_int = GetAttr<int>(kernel_node, "end_mask");
auto end_mask_int = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "end_mask"));
auto end_mask = Dec2Bin(end_mask_int);
for (size_t j = 0; j < end_mask.size(); j++) {
if (end_mask[j]) {
@ -127,7 +133,7 @@ class StridedSliceGpuKernel : public GpuKernel {
}
}
auto ellipsis_mask_int = GetAttr<int>(kernel_node, "ellipsis_mask");
auto ellipsis_mask_int = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "ellipsis_mask"));
auto ellipsis_mask = Dec2Bin(ellipsis_mask_int);
for (size_t k = 0; k < ellipsis_mask.size(); k++) {
if (ellipsis_mask[k]) {
@ -137,7 +143,7 @@ class StridedSliceGpuKernel : public GpuKernel {
}
}
auto shrink_axis_mask_str = GetAttr<int>(kernel_node, "shrink_axis_mask");
auto shrink_axis_mask_str = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "shrink_axis_mask"));
auto shrink_axis_mask = Dec2Bin(shrink_axis_mask_str);
for (size_t l = 0; l < shrink_axis_mask.size(); l++) {
if (shrink_axis_mask[l]) {

View File

@ -50,7 +50,10 @@ class StridedSliceGradGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto shapex = GetAttr<std::vector<int>>(kernel_node, "shapex");
std::vector<int> shapex;
std::vector<int64_t> shapex_me = GetAttr<std::vector<int64_t>>(kernel_node, "shapex");
(void)std::transform(shapex_me.begin(), shapex_me.end(), std::back_inserter(shapex),
[](const int64_t &value) { return static_cast<int>(value); });
for (auto x : shapex) {
input_shape_.push_back(IntToSize(x));
}
@ -84,9 +87,15 @@ class StridedSliceGradGpuKernel : public GpuKernel {
private:
void FillEmptyDims(const CNodePtr &kernel_node) {
begin_ = GetAttr<std::vector<int>>(kernel_node, "begin");
end_ = GetAttr<std::vector<int>>(kernel_node, "end");
strides_ = GetAttr<std::vector<int>>(kernel_node, "strides");
std::vector<int64_t> begin_me = GetAttr<std::vector<int64_t>>(kernel_node, "begin");
std::vector<int64_t> end_me = GetAttr<std::vector<int64_t>>(kernel_node, "end");
std::vector<int64_t> strides_me = GetAttr<std::vector<int64_t>>(kernel_node, "strides");
(void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(end_me.begin(), end_me.end(), std::back_inserter(end_),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides_),
[](const int64_t &value) { return static_cast<int>(value); });
for (size_t i = 0; i < MAX_DIMS; i++) {
if (i < begin_.size()) {
@ -114,7 +123,7 @@ class StridedSliceGradGpuKernel : public GpuKernel {
}
void ParseMasks(const CNodePtr &kernel_node) {
auto begin_mask_int = GetAttr<int>(kernel_node, "begin_mask");
auto begin_mask_int = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "begin_mask"));
auto begin_mask = Dec2Bin(begin_mask_int);
for (size_t i = 0; i < begin_mask.size(); i++) {
if (begin_mask[i]) {
@ -122,7 +131,7 @@ class StridedSliceGradGpuKernel : public GpuKernel {
}
}
auto end_mask_int = GetAttr<int>(kernel_node, "end_mask");
auto end_mask_int = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "end_mask"));
auto end_mask = Dec2Bin(end_mask_int);
for (size_t j = 0; j < end_mask.size(); j++) {
if (end_mask[j]) {
@ -130,7 +139,7 @@ class StridedSliceGradGpuKernel : public GpuKernel {
}
}
auto ellipsis_mask_int = GetAttr<int>(kernel_node, "ellipsis_mask");
auto ellipsis_mask_int = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "ellipsis_mask"));
auto ellipsis_mask = Dec2Bin(ellipsis_mask_int);
for (size_t k = 0; k < ellipsis_mask.size(); k++) {
if (ellipsis_mask[k]) {
@ -140,7 +149,7 @@ class StridedSliceGradGpuKernel : public GpuKernel {
}
}
auto shrink_axis_mask_str = GetAttr<int>(kernel_node, "shrink_axis_mask");
auto shrink_axis_mask_str = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "shrink_axis_mask"));
auto shrink_axis_mask = Dec2Bin(shrink_axis_mask_str);
for (size_t l = 0; l < shrink_axis_mask.size(); l++) {
if (shrink_axis_mask[l]) {

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_TRANSPOSE_H_
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cuh"
@ -75,7 +76,10 @@ class TransposeGpuFwdKernel : public GpuKernel {
}
input_size_ *= sizeof(T);
output_size_ = input_size_;
auto perm = GetAttr<std::vector<int>>(kernel_node, "perm");
std::vector<int> perm;
std::vector<int64_t> perm_me = GetAttr<std::vector<int64_t>>(kernel_node, "perm");
(void)std::transform(perm_me.begin(), perm_me.end(), std::back_inserter(perm),
[](const int64_t &value) { return static_cast<int>(value); });
for (size_t j = 0; j < perm.size(); j++) {
input_axis_.push_back(perm[j]);
}

View File

@ -15,6 +15,7 @@
*/
#include "backend/kernel_compiler/gpu/data/dataset_init_kernel.h"
#include <algorithm>
#include "backend/kernel_compiler/gpu/data/dataset_utils.h"
#include "runtime/device/gpu/gpu_buffer_mgr.h"
#include "runtime/device/gpu/gpu_memory_allocator.h"
@ -34,7 +35,15 @@ const std::vector<size_t> &DatasetInitKernel::GetWorkspaceSizeList() const { ret
bool DatasetInitKernel::Init(const CNodePtr &kernel_node) {
queue_name_ = GetAttr<std::string>(kernel_node, "queue_name");
auto shapes = GetAttr<const std::vector<std::vector<int>>>(kernel_node, "shapes");
std::vector<std::vector<int>> shapes;
std::vector<std::vector<int64_t>> shapes_me = GetAttr<const std::vector<std::vector<int64_t>>>(kernel_node, "shapes");
(void)std::transform(shapes_me.begin(), shapes_me.end(), std::back_inserter(shapes),
[](const std::vector<int64_t> &values) {
std::vector<int> shape;
(void)std::transform(values.begin(), values.end(), std::back_inserter(shape),
[](const int64_t &value) { return static_cast<int>(value); });
return shape;
});
auto types = GetAttr<const std::vector<TypePtr>>(kernel_node, "types");
if (shapes.size() != types.size()) {
MS_LOG(EXCEPTION) << "Invalid shapes: " << shapes << ", types: " << types;

View File

@ -19,6 +19,7 @@
#include <memory>
#include <string>
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/gpu/data/dataset_utils.h"
#include "profiler/device/gpu/gpu_profiling.h"
#include "runtime/device/gpu/gpu_buffer_mgr.h"
@ -44,7 +45,15 @@ const std::vector<size_t> &DatasetIteratorKernel::GetWorkspaceSizeList() const {
bool DatasetIteratorKernel::Init(const CNodePtr &kernel_node) {
queue_name_ = GetAttr<std::string>(kernel_node, "shared_name");
auto shapes = GetAttr<const std::vector<std::vector<int>>>(kernel_node, "shapes");
std::vector<std::vector<int>> shapes;
std::vector<std::vector<int64_t>> shapes_me = GetAttr<const std::vector<std::vector<int64_t>>>(kernel_node, "shapes");
(void)std::transform(shapes_me.begin(), shapes_me.end(), std::back_inserter(shapes),
[](const std::vector<int64_t> &values) {
std::vector<int> shape;
(void)std::transform(values.begin(), values.end(), std::back_inserter(shape),
[](const int64_t &value) { return static_cast<int>(value); });
return shape;
});
auto types = GetAttr<const std::vector<TypePtr>>(kernel_node, "types");
if (shapes.size() != types.size()) {
MS_LOG(EXCEPTION) << "Invalid shapes: " << shapes << ", types: " << types;

View File

@ -85,7 +85,7 @@ class AddNGpuFwdKernel : public GpuKernel {
InitResource();
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
num_input_ = GetAttr<int>(kernel_node, "n");
num_input_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "n"));
if (IntToSize(num_input_) != input_num) {
MS_LOG(ERROR) << "Input number is " << num_input_ << " in attr, but got " << input_num << "input.";
return false;

View File

@ -19,6 +19,7 @@
#include <cublas_v2.h>
#include <cuda_runtime_api.h>
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/gpu/cuda_impl/identity_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/matrix_split_impl.cuh"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
@ -105,7 +106,7 @@ class CholeskyTrsmGpuKernel : public GpuKernel {
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle();
blas_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle();
auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
split_dim = GetAttr<int>(kernel_node, "split_dim");
split_dim = static_cast<int>(GetAttr<int64_t>(kernel_node, "split_dim"));
if (split_dim == 0) {
use_split_matrix = false;
if (in_shape.size() == 2) {

View File

@ -51,7 +51,7 @@ class CumSumGpuKernel : public GpuKernel {
}
input_size_0_ = sizeof(T);
shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
axis_ = GetAttr<int>(kernel_node, "axis");
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
exclusive_ = GetAttr<bool>(kernel_node, "exclusive");
reverse_ = GetAttr<bool>(kernel_node, "reverse");
int input_dim_length = SizeToInt(shape_.size());

View File

@ -86,7 +86,7 @@ class DetTriangleGpuKernel : public GpuKernel {
MS_LOG(ERROR) << "The maxtices should be in shape of square.";
return false;
}
fill_mode_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("fill_mode"));
fill_mode_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("fill_mode")));
InitSizeLists();
return true;
}

View File

@ -92,8 +92,8 @@ class MultinomialGpuKernel : public GpuKernel {
output_size_ *= output_shape[i];
}
workspace_size_ = output_size_;
seed_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed"));
seed2_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed2"));
seed_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed")));
seed2_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed2")));
InitSizeLists();
return true;
}

View File

@ -119,8 +119,8 @@ class RandomOpGpuKernel : public GpuKernel {
output_size_ *= output_shape[i];
workspace_size_ *= output_shape[i];
}
seed_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed"));
seed2_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed2"));
seed_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed")));
seed2_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed2")));
InitSizeLists();
return true;
}

View File

@ -20,6 +20,7 @@
#include <cublas_v2.h>
#include <cuda_runtime_api.h>
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/kernel_constants.h"
@ -135,9 +136,14 @@ class TensorDotGpuKernel : public GpuKernel {
}
// holding in temp values to convert to size_t vectors
auto x1_transpose_fwd_temp = GetAttr<std::vector<int>>(kernel_node, "x1_transpose_fwd");
auto x2_transpose_fwd_temp = GetAttr<std::vector<int>>(kernel_node, "x2_transpose_fwd");
std::vector<int> x1_transpose_fwd_temp;
std::vector<int64_t> x1_transpose_me = GetAttr<std::vector<int64_t>>(kernel_node, "x1_transpose_fwd");
(void)std::transform(x1_transpose_me.begin(), x1_transpose_me.end(), std::back_inserter(x1_transpose_fwd_temp),
[](const int64_t &value) { return static_cast<int>(value); });
std::vector<int> x2_transpose_fwd_temp;
std::vector<int64_t> x2_transpose_me = GetAttr<std::vector<int64_t>>(kernel_node, "x2_transpose_fwd");
(void)std::transform(x2_transpose_me.begin(), x2_transpose_me.end(), std::back_inserter(x2_transpose_fwd_temp),
[](const int64_t &value) { return static_cast<int>(value); });
for (size_t i = 0; i < x1_transpose_fwd_temp.size(); i++) {
x1_transpose_fwd_.push_back(x1_transpose_fwd_temp[i]);
}
@ -147,8 +153,12 @@ class TensorDotGpuKernel : public GpuKernel {
}
// values to decide multiplication call specifics
x1_reshape_fwd_ = GetAttr<std::vector<int>>(kernel_node, "x1_reshape_fwd");
x2_reshape_fwd_ = GetAttr<std::vector<int>>(kernel_node, "x2_reshape_fwd");
std::vector<int64_t> x1_reshape_me = GetAttr<std::vector<int64_t>>(kernel_node, "x1_reshape_fwd");
(void)std::transform(x1_reshape_me.begin(), x1_reshape_me.end(), std::back_inserter(x1_reshape_fwd_),
[](const int64_t &value) { return static_cast<int>(value); });
std::vector<int64_t> x2_reshape_me = GetAttr<std::vector<int64_t>>(kernel_node, "x2_reshape_fwd");
(void)std::transform(x2_reshape_me.begin(), x2_reshape_me.end(), std::back_inserter(x2_reshape_fwd_),
[](const int64_t &value) { return static_cast<int>(value); });
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
output_size_ = sizeof(T);
for (size_t i = 0; i < output_shape.size(); i++) {

View File

@ -183,7 +183,7 @@ class UpdateThorGradientGpuKernel : public GpuKernel {
auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto matrix_g_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
split_dim = size_t(GetAttr<int>(kernel_node, "split_dim"));
split_dim = LongToSize(GetAttr<int64_t>(kernel_node, "split_dim"));
gradient_size.batch_h = gradient_shape[0] / split_dim;
gradient_size.batch_w = gradient_shape[1] / split_dim;

View File

@ -195,7 +195,7 @@ class NcclGpuKernel : public GpuKernel {
auto root_rank = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kAttrRootRank);
if (root_rank) {
root_ = GetValue<int>(root_rank);
root_ = static_cast<int>(GetValue<int64_t>(root_rank));
}
return;
}

View File

@ -125,9 +125,12 @@ class Conv2dGpuFwdKernel : public GpuKernel {
compute_format_ = CUDNN_TENSOR_NHWC;
}
Set4DDesc(in_shape, filter_shape, output_shape);
group_ = GetAttr<int>(kernel_node, "group");
group_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "group"));
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
std::vector<int> pad_list;
std::vector<int64_t> pad_list_me = GetAttr<std::vector<int64_t>>(kernel_node, "pad_list");
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
[](const int64_t &value) { return static_cast<int>(value); });
pad_height_ = pad_list[0];
pad_width_ = pad_list[2];
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
@ -306,8 +309,12 @@ class Conv2dGpuFwdKernel : public GpuKernel {
}
}
void SetStrideAndDilation(const CNodePtr &kernel_node) {
stride_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
dilation_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
std::vector<int64_t> stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "stride");
std::vector<int64_t> dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "dilation");
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
[](const int64_t &value) { return static_cast<int>(value); });
if (stride_.size() != 4) {
MS_LOG(EXCEPTION) << "Conv2d's' stride must be 4d!";
}

View File

@ -127,10 +127,13 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
}
SetNCHW(in_shape, &n_, &c_, &old_height_, &old_width_, data_format_);
Set4DDesc(dy_shape, filter_shape, in_shape);
group_ = GetAttr<int>(kernel_node, "group");
group_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "group"));
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
std::vector<int> pad_list;
std::vector<int64_t> pad_list_me = GetAttr<std::vector<int64_t>>(kernel_node, "pad_list");
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
[](const int64_t &value) { return static_cast<int>(value); });
pad_height_ = pad_list[0];
pad_width_ = pad_list[2];
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
@ -284,7 +287,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
void GetFilterShape(const CNodePtr &kernel_node, std::vector<size_t> *filter_shape) {
auto shp_tuple_x = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("filter_sizes")->cast<ValueTuplePtr>()->value();
(void)std::transform(std::begin(shp_tuple_x), std::end(shp_tuple_x), std::back_inserter(*filter_shape),
[](const ValuePtr &e) -> size_t { return e->cast<Int32ImmPtr>()->value(); });
[](const ValuePtr &e) -> size_t { return static_cast<int>(e->cast<Int64ImmPtr>()->value()); });
}
void Set4DDesc(const std::vector<size_t> &dy_shape, const std::vector<size_t> &filter_shape,
const std::vector<size_t> &in_shape) {
@ -309,8 +312,12 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
"cudnnSetTensorNdDescriptor failed");
}
void SetStrideAndDilation(const CNodePtr &kernel_node) {
stride_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
dilation_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
std::vector<int64_t> stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "stride");
std::vector<int64_t> dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "dilation");
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
[](const int64_t &value) { return static_cast<int>(value); });
if (stride_.size() != 2) {
MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel's stride must be 2d!";
}

View File

@ -130,10 +130,13 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format_);
Set4DDesc(dy_shape, input_shape, filter_shape);
group_ = GetAttr<int>(kernel_node, "group");
group_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "group"));
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
std::vector<int> pad_list;
std::vector<int64_t> pad_list_me = GetAttr<std::vector<int64_t>>(kernel_node, "pad_list");
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
[](const int64_t &value) { return static_cast<int>(value); });
pad_height_ = pad_list[0];
pad_width_ = pad_list[2];
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
@ -263,7 +266,10 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
return true;
}
void SetPad(const std::vector<int> &input_shape, const CNodePtr &kernel_node) {
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
std::vector<int> pad_list;
std::vector<int64_t> pad_list_me = GetAttr<std::vector<int64_t>>(kernel_node, "pad_list");
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
[](const int64_t &value) { return static_cast<int>(value); });
}
void SelectAlgorithm(cudnnTensorDescriptor_t dx_desc_real) {
if (group_ > 1 || CUDNN_MAJOR < 7) {
@ -288,7 +294,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
void GetInputShape(const CNodePtr &kernel_node, std::vector<size_t> *input_shape) {
auto shp_tuple_x = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("input_sizes")->cast<ValueTuplePtr>()->value();
(void)std::transform(std::begin(shp_tuple_x), std::end(shp_tuple_x), std::back_inserter(*input_shape),
[](const ValuePtr &e) -> size_t { return e->cast<Int32ImmPtr>()->value(); });
[](const ValuePtr &e) -> size_t { return static_cast<int>(e->cast<Int64ImmPtr>()->value()); });
}
void Set4DDesc(const std::vector<size_t> &dy_shape, const std::vector<size_t> &input_shape,
const std::vector<size_t> &filter_shape) {
@ -313,8 +319,12 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
"cudnnSetTensorNdDescriptor failed");
}
void SetStrideAndDilation(const CNodePtr &kernel_node) {
stride_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
dilation_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
std::vector<int64_t> stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "stride");
std::vector<int64_t> dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "dilation");
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
[](const int64_t &value) { return static_cast<int>(value); });
if (stride_.size() != 2) {
MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel's stride must be 2d!";
}

View File

@ -86,7 +86,10 @@ class Im2ColGpuFwdKernel : public GpuKernel {
return false;
}
auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto filter_shape = GetAttr<std::vector<int>>(kernel_node, "kernel_size");
std::vector<int> filter_shape;
std::vector<int64_t> filter_shape_me = GetAttr<std::vector<int64_t>>(kernel_node, "kernel_size");
(void)std::transform(filter_shape_me.begin(), filter_shape_me.end(), std::back_inserter(filter_shape),
[](const int64_t &value) { return static_cast<int>(value); });
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(in_shape);
if (is_null_input_) {
@ -96,7 +99,7 @@ class Im2ColGpuFwdKernel : public GpuKernel {
}
Set4DDesc(in_shape, filter_shape, output_shape);
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, 1), "cudnnSetConvGroupCount failed");
pad_height_ = GetAttr<int>(kernel_node, "pad");
pad_height_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "pad"));
pad_width_ = pad_height_;
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
SetStrideAndDilation(kernel_node);
@ -173,7 +176,10 @@ class Im2ColGpuFwdKernel : public GpuKernel {
return true;
}
void SetPad(const std::vector<size_t> &in_shape, const CNodePtr &kernel_node) {
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
std::vector<int> pad_list;
std::vector<int64_t> pad_list_me = GetAttr<std::vector<int64_t>>(kernel_node, "pad_list");
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
[](const int64_t &value) { return static_cast<int>(value); });
n_ = SizeToInt(in_shape[0]);
c_ = SizeToInt(in_shape[1]);
@ -217,8 +223,12 @@ class Im2ColGpuFwdKernel : public GpuKernel {
}
void SetStrideAndDilation(const CNodePtr &kernel_node) {
stride_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
dilation_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
std::vector<int64_t> stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "stride");
std::vector<int64_t> dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "dilation");
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
[](const int64_t &value) { return static_cast<int>(value); });
if (stride_.size() != 4) {
MS_LOG(EXCEPTION) << "Im2Col's stride must be 4d!";
}

View File

@ -97,7 +97,7 @@ class L2NormalizeGpuKernel : public GpuKernel {
}
int input_dim_length = SizeToInt(AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0).size());
int axis = GetAttr<int>(kernel_node, "axis");
int axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
axis_ = axis < 0 ? (axis + input_dim_length) : axis;
epsilon_ = GetAttr<float>(kernel_node, "epsilon");

View File

@ -119,7 +119,7 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
}
int input_dim_length = SizeToInt(AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0).size());
int axis = GetAttr<int>(kernel_node, "axis");
int axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
axis_ = axis < 0 ? (axis + input_dim_length) : axis;
epsilon_ = GetAttr<float>(kernel_node, "epsilon");

View File

@ -49,8 +49,8 @@ class LayerNormGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
int begin_norm_axis = GetAttr<int>(kernel_node, "begin_norm_axis");
int begin_params_axis = GetAttr<int>(kernel_node, "begin_params_axis");
int begin_norm_axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "begin_norm_axis"));
int begin_params_axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "begin_params_axis"));
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (begin_norm_axis < 0) {

View File

@ -51,8 +51,8 @@ class LayerNormGradGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
int begin_norm_axis = GetAttr<int>(kernel_node, "begin_norm_axis");
int begin_params_axis = GetAttr<int>(kernel_node, "begin_params_axis");
int begin_norm_axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "begin_norm_axis"));
int begin_params_axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "begin_params_axis"));
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (begin_norm_axis < 0) {

View File

@ -95,9 +95,9 @@ class LstmGpuKernel : public GpuKernel {
batch_size_ = SizeToInt(input_shape[1]);
input_size_ = SizeToInt(input_shape[2]);
input_size_ = GetAttr<int>(kernel_node, "input_size");
hidden_size_ = GetAttr<int>(kernel_node, "hidden_size");
num_layers_ = GetAttr<int>(kernel_node, "num_layers");
input_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "input_size"));
hidden_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "hidden_size"));
num_layers_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "num_layers"));
has_bias_ = GetAttr<bool>(kernel_node, "has_bias");
bidirectional_ = GetAttr<bool>(kernel_node, "bidirectional");
dropout_ = GetAttr<float>(kernel_node, "dropout");

View File

@ -96,9 +96,9 @@ class LstmGradDataGpuKernel : public GpuKernel {
return true;
}
void GetAttrs(const CNodePtr &kernel_node) {
input_size_ = GetAttr<int>(kernel_node, "input_size");
hidden_size_ = GetAttr<int>(kernel_node, "hidden_size");
num_layers_ = GetAttr<int>(kernel_node, "num_layers");
input_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "input_size"));
hidden_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "hidden_size"));
num_layers_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "num_layers"));
has_bias_ = GetAttr<bool>(kernel_node, "has_bias");
bidirectional_ = GetAttr<bool>(kernel_node, "bidirectional");
dropout_ = GetAttr<float>(kernel_node, "dropout");

View File

@ -89,9 +89,9 @@ class LstmGradWeightGpuKernel : public GpuKernel {
seq_len_ = SizeToInt(input_shape[0]);
batch_size_ = SizeToInt(input_shape[1]);
input_size_ = GetAttr<int>(kernel_node, "input_size");
hidden_size_ = GetAttr<int>(kernel_node, "hidden_size");
num_layers_ = GetAttr<int>(kernel_node, "num_layers");
input_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "input_size"));
hidden_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "hidden_size"));
num_layers_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "num_layers"));
has_bias_ = GetAttr<bool>(kernel_node, "has_bias");
bidirectional_ = GetAttr<bool>(kernel_node, "bidirectional");
dropout_ = GetAttr<float>(kernel_node, "dropout");

View File

@ -90,10 +90,18 @@ class MaxPoolWithArgmaxGpuFwdKernel : public GpuKernel {
input_width_ = SizeToInt(input_shape[3]);
output_height_ = SizeToInt(output_shape[2]);
output_width_ = SizeToInt(output_shape[3]);
auto window = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ksize"));
std::vector<int> window;
std::vector<int64_t> window_me =
GetValue<std::vector<int64_t>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ksize"));
(void)std::transform(window_me.begin(), window_me.end(), std::back_inserter(window),
[](const int64_t &value) { return static_cast<int>(value); });
window_height_ = window[1];
window_width_ = window[2];
auto stride = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides"));
std::vector<int> stride;
std::vector<int64_t> stride_me =
GetValue<std::vector<int64_t>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides"));
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride),
[](const int64_t &value) { return static_cast<int>(value); });
stride_height_ = stride[1];
stride_width_ = stride[2];
pad_mode_ = GetValue<std::string>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("padding"));

View File

@ -95,10 +95,18 @@ class MaxPoolWithArgmaxGradGpuKernel : public GpuKernel {
x_width_ = SizeToInt(x_shape[3]);
dy_height_ = SizeToInt(dy_shape[2]);
dy_width_ = SizeToInt(dy_shape[3]);
auto window = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ksize"));
std::vector<int> window;
std::vector<int64_t> window_me =
GetValue<std::vector<int64_t>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ksize"));
(void)std::transform(window_me.begin(), window_me.end(), std::back_inserter(window),
[](const int64_t &value) { return static_cast<int>(value); });
window_height_ = window[1];
window_width_ = window[2];
auto stride = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides"));
std::vector<int> stride;
std::vector<int64_t> stride_me =
GetValue<std::vector<int64_t>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides"));
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride),
[](const int64_t &value) { return static_cast<int>(value); });
stride_height_ = stride[1];
stride_width_ = stride[2];
pad_mode_ = GetValue<std::string>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("padding"));

View File

@ -19,6 +19,7 @@
#include <iostream>
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh"
@ -77,7 +78,15 @@ class PadGpuFwdKernel : public GpuKernel {
input_shape.insert(it, 2, 1); // channel padding
shape_size_ = 4;
}
paddings = GetValue<std::vector<std::vector<int>>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("paddings"));
std::vector<std::vector<int64_t>> paddings_me =
GetValue<std::vector<std::vector<int64_t>>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("paddings"));
(void)std::transform(paddings_me.begin(), paddings_me.end(), std::back_inserter(paddings),
[](const std::vector<int64_t> &values) {
std::vector<int> shape;
(void)std::transform(values.begin(), values.end(), std::back_inserter(shape),
[](const int64_t &value) { return static_cast<int>(value); });
return shape;
});
// shape adjustement -> from 2d/3d to 4d to standardize
if (paddings.size() == 4) {
} else if (paddings.size() == 3) {

View File

@ -157,10 +157,17 @@ class PoolingGpuFwdKernel : public GpuKernel {
}
void SetPad(const CNodePtr &kernel_node) {
pad_mode_ = GetValue<std::string>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("padding"));
auto window = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ksize"));
std::vector<int> window;
std::vector<int64_t> window_me =
GetValue<std::vector<int64_t>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ksize"));
(void)std::transform(window_me.begin(), window_me.end(), std::back_inserter(window),
[](const int64_t &value) { return static_cast<int>(value); });
int window_height = window[2];
int window_width = window[3];
stride_ = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides"));
std::vector<int64_t> stride_me =
GetValue<std::vector<int64_t>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides"));
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_),
[](const int64_t &value) { return static_cast<int>(value); });
int windowDimA[2] = {window_height, window_width};
int paddingA[2] = {0, 0};
int strideA[2] = {stride_[2], stride_[3]};

View File

@ -174,8 +174,13 @@ class PoolingGradGpuKernel : public GpuKernel {
}
void SetPad(const CNodePtr &kernel_node) {
pad_mode_ = GetAttr<std::string>(kernel_node, "padding");
stride_ = GetAttr<std::vector<int>>(kernel_node, "strides");
auto window = GetAttr<std::vector<int>>(kernel_node, "ksize");
std::vector<int64_t> stride_me = GetAttr<std::vector<int64_t>>(kernel_node, "strides");
std::vector<int> window;
std::vector<int64_t> window_me = GetAttr<std::vector<int64_t>>(kernel_node, "ksize");
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_),
[](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(window_me.begin(), window_me.end(), std::back_inserter(window),
[](const int64_t &value) { return static_cast<int>(value); });
int window_height = window[2];
int window_width = window[3];
int stride_h = stride_[2];

View File

@ -85,11 +85,11 @@ class ROIAlignGpuFwdKernel : public GpuKernel {
rois_shape_ = {roi_rows_, roi_cols_};
// Get primitive args
pooled_height_ = GetAttr<int>(kernel_node, "pooled_height");
pooled_width_ = GetAttr<int>(kernel_node, "pooled_width");
pooled_height_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "pooled_height"));
pooled_width_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "pooled_width"));
spatial_scale_ = static_cast<T>(GetAttr<float>(kernel_node, "spatial_scale"));
sample_num_ = GetAttr<int>(kernel_node, "sample_num");
roi_end_mode_ = GetAttr<int>(kernel_node, "roi_end_mode");
sample_num_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "sample_num"));
roi_end_mode_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "roi_end_mode"));
// Get output_shape
output_shape_ = {roi_rows_, channels_, pooled_height_, pooled_width_};

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_KERNEL_GPU_ROI_ALIGN_GRAD_GPU_KERNEL_H
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/roi_align_impl.cuh"
@ -83,11 +84,13 @@ class ROIAlignGradGpuFwdKernel : public GpuKernel {
rois_size_ = roi_rows_ * roi_cols_ * sizeof(T);
// Get primitive args
xdiff_shape_ = GetAttr<std::vector<int>>(kernel_node, "xdiff_shape");
pooled_height_ = GetAttr<int>(kernel_node, "pooled_height");
pooled_width_ = GetAttr<int>(kernel_node, "pooled_width");
std::vector<int64_t> xdiff_shape_me = GetAttr<std::vector<int64_t>>(kernel_node, "xdiff_shape");
(void)std::transform(xdiff_shape_me.begin(), xdiff_shape_me.end(), std::back_inserter(xdiff_shape_),
[](const int64_t &value) { return static_cast<int>(value); });
pooled_height_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "pooled_height"));
pooled_width_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "pooled_width"));
spatial_scale_ = static_cast<T>(GetAttr<float>(kernel_node, "spatial_scale"));
sample_num_ = GetAttr<int>(kernel_node, "sample_num");
sample_num_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "sample_num"));
roi_end_mode_ = 1;
// Get channels, height & width

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_SOFTMAX_GPU_KERNEL_H_
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/kernel_constants.h"
@ -117,11 +118,14 @@ class SoftmaxGpuKernel : public GpuKernel {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
if (kernel_name == "LogSoftmax") {
algo_ = CUDNN_SOFTMAX_LOG;
auto axis = GetAttr<int>(kernel_node, "axis");
auto axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
InitSizeByAxis(input_shape, axis);
} else {
algo_ = CUDNN_SOFTMAX_ACCURATE;
auto axis = GetAttr<std::vector<int>>(kernel_node, "axis");
std::vector<int> axis;
std::vector<int64_t> axis_me = GetAttr<std::vector<int64_t>>(kernel_node, "axis");
(void)std::transform(axis_me.begin(), axis_me.end(), std::back_inserter(axis),
[](const int64_t &value) { return static_cast<int>(value); });
InitSizeByAxis(input_shape, axis[0]);
}
CHECK_CUDNN_RET_WITH_EXCEPT(

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_SOFTMAX_GRAD_GPU_KERNEL_H_
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/kernel_constants.h"
@ -123,11 +124,14 @@ class SoftmaxGradGpuKernel : public GpuKernel {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
if (kernel_name == "LogSoftmaxGrad") {
algo_ = CUDNN_SOFTMAX_LOG;
auto axis = GetAttr<int>(kernel_node, "axis");
auto axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
InitSizeByAxis(input_shape, axis);
} else {
algo_ = CUDNN_SOFTMAX_ACCURATE;
auto axis = GetAttr<std::vector<int>>(kernel_node, "axis");
std::vector<int> axis;
std::vector<int64_t> axis_me = GetAttr<std::vector<int64_t>>(kernel_node, "axis");
(void)std::transform(axis_me.begin(), axis_me.end(), std::back_inserter(axis),
[](const int64_t &value) { return static_cast<int>(value); });
InitSizeByAxis(input_shape, axis[0]);
}
CHECK_CUDNN_RET_WITH_EXCEPT(

View File

@ -78,11 +78,11 @@ class UniformCandidateSamplerGpuKernel : public GpuKernel {
return false;
}
// getting attrs
num_true_ = GetAttr<int>(kernel_node, "num_true");
num_sampled_ = GetAttr<int>(kernel_node, "num_sampled");
num_true_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "num_true"));
num_sampled_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "num_sampled"));
unique_ = GetAttr<bool>(kernel_node, "unique");
range_max_ = GetAttr<int>(kernel_node, "range_max");
int seed = GetAttr<int>(kernel_node, "seed");
range_max_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "range_max"));
int seed = static_cast<int>(GetAttr<int64_t>(kernel_node, "seed"));
remove_accidental_hits_ = GetAttr<bool>(kernel_node, "remove_accidental_hits");
if (seed == 0) seed = time(NULL);
generator_.seed(seed);

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_KERNEL_GPU_OTHER_BOUNDINGBOX_DECODE_GPU_KERNEL_H
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/gpu/cuda_impl/boundingbox_decode_impl.cuh"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@ -92,7 +93,7 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<ValueList>()) {
means_ = GetAttr<std::vector<float>>(kernel_node, "means");
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<FloatImm>()) {
float mean = GetAttr<int>(kernel_node, "means");
float mean = GetAttr<float>(kernel_node, "means");
for (size_t i = 0; i < coordinate_size; i++) {
means_.emplace_back(mean);
}
@ -104,7 +105,7 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<ValueList>()) {
stds_ = GetAttr<std::vector<float>>(kernel_node, "stds");
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<FloatImm>()) {
float std = GetAttr<int>(kernel_node, "stds");
float std = GetAttr<float>(kernel_node, "stds");
for (size_t i = 0; i < coordinate_size; i++) {
stds_.emplace_back(std);
}
@ -112,7 +113,9 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
MS_LOG(EXCEPTION) << "Attribute stds type is invalid.";
}
max_shape_ = GetAttr<std::vector<int>>(kernel_node, "max_shape");
std::vector<int64_t> max_shape_me = GetAttr<std::vector<int64_t>>(kernel_node, "max_shape");
(void)std::transform(max_shape_me.begin(), max_shape_me.end(), std::back_inserter(max_shape_),
[](const int64_t &value) { return static_cast<int>(value); });
wh_ratio_clip_ = GetAttr<float>(kernel_node, "wh_ratio_clip");
if (means_.size() < coordinate_size || stds_.size() < coordinate_size) {

View File

@ -92,7 +92,7 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<ValueList>()) {
means_ = GetAttr<std::vector<float>>(kernel_node, "means");
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<FloatImm>()) {
float mean = GetAttr<int>(kernel_node, "means");
float mean = GetAttr<float>(kernel_node, "means");
for (size_t i = 0; i < coordinate_size; i++) {
means_.emplace_back(mean);
}
@ -104,7 +104,7 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<ValueList>()) {
stds_ = GetAttr<std::vector<float>>(kernel_node, "stds");
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<FloatImm>()) {
float std = GetAttr<int>(kernel_node, "stds");
float std = GetAttr<float>(kernel_node, "stds");
for (size_t i = 0; i < coordinate_size; i++) {
stds_.emplace_back(std);
}

View File

@ -89,7 +89,7 @@ class BatchNormFold2GpuKernel : public GpuKernel {
channel_ = input_shape[1];
height_ = input_shape[2];
width_ = input_shape[3];
freeze_bn_ = GetValue<int32_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("freeze_bn"));
freeze_bn_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("freeze_bn")));
InitSizeLists();
return true;

View File

@ -123,7 +123,7 @@ class BatchNormFoldGpuKernel : public GpuKernel {
exp_avg_factor_ = 1.0 - momentum;
epsilon_ = GetValue<T>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("epsilon"));
is_training_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("is_training"));
freeze_bn_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("freeze_bn"));
freeze_bn_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("freeze_bn")));
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (input_shape.size() != 4) {

View File

@ -109,7 +109,7 @@ class BatchNormFoldGradGpuKernel : public GpuKernel {
epsilon_ = GetValue<T>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("epsilon"));
is_training_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("is_training"));
freeze_bn_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("freeze_bn"));
freeze_bn_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("freeze_bn")));
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
if (input_shape.size() != 4) {

View File

@ -55,11 +55,11 @@ bool FakeQuantPerChannelGpuKernel::Init(const CNodePtr &kernel_node) {
}
// get attribute
num_bits_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits"));
num_bits_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits")));
training_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("training"));
symmetric_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("symmetric"));
narrow_range_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("narrow_range"));
quant_delay_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay"));
quant_delay_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay")));
if (num_bits_ <= 2 || num_bits_ >= 16) {
MS_LOG(EXCEPTION) << "Attr \'num_bits\' " << num_bits_ << "is out of range, expected between 2 and 16.";

View File

@ -49,12 +49,12 @@ bool FakeQuantPerChannelGradGpuKernel::Init(const CNodePtr &kernel_node) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but FakeQuantGrad GpuKernel OP needs 1 output.";
}
num_bits_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits"));
num_bits_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits")));
if (num_bits_ <= 2 || num_bits_ >= 16) {
MS_LOG(EXCEPTION) << "Attr \'num_bits\' " << num_bits_ << " is out of range, expected between 2 and 16.";
}
quant_delay_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay"));
quant_delay_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay")));
if (quant_delay_ < 0) {
MS_LOG(EXCEPTION) << "Attr \'quant_delay_\' " << quant_delay_ << " is less then 0, require larger than 0.";
}

View File

@ -52,8 +52,8 @@ bool FakeQuantPerLayerGpuKernel::Init(const CNodePtr &kernel_node) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but FakeQuant GpuKernel OP needs 1 output.";
}
num_bits_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits"));
quant_delay_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay"));
num_bits_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits")));
quant_delay_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay")));
training_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("training"));
symmetric_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("symmetric"));
narrow_range_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("narrow_range"));

View File

@ -48,12 +48,12 @@ bool FakeQuantPerLayerGradGpuKernel::Init(const CNodePtr &kernel_node) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but FakeQuantGrad GpuKernel OP needs 1 output.";
}
num_bits_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits"));
num_bits_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits")));
if (num_bits_ <= 2 || num_bits_ >= 16) {
MS_LOG(EXCEPTION) << "Attr \'num_bits\' " << num_bits_ << " is out of range, expected between 2 and 16.";
}
quant_delay_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay"));
quant_delay_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay")));
if (quant_delay_ < 0) {
MS_LOG(EXCEPTION) << "Attr \'quant_delay_\' " << quant_delay_ << " is less then 0, require larger than 0.";
}

View File

@ -80,8 +80,8 @@ class RandomChoiceWithMaskGpuKernel : public GpuKernel {
input_shape_5D_.insert(input_shape_5D_.begin(), 1);
}
// init seedc_
int seed = GetAttr<int>(kernel_node, "seed");
int seed2 = GetAttr<int>(kernel_node, "seed2");
int seed = static_cast<int>(GetAttr<int64_t>(kernel_node, "seed"));
int seed2 = static_cast<int>(GetAttr<int64_t>(kernel_node, "seed2"));
if (seed2 != 0)
seedc_ = seed2;
else if (seed != 0)
@ -92,7 +92,7 @@ class RandomChoiceWithMaskGpuKernel : public GpuKernel {
for (size_t i = 0; i < input_shape.size(); i++) {
input_size_ *= input_shape[i];
}
count_ = GetAttr<int>(kernel_node, "count");
count_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "count"));
// upper ceiling for input for ceil_power2
ceil_power2_ = RcwmRoundUpPower2(input_size_);
InitSizeLists();

View File

@ -114,16 +114,16 @@ bool HcomUtil::GetHcomCount(const AnfNodePtr &anf_node, const vector<HcclDataTyp
}
if (AnfAlgo::GetCNodeName(anf_node) == kReduceScatterOpName) {
int32_t rank_size;
int64_t rank_size;
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
MS_EXCEPTION_IF_NULL(primitive);
if (primitive->GetAttr("rank_size") != nullptr) {
rank_size = GetValue<int32_t>(primitive->GetAttr("rank_size"));
rank_size = GetValue<int64_t>(primitive->GetAttr("rank_size"));
} else {
MS_LOG(ERROR) << "Get rank size failed";
return false;
}
block_size = input_size / IntToSize(rank_size);
block_size = input_size / LongToSize(rank_size);
total_size = total_size + block_size;
} else {
if (AnfAlgo::GetCNodeName(anf_node) == kAllGatherOpName) {
@ -175,7 +175,7 @@ bool HcomUtil::GetHcomRootId(const AnfNodePtr &anf_node, uint32_t *root_id) {
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
MS_EXCEPTION_IF_NULL(primitive);
if (primitive->GetAttr("root_rank") != nullptr) {
*root_id = (uint32_t)GetValue<int>(primitive->GetAttr("root_rank"));
*root_id = (uint32_t)GetValue<int64_t>(primitive->GetAttr("root_rank"));
} else {
MS_LOG(ERROR) << "HcomUtil::Get HCOM_ATTR_ROOT_INDEX fail, not support!";
return false;

View File

@ -27,12 +27,12 @@ void DynamicShapeKernel::Execute() {
}
auto prev_output_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode_ptr_, 0);
auto output_shape = std::vector<int>(SizeToInt(prev_output_shape.size()));
auto output_shape = std::vector<int64_t>(SizeToLong(prev_output_shape.size()));
auto output_type = TypeId::kNumberTypeInt32;
auto output_type = TypeId::kNumberTypeInt64;
auto output_tensor_for_sync = std::make_shared<tensor::Tensor>(output_type, output_shape);
auto data_ptr = static_cast<int32_t *>(output_tensor_for_sync->data_c());
auto data_ptr = static_cast<int64_t *>(output_tensor_for_sync->data_c());
for (size_t i = 0; i < prev_output_shape.size(); ++i) {
MS_LOG(INFO) << "DEBUG prev_output_shape[" << i << "]:" << prev_output_shape[i];
*(data_ptr + i) = prev_output_shape[i];

View File

@ -28,9 +28,9 @@
namespace mindspore {
namespace kernel {
using mindspore::kernel::tbe::TbeUtils;
std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) {
std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) {
MS_LOG(INFO) << "kernel fusion build start, scope size:" << fusion_scopes.size();
std::map<int32_t, KernelModPtr> kernel_mod_ret;
std::map<int64_t, KernelModPtr> kernel_mod_ret;
auto build_manger = std::make_shared<ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manger);
for (const auto &fusion_scope_iter : fusion_scopes) {

View File

@ -26,15 +26,15 @@ namespace kernel {
* @brief fuse op and return a callable mod
*/
struct FusionScopeInfo {
FusionScopeInfo(int32_t id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, std::vector<AnfNodePtr> out)
FusionScopeInfo(int64_t id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, std::vector<AnfNodePtr> out)
: scope_id(id), input_nodes(std::move(in)), compute_nodes(std::move(comp)), output_nodes(std::move(out)) {}
int32_t scope_id{};
int64_t scope_id{};
std::vector<AnfNodePtr> input_nodes;
std::vector<AnfNodePtr> compute_nodes;
std::vector<AnfNodePtr> output_nodes;
};
std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes);
std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes);
} // namespace kernel
} // namespace mindspore

View File

@ -69,6 +69,7 @@ constexpr auto kVTypeBool = "bool";
constexpr auto kVTypeFloat = "float";
constexpr auto kVTypeListInt = "listInt";
constexpr auto kVTypeInt32 = "Int32";
constexpr auto kVTypeInt64 = "Int64";
constexpr auto kVTypeListUInt64 = "listUInt64";
constexpr auto kVTypeListFloat = "listFloat";
constexpr auto kVTypeListListInt = "listListInt";
@ -241,9 +242,9 @@ bool GetInputNameAndRealNum(const std::shared_ptr<AnfNode> &anf_node, const std:
MS_EXCEPTION_IF_NULL(op_input_name);
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
// for dynamic input number, dyn_input_sizes has the info of dynamic input num for each input.
std::vector<int> dyn_input_sizes;
std::vector<int64_t> dyn_input_sizes;
if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) {
dyn_input_sizes = GetValue<const std::vector<int>>(primitive->GetAttr(kAttrDynInputSizes));
dyn_input_sizes = GetValue<const std::vector<int64_t>>(primitive->GetAttr(kAttrDynInputSizes));
}
if (input_ptr->param_type() == kParamDynamic) {
@ -251,7 +252,7 @@ bool GetInputNameAndRealNum(const std::shared_ptr<AnfNode> &anf_node, const std:
MS_LOG(ERROR) << "Dyn input index" << *dyn_input_index << "is over dyn input num" << dyn_input_sizes.size();
return false;
}
*input_num = IntToSize(dyn_input_sizes[*dyn_input_index]);
*input_num = LongToSize(dyn_input_sizes[*dyn_input_index]);
*op_input_name = input_ptr->name() + "_dynamic_";
(*dyn_input_index)++;
// if optional input is exist
@ -454,7 +455,15 @@ void TbeKernelJsonCreator::ParseAttrValue(const std::string &type, const mindspo
MS_EXCEPTION_IF_NULL(value);
MS_EXCEPTION_IF_NULL(attr_obj);
if (type == kVTypeInt) {
auto attr_value = GetValue<int>(value);
if (value->isa<Int32Imm>()) {
auto attr_value = GetValue<int>(value);
(*attr_obj)[kJValue] = attr_value;
} else {
auto attr_value = GetValue<int64_t>(value);
(*attr_obj)[kJValue] = attr_value;
}
} else if (type == kVTypeInt64) {
auto attr_value = GetValue<int64_t>(value);
(*attr_obj)[kJValue] = attr_value;
} else if (type == kVTypeStr) {
auto attr_value = GetValue<std::string>(value);
@ -469,15 +478,25 @@ void TbeKernelJsonCreator::ParseAttrValue(const std::string &type, const mindspo
auto attr_value = GetValue<float>(value);
(*attr_obj)[kJValue] = attr_value;
} else if (type == kVTypeListInt) {
std::vector<int> attr_value;
std::vector<int64_t> attr_value;
auto value_type = value->type();
MS_EXCEPTION_IF_NULL(value_type);
auto value_type_str = value_type->ToString();
if (value_type_str == kVTypeInt32) {
int data = GetValue<int>(value);
if (value_type_str == kVTypeInt64) {
int64_t data = GetValue<int64_t>(value);
attr_value.push_back(data);
} else {
attr_value = GetValue<std::vector<int>>(value);
auto vec =
value->isa<ValueTuple>() ? value->cast<ValueTuplePtr>()->value() : value->cast<ValueListPtr>()->value();
if (!vec.empty()) {
if (vec[0]->isa<Int32Imm>()) {
std::vector<int32_t> attr_value_me = GetValue<std::vector<int32_t>>(value);
(void)std::transform(attr_value_me.begin(), attr_value_me.end(), std::back_inserter(attr_value),
[](const int &value) { return static_cast<int64_t>(value); });
} else {
attr_value = GetValue<std::vector<int64_t>>(value);
}
}
}
(*attr_obj)[kJValue] = attr_value;
} else if (type == kVTypeListFloat) {
@ -496,7 +515,7 @@ void TbeKernelJsonCreator::ParseAttrValue(const std::string &type, const mindspo
auto attr_value = GetValue<std::vector<size_t>>(value);
(*attr_obj)[kJValue] = attr_value;
} else if (type == kVTypeListListInt) {
auto attr_value = GetValue<std::vector<std::vector<int>>>(value);
auto attr_value = GetValue<std::vector<std::vector<int64_t>>>(value);
(*attr_obj)[kJValue] = attr_value;
} else {
MS_LOG(EXCEPTION) << "Type: " << type << "not support";
@ -959,17 +978,17 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) {
MS_EXCEPTION_IF_NULL(primitive);
// for dynamic input number, dyn_input_sizes has the info of dynamic input num for each input.
bool ret = false;
std::vector<int> dyn_input_sizes;
std::vector<int64_t> dyn_input_sizes;
auto dynamic_input_attr = primitive->GetAttr(kAttrDynInputSizes);
if (dynamic_input_attr != nullptr) {
dyn_input_sizes = GetValue<const std::vector<int>>(dynamic_input_attr);
dyn_input_sizes = GetValue<const std::vector<int64_t>>(dynamic_input_attr);
auto real_input_size = cnode->inputs().size() - 1;
auto dyn_input_size = dyn_input_sizes.size();
if (dyn_input_size != 1) {
MS_LOG(INFO) << "Fusion error: fusion build not support dyn_input_sizes > 1";
return ret;
}
if (IntToSize(dyn_input_sizes[0]) != real_input_size) {
if (LongToSize(dyn_input_sizes[0]) != real_input_size) {
MS_LOG(INFO) << "Fusion error: dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size"
<< real_input_size;
return ret;
@ -1069,7 +1088,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
return true;
}
std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int> &output_used_nums) {
std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int64_t> &output_used_nums) {
std::vector<size_t> desc_output_index = {};
for (size_t idx = 0; idx < output_used_nums.size(); ++idx) {
auto output_use_num_item = output_used_nums[idx];
@ -1087,7 +1106,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode
MS_EXCEPTION_IF_NULL(output_desc_list);
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) {
auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum);
auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(cnode, kAttrOutputUsedNum);
MS_LOG(INFO) << "Fusion info: this node's output has been reused, node name: " << cnode->fullname_with_scope();
if (output_used_nums.size() != output_size) {
MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")"

View File

@ -58,7 +58,7 @@ class TbeKernelBuild {
static bool GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
std::vector<nlohmann::json> *input_desc_list, size_t *index);
static std::vector<size_t> GetDescOutputIndex(const std::vector<int> &output_used_nums);
static std::vector<size_t> GetDescOutputIndex(const std::vector<int64_t> &output_used_nums);
static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
std::vector<nlohmann::json> *output_desc_list);
static void GenPreDescJson(nlohmann::json *output_desc);

View File

@ -35,7 +35,7 @@ bool TbeKernelBroadCastSelecter::GetShapeInfo(SupportFormat *support_format) {
output_shapes_.clear();
if (AnfAlgo::HasNodeAttr(kAttrDynInputSizes, cnode_ptr_)) {
MS_LOG(INFO) << "This broadcast node has dynamic input.";
auto dynamic_size_vec = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode_ptr_, kAttrDynInputSizes);
auto dynamic_size_vec = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(cnode_ptr_, kAttrDynInputSizes);
if (dynamic_size_vec.empty() || dynamic_size_vec[0] < 2) {
MS_LOG(EXCEPTION) << "dynamic attr set error, please check.";
}

View File

@ -43,7 +43,7 @@ class TbeKernelReduceSelecter {
CNodePtr cnode_ptr_;
std::vector<size_t> input_shape_{};
std::vector<size_t> output_shape_{};
std::vector<int> axis_{};
std::vector<int64_t> axis_{};
bool keep_dims_ = false;
};
} // namespace kernel

Some files were not shown because too many files have changed in this diff Show More