forked from mindspore-Ecosystem/mindspore
support int64 shape
This commit is contained in:
parent
0c588571b4
commit
d7faa77b5e
|
@ -116,7 +116,7 @@ void ParseAttrValue(const std::string &type, const std::string &attr_name, const
|
|||
MS_EXCEPTION_IF_NULL(node_attr);
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
if (type == "int") {
|
||||
auto attr_value = GetValue<int>(value);
|
||||
auto attr_value = static_cast<int>(GetValue<int64_t>(value));
|
||||
(*node_attr)[attr_name].set_i(attr_value);
|
||||
} else if (type == "str") {
|
||||
auto attr_value = GetValue<std::string>(value);
|
||||
|
@ -128,15 +128,15 @@ void ParseAttrValue(const std::string &type, const std::string &attr_name, const
|
|||
auto attr_value = GetValue<float>(value);
|
||||
(*node_attr)[attr_name].set_f(attr_value);
|
||||
} else if (type == "listInt") {
|
||||
std::vector<int> attr_value;
|
||||
std::vector<int64_t> attr_value;
|
||||
auto value_type = value->type();
|
||||
MS_EXCEPTION_IF_NULL(value_type);
|
||||
auto value_type_str = value_type->ToString();
|
||||
if (value_type_str == "Int32") {
|
||||
int data = GetValue<int>(value);
|
||||
if (value_type_str == "Int64") {
|
||||
int64_t data = GetValue<int64_t>(value);
|
||||
attr_value.push_back(data);
|
||||
} else {
|
||||
attr_value = GetValue<std::vector<int>>(value);
|
||||
attr_value = GetValue<std::vector<int64_t>>(value);
|
||||
}
|
||||
mindspore::AttrValue input_shape_attr;
|
||||
mindspore::AttrValue_ArrayValue *input_shape_attr_list = input_shape_attr.mutable_array();
|
||||
|
|
|
@ -168,7 +168,7 @@ class CNodeDecoder {
|
|||
output_formats_.push_back(output_desc[kJsonKeyFormat]);
|
||||
output_types_.push_back(DtypeToTypeId(output_desc[kJsonKeyDataType]));
|
||||
auto get_item =
|
||||
func_graph->NewCNode({NewValueNode(prim::kPrimTupleGetItem), cnode_, NewValueNode(SizeToInt(j))});
|
||||
func_graph->NewCNode({NewValueNode(prim::kPrimTupleGetItem), cnode_, NewValueNode(SizeToLong(j))});
|
||||
func_graph->AddNode(get_item);
|
||||
nodes_map_[output_desc[kJsonKeyTensorName]] = get_item;
|
||||
}
|
||||
|
|
|
@ -35,7 +35,10 @@ std::vector<int> GetDynInputSize(const AnfNodePtr &anf_node) {
|
|||
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
|
||||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
if (primitive->HasAttr(kAttrDynInputSizes)) {
|
||||
dyn_input_sizes = GetValue<const std::vector<int>>(primitive->GetAttr(kAttrDynInputSizes));
|
||||
std::vector<int64_t> dyn_input_sizes_me =
|
||||
GetValue<const std::vector<int64_t>>(primitive->GetAttr(kAttrDynInputSizes));
|
||||
(void)std::transform(dyn_input_sizes_me.begin(), dyn_input_sizes_me.end(), std::back_inserter(dyn_input_sizes),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
}
|
||||
return dyn_input_sizes;
|
||||
}
|
||||
|
@ -256,7 +259,7 @@ void AkgKernelJsonGenerator::GetAttrJson(const AnfNodePtr &anf_node, const std::
|
|||
std::string type = op_attr->type();
|
||||
(*attr_json)[kJsonKeyDataType] = type;
|
||||
if (type == "int") {
|
||||
(*attr_json)[kJsonKeyValue] = GetValue<int>(attr_value);
|
||||
(*attr_json)[kJsonKeyValue] = static_cast<int>(GetValue<int64_t>(attr_value));
|
||||
} else if (type == "str") {
|
||||
(*attr_json)[kJsonKeyValue] = GetValue<std::string>(attr_value);
|
||||
} else if (type == "bool") {
|
||||
|
@ -264,7 +267,11 @@ void AkgKernelJsonGenerator::GetAttrJson(const AnfNodePtr &anf_node, const std::
|
|||
} else if (type == "float") {
|
||||
(*attr_json)[kJsonKeyValue] = GetValue<float>(attr_value);
|
||||
} else if (type == "listInt") {
|
||||
(*attr_json)[kJsonKeyValue] = GetValue<std::vector<int>>(attr_value);
|
||||
std::vector<int> list_int;
|
||||
std::vector<int64_t> list_int_me = GetValue<std::vector<int64_t>>(attr_value);
|
||||
(void)std::transform(list_int_me.begin(), list_int_me.end(), std::back_inserter(list_int),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(*attr_json)[kJsonKeyValue] = list_int;
|
||||
} else if (type == "listStr") {
|
||||
std::vector<std::string> data_format;
|
||||
if (op_attr->name() == kArgDataformat) {
|
||||
|
|
|
@ -235,7 +235,7 @@ size_t GetDtypeNbyte(const std::string &dtypes) {
|
|||
}
|
||||
|
||||
bool SetInputKernelBuilderInfo(const std::vector<std::shared_ptr<OpIOInfo>> &inputs, size_t real_input_num,
|
||||
size_t builder_idex, const std::vector<int> &dyn_input_sizes,
|
||||
size_t builder_idex, const std::vector<int64_t> &dyn_input_sizes,
|
||||
const std::shared_ptr<KernelBuildInfo::KernelBuildInfoBuilder> &builder) {
|
||||
MS_EXCEPTION_IF_NULL(builder);
|
||||
|
||||
|
@ -262,7 +262,7 @@ bool SetInputKernelBuilderInfo(const std::vector<std::shared_ptr<OpIOInfo>> &inp
|
|||
return false;
|
||||
}
|
||||
|
||||
for (int t = 0; t < dyn_input_sizes[dyn_input_idx]; t++) {
|
||||
for (int64_t t = 0; t < dyn_input_sizes[dyn_input_idx]; t++) {
|
||||
kernel_info_index++;
|
||||
auto type_id = DtypeToTypeId(dtypes[builder_idex]);
|
||||
inputs_device_type.push_back(type_id);
|
||||
|
@ -376,11 +376,11 @@ bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr<const OpIn
|
|||
size_t real_output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
std::vector<std::shared_ptr<OpIOInfo>> inputs = op_info_ptr->inputs_ptr();
|
||||
std::vector<std::shared_ptr<OpIOInfo>> outputs = op_info_ptr->outputs_ptr();
|
||||
std::vector<int> dyn_input_sizes;
|
||||
std::vector<int64_t> dyn_input_sizes;
|
||||
auto primitive = AnfAlgo::GetCNodePrimitive(kernel_node);
|
||||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
if (primitive->GetAttr("dyn_input_sizes") != nullptr) {
|
||||
dyn_input_sizes = GetValue<std::vector<int>>(primitive->GetAttr("dyn_input_sizes"));
|
||||
dyn_input_sizes = GetValue<std::vector<int64_t>>(primitive->GetAttr("dyn_input_sizes"));
|
||||
}
|
||||
if (inputs.size() > 0) {
|
||||
MS_EXCEPTION_IF_NULL(inputs[0]);
|
||||
|
@ -552,11 +552,11 @@ std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(cons
|
|||
continue;
|
||||
}
|
||||
|
||||
std::vector<int> dyn_input_sizes;
|
||||
std::vector<int64_t> dyn_input_sizes;
|
||||
auto prim = AnfAlgo::GetCNodePrimitive(anf_node);
|
||||
MS_EXCEPTION_IF_NULL(prim);
|
||||
if (prim->GetAttr(kAttrDynInputSizes) != nullptr) {
|
||||
dyn_input_sizes = GetValue<const std::vector<int>>(prim->GetAttr(kAttrDynInputSizes));
|
||||
dyn_input_sizes = GetValue<const std::vector<int64_t>>(prim->GetAttr(kAttrDynInputSizes));
|
||||
}
|
||||
|
||||
if (dyn_input_sizes.empty()) {
|
||||
|
@ -764,28 +764,26 @@ bool IsWeightBoundary(const AnfNodePtr &node) {
|
|||
return false;
|
||||
}
|
||||
|
||||
std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode) {
|
||||
std::vector<int64_t> GetReduceAttrAxis(const CNodePtr &cnode) {
|
||||
if (AnfAlgo::GetInputTensorNum(cnode) != AnfAlgo::GetOutputTensorNum(cnode) &&
|
||||
AnfAlgo::GetInputTensorNum(cnode) != 1) {
|
||||
MS_LOG(EXCEPTION) << "the kind of reduce node [" << cnode->DebugString()
|
||||
<< "] is not single input or single output ";
|
||||
}
|
||||
std::vector<int> axis;
|
||||
std::vector<int64_t> axis;
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, 0);
|
||||
auto primitive = AnfAlgo::GetCNodePrimitive(cnode);
|
||||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
auto axis_attr = primitive->GetAttr(kAxis);
|
||||
if (axis_attr == nullptr) {
|
||||
MS_LOG(ERROR) << "This node does't have axie attr.";
|
||||
return std::vector<int>();
|
||||
return std::vector<int64_t>();
|
||||
}
|
||||
auto type = axis_attr->type();
|
||||
MS_EXCEPTION_IF_NULL(type);
|
||||
std::vector<int> axis_list;
|
||||
if (type->ToString() == kTypeInt32) {
|
||||
axis_list.emplace_back(GetValue<int>(axis_attr));
|
||||
std::vector<int64_t> axis_list;
|
||||
if (axis_attr->isa<Int64Imm>()) {
|
||||
axis_list.emplace_back(GetValue<int64_t>(axis_attr));
|
||||
} else {
|
||||
axis_list = GetValue<std::vector<int>>(axis_attr);
|
||||
axis_list = GetValue<std::vector<int64_t>>(axis_attr);
|
||||
}
|
||||
for (const auto &elem : axis_list) {
|
||||
if (elem < 0) {
|
||||
|
|
|
@ -100,7 +100,7 @@ void GetFuncGraphOutputNodes(const FuncGraphPtr &func_graph, std::vector<AnfNode
|
|||
bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann::json *const node_json);
|
||||
void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<AnfNodePtr, size_t>> *node_list);
|
||||
bool IsWeightBoundary(const AnfNodePtr &node);
|
||||
std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode);
|
||||
std::vector<int64_t> GetReduceAttrAxis(const CNodePtr &cnode);
|
||||
std::string GetProcessorStr(const AnfNodePtr &anf_node);
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -27,7 +27,7 @@ void ArgmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
batch_size_ = shape[0];
|
||||
class_num_ = shape[1];
|
||||
|
||||
int axis = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS);
|
||||
int64_t axis = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
|
||||
if (axis != -1 && axis != 1) {
|
||||
MS_LOG(EXCEPTION) << "argmax kernel not support axis " << axis;
|
||||
}
|
||||
|
|
|
@ -22,12 +22,12 @@ namespace kernel {
|
|||
void ConcatCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
CheckParam(kernel_node);
|
||||
|
||||
axis_ = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS);
|
||||
axis_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
|
||||
auto input_1_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
if (axis_ < 0) {
|
||||
axis_ = axis_ + SizeToInt(input_1_shape.size());
|
||||
axis_ = axis_ + SizeToLong(input_1_shape.size());
|
||||
}
|
||||
axis_ += 4 - input_1_shape.size();
|
||||
axis_ += 4 - SizeToLong(input_1_shape.size());
|
||||
|
||||
auto input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
for (size_t i = 0; i < input_num; i++) {
|
||||
|
|
|
@ -36,7 +36,7 @@ class ConcatCPUKernel : public CPUKernel {
|
|||
void CheckParam(const CNodePtr &kernel_node);
|
||||
void CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, size_t dim2,
|
||||
float **output_addr, size_t *buff_size);
|
||||
int axis_;
|
||||
int64_t axis_;
|
||||
std::vector<std::vector<size_t>> input_shape_list_;
|
||||
std::vector<size_t> output_shape_;
|
||||
};
|
||||
|
|
|
@ -22,7 +22,7 @@ namespace mindspore {
|
|||
namespace kernel {
|
||||
void EmbeddingLookUpCommGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
CheckParam(kernel_node);
|
||||
split_num_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "split_num");
|
||||
split_num_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "split_num");
|
||||
MS_LOG(INFO) << "split_num: " << split_num_;
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
if (input_shape[0] % split_num_ != 0) {
|
||||
|
@ -47,9 +47,9 @@ bool EmbeddingLookUpCommGradCPUKernel::Launch(const std::vector<kernel::AddressP
|
|||
MS_LOG(DEBUG) << "output addr: " << output_addr << "output size: " << output_size;
|
||||
memset_s(output_addr, output_size, 0, output_size);
|
||||
const std::vector<int> &rank_group = {0, 1, 2, 3, 4, 5, 6, 7};
|
||||
size_t input_split_lens = input_size / split_num_ / sizeof(float_t);
|
||||
size_t output_split_lens = output_size / split_num_ / sizeof(float_t);
|
||||
for (int i = 0; i < split_num_; i++) {
|
||||
size_t input_split_lens = input_size / LongToSize(split_num_) / sizeof(float_t);
|
||||
size_t output_split_lens = output_size / LongToSize(split_num_) / sizeof(float_t);
|
||||
for (int64_t i = 0; i < split_num_; i++) {
|
||||
MPIAllGather(input_addr + i * input_split_lens, output_addr + i * output_split_lens, rank_group, input_split_lens);
|
||||
}
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
|
|
@ -34,7 +34,7 @@ class EmbeddingLookUpCommGradCPUKernel : public CPUKernel {
|
|||
|
||||
private:
|
||||
void CheckParam(const CNodePtr &kernel_node);
|
||||
int split_num_;
|
||||
int64_t split_num_;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(EmbeddingLookupCommGrad,
|
||||
|
|
|
@ -61,7 +61,7 @@ void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
indices_lens_ *= shape;
|
||||
}
|
||||
if (AnfAlgo::HasNodeAttr(kAttrOffset, kernel_node)) {
|
||||
offset_ = AnfAlgo::GetNodeAttr<int>(kernel_node, kAttrOffset);
|
||||
offset_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, kAttrOffset);
|
||||
}
|
||||
indices_data_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 1);
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ class EmbeddingLookUpCPUKernel : public CPUKernel {
|
|||
|
||||
protected:
|
||||
void CheckParam(const CNodePtr &kernel_node);
|
||||
int offset_{0};
|
||||
int64_t offset_{0};
|
||||
size_t indices_lens_{1};
|
||||
size_t first_dim_size_{1};
|
||||
size_t outer_dim_size_{1};
|
||||
|
|
|
@ -23,9 +23,9 @@ void GatherV2CPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
indices_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
axis_ = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS);
|
||||
axis_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
|
||||
if (axis_ < 0) {
|
||||
axis_ = axis_ + SizeToInt(input_shape_.size());
|
||||
axis_ = axis_ + SizeToLong(input_shape_.size());
|
||||
}
|
||||
axis_ += 4 - input_shape_.size();
|
||||
CPUKernelUtils::ExpandDimsTo4(&input_shape_);
|
||||
|
@ -75,7 +75,7 @@ void GatherV2CPUKernel::CopyDataToOutput(const std::vector<kernel::AddressPtr> &
|
|||
MS_LOG(EXCEPTION) << "The indices value is less than 0.";
|
||||
}
|
||||
size_t index = IntToSize(indices_addr[i]);
|
||||
if (index >= input_shape_[IntToSize(axis_)]) {
|
||||
if (index >= input_shape_[LongToSize(axis_)]) {
|
||||
auto ret = memset_s(*output_addr, *buff_size, 0., num * sizeof(float));
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memset failed.";
|
||||
|
|
|
@ -39,7 +39,7 @@ class GatherV2CPUKernel : public CPUKernel {
|
|||
std::vector<size_t> input_shape_;
|
||||
std::vector<size_t> indices_shape_;
|
||||
std::vector<size_t> output_shape_;
|
||||
int axis_;
|
||||
int64_t axis_;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h"
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "utils/ms_utils.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
@ -30,7 +31,7 @@ void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
MS_LOG(EXCEPTION) << "conv2d only support nchw input!";
|
||||
}
|
||||
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
|
||||
size_t group = IntToSize(AnfAlgo::GetNodeAttr<int>(kernel_node, GROUP));
|
||||
size_t group = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, GROUP));
|
||||
if (group != 1) {
|
||||
if (src_shape[1] % group != 0) {
|
||||
MS_LOG(EXCEPTION) << "conv2d channels should be divided by group!";
|
||||
|
@ -41,8 +42,14 @@ void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
|
||||
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
|
||||
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
||||
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
|
||||
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
|
||||
std::vector<int> stride_ori;
|
||||
std::vector<int> dilation_ori;
|
||||
auto stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDE);
|
||||
auto dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, DILATION);
|
||||
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_ori),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_ori),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) {
|
||||
MS_LOG(EXCEPTION) << "conv2d only support equal stride, and stride must be 4d!";
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h"
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "utils/ms_utils.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
@ -30,7 +31,7 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
MS_LOG(EXCEPTION) << ("conv2d grad filter only support nchw input!");
|
||||
}
|
||||
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
|
||||
size_t group = IntToSize(AnfAlgo::GetNodeAttr<int>(kernel_node, GROUP));
|
||||
size_t group = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, GROUP));
|
||||
if (group != 1) {
|
||||
if (src_shape[1] % group != 0) {
|
||||
MS_LOG(EXCEPTION) << "conv2d channels should be divided by group!";
|
||||
|
@ -41,8 +42,14 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
|
||||
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
|
||||
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
||||
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
|
||||
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
|
||||
std::vector<int> stride_ori;
|
||||
std::vector<int> dilation_ori;
|
||||
auto stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDE);
|
||||
auto dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, DILATION);
|
||||
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_ori),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_ori),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
|
||||
MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel only support equal stride, and stride must be 2d!";
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h"
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "utils/ms_utils.h"
|
||||
|
@ -30,7 +31,7 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
MS_LOG(EXCEPTION) << "conv2d grad filter only support nchw input!";
|
||||
}
|
||||
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
|
||||
size_t group = IntToSize(AnfAlgo::GetNodeAttr<int>(kernel_node, GROUP));
|
||||
size_t group = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, GROUP));
|
||||
if (group != 1) {
|
||||
if (src_shape[1] % group != 0) {
|
||||
MS_LOG(EXCEPTION) << "conv2d channels should be divided by group!";
|
||||
|
@ -42,8 +43,14 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
|
||||
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
||||
|
||||
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
|
||||
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
|
||||
std::vector<int> stride_ori;
|
||||
std::vector<int> dilation_ori;
|
||||
auto stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDE);
|
||||
auto dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, DILATION);
|
||||
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_ori),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_ori),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
|
||||
MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel only support equal stride, and stride must be 2d!";
|
||||
}
|
||||
|
|
|
@ -76,9 +76,9 @@ void LstmCPUKernel::CheckParam(const CNodePtr &kernel_node) {
|
|||
std::vector<size_t> src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
std::vector<size_t> src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2);
|
||||
bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional");
|
||||
input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size");
|
||||
hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size");
|
||||
num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers");
|
||||
input_size_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "input_size"));
|
||||
hidden_size_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "hidden_size"));
|
||||
num_layers_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "num_layers"));
|
||||
has_bias_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "has_bias");
|
||||
batch_size_ = SizeToInt(src_shape[1]);
|
||||
seq_len_ = SizeToInt(src_shape[0]);
|
||||
|
|
|
@ -94,9 +94,9 @@ void LSTMGradCPUKernel::CheckParam(const CNodePtr &kernel_node) {
|
|||
std::vector<size_t> src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
std::vector<size_t> src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2);
|
||||
bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional");
|
||||
input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size");
|
||||
hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size");
|
||||
num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers");
|
||||
input_size_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "input_size");
|
||||
hidden_size_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "hidden_size");
|
||||
num_layers_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "num_layers");
|
||||
has_bias_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "has_bias");
|
||||
batch_size_ = SizeToInt(src_shape[1]);
|
||||
seq_len_ = SizeToInt(src_shape[0]);
|
||||
|
@ -104,20 +104,20 @@ void LSTMGradCPUKernel::CheckParam(const CNodePtr &kernel_node) {
|
|||
if (bidirectional_) {
|
||||
num_directions_ = 2;
|
||||
}
|
||||
const int gate_size = 4 * hidden_size_;
|
||||
const int64_t gate_size = 4 * hidden_size_;
|
||||
if (num_layers_ <= 0) {
|
||||
MS_LOG(EXCEPTION) << "layers must be greater than zero!";
|
||||
}
|
||||
if (num_layers_ > kMaxLSTMLayer) {
|
||||
MS_LOG(EXCEPTION) << "layers must be lower than 100!";
|
||||
}
|
||||
for (int i = 0; i < num_layers_; ++i) {
|
||||
for (int64_t i = 0; i < num_layers_; ++i) {
|
||||
weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_);
|
||||
weight_h_size_ += gate_size * hidden_size_;
|
||||
}
|
||||
weight_size_ = weight_size_ * num_directions_;
|
||||
weight_h_size_ = weight_h_size_ * num_directions_;
|
||||
if (num_directions_ * num_layers_ != SizeToInt(src_h_shape[0])) {
|
||||
if (num_directions_ * num_layers_ != SizeToLong(src_h_shape[0])) {
|
||||
MS_LOG(EXCEPTION) << "error iteration shape!";
|
||||
}
|
||||
if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) {
|
||||
|
|
|
@ -44,13 +44,13 @@ class LSTMGradCPUKernel : public MKLCPUKernel {
|
|||
const dnnl::memory &diff_bias_memory);
|
||||
void ResetMemory(const dnnl::memory &mem, string name);
|
||||
void CheckParam(const CNodePtr &kernel_node);
|
||||
int weight_size_ = 0;
|
||||
int weight_h_size_ = 0;
|
||||
int input_size_;
|
||||
int hidden_size_;
|
||||
int num_layers_;
|
||||
int batch_size_;
|
||||
int seq_len_;
|
||||
int64_t weight_size_ = 0;
|
||||
int64_t weight_h_size_ = 0;
|
||||
int64_t input_size_;
|
||||
int64_t hidden_size_;
|
||||
int64_t num_layers_;
|
||||
int64_t batch_size_;
|
||||
int64_t seq_len_;
|
||||
int num_directions_;
|
||||
bool bidirectional_;
|
||||
bool has_bias_;
|
||||
|
|
|
@ -56,7 +56,10 @@ void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pa
|
|||
padding_r->emplace_back(0);
|
||||
padding_r->emplace_back(0);
|
||||
} else {
|
||||
std::vector<int> pad = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, PAD_LIST);
|
||||
std::vector<int> pad;
|
||||
std::vector<int64_t> pad_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, PAD_LIST);
|
||||
(void)std::transform(pad_me.begin(), pad_me.end(), std::back_inserter(pad),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
padding_l->emplace_back(pad[0]);
|
||||
padding_l->emplace_back(pad[1]);
|
||||
padding_r->emplace_back(pad[2]);
|
||||
|
|
|
@ -28,8 +28,14 @@ void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
|
||||
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
||||
std::vector<int> origin_kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE);
|
||||
std::vector<int> strides = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES);
|
||||
std::vector<int> origin_kernel_sizes;
|
||||
std::vector<int> strides;
|
||||
std::vector<int64_t> kernel_sizes_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, KSIZE);
|
||||
std::vector<int64_t> strides_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDES);
|
||||
(void)std::transform(kernel_sizes_me.begin(), kernel_sizes_me.end(), std::back_inserter(origin_kernel_sizes),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (origin_kernel_sizes.size() != 4 || strides.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "invalid kernel size " << origin_kernel_sizes.size() << " or stride size " << strides.size();
|
||||
}
|
||||
|
|
|
@ -27,8 +27,14 @@ void PoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
src_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
dst_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
std::vector<int> kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE);
|
||||
std::vector<int> strides = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES);
|
||||
std::vector<int> kernel_sizes;
|
||||
std::vector<int> strides;
|
||||
auto kernel_sizes_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, KSIZE);
|
||||
auto strides_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDES);
|
||||
(void)std::transform(kernel_sizes_me.begin(), kernel_sizes_me.end(), std::back_inserter(kernel_sizes),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (kernel_sizes.size() != 4 || strides.size() != 4 || src_shape_.size() != 4 || dst_shape_.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "pooling grad invalid input size";
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h"
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "utils/ms_utils.h"
|
||||
|
@ -23,7 +24,10 @@ namespace kernel {
|
|||
void SoftmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
std::vector<int> axis_list = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, AXIS);
|
||||
std::vector<int> axis_list;
|
||||
std::vector<int64_t> axis_list_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, AXIS);
|
||||
(void)std::transform(axis_list_me.begin(), axis_list_me.end(), std::back_inserter(axis_list),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (axis_list.size() != 1) {
|
||||
MS_LOG(EXCEPTION) << "cpu softmax only support input axis size 1";
|
||||
}
|
||||
|
|
|
@ -24,14 +24,14 @@ void OneHotCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
if (output_shape.size() < 2) {
|
||||
MS_LOG(EXCEPTION) << "invalid output shape size: " << output_shape.size();
|
||||
}
|
||||
int axis = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS);
|
||||
if (axis != -1 && IntToSize(axis) >= output_shape.size()) {
|
||||
int64_t axis = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
|
||||
if (axis != -1 && LongToSize(axis) >= output_shape.size()) {
|
||||
MS_LOG(EXCEPTION) << "invalid axis: " << axis;
|
||||
}
|
||||
if (axis == -1) {
|
||||
axis_ = output_shape.size() - 1;
|
||||
} else {
|
||||
axis_ = IntToSize(axis);
|
||||
axis_ = LongToSize(axis);
|
||||
}
|
||||
depth_ = output_shape[axis_];
|
||||
stride_ = 1;
|
||||
|
|
|
@ -44,7 +44,8 @@ void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
values.insert(values.end(), output_shape.begin(), output_shape.end());
|
||||
MS_LOG(INFO) << "Init embedding lookup proxy kernel, input shape:" << input_shape
|
||||
<< ", indices_shape:" << indices_shape << ", output_shape:" << output_shape;
|
||||
std::vector<int> lens{SizeToInt(input_shape.size()), SizeToInt(indices_shape.size()), SizeToInt(output_shape.size())};
|
||||
std::vector<int64_t> lens{SizeToLong(input_shape.size()), SizeToLong(indices_shape.size()),
|
||||
SizeToLong(output_shape.size())};
|
||||
if (mindspore::ps::Util::IsRoleOfWorker()) {
|
||||
mindspore::ps::worker.AddEmbeddingTable(key_, input_shape[axis]);
|
||||
mindspore::ps::worker.InitPSEmbeddingTable(keys, values, lens);
|
||||
|
|
|
@ -39,11 +39,11 @@ class PushKernel : public CPUKernel {
|
|||
}
|
||||
std::vector<size_t> keys;
|
||||
std::vector<uintptr_t> addrs;
|
||||
std::vector<int> sizes;
|
||||
std::vector<int64_t> sizes;
|
||||
for (auto input : inputs) {
|
||||
keys.push_back(key_);
|
||||
addrs.push_back(reinterpret_cast<uintptr_t>(input->addr));
|
||||
sizes.push_back(SizeToInt(input->size) / sizeof(T));
|
||||
sizes.push_back(SizeToLong(input->size) / sizeof(T));
|
||||
}
|
||||
mindspore::ps::worker.Push(keys, addrs, sizes);
|
||||
auto ret = memcpy_s(outputs[0]->addr, outputs[0]->size, &key_, sizeof(size_t));
|
||||
|
@ -56,8 +56,9 @@ class PushKernel : public CPUKernel {
|
|||
|
||||
void Init(const CNodePtr &kernel_node) {
|
||||
key_ = AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
|
||||
auto optim_input_shapes = AnfAlgo::GetNodeAttr<std::vector<std::vector<int>>>(kernel_node, "optim_input_shapes");
|
||||
std::vector<int> only_shape_indices = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "only_shape_indices");
|
||||
auto optim_input_shapes =
|
||||
AnfAlgo::GetNodeAttr<std::vector<std::vector<int64_t>>>(kernel_node, "optim_input_shapes");
|
||||
auto only_shape_indices = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "only_shape_indices");
|
||||
MS_LOG(INFO) << "Key " << key_ << " optimizer input shapes are:" << optim_input_shapes;
|
||||
MS_LOG(INFO) << "Only init shape indices are " << only_shape_indices;
|
||||
for (size_t i = 0; i < optim_input_shapes.size(); i++) {
|
||||
|
@ -66,7 +67,7 @@ class PushKernel : public CPUKernel {
|
|||
if (std::count(only_shape_indices.begin(), only_shape_indices.end(), i) == 0) {
|
||||
size_t size = sizeof(T);
|
||||
for (size_t j = 0; j < shape.size(); j++) {
|
||||
size *= shape[j];
|
||||
size *= LongToSize(shape[j]);
|
||||
}
|
||||
input_size_list_.push_back(size);
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/cpu/reduce_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
|
@ -87,7 +88,10 @@ bool ReduceCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
|||
void ReduceCPUKernel::CheckAxis(const CNodePtr &kernel_node) {
|
||||
auto axis_addr = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(AXIS);
|
||||
if (axis_addr->isa<ValueTuple>()) {
|
||||
auto attr_axis = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, AXIS);
|
||||
std::vector<int> attr_axis;
|
||||
std::vector<int64_t> attr_axis_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, AXIS);
|
||||
(void)std::transform(attr_axis_me.begin(), attr_axis_me.end(), std::back_inserter(attr_axis),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (attr_axis.size() > shape_.size()) {
|
||||
MS_LOG(EXCEPTION) << "invalid axis size: " << axis_.size();
|
||||
} else if (attr_axis.empty()) {
|
||||
|
@ -105,8 +109,8 @@ void ReduceCPUKernel::CheckAxis(const CNodePtr &kernel_node) {
|
|||
axis_.push_back(IntToSize(axis));
|
||||
}
|
||||
}
|
||||
} else if (axis_addr->isa<Int32Imm>()) {
|
||||
int axis = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS);
|
||||
} else if (axis_addr->isa<Int64Imm>()) {
|
||||
int axis = static_cast<int64_t>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS));
|
||||
while (axis < 0) {
|
||||
axis += SizeToInt(shape_.size());
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/cpu/slice_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
|
@ -21,17 +22,26 @@ namespace kernel {
|
|||
void SliceCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
CheckParam(kernel_node);
|
||||
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
begin_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, BEGIN);
|
||||
std::vector<int64_t> begin_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, BEGIN);
|
||||
(void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
auto prim = AnfAlgo::GetCNodePrimitive(kernel_node);
|
||||
MS_EXCEPTION_IF_NULL(prim);
|
||||
auto strides = prim->GetAttr(STRIDES);
|
||||
if (strides != nullptr) {
|
||||
strides_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES);
|
||||
end_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, END);
|
||||
std::vector<int64_t> strides_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDES);
|
||||
std::vector<int64_t> end_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, END);
|
||||
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(end_me.begin(), end_me.end(), std::back_inserter(end_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
TransArg();
|
||||
ClipBegin();
|
||||
} else {
|
||||
auto sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, SIZE);
|
||||
std::vector<int> sizes;
|
||||
std::vector<int64_t> sizes_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, SIZE);
|
||||
(void)std::transform(sizes_me.begin(), sizes_me.end(), std::back_inserter(sizes),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (sizes.size() != input_shape_.size() || begin_.size() != input_shape_.size()) {
|
||||
MS_LOG(EXCEPTION) << "begin|size|input size must be equal";
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/kernel_compiler/cpu/slice_grad_cpu_kernel.h"
|
||||
#include <algorithm>
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "ir/primitive.h"
|
||||
|
||||
|
@ -22,19 +23,27 @@ namespace kernel {
|
|||
void SliceGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
CheckParam(kernel_node);
|
||||
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
begin_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, BEGIN);
|
||||
std::vector<int64_t> begin_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, BEGIN);
|
||||
(void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
auto prim = AnfAlgo::GetCNodePrimitive(kernel_node);
|
||||
MS_EXCEPTION_IF_NULL(prim);
|
||||
auto strides = prim->GetAttr(STRIDES);
|
||||
if (strides != nullptr) {
|
||||
strides_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES);
|
||||
end_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, END);
|
||||
std::vector<int64_t> strides_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDES);
|
||||
std::vector<int64_t> end_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, END);
|
||||
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(end_me.begin(), end_me.end(), std::back_inserter(end_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (strides_.size() != end_.size() || strides_.size() != output_shape_.size()) {
|
||||
MS_LOG(EXCEPTION) << "stride|end|input size must be equal";
|
||||
}
|
||||
FormatArgs(true);
|
||||
} else {
|
||||
size_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, SIZE);
|
||||
std::vector<int64_t> size_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, SIZE);
|
||||
(void)std::transform(size_me.begin(), size_me.end(), std::back_inserter(size_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (size_.size() != output_shape_.size() || begin_.size() != output_shape_.size()) {
|
||||
MS_LOG(EXCEPTION) << "begin|size|input size must be equal";
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "backend/kernel_compiler/cpu/tile_cpu_kernel.h"
|
||||
#include <algorithm>
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -23,7 +24,9 @@ void TileCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
CheckParam(kernel_node);
|
||||
x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
y_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
multiples_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "multiples");
|
||||
std::vector<int64_t> multiples_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "multiples");
|
||||
(void)std::transform(multiples_me.begin(), multiples_me.end(), std::back_inserter(multiples_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "backend/kernel_compiler/cpu/transpose_cpu_kernel.h"
|
||||
#include <algorithm>
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -22,7 +23,9 @@ const size_t kMaxDim = 100;
|
|||
void TransposeCPUFwdKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
axis_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "perm");
|
||||
std::vector<int64_t> axis_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "perm");
|
||||
(void)std::transform(axis_me.begin(), axis_me.end(), std::back_inserter(axis_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (shape_.size() != axis_.size()) {
|
||||
MS_LOG(EXCEPTION) << "The size of input shape and transpose axis shape must be equal.";
|
||||
}
|
||||
|
|
|
@ -43,9 +43,9 @@ template <typename T>
|
|||
void UniqueCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
|
||||
auto x_addr = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
auto y_addr = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
auto idx_addr = reinterpret_cast<int *>(outputs[1]->addr);
|
||||
auto idx_addr = reinterpret_cast<int64_t *>(outputs[1]->addr);
|
||||
|
||||
std::unordered_map<T, int> uniq;
|
||||
std::unordered_map<T, int64_t> uniq;
|
||||
int n = SizeToInt(n_);
|
||||
uniq.reserve(n * 2);
|
||||
for (int i = 0, j = 0; i < n; ++i) {
|
||||
|
|
|
@ -64,7 +64,7 @@ class ArgmaxGpuKernel : public GpuKernel {
|
|||
<< "-D inputs.";
|
||||
}
|
||||
|
||||
axis_ = GetAttr<int>(kernel_node, "axis");
|
||||
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
if (axis_ < 0) {
|
||||
axis_ += SizeToInt(input_shape.size());
|
||||
}
|
||||
|
|
|
@ -47,7 +47,7 @@ class ArgmaxWithValueGpuKernel : public GpuKernel {
|
|||
std::vector<size_t> shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 1);
|
||||
int dims = shape.size();
|
||||
int axis = GetAttr<int>(kernel_node, "axis");
|
||||
int axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
if (axis < 0) {
|
||||
axis += dims;
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/kernel_constants.h"
|
||||
|
@ -97,7 +98,10 @@ class ArrayReduceGpuKernel : public GpuKernel {
|
|||
|
||||
if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("axis")->isa<ValueTuple>() ||
|
||||
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("axis")->isa<ValueList>()) {
|
||||
auto attr_axis = GetAttr<std::vector<int>>(kernel_node, "axis");
|
||||
std::vector<int> attr_axis;
|
||||
std::vector<int64_t> attr_axis_me = GetAttr<std::vector<int64_t>>(kernel_node, "axis");
|
||||
(void)std::transform(attr_axis_me.begin(), attr_axis_me.end(), std::back_inserter(attr_axis),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (attr_axis.empty()) {
|
||||
axis_.push_back(-1);
|
||||
} else {
|
||||
|
@ -105,8 +109,8 @@ class ArrayReduceGpuKernel : public GpuKernel {
|
|||
axis < 0 ? axis_.push_back(axis + input_dim_length) : axis_.push_back(axis);
|
||||
}
|
||||
}
|
||||
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("axis")->isa<Int32Imm>()) {
|
||||
int axis = GetAttr<int>(kernel_node, "axis");
|
||||
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("axis")->isa<Int64Imm>()) {
|
||||
int axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
axis < 0 ? axis_.push_back(axis + input_dim_length) : axis_.push_back(axis);
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Attribute axis type is invalid.";
|
||||
|
|
|
@ -63,7 +63,7 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
|
|||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
axis_ = GetAttr<int>(kernel_node, "axis");
|
||||
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
if (axis_ < 0) {
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
axis_ += SizeToInt(input_shape.size());
|
||||
|
|
|
@ -55,7 +55,7 @@ class GatherGpuFwdKernel : public GpuKernel {
|
|||
index_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
|
||||
axis_ = GetAttr<int>(kernel_node, "dim");
|
||||
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "dim"));
|
||||
if (axis_ < 0) {
|
||||
axis_ = axis_ + SizeToInt(input_shapes_.size());
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ class GatherGradGpuKernel : public GpuKernel {
|
|||
grad_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
|
||||
axis_ = GetAttr<int>(kernel_node, "dim");
|
||||
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "dim"));
|
||||
if (axis_ < 0) {
|
||||
axis_ = axis_ + SizeToInt(index_shapes_.size());
|
||||
}
|
||||
|
|
|
@ -56,7 +56,7 @@ class GatherV2GpuFwdKernel : public GpuKernel {
|
|||
indices_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
|
||||
axis_ = GetAttr<int>(kernel_node, "axis");
|
||||
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
if (axis_ < 0) {
|
||||
axis_ = axis_ + SizeToInt(input_shapes_.size());
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ class OneHotGpuFwdKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
int axis = GetAttr<int>(kernel_node, "axis");
|
||||
int axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
auto input = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto output = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
int input_size = SizeToInt(input.size());
|
||||
|
|
|
@ -114,12 +114,12 @@ class RepeatElementsGpuKernel : public GpuKernel {
|
|||
}
|
||||
std::reverse(input_shape_cumulative_product_.begin(), input_shape_cumulative_product_.end());
|
||||
|
||||
axis_ = GetAttr<int>(kernel_node, "axis");
|
||||
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
if (axis_ < 0) {
|
||||
axis_ += input_dim_;
|
||||
}
|
||||
|
||||
rep_ = GetAttr<int>(kernel_node, "rep");
|
||||
rep_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "rep"));
|
||||
output_size_ = input_size_ * rep_;
|
||||
output_shape_ = input_shape_;
|
||||
output_shape_[axis_] *= rep_;
|
||||
|
|
|
@ -60,11 +60,11 @@ class RepeatElementsGradGpuKernel : public GpuKernel {
|
|||
std::vector<size_t> dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
int dy_dim = dy_shape.size();
|
||||
|
||||
axis_ = GetAttr<int>(kernel_node, "axis");
|
||||
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
if (axis_ < 0) {
|
||||
axis_ += dy_dim;
|
||||
}
|
||||
rep_ = GetAttr<int>(kernel_node, "rep");
|
||||
rep_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "rep"));
|
||||
if (axis_ >= dy_dim) {
|
||||
axis_ = dy_dim - 1;
|
||||
rep_ = 1;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_GPU_SCATTER_ND_GPU_KERNEL_H
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cuh"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
|
@ -98,7 +99,9 @@ class ScatterNdGpuFwdKernel : public GpuKernel {
|
|||
indices_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
|
||||
vec_work_shape_ = GetAttr<std::vector<S>>(kernel_node, "shape");
|
||||
std::vector<int64_t> shape_me = GetAttr<std::vector<int64_t>>(kernel_node, "shape");
|
||||
(void)std::transform(shape_me.begin(), shape_me.end(), std::back_inserter(vec_work_shape_),
|
||||
[](const int64_t &value) { return static_cast<S>(value); });
|
||||
|
||||
GetSize();
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
|
||||
|
@ -106,8 +107,12 @@ class SliceGpuFwdKernel : public GpuKernel {
|
|||
MS_LOG(ERROR) << "Input dims is " << input_shape.size() << ", scalar is not supported.";
|
||||
return false;
|
||||
}
|
||||
size_ = GetAttr<std::vector<int>>(kernel_node, "size");
|
||||
begin_ = GetAttr<std::vector<int>>(kernel_node, "begin");
|
||||
std::vector<int64_t> size_me = GetAttr<std::vector<int64_t>>(kernel_node, "size");
|
||||
std::vector<int64_t> begin_me = GetAttr<std::vector<int64_t>>(kernel_node, "begin");
|
||||
(void)std::transform(size_me.begin(), size_me.end(), std::back_inserter(size_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
for (size_t i = 0; i < input_shape.size(); i++) {
|
||||
if (input_shape[i] <= 0 || size_[i] <= 0) {
|
||||
MS_LOG(WARNING) << "Slice output is null.";
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SLICE_GRAD_GPU_KERNEL_H
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
|
||||
|
@ -50,27 +51,38 @@ class SliceGradGpuKernel : public GpuKernel {
|
|||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (kernel_name == "StridedSliceGrad") {
|
||||
is_strided_slice_ = true;
|
||||
auto shapex = GetAttr<std::vector<int>>(kernel_node, "shapex");
|
||||
std::vector<int> shapex;
|
||||
std::vector<int64_t> shapex_me = GetAttr<std::vector<int64_t>>(kernel_node, "shapex");
|
||||
(void)std::transform(shapex_me.begin(), shapex_me.end(), std::back_inserter(shapex),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
for (auto x : shapex) {
|
||||
input_shape_.push_back(IntToSize(x));
|
||||
}
|
||||
for (auto i = input_shape_.size(); i < 4; i++) {
|
||||
(void)input_shape_.insert(input_shape_.begin(), 1);
|
||||
}
|
||||
strides_ = GetAttr<std::vector<int>>(kernel_node, "strides");
|
||||
std::vector<int64_t> strides_me = GetAttr<std::vector<int64_t>>(kernel_node, "strides");
|
||||
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
for (auto i = strides_.size(); i < 4; i++) {
|
||||
(void)strides_.insert(strides_.begin(), 1);
|
||||
}
|
||||
size_ = GetAttr<std::vector<int>>(kernel_node, "end");
|
||||
std::vector<int64_t> size_me = GetAttr<std::vector<int64_t>>(kernel_node, "end");
|
||||
(void)std::transform(size_me.begin(), size_me.end(), std::back_inserter(size_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
} else {
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
ShapeNdTo4d(input_shape, &input_shape_);
|
||||
size_ = GetAttr<std::vector<int>>(kernel_node, "size");
|
||||
std::vector<int64_t> size_me = GetAttr<std::vector<int64_t>>(kernel_node, "size");
|
||||
(void)std::transform(size_me.begin(), size_me.end(), std::back_inserter(size_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
}
|
||||
|
||||
auto dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
ShapeNdTo4d(dy_shape, &dy_shape_);
|
||||
begin_ = GetAttr<std::vector<int>>(kernel_node, "begin");
|
||||
std::vector<int64_t> begin_me = GetAttr<std::vector<int64_t>>(kernel_node, "begin");
|
||||
(void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
DealParam();
|
||||
input_size_ = input_shape_[0] * input_shape_[1] * input_shape_[2] * input_shape_[3] * sizeof(T);
|
||||
|
||||
|
|
|
@ -57,12 +57,12 @@ class SplitGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
axis_ = GetAttr<int>(kernel_node, "axis");
|
||||
axis_ = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
if (axis_ < 0) {
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
axis_ += SizeToInt(input_shape.size());
|
||||
}
|
||||
output_num_ = GetAttr<int>(kernel_node, "output_num");
|
||||
output_num_ = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "output_num"));
|
||||
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
|
|
|
@ -81,9 +81,15 @@ class StridedSliceGpuKernel : public GpuKernel {
|
|||
|
||||
private:
|
||||
void FillEmptyDims(const CNodePtr &kernel_node) {
|
||||
begin_ = GetAttr<std::vector<int>>(kernel_node, "begin");
|
||||
end_ = GetAttr<std::vector<int>>(kernel_node, "end");
|
||||
strides_ = GetAttr<std::vector<int>>(kernel_node, "strides");
|
||||
std::vector<int64_t> begin_me = GetAttr<std::vector<int64_t>>(kernel_node, "begin");
|
||||
std::vector<int64_t> end_me = GetAttr<std::vector<int64_t>>(kernel_node, "end");
|
||||
std::vector<int64_t> strides_me = GetAttr<std::vector<int64_t>>(kernel_node, "strides");
|
||||
(void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(end_me.begin(), end_me.end(), std::back_inserter(end_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
|
||||
for (size_t i = 0; i < MAX_DIMS; i++) {
|
||||
if (i < begin_.size()) {
|
||||
|
@ -111,7 +117,7 @@ class StridedSliceGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
void ParseMasks(const CNodePtr &kernel_node) {
|
||||
auto begin_mask_int = GetAttr<int>(kernel_node, "begin_mask");
|
||||
auto begin_mask_int = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "begin_mask"));
|
||||
auto begin_mask = Dec2Bin(begin_mask_int);
|
||||
for (size_t i = 0; i < begin_mask.size(); i++) {
|
||||
if (begin_mask[i]) {
|
||||
|
@ -119,7 +125,7 @@ class StridedSliceGpuKernel : public GpuKernel {
|
|||
}
|
||||
}
|
||||
|
||||
auto end_mask_int = GetAttr<int>(kernel_node, "end_mask");
|
||||
auto end_mask_int = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "end_mask"));
|
||||
auto end_mask = Dec2Bin(end_mask_int);
|
||||
for (size_t j = 0; j < end_mask.size(); j++) {
|
||||
if (end_mask[j]) {
|
||||
|
@ -127,7 +133,7 @@ class StridedSliceGpuKernel : public GpuKernel {
|
|||
}
|
||||
}
|
||||
|
||||
auto ellipsis_mask_int = GetAttr<int>(kernel_node, "ellipsis_mask");
|
||||
auto ellipsis_mask_int = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "ellipsis_mask"));
|
||||
auto ellipsis_mask = Dec2Bin(ellipsis_mask_int);
|
||||
for (size_t k = 0; k < ellipsis_mask.size(); k++) {
|
||||
if (ellipsis_mask[k]) {
|
||||
|
@ -137,7 +143,7 @@ class StridedSliceGpuKernel : public GpuKernel {
|
|||
}
|
||||
}
|
||||
|
||||
auto shrink_axis_mask_str = GetAttr<int>(kernel_node, "shrink_axis_mask");
|
||||
auto shrink_axis_mask_str = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "shrink_axis_mask"));
|
||||
auto shrink_axis_mask = Dec2Bin(shrink_axis_mask_str);
|
||||
for (size_t l = 0; l < shrink_axis_mask.size(); l++) {
|
||||
if (shrink_axis_mask[l]) {
|
||||
|
|
|
@ -50,7 +50,10 @@ class StridedSliceGradGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto shapex = GetAttr<std::vector<int>>(kernel_node, "shapex");
|
||||
std::vector<int> shapex;
|
||||
std::vector<int64_t> shapex_me = GetAttr<std::vector<int64_t>>(kernel_node, "shapex");
|
||||
(void)std::transform(shapex_me.begin(), shapex_me.end(), std::back_inserter(shapex),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
for (auto x : shapex) {
|
||||
input_shape_.push_back(IntToSize(x));
|
||||
}
|
||||
|
@ -84,9 +87,15 @@ class StridedSliceGradGpuKernel : public GpuKernel {
|
|||
|
||||
private:
|
||||
void FillEmptyDims(const CNodePtr &kernel_node) {
|
||||
begin_ = GetAttr<std::vector<int>>(kernel_node, "begin");
|
||||
end_ = GetAttr<std::vector<int>>(kernel_node, "end");
|
||||
strides_ = GetAttr<std::vector<int>>(kernel_node, "strides");
|
||||
std::vector<int64_t> begin_me = GetAttr<std::vector<int64_t>>(kernel_node, "begin");
|
||||
std::vector<int64_t> end_me = GetAttr<std::vector<int64_t>>(kernel_node, "end");
|
||||
std::vector<int64_t> strides_me = GetAttr<std::vector<int64_t>>(kernel_node, "strides");
|
||||
(void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(end_me.begin(), end_me.end(), std::back_inserter(end_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
|
||||
for (size_t i = 0; i < MAX_DIMS; i++) {
|
||||
if (i < begin_.size()) {
|
||||
|
@ -114,7 +123,7 @@ class StridedSliceGradGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
void ParseMasks(const CNodePtr &kernel_node) {
|
||||
auto begin_mask_int = GetAttr<int>(kernel_node, "begin_mask");
|
||||
auto begin_mask_int = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "begin_mask"));
|
||||
auto begin_mask = Dec2Bin(begin_mask_int);
|
||||
for (size_t i = 0; i < begin_mask.size(); i++) {
|
||||
if (begin_mask[i]) {
|
||||
|
@ -122,7 +131,7 @@ class StridedSliceGradGpuKernel : public GpuKernel {
|
|||
}
|
||||
}
|
||||
|
||||
auto end_mask_int = GetAttr<int>(kernel_node, "end_mask");
|
||||
auto end_mask_int = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "end_mask"));
|
||||
auto end_mask = Dec2Bin(end_mask_int);
|
||||
for (size_t j = 0; j < end_mask.size(); j++) {
|
||||
if (end_mask[j]) {
|
||||
|
@ -130,7 +139,7 @@ class StridedSliceGradGpuKernel : public GpuKernel {
|
|||
}
|
||||
}
|
||||
|
||||
auto ellipsis_mask_int = GetAttr<int>(kernel_node, "ellipsis_mask");
|
||||
auto ellipsis_mask_int = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "ellipsis_mask"));
|
||||
auto ellipsis_mask = Dec2Bin(ellipsis_mask_int);
|
||||
for (size_t k = 0; k < ellipsis_mask.size(); k++) {
|
||||
if (ellipsis_mask[k]) {
|
||||
|
@ -140,7 +149,7 @@ class StridedSliceGradGpuKernel : public GpuKernel {
|
|||
}
|
||||
}
|
||||
|
||||
auto shrink_axis_mask_str = GetAttr<int>(kernel_node, "shrink_axis_mask");
|
||||
auto shrink_axis_mask_str = static_cast<int64_t>(GetAttr<int64_t>(kernel_node, "shrink_axis_mask"));
|
||||
auto shrink_axis_mask = Dec2Bin(shrink_axis_mask_str);
|
||||
for (size_t l = 0; l < shrink_axis_mask.size(); l++) {
|
||||
if (shrink_axis_mask[l]) {
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_TRANSPOSE_H_
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cuh"
|
||||
|
@ -75,7 +76,10 @@ class TransposeGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
input_size_ *= sizeof(T);
|
||||
output_size_ = input_size_;
|
||||
auto perm = GetAttr<std::vector<int>>(kernel_node, "perm");
|
||||
std::vector<int> perm;
|
||||
std::vector<int64_t> perm_me = GetAttr<std::vector<int64_t>>(kernel_node, "perm");
|
||||
(void)std::transform(perm_me.begin(), perm_me.end(), std::back_inserter(perm),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
for (size_t j = 0; j < perm.size(); j++) {
|
||||
input_axis_.push_back(perm[j]);
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/data/dataset_init_kernel.h"
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/data/dataset_utils.h"
|
||||
#include "runtime/device/gpu/gpu_buffer_mgr.h"
|
||||
#include "runtime/device/gpu/gpu_memory_allocator.h"
|
||||
|
@ -34,7 +35,15 @@ const std::vector<size_t> &DatasetInitKernel::GetWorkspaceSizeList() const { ret
|
|||
|
||||
bool DatasetInitKernel::Init(const CNodePtr &kernel_node) {
|
||||
queue_name_ = GetAttr<std::string>(kernel_node, "queue_name");
|
||||
auto shapes = GetAttr<const std::vector<std::vector<int>>>(kernel_node, "shapes");
|
||||
std::vector<std::vector<int>> shapes;
|
||||
std::vector<std::vector<int64_t>> shapes_me = GetAttr<const std::vector<std::vector<int64_t>>>(kernel_node, "shapes");
|
||||
(void)std::transform(shapes_me.begin(), shapes_me.end(), std::back_inserter(shapes),
|
||||
[](const std::vector<int64_t> &values) {
|
||||
std::vector<int> shape;
|
||||
(void)std::transform(values.begin(), values.end(), std::back_inserter(shape),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
return shape;
|
||||
});
|
||||
auto types = GetAttr<const std::vector<TypePtr>>(kernel_node, "types");
|
||||
if (shapes.size() != types.size()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid shapes: " << shapes << ", types: " << types;
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/data/dataset_utils.h"
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
#include "runtime/device/gpu/gpu_buffer_mgr.h"
|
||||
|
@ -44,7 +45,15 @@ const std::vector<size_t> &DatasetIteratorKernel::GetWorkspaceSizeList() const {
|
|||
|
||||
bool DatasetIteratorKernel::Init(const CNodePtr &kernel_node) {
|
||||
queue_name_ = GetAttr<std::string>(kernel_node, "shared_name");
|
||||
auto shapes = GetAttr<const std::vector<std::vector<int>>>(kernel_node, "shapes");
|
||||
std::vector<std::vector<int>> shapes;
|
||||
std::vector<std::vector<int64_t>> shapes_me = GetAttr<const std::vector<std::vector<int64_t>>>(kernel_node, "shapes");
|
||||
(void)std::transform(shapes_me.begin(), shapes_me.end(), std::back_inserter(shapes),
|
||||
[](const std::vector<int64_t> &values) {
|
||||
std::vector<int> shape;
|
||||
(void)std::transform(values.begin(), values.end(), std::back_inserter(shape),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
return shape;
|
||||
});
|
||||
auto types = GetAttr<const std::vector<TypePtr>>(kernel_node, "types");
|
||||
if (shapes.size() != types.size()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid shapes: " << shapes << ", types: " << types;
|
||||
|
|
|
@ -85,7 +85,7 @@ class AddNGpuFwdKernel : public GpuKernel {
|
|||
InitResource();
|
||||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
num_input_ = GetAttr<int>(kernel_node, "n");
|
||||
num_input_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "n"));
|
||||
if (IntToSize(num_input_) != input_num) {
|
||||
MS_LOG(ERROR) << "Input number is " << num_input_ << " in attr, but got " << input_num << "input.";
|
||||
return false;
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <cublas_v2.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/identity_impl.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/matrix_split_impl.cuh"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
|
@ -105,7 +106,7 @@ class CholeskyTrsmGpuKernel : public GpuKernel {
|
|||
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle();
|
||||
blas_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle();
|
||||
auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
split_dim = GetAttr<int>(kernel_node, "split_dim");
|
||||
split_dim = static_cast<int>(GetAttr<int64_t>(kernel_node, "split_dim"));
|
||||
if (split_dim == 0) {
|
||||
use_split_matrix = false;
|
||||
if (in_shape.size() == 2) {
|
||||
|
|
|
@ -51,7 +51,7 @@ class CumSumGpuKernel : public GpuKernel {
|
|||
}
|
||||
input_size_0_ = sizeof(T);
|
||||
shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
axis_ = GetAttr<int>(kernel_node, "axis");
|
||||
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
exclusive_ = GetAttr<bool>(kernel_node, "exclusive");
|
||||
reverse_ = GetAttr<bool>(kernel_node, "reverse");
|
||||
int input_dim_length = SizeToInt(shape_.size());
|
||||
|
|
|
@ -86,7 +86,7 @@ class DetTriangleGpuKernel : public GpuKernel {
|
|||
MS_LOG(ERROR) << "The maxtices should be in shape of square.";
|
||||
return false;
|
||||
}
|
||||
fill_mode_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("fill_mode"));
|
||||
fill_mode_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("fill_mode")));
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -92,8 +92,8 @@ class MultinomialGpuKernel : public GpuKernel {
|
|||
output_size_ *= output_shape[i];
|
||||
}
|
||||
workspace_size_ = output_size_;
|
||||
seed_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed"));
|
||||
seed2_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed2"));
|
||||
seed_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed")));
|
||||
seed2_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed2")));
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -119,8 +119,8 @@ class RandomOpGpuKernel : public GpuKernel {
|
|||
output_size_ *= output_shape[i];
|
||||
workspace_size_ *= output_shape[i];
|
||||
}
|
||||
seed_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed"));
|
||||
seed2_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed2"));
|
||||
seed_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed")));
|
||||
seed2_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed2")));
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <cublas_v2.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/kernel_constants.h"
|
||||
|
@ -135,9 +136,14 @@ class TensorDotGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
// holding in temp values to convert to size_t vectors
|
||||
auto x1_transpose_fwd_temp = GetAttr<std::vector<int>>(kernel_node, "x1_transpose_fwd");
|
||||
auto x2_transpose_fwd_temp = GetAttr<std::vector<int>>(kernel_node, "x2_transpose_fwd");
|
||||
|
||||
std::vector<int> x1_transpose_fwd_temp;
|
||||
std::vector<int64_t> x1_transpose_me = GetAttr<std::vector<int64_t>>(kernel_node, "x1_transpose_fwd");
|
||||
(void)std::transform(x1_transpose_me.begin(), x1_transpose_me.end(), std::back_inserter(x1_transpose_fwd_temp),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
std::vector<int> x2_transpose_fwd_temp;
|
||||
std::vector<int64_t> x2_transpose_me = GetAttr<std::vector<int64_t>>(kernel_node, "x2_transpose_fwd");
|
||||
(void)std::transform(x2_transpose_me.begin(), x2_transpose_me.end(), std::back_inserter(x2_transpose_fwd_temp),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
for (size_t i = 0; i < x1_transpose_fwd_temp.size(); i++) {
|
||||
x1_transpose_fwd_.push_back(x1_transpose_fwd_temp[i]);
|
||||
}
|
||||
|
@ -147,8 +153,12 @@ class TensorDotGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
// values to decide multiplication call specifics
|
||||
x1_reshape_fwd_ = GetAttr<std::vector<int>>(kernel_node, "x1_reshape_fwd");
|
||||
x2_reshape_fwd_ = GetAttr<std::vector<int>>(kernel_node, "x2_reshape_fwd");
|
||||
std::vector<int64_t> x1_reshape_me = GetAttr<std::vector<int64_t>>(kernel_node, "x1_reshape_fwd");
|
||||
(void)std::transform(x1_reshape_me.begin(), x1_reshape_me.end(), std::back_inserter(x1_reshape_fwd_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
std::vector<int64_t> x2_reshape_me = GetAttr<std::vector<int64_t>>(kernel_node, "x2_reshape_fwd");
|
||||
(void)std::transform(x2_reshape_me.begin(), x2_reshape_me.end(), std::back_inserter(x2_reshape_fwd_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
output_size_ = sizeof(T);
|
||||
for (size_t i = 0; i < output_shape.size(); i++) {
|
||||
|
|
|
@ -183,7 +183,7 @@ class UpdateThorGradientGpuKernel : public GpuKernel {
|
|||
auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto matrix_g_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
|
||||
split_dim = size_t(GetAttr<int>(kernel_node, "split_dim"));
|
||||
split_dim = LongToSize(GetAttr<int64_t>(kernel_node, "split_dim"));
|
||||
|
||||
gradient_size.batch_h = gradient_shape[0] / split_dim;
|
||||
gradient_size.batch_w = gradient_shape[1] / split_dim;
|
||||
|
|
|
@ -195,7 +195,7 @@ class NcclGpuKernel : public GpuKernel {
|
|||
|
||||
auto root_rank = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kAttrRootRank);
|
||||
if (root_rank) {
|
||||
root_ = GetValue<int>(root_rank);
|
||||
root_ = static_cast<int>(GetValue<int64_t>(root_rank));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -125,9 +125,12 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
|||
compute_format_ = CUDNN_TENSOR_NHWC;
|
||||
}
|
||||
Set4DDesc(in_shape, filter_shape, output_shape);
|
||||
group_ = GetAttr<int>(kernel_node, "group");
|
||||
group_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "group"));
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
|
||||
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
|
||||
std::vector<int> pad_list;
|
||||
std::vector<int64_t> pad_list_me = GetAttr<std::vector<int64_t>>(kernel_node, "pad_list");
|
||||
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
pad_height_ = pad_list[0];
|
||||
pad_width_ = pad_list[2];
|
||||
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
|
||||
|
@ -306,8 +309,12 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
}
|
||||
void SetStrideAndDilation(const CNodePtr &kernel_node) {
|
||||
stride_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
|
||||
dilation_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
|
||||
std::vector<int64_t> stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "stride");
|
||||
std::vector<int64_t> dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "dilation");
|
||||
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (stride_.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "Conv2d's' stride must be 4d!";
|
||||
}
|
||||
|
|
|
@ -127,10 +127,13 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
|||
}
|
||||
SetNCHW(in_shape, &n_, &c_, &old_height_, &old_width_, data_format_);
|
||||
Set4DDesc(dy_shape, filter_shape, in_shape);
|
||||
group_ = GetAttr<int>(kernel_node, "group");
|
||||
group_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "group"));
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
|
||||
|
||||
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
|
||||
std::vector<int> pad_list;
|
||||
std::vector<int64_t> pad_list_me = GetAttr<std::vector<int64_t>>(kernel_node, "pad_list");
|
||||
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
pad_height_ = pad_list[0];
|
||||
pad_width_ = pad_list[2];
|
||||
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
|
||||
|
@ -284,7 +287,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
|||
void GetFilterShape(const CNodePtr &kernel_node, std::vector<size_t> *filter_shape) {
|
||||
auto shp_tuple_x = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("filter_sizes")->cast<ValueTuplePtr>()->value();
|
||||
(void)std::transform(std::begin(shp_tuple_x), std::end(shp_tuple_x), std::back_inserter(*filter_shape),
|
||||
[](const ValuePtr &e) -> size_t { return e->cast<Int32ImmPtr>()->value(); });
|
||||
[](const ValuePtr &e) -> size_t { return static_cast<int>(e->cast<Int64ImmPtr>()->value()); });
|
||||
}
|
||||
void Set4DDesc(const std::vector<size_t> &dy_shape, const std::vector<size_t> &filter_shape,
|
||||
const std::vector<size_t> &in_shape) {
|
||||
|
@ -309,8 +312,12 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
|||
"cudnnSetTensorNdDescriptor failed");
|
||||
}
|
||||
void SetStrideAndDilation(const CNodePtr &kernel_node) {
|
||||
stride_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
|
||||
dilation_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
|
||||
std::vector<int64_t> stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "stride");
|
||||
std::vector<int64_t> dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "dilation");
|
||||
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (stride_.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel's stride must be 2d!";
|
||||
}
|
||||
|
|
|
@ -130,10 +130,13 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
|||
SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format_);
|
||||
Set4DDesc(dy_shape, input_shape, filter_shape);
|
||||
|
||||
group_ = GetAttr<int>(kernel_node, "group");
|
||||
group_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "group"));
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
|
||||
|
||||
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
|
||||
std::vector<int> pad_list;
|
||||
std::vector<int64_t> pad_list_me = GetAttr<std::vector<int64_t>>(kernel_node, "pad_list");
|
||||
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
pad_height_ = pad_list[0];
|
||||
pad_width_ = pad_list[2];
|
||||
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
|
||||
|
@ -263,7 +266,10 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
void SetPad(const std::vector<int> &input_shape, const CNodePtr &kernel_node) {
|
||||
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
|
||||
std::vector<int> pad_list;
|
||||
std::vector<int64_t> pad_list_me = GetAttr<std::vector<int64_t>>(kernel_node, "pad_list");
|
||||
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
}
|
||||
void SelectAlgorithm(cudnnTensorDescriptor_t dx_desc_real) {
|
||||
if (group_ > 1 || CUDNN_MAJOR < 7) {
|
||||
|
@ -288,7 +294,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
|||
void GetInputShape(const CNodePtr &kernel_node, std::vector<size_t> *input_shape) {
|
||||
auto shp_tuple_x = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("input_sizes")->cast<ValueTuplePtr>()->value();
|
||||
(void)std::transform(std::begin(shp_tuple_x), std::end(shp_tuple_x), std::back_inserter(*input_shape),
|
||||
[](const ValuePtr &e) -> size_t { return e->cast<Int32ImmPtr>()->value(); });
|
||||
[](const ValuePtr &e) -> size_t { return static_cast<int>(e->cast<Int64ImmPtr>()->value()); });
|
||||
}
|
||||
void Set4DDesc(const std::vector<size_t> &dy_shape, const std::vector<size_t> &input_shape,
|
||||
const std::vector<size_t> &filter_shape) {
|
||||
|
@ -313,8 +319,12 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
|||
"cudnnSetTensorNdDescriptor failed");
|
||||
}
|
||||
void SetStrideAndDilation(const CNodePtr &kernel_node) {
|
||||
stride_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
|
||||
dilation_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
|
||||
std::vector<int64_t> stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "stride");
|
||||
std::vector<int64_t> dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "dilation");
|
||||
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (stride_.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel's stride must be 2d!";
|
||||
}
|
||||
|
|
|
@ -86,7 +86,10 @@ class Im2ColGpuFwdKernel : public GpuKernel {
|
|||
return false;
|
||||
}
|
||||
auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto filter_shape = GetAttr<std::vector<int>>(kernel_node, "kernel_size");
|
||||
std::vector<int> filter_shape;
|
||||
std::vector<int64_t> filter_shape_me = GetAttr<std::vector<int64_t>>(kernel_node, "kernel_size");
|
||||
(void)std::transform(filter_shape_me.begin(), filter_shape_me.end(), std::back_inserter(filter_shape),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(in_shape);
|
||||
if (is_null_input_) {
|
||||
|
@ -96,7 +99,7 @@ class Im2ColGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
Set4DDesc(in_shape, filter_shape, output_shape);
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, 1), "cudnnSetConvGroupCount failed");
|
||||
pad_height_ = GetAttr<int>(kernel_node, "pad");
|
||||
pad_height_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "pad"));
|
||||
pad_width_ = pad_height_;
|
||||
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
|
||||
SetStrideAndDilation(kernel_node);
|
||||
|
@ -173,7 +176,10 @@ class Im2ColGpuFwdKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
void SetPad(const std::vector<size_t> &in_shape, const CNodePtr &kernel_node) {
|
||||
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
|
||||
std::vector<int> pad_list;
|
||||
std::vector<int64_t> pad_list_me = GetAttr<std::vector<int64_t>>(kernel_node, "pad_list");
|
||||
(void)std::transform(pad_list_me.begin(), pad_list_me.end(), std::back_inserter(pad_list),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
|
||||
n_ = SizeToInt(in_shape[0]);
|
||||
c_ = SizeToInt(in_shape[1]);
|
||||
|
@ -217,8 +223,12 @@ class Im2ColGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
void SetStrideAndDilation(const CNodePtr &kernel_node) {
|
||||
stride_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
|
||||
dilation_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
|
||||
std::vector<int64_t> stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "stride");
|
||||
std::vector<int64_t> dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "dilation");
|
||||
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (stride_.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "Im2Col's stride must be 4d!";
|
||||
}
|
||||
|
|
|
@ -97,7 +97,7 @@ class L2NormalizeGpuKernel : public GpuKernel {
|
|||
}
|
||||
int input_dim_length = SizeToInt(AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0).size());
|
||||
|
||||
int axis = GetAttr<int>(kernel_node, "axis");
|
||||
int axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
axis_ = axis < 0 ? (axis + input_dim_length) : axis;
|
||||
epsilon_ = GetAttr<float>(kernel_node, "epsilon");
|
||||
|
||||
|
|
|
@ -119,7 +119,7 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
int input_dim_length = SizeToInt(AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0).size());
|
||||
int axis = GetAttr<int>(kernel_node, "axis");
|
||||
int axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
axis_ = axis < 0 ? (axis + input_dim_length) : axis;
|
||||
|
||||
epsilon_ = GetAttr<float>(kernel_node, "epsilon");
|
||||
|
|
|
@ -49,8 +49,8 @@ class LayerNormGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
int begin_norm_axis = GetAttr<int>(kernel_node, "begin_norm_axis");
|
||||
int begin_params_axis = GetAttr<int>(kernel_node, "begin_params_axis");
|
||||
int begin_norm_axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "begin_norm_axis"));
|
||||
int begin_params_axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "begin_params_axis"));
|
||||
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
if (begin_norm_axis < 0) {
|
||||
|
|
|
@ -51,8 +51,8 @@ class LayerNormGradGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
int begin_norm_axis = GetAttr<int>(kernel_node, "begin_norm_axis");
|
||||
int begin_params_axis = GetAttr<int>(kernel_node, "begin_params_axis");
|
||||
int begin_norm_axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "begin_norm_axis"));
|
||||
int begin_params_axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "begin_params_axis"));
|
||||
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
if (begin_norm_axis < 0) {
|
||||
|
|
|
@ -95,9 +95,9 @@ class LstmGpuKernel : public GpuKernel {
|
|||
batch_size_ = SizeToInt(input_shape[1]);
|
||||
input_size_ = SizeToInt(input_shape[2]);
|
||||
|
||||
input_size_ = GetAttr<int>(kernel_node, "input_size");
|
||||
hidden_size_ = GetAttr<int>(kernel_node, "hidden_size");
|
||||
num_layers_ = GetAttr<int>(kernel_node, "num_layers");
|
||||
input_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "input_size"));
|
||||
hidden_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "hidden_size"));
|
||||
num_layers_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "num_layers"));
|
||||
has_bias_ = GetAttr<bool>(kernel_node, "has_bias");
|
||||
bidirectional_ = GetAttr<bool>(kernel_node, "bidirectional");
|
||||
dropout_ = GetAttr<float>(kernel_node, "dropout");
|
||||
|
|
|
@ -96,9 +96,9 @@ class LstmGradDataGpuKernel : public GpuKernel {
|
|||
return true;
|
||||
}
|
||||
void GetAttrs(const CNodePtr &kernel_node) {
|
||||
input_size_ = GetAttr<int>(kernel_node, "input_size");
|
||||
hidden_size_ = GetAttr<int>(kernel_node, "hidden_size");
|
||||
num_layers_ = GetAttr<int>(kernel_node, "num_layers");
|
||||
input_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "input_size"));
|
||||
hidden_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "hidden_size"));
|
||||
num_layers_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "num_layers"));
|
||||
has_bias_ = GetAttr<bool>(kernel_node, "has_bias");
|
||||
bidirectional_ = GetAttr<bool>(kernel_node, "bidirectional");
|
||||
dropout_ = GetAttr<float>(kernel_node, "dropout");
|
||||
|
|
|
@ -89,9 +89,9 @@ class LstmGradWeightGpuKernel : public GpuKernel {
|
|||
seq_len_ = SizeToInt(input_shape[0]);
|
||||
batch_size_ = SizeToInt(input_shape[1]);
|
||||
|
||||
input_size_ = GetAttr<int>(kernel_node, "input_size");
|
||||
hidden_size_ = GetAttr<int>(kernel_node, "hidden_size");
|
||||
num_layers_ = GetAttr<int>(kernel_node, "num_layers");
|
||||
input_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "input_size"));
|
||||
hidden_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "hidden_size"));
|
||||
num_layers_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "num_layers"));
|
||||
has_bias_ = GetAttr<bool>(kernel_node, "has_bias");
|
||||
bidirectional_ = GetAttr<bool>(kernel_node, "bidirectional");
|
||||
dropout_ = GetAttr<float>(kernel_node, "dropout");
|
||||
|
|
|
@ -90,10 +90,18 @@ class MaxPoolWithArgmaxGpuFwdKernel : public GpuKernel {
|
|||
input_width_ = SizeToInt(input_shape[3]);
|
||||
output_height_ = SizeToInt(output_shape[2]);
|
||||
output_width_ = SizeToInt(output_shape[3]);
|
||||
auto window = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ksize"));
|
||||
std::vector<int> window;
|
||||
std::vector<int64_t> window_me =
|
||||
GetValue<std::vector<int64_t>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ksize"));
|
||||
(void)std::transform(window_me.begin(), window_me.end(), std::back_inserter(window),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
window_height_ = window[1];
|
||||
window_width_ = window[2];
|
||||
auto stride = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides"));
|
||||
std::vector<int> stride;
|
||||
std::vector<int64_t> stride_me =
|
||||
GetValue<std::vector<int64_t>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides"));
|
||||
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
stride_height_ = stride[1];
|
||||
stride_width_ = stride[2];
|
||||
pad_mode_ = GetValue<std::string>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("padding"));
|
||||
|
|
|
@ -95,10 +95,18 @@ class MaxPoolWithArgmaxGradGpuKernel : public GpuKernel {
|
|||
x_width_ = SizeToInt(x_shape[3]);
|
||||
dy_height_ = SizeToInt(dy_shape[2]);
|
||||
dy_width_ = SizeToInt(dy_shape[3]);
|
||||
auto window = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ksize"));
|
||||
std::vector<int> window;
|
||||
std::vector<int64_t> window_me =
|
||||
GetValue<std::vector<int64_t>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ksize"));
|
||||
(void)std::transform(window_me.begin(), window_me.end(), std::back_inserter(window),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
window_height_ = window[1];
|
||||
window_width_ = window[2];
|
||||
auto stride = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides"));
|
||||
std::vector<int> stride;
|
||||
std::vector<int64_t> stride_me =
|
||||
GetValue<std::vector<int64_t>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides"));
|
||||
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
stride_height_ = stride[1];
|
||||
stride_width_ = stride[2];
|
||||
pad_mode_ = GetValue<std::string>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("padding"));
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh"
|
||||
|
@ -77,7 +78,15 @@ class PadGpuFwdKernel : public GpuKernel {
|
|||
input_shape.insert(it, 2, 1); // channel padding
|
||||
shape_size_ = 4;
|
||||
}
|
||||
paddings = GetValue<std::vector<std::vector<int>>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("paddings"));
|
||||
std::vector<std::vector<int64_t>> paddings_me =
|
||||
GetValue<std::vector<std::vector<int64_t>>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("paddings"));
|
||||
(void)std::transform(paddings_me.begin(), paddings_me.end(), std::back_inserter(paddings),
|
||||
[](const std::vector<int64_t> &values) {
|
||||
std::vector<int> shape;
|
||||
(void)std::transform(values.begin(), values.end(), std::back_inserter(shape),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
return shape;
|
||||
});
|
||||
// shape adjustement -> from 2d/3d to 4d to standardize
|
||||
if (paddings.size() == 4) {
|
||||
} else if (paddings.size() == 3) {
|
||||
|
|
|
@ -157,10 +157,17 @@ class PoolingGpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
void SetPad(const CNodePtr &kernel_node) {
|
||||
pad_mode_ = GetValue<std::string>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("padding"));
|
||||
auto window = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ksize"));
|
||||
std::vector<int> window;
|
||||
std::vector<int64_t> window_me =
|
||||
GetValue<std::vector<int64_t>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ksize"));
|
||||
(void)std::transform(window_me.begin(), window_me.end(), std::back_inserter(window),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
int window_height = window[2];
|
||||
int window_width = window[3];
|
||||
stride_ = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides"));
|
||||
std::vector<int64_t> stride_me =
|
||||
GetValue<std::vector<int64_t>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides"));
|
||||
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
int windowDimA[2] = {window_height, window_width};
|
||||
int paddingA[2] = {0, 0};
|
||||
int strideA[2] = {stride_[2], stride_[3]};
|
||||
|
|
|
@ -174,8 +174,13 @@ class PoolingGradGpuKernel : public GpuKernel {
|
|||
}
|
||||
void SetPad(const CNodePtr &kernel_node) {
|
||||
pad_mode_ = GetAttr<std::string>(kernel_node, "padding");
|
||||
stride_ = GetAttr<std::vector<int>>(kernel_node, "strides");
|
||||
auto window = GetAttr<std::vector<int>>(kernel_node, "ksize");
|
||||
std::vector<int64_t> stride_me = GetAttr<std::vector<int64_t>>(kernel_node, "strides");
|
||||
std::vector<int> window;
|
||||
std::vector<int64_t> window_me = GetAttr<std::vector<int64_t>>(kernel_node, "ksize");
|
||||
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
(void)std::transform(window_me.begin(), window_me.end(), std::back_inserter(window),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
int window_height = window[2];
|
||||
int window_width = window[3];
|
||||
int stride_h = stride_[2];
|
||||
|
|
|
@ -85,11 +85,11 @@ class ROIAlignGpuFwdKernel : public GpuKernel {
|
|||
rois_shape_ = {roi_rows_, roi_cols_};
|
||||
|
||||
// Get primitive args
|
||||
pooled_height_ = GetAttr<int>(kernel_node, "pooled_height");
|
||||
pooled_width_ = GetAttr<int>(kernel_node, "pooled_width");
|
||||
pooled_height_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "pooled_height"));
|
||||
pooled_width_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "pooled_width"));
|
||||
spatial_scale_ = static_cast<T>(GetAttr<float>(kernel_node, "spatial_scale"));
|
||||
sample_num_ = GetAttr<int>(kernel_node, "sample_num");
|
||||
roi_end_mode_ = GetAttr<int>(kernel_node, "roi_end_mode");
|
||||
sample_num_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "sample_num"));
|
||||
roi_end_mode_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "roi_end_mode"));
|
||||
|
||||
// Get output_shape
|
||||
output_shape_ = {roi_rows_, channels_, pooled_height_, pooled_width_};
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_GPU_ROI_ALIGN_GRAD_GPU_KERNEL_H
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/roi_align_impl.cuh"
|
||||
|
@ -83,11 +84,13 @@ class ROIAlignGradGpuFwdKernel : public GpuKernel {
|
|||
rois_size_ = roi_rows_ * roi_cols_ * sizeof(T);
|
||||
|
||||
// Get primitive args
|
||||
xdiff_shape_ = GetAttr<std::vector<int>>(kernel_node, "xdiff_shape");
|
||||
pooled_height_ = GetAttr<int>(kernel_node, "pooled_height");
|
||||
pooled_width_ = GetAttr<int>(kernel_node, "pooled_width");
|
||||
std::vector<int64_t> xdiff_shape_me = GetAttr<std::vector<int64_t>>(kernel_node, "xdiff_shape");
|
||||
(void)std::transform(xdiff_shape_me.begin(), xdiff_shape_me.end(), std::back_inserter(xdiff_shape_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
pooled_height_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "pooled_height"));
|
||||
pooled_width_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "pooled_width"));
|
||||
spatial_scale_ = static_cast<T>(GetAttr<float>(kernel_node, "spatial_scale"));
|
||||
sample_num_ = GetAttr<int>(kernel_node, "sample_num");
|
||||
sample_num_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "sample_num"));
|
||||
roi_end_mode_ = 1;
|
||||
|
||||
// Get channels, height & width
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_SOFTMAX_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/kernel_constants.h"
|
||||
|
@ -117,11 +118,14 @@ class SoftmaxGpuKernel : public GpuKernel {
|
|||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (kernel_name == "LogSoftmax") {
|
||||
algo_ = CUDNN_SOFTMAX_LOG;
|
||||
auto axis = GetAttr<int>(kernel_node, "axis");
|
||||
auto axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
InitSizeByAxis(input_shape, axis);
|
||||
} else {
|
||||
algo_ = CUDNN_SOFTMAX_ACCURATE;
|
||||
auto axis = GetAttr<std::vector<int>>(kernel_node, "axis");
|
||||
std::vector<int> axis;
|
||||
std::vector<int64_t> axis_me = GetAttr<std::vector<int64_t>>(kernel_node, "axis");
|
||||
(void)std::transform(axis_me.begin(), axis_me.end(), std::back_inserter(axis),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
InitSizeByAxis(input_shape, axis[0]);
|
||||
}
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_SOFTMAX_GRAD_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/kernel_constants.h"
|
||||
|
@ -123,11 +124,14 @@ class SoftmaxGradGpuKernel : public GpuKernel {
|
|||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (kernel_name == "LogSoftmaxGrad") {
|
||||
algo_ = CUDNN_SOFTMAX_LOG;
|
||||
auto axis = GetAttr<int>(kernel_node, "axis");
|
||||
auto axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
InitSizeByAxis(input_shape, axis);
|
||||
} else {
|
||||
algo_ = CUDNN_SOFTMAX_ACCURATE;
|
||||
auto axis = GetAttr<std::vector<int>>(kernel_node, "axis");
|
||||
std::vector<int> axis;
|
||||
std::vector<int64_t> axis_me = GetAttr<std::vector<int64_t>>(kernel_node, "axis");
|
||||
(void)std::transform(axis_me.begin(), axis_me.end(), std::back_inserter(axis),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
InitSizeByAxis(input_shape, axis[0]);
|
||||
}
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
|
|
|
@ -78,11 +78,11 @@ class UniformCandidateSamplerGpuKernel : public GpuKernel {
|
|||
return false;
|
||||
}
|
||||
// getting attrs
|
||||
num_true_ = GetAttr<int>(kernel_node, "num_true");
|
||||
num_sampled_ = GetAttr<int>(kernel_node, "num_sampled");
|
||||
num_true_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "num_true"));
|
||||
num_sampled_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "num_sampled"));
|
||||
unique_ = GetAttr<bool>(kernel_node, "unique");
|
||||
range_max_ = GetAttr<int>(kernel_node, "range_max");
|
||||
int seed = GetAttr<int>(kernel_node, "seed");
|
||||
range_max_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "range_max"));
|
||||
int seed = static_cast<int>(GetAttr<int64_t>(kernel_node, "seed"));
|
||||
remove_accidental_hits_ = GetAttr<bool>(kernel_node, "remove_accidental_hits");
|
||||
if (seed == 0) seed = time(NULL);
|
||||
generator_.seed(seed);
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_GPU_OTHER_BOUNDINGBOX_DECODE_GPU_KERNEL_H
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/boundingbox_decode_impl.cuh"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
|
@ -92,7 +93,7 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
|
|||
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<ValueList>()) {
|
||||
means_ = GetAttr<std::vector<float>>(kernel_node, "means");
|
||||
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<FloatImm>()) {
|
||||
float mean = GetAttr<int>(kernel_node, "means");
|
||||
float mean = GetAttr<float>(kernel_node, "means");
|
||||
for (size_t i = 0; i < coordinate_size; i++) {
|
||||
means_.emplace_back(mean);
|
||||
}
|
||||
|
@ -104,7 +105,7 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
|
|||
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<ValueList>()) {
|
||||
stds_ = GetAttr<std::vector<float>>(kernel_node, "stds");
|
||||
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<FloatImm>()) {
|
||||
float std = GetAttr<int>(kernel_node, "stds");
|
||||
float std = GetAttr<float>(kernel_node, "stds");
|
||||
for (size_t i = 0; i < coordinate_size; i++) {
|
||||
stds_.emplace_back(std);
|
||||
}
|
||||
|
@ -112,7 +113,9 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
|
|||
MS_LOG(EXCEPTION) << "Attribute stds type is invalid.";
|
||||
}
|
||||
|
||||
max_shape_ = GetAttr<std::vector<int>>(kernel_node, "max_shape");
|
||||
std::vector<int64_t> max_shape_me = GetAttr<std::vector<int64_t>>(kernel_node, "max_shape");
|
||||
(void)std::transform(max_shape_me.begin(), max_shape_me.end(), std::back_inserter(max_shape_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
wh_ratio_clip_ = GetAttr<float>(kernel_node, "wh_ratio_clip");
|
||||
|
||||
if (means_.size() < coordinate_size || stds_.size() < coordinate_size) {
|
||||
|
|
|
@ -92,7 +92,7 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
|
|||
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<ValueList>()) {
|
||||
means_ = GetAttr<std::vector<float>>(kernel_node, "means");
|
||||
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<FloatImm>()) {
|
||||
float mean = GetAttr<int>(kernel_node, "means");
|
||||
float mean = GetAttr<float>(kernel_node, "means");
|
||||
for (size_t i = 0; i < coordinate_size; i++) {
|
||||
means_.emplace_back(mean);
|
||||
}
|
||||
|
@ -104,7 +104,7 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
|
|||
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<ValueList>()) {
|
||||
stds_ = GetAttr<std::vector<float>>(kernel_node, "stds");
|
||||
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<FloatImm>()) {
|
||||
float std = GetAttr<int>(kernel_node, "stds");
|
||||
float std = GetAttr<float>(kernel_node, "stds");
|
||||
for (size_t i = 0; i < coordinate_size; i++) {
|
||||
stds_.emplace_back(std);
|
||||
}
|
||||
|
|
|
@ -89,7 +89,7 @@ class BatchNormFold2GpuKernel : public GpuKernel {
|
|||
channel_ = input_shape[1];
|
||||
height_ = input_shape[2];
|
||||
width_ = input_shape[3];
|
||||
freeze_bn_ = GetValue<int32_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("freeze_bn"));
|
||||
freeze_bn_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("freeze_bn")));
|
||||
|
||||
InitSizeLists();
|
||||
return true;
|
||||
|
|
|
@ -123,7 +123,7 @@ class BatchNormFoldGpuKernel : public GpuKernel {
|
|||
exp_avg_factor_ = 1.0 - momentum;
|
||||
epsilon_ = GetValue<T>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("epsilon"));
|
||||
is_training_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("is_training"));
|
||||
freeze_bn_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("freeze_bn"));
|
||||
freeze_bn_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("freeze_bn")));
|
||||
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
if (input_shape.size() != 4) {
|
||||
|
|
|
@ -109,7 +109,7 @@ class BatchNormFoldGradGpuKernel : public GpuKernel {
|
|||
|
||||
epsilon_ = GetValue<T>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("epsilon"));
|
||||
is_training_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("is_training"));
|
||||
freeze_bn_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("freeze_bn"));
|
||||
freeze_bn_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("freeze_bn")));
|
||||
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
if (input_shape.size() != 4) {
|
||||
|
|
|
@ -55,11 +55,11 @@ bool FakeQuantPerChannelGpuKernel::Init(const CNodePtr &kernel_node) {
|
|||
}
|
||||
|
||||
// get attribute
|
||||
num_bits_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits"));
|
||||
num_bits_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits")));
|
||||
training_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("training"));
|
||||
symmetric_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("symmetric"));
|
||||
narrow_range_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("narrow_range"));
|
||||
quant_delay_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay"));
|
||||
quant_delay_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay")));
|
||||
|
||||
if (num_bits_ <= 2 || num_bits_ >= 16) {
|
||||
MS_LOG(EXCEPTION) << "Attr \'num_bits\' " << num_bits_ << "is out of range, expected between 2 and 16.";
|
||||
|
|
|
@ -49,12 +49,12 @@ bool FakeQuantPerChannelGradGpuKernel::Init(const CNodePtr &kernel_node) {
|
|||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but FakeQuantGrad GpuKernel OP needs 1 output.";
|
||||
}
|
||||
|
||||
num_bits_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits"));
|
||||
num_bits_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits")));
|
||||
if (num_bits_ <= 2 || num_bits_ >= 16) {
|
||||
MS_LOG(EXCEPTION) << "Attr \'num_bits\' " << num_bits_ << " is out of range, expected between 2 and 16.";
|
||||
}
|
||||
|
||||
quant_delay_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay"));
|
||||
quant_delay_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay")));
|
||||
if (quant_delay_ < 0) {
|
||||
MS_LOG(EXCEPTION) << "Attr \'quant_delay_\' " << quant_delay_ << " is less then 0, require larger than 0.";
|
||||
}
|
||||
|
|
|
@ -52,8 +52,8 @@ bool FakeQuantPerLayerGpuKernel::Init(const CNodePtr &kernel_node) {
|
|||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but FakeQuant GpuKernel OP needs 1 output.";
|
||||
}
|
||||
|
||||
num_bits_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits"));
|
||||
quant_delay_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay"));
|
||||
num_bits_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits")));
|
||||
quant_delay_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay")));
|
||||
training_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("training"));
|
||||
symmetric_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("symmetric"));
|
||||
narrow_range_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("narrow_range"));
|
||||
|
|
|
@ -48,12 +48,12 @@ bool FakeQuantPerLayerGradGpuKernel::Init(const CNodePtr &kernel_node) {
|
|||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but FakeQuantGrad GpuKernel OP needs 1 output.";
|
||||
}
|
||||
|
||||
num_bits_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits"));
|
||||
num_bits_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits")));
|
||||
if (num_bits_ <= 2 || num_bits_ >= 16) {
|
||||
MS_LOG(EXCEPTION) << "Attr \'num_bits\' " << num_bits_ << " is out of range, expected between 2 and 16.";
|
||||
}
|
||||
|
||||
quant_delay_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay"));
|
||||
quant_delay_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay")));
|
||||
if (quant_delay_ < 0) {
|
||||
MS_LOG(EXCEPTION) << "Attr \'quant_delay_\' " << quant_delay_ << " is less then 0, require larger than 0.";
|
||||
}
|
||||
|
|
|
@ -80,8 +80,8 @@ class RandomChoiceWithMaskGpuKernel : public GpuKernel {
|
|||
input_shape_5D_.insert(input_shape_5D_.begin(), 1);
|
||||
}
|
||||
// init seedc_
|
||||
int seed = GetAttr<int>(kernel_node, "seed");
|
||||
int seed2 = GetAttr<int>(kernel_node, "seed2");
|
||||
int seed = static_cast<int>(GetAttr<int64_t>(kernel_node, "seed"));
|
||||
int seed2 = static_cast<int>(GetAttr<int64_t>(kernel_node, "seed2"));
|
||||
if (seed2 != 0)
|
||||
seedc_ = seed2;
|
||||
else if (seed != 0)
|
||||
|
@ -92,7 +92,7 @@ class RandomChoiceWithMaskGpuKernel : public GpuKernel {
|
|||
for (size_t i = 0; i < input_shape.size(); i++) {
|
||||
input_size_ *= input_shape[i];
|
||||
}
|
||||
count_ = GetAttr<int>(kernel_node, "count");
|
||||
count_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "count"));
|
||||
// upper ceiling for input for ceil_power2
|
||||
ceil_power2_ = RcwmRoundUpPower2(input_size_);
|
||||
InitSizeLists();
|
||||
|
|
|
@ -114,16 +114,16 @@ bool HcomUtil::GetHcomCount(const AnfNodePtr &anf_node, const vector<HcclDataTyp
|
|||
}
|
||||
|
||||
if (AnfAlgo::GetCNodeName(anf_node) == kReduceScatterOpName) {
|
||||
int32_t rank_size;
|
||||
int64_t rank_size;
|
||||
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
|
||||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
if (primitive->GetAttr("rank_size") != nullptr) {
|
||||
rank_size = GetValue<int32_t>(primitive->GetAttr("rank_size"));
|
||||
rank_size = GetValue<int64_t>(primitive->GetAttr("rank_size"));
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Get rank size failed";
|
||||
return false;
|
||||
}
|
||||
block_size = input_size / IntToSize(rank_size);
|
||||
block_size = input_size / LongToSize(rank_size);
|
||||
total_size = total_size + block_size;
|
||||
} else {
|
||||
if (AnfAlgo::GetCNodeName(anf_node) == kAllGatherOpName) {
|
||||
|
@ -175,7 +175,7 @@ bool HcomUtil::GetHcomRootId(const AnfNodePtr &anf_node, uint32_t *root_id) {
|
|||
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
|
||||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
if (primitive->GetAttr("root_rank") != nullptr) {
|
||||
*root_id = (uint32_t)GetValue<int>(primitive->GetAttr("root_rank"));
|
||||
*root_id = (uint32_t)GetValue<int64_t>(primitive->GetAttr("root_rank"));
|
||||
} else {
|
||||
MS_LOG(ERROR) << "HcomUtil::Get HCOM_ATTR_ROOT_INDEX fail, not support!";
|
||||
return false;
|
||||
|
|
|
@ -27,12 +27,12 @@ void DynamicShapeKernel::Execute() {
|
|||
}
|
||||
|
||||
auto prev_output_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode_ptr_, 0);
|
||||
auto output_shape = std::vector<int>(SizeToInt(prev_output_shape.size()));
|
||||
auto output_shape = std::vector<int64_t>(SizeToLong(prev_output_shape.size()));
|
||||
|
||||
auto output_type = TypeId::kNumberTypeInt32;
|
||||
auto output_type = TypeId::kNumberTypeInt64;
|
||||
|
||||
auto output_tensor_for_sync = std::make_shared<tensor::Tensor>(output_type, output_shape);
|
||||
auto data_ptr = static_cast<int32_t *>(output_tensor_for_sync->data_c());
|
||||
auto data_ptr = static_cast<int64_t *>(output_tensor_for_sync->data_c());
|
||||
for (size_t i = 0; i < prev_output_shape.size(); ++i) {
|
||||
MS_LOG(INFO) << "DEBUG prev_output_shape[" << i << "]:" << prev_output_shape[i];
|
||||
*(data_ptr + i) = prev_output_shape[i];
|
||||
|
|
|
@ -28,9 +28,9 @@
|
|||
namespace mindspore {
|
||||
namespace kernel {
|
||||
using mindspore::kernel::tbe::TbeUtils;
|
||||
std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) {
|
||||
std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) {
|
||||
MS_LOG(INFO) << "kernel fusion build start, scope size:" << fusion_scopes.size();
|
||||
std::map<int32_t, KernelModPtr> kernel_mod_ret;
|
||||
std::map<int64_t, KernelModPtr> kernel_mod_ret;
|
||||
auto build_manger = std::make_shared<ParallelBuildManager>();
|
||||
MS_EXCEPTION_IF_NULL(build_manger);
|
||||
for (const auto &fusion_scope_iter : fusion_scopes) {
|
||||
|
|
|
@ -26,15 +26,15 @@ namespace kernel {
|
|||
* @brief fuse op and return a callable mod
|
||||
*/
|
||||
struct FusionScopeInfo {
|
||||
FusionScopeInfo(int32_t id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, std::vector<AnfNodePtr> out)
|
||||
FusionScopeInfo(int64_t id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, std::vector<AnfNodePtr> out)
|
||||
: scope_id(id), input_nodes(std::move(in)), compute_nodes(std::move(comp)), output_nodes(std::move(out)) {}
|
||||
int32_t scope_id{};
|
||||
int64_t scope_id{};
|
||||
std::vector<AnfNodePtr> input_nodes;
|
||||
std::vector<AnfNodePtr> compute_nodes;
|
||||
std::vector<AnfNodePtr> output_nodes;
|
||||
};
|
||||
|
||||
std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes);
|
||||
std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -69,6 +69,7 @@ constexpr auto kVTypeBool = "bool";
|
|||
constexpr auto kVTypeFloat = "float";
|
||||
constexpr auto kVTypeListInt = "listInt";
|
||||
constexpr auto kVTypeInt32 = "Int32";
|
||||
constexpr auto kVTypeInt64 = "Int64";
|
||||
constexpr auto kVTypeListUInt64 = "listUInt64";
|
||||
constexpr auto kVTypeListFloat = "listFloat";
|
||||
constexpr auto kVTypeListListInt = "listListInt";
|
||||
|
@ -241,9 +242,9 @@ bool GetInputNameAndRealNum(const std::shared_ptr<AnfNode> &anf_node, const std:
|
|||
MS_EXCEPTION_IF_NULL(op_input_name);
|
||||
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
|
||||
// for dynamic input number, dyn_input_sizes has the info of dynamic input num for each input.
|
||||
std::vector<int> dyn_input_sizes;
|
||||
std::vector<int64_t> dyn_input_sizes;
|
||||
if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) {
|
||||
dyn_input_sizes = GetValue<const std::vector<int>>(primitive->GetAttr(kAttrDynInputSizes));
|
||||
dyn_input_sizes = GetValue<const std::vector<int64_t>>(primitive->GetAttr(kAttrDynInputSizes));
|
||||
}
|
||||
|
||||
if (input_ptr->param_type() == kParamDynamic) {
|
||||
|
@ -251,7 +252,7 @@ bool GetInputNameAndRealNum(const std::shared_ptr<AnfNode> &anf_node, const std:
|
|||
MS_LOG(ERROR) << "Dyn input index" << *dyn_input_index << "is over dyn input num" << dyn_input_sizes.size();
|
||||
return false;
|
||||
}
|
||||
*input_num = IntToSize(dyn_input_sizes[*dyn_input_index]);
|
||||
*input_num = LongToSize(dyn_input_sizes[*dyn_input_index]);
|
||||
*op_input_name = input_ptr->name() + "_dynamic_";
|
||||
(*dyn_input_index)++;
|
||||
// if optional input is exist
|
||||
|
@ -454,7 +455,15 @@ void TbeKernelJsonCreator::ParseAttrValue(const std::string &type, const mindspo
|
|||
MS_EXCEPTION_IF_NULL(value);
|
||||
MS_EXCEPTION_IF_NULL(attr_obj);
|
||||
if (type == kVTypeInt) {
|
||||
auto attr_value = GetValue<int>(value);
|
||||
if (value->isa<Int32Imm>()) {
|
||||
auto attr_value = GetValue<int>(value);
|
||||
(*attr_obj)[kJValue] = attr_value;
|
||||
} else {
|
||||
auto attr_value = GetValue<int64_t>(value);
|
||||
(*attr_obj)[kJValue] = attr_value;
|
||||
}
|
||||
} else if (type == kVTypeInt64) {
|
||||
auto attr_value = GetValue<int64_t>(value);
|
||||
(*attr_obj)[kJValue] = attr_value;
|
||||
} else if (type == kVTypeStr) {
|
||||
auto attr_value = GetValue<std::string>(value);
|
||||
|
@ -469,15 +478,25 @@ void TbeKernelJsonCreator::ParseAttrValue(const std::string &type, const mindspo
|
|||
auto attr_value = GetValue<float>(value);
|
||||
(*attr_obj)[kJValue] = attr_value;
|
||||
} else if (type == kVTypeListInt) {
|
||||
std::vector<int> attr_value;
|
||||
std::vector<int64_t> attr_value;
|
||||
auto value_type = value->type();
|
||||
MS_EXCEPTION_IF_NULL(value_type);
|
||||
auto value_type_str = value_type->ToString();
|
||||
if (value_type_str == kVTypeInt32) {
|
||||
int data = GetValue<int>(value);
|
||||
if (value_type_str == kVTypeInt64) {
|
||||
int64_t data = GetValue<int64_t>(value);
|
||||
attr_value.push_back(data);
|
||||
} else {
|
||||
attr_value = GetValue<std::vector<int>>(value);
|
||||
auto vec =
|
||||
value->isa<ValueTuple>() ? value->cast<ValueTuplePtr>()->value() : value->cast<ValueListPtr>()->value();
|
||||
if (!vec.empty()) {
|
||||
if (vec[0]->isa<Int32Imm>()) {
|
||||
std::vector<int32_t> attr_value_me = GetValue<std::vector<int32_t>>(value);
|
||||
(void)std::transform(attr_value_me.begin(), attr_value_me.end(), std::back_inserter(attr_value),
|
||||
[](const int &value) { return static_cast<int64_t>(value); });
|
||||
} else {
|
||||
attr_value = GetValue<std::vector<int64_t>>(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
(*attr_obj)[kJValue] = attr_value;
|
||||
} else if (type == kVTypeListFloat) {
|
||||
|
@ -496,7 +515,7 @@ void TbeKernelJsonCreator::ParseAttrValue(const std::string &type, const mindspo
|
|||
auto attr_value = GetValue<std::vector<size_t>>(value);
|
||||
(*attr_obj)[kJValue] = attr_value;
|
||||
} else if (type == kVTypeListListInt) {
|
||||
auto attr_value = GetValue<std::vector<std::vector<int>>>(value);
|
||||
auto attr_value = GetValue<std::vector<std::vector<int64_t>>>(value);
|
||||
(*attr_obj)[kJValue] = attr_value;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Type: " << type << "not support";
|
||||
|
@ -959,17 +978,17 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) {
|
|||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
// for dynamic input number, dyn_input_sizes has the info of dynamic input num for each input.
|
||||
bool ret = false;
|
||||
std::vector<int> dyn_input_sizes;
|
||||
std::vector<int64_t> dyn_input_sizes;
|
||||
auto dynamic_input_attr = primitive->GetAttr(kAttrDynInputSizes);
|
||||
if (dynamic_input_attr != nullptr) {
|
||||
dyn_input_sizes = GetValue<const std::vector<int>>(dynamic_input_attr);
|
||||
dyn_input_sizes = GetValue<const std::vector<int64_t>>(dynamic_input_attr);
|
||||
auto real_input_size = cnode->inputs().size() - 1;
|
||||
auto dyn_input_size = dyn_input_sizes.size();
|
||||
if (dyn_input_size != 1) {
|
||||
MS_LOG(INFO) << "Fusion error: fusion build not support dyn_input_sizes > 1";
|
||||
return ret;
|
||||
}
|
||||
if (IntToSize(dyn_input_sizes[0]) != real_input_size) {
|
||||
if (LongToSize(dyn_input_sizes[0]) != real_input_size) {
|
||||
MS_LOG(INFO) << "Fusion error: dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size"
|
||||
<< real_input_size;
|
||||
return ret;
|
||||
|
@ -1069,7 +1088,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
|
|||
return true;
|
||||
}
|
||||
|
||||
std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int> &output_used_nums) {
|
||||
std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int64_t> &output_used_nums) {
|
||||
std::vector<size_t> desc_output_index = {};
|
||||
for (size_t idx = 0; idx < output_used_nums.size(); ++idx) {
|
||||
auto output_use_num_item = output_used_nums[idx];
|
||||
|
@ -1087,7 +1106,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode
|
|||
MS_EXCEPTION_IF_NULL(output_desc_list);
|
||||
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
|
||||
if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) {
|
||||
auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum);
|
||||
auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(cnode, kAttrOutputUsedNum);
|
||||
MS_LOG(INFO) << "Fusion info: this node's output has been reused, node name: " << cnode->fullname_with_scope();
|
||||
if (output_used_nums.size() != output_size) {
|
||||
MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")"
|
||||
|
|
|
@ -58,7 +58,7 @@ class TbeKernelBuild {
|
|||
static bool GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
|
||||
std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
|
||||
std::vector<nlohmann::json> *input_desc_list, size_t *index);
|
||||
static std::vector<size_t> GetDescOutputIndex(const std::vector<int> &output_used_nums);
|
||||
static std::vector<size_t> GetDescOutputIndex(const std::vector<int64_t> &output_used_nums);
|
||||
static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
|
||||
std::vector<nlohmann::json> *output_desc_list);
|
||||
static void GenPreDescJson(nlohmann::json *output_desc);
|
||||
|
|
|
@ -35,7 +35,7 @@ bool TbeKernelBroadCastSelecter::GetShapeInfo(SupportFormat *support_format) {
|
|||
output_shapes_.clear();
|
||||
if (AnfAlgo::HasNodeAttr(kAttrDynInputSizes, cnode_ptr_)) {
|
||||
MS_LOG(INFO) << "This broadcast node has dynamic input.";
|
||||
auto dynamic_size_vec = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode_ptr_, kAttrDynInputSizes);
|
||||
auto dynamic_size_vec = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(cnode_ptr_, kAttrDynInputSizes);
|
||||
if (dynamic_size_vec.empty() || dynamic_size_vec[0] < 2) {
|
||||
MS_LOG(EXCEPTION) << "dynamic attr set error, please check.";
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ class TbeKernelReduceSelecter {
|
|||
CNodePtr cnode_ptr_;
|
||||
std::vector<size_t> input_shape_{};
|
||||
std::vector<size_t> output_shape_{};
|
||||
std::vector<int> axis_{};
|
||||
std::vector<int64_t> axis_{};
|
||||
bool keep_dims_ = false;
|
||||
};
|
||||
} // namespace kernel
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue