forked from mindspore-Ecosystem/mindspore
cpu conv2d support diff winsize
This commit is contained in:
parent
0e3dfdd01c
commit
634035827e
|
@ -32,8 +32,6 @@ void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||||
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
|
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
|
||||||
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
|
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
|
||||||
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
||||||
|
|
||||||
int kernel_size = SizeToInt(weight_shape[3]);
|
|
||||||
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
|
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
|
||||||
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
|
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
|
||||||
if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) {
|
if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) {
|
||||||
|
@ -57,6 +55,7 @@ void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||||
std::vector<int> int_padding_r;
|
std::vector<int> int_padding_r;
|
||||||
|
|
||||||
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);
|
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);
|
||||||
|
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
|
||||||
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r);
|
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r);
|
||||||
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
|
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
|
||||||
MS_LOG(EXCEPTION) << "get padding failed";
|
MS_LOG(EXCEPTION) << "get padding failed";
|
||||||
|
|
|
@ -32,8 +32,6 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||||
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
|
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
|
||||||
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
|
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
|
||||||
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
||||||
|
|
||||||
int kernel_size = SizeToInt(weight_shape[3]);
|
|
||||||
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
|
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
|
||||||
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
|
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
|
||||||
if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
|
if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
|
||||||
|
@ -53,6 +51,7 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||||
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);
|
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);
|
||||||
std::vector<int> int_padding_l;
|
std::vector<int> int_padding_l;
|
||||||
std::vector<int> int_padding_r;
|
std::vector<int> int_padding_r;
|
||||||
|
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
|
||||||
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r);
|
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r);
|
||||||
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
|
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
|
||||||
MS_LOG(EXCEPTION) << "get padding failed";
|
MS_LOG(EXCEPTION) << "get padding failed";
|
||||||
|
|
|
@ -33,7 +33,6 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||||
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
|
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
|
||||||
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
||||||
|
|
||||||
int kernel_size = SizeToInt(weight_shape[3]);
|
|
||||||
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
|
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
|
||||||
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
|
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
|
||||||
if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
|
if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
|
||||||
|
@ -52,6 +51,7 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||||
std::vector<int> int_padding_l;
|
std::vector<int> int_padding_l;
|
||||||
std::vector<int> int_padding_r;
|
std::vector<int> int_padding_r;
|
||||||
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);
|
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);
|
||||||
|
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
|
||||||
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r);
|
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r);
|
||||||
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
|
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
|
||||||
MS_LOG(EXCEPTION) << "conv2d grad get padding failed";
|
MS_LOG(EXCEPTION) << "conv2d grad get padding failed";
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace kernel {
|
namespace kernel {
|
||||||
void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode,
|
void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode,
|
||||||
const std::vector<size_t> &src_shape, int kernel_size, int stride,
|
const std::vector<size_t> &src_shape, const std::vector<size_t> &kernel_size, int stride,
|
||||||
std::vector<int> *padding_l, std::vector<int> *padding_r) {
|
std::vector<int> *padding_l, std::vector<int> *padding_r) {
|
||||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||||
if (src_shape.size() < 2) {
|
if (src_shape.size() < 2) {
|
||||||
|
@ -32,11 +32,13 @@ void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pa
|
||||||
std::vector<int> weight_height;
|
std::vector<int> weight_height;
|
||||||
weight_height.emplace_back(src_shape[src_shape.size() - 2]);
|
weight_height.emplace_back(src_shape[src_shape.size() - 2]);
|
||||||
weight_height.emplace_back(src_shape[src_shape.size() - 1]);
|
weight_height.emplace_back(src_shape[src_shape.size() - 1]);
|
||||||
int rad = kernel_size / 2;
|
|
||||||
int need_pad = kernel_size - 1;
|
|
||||||
MS_LOG(INFO) << "pad mode " << pad_mode;
|
MS_LOG(INFO) << "pad mode " << pad_mode;
|
||||||
if (pad_mode == PAD_MODE_LOWER_SAME || pad_mode == PAD_MODE_UPPER_SAME) {
|
if (pad_mode == PAD_MODE_LOWER_SAME || pad_mode == PAD_MODE_UPPER_SAME) {
|
||||||
for (auto wh : weight_height) {
|
for (size_t i = 0; i < weight_height.size(); ++i) {
|
||||||
|
auto wh = weight_height[i];
|
||||||
|
int rad = kernel_size[i] / 2;
|
||||||
|
int need_pad = kernel_size[i] - 1;
|
||||||
int re = (wh - 1) % stride;
|
int re = (wh - 1) % stride;
|
||||||
int pad = std::max(rad - (re / 2), 0);
|
int pad = std::max(rad - (re / 2), 0);
|
||||||
padding_r->emplace_back(pad);
|
padding_r->emplace_back(pad);
|
||||||
|
|
|
@ -33,7 +33,8 @@ class MKLCPUKernel : public CPUKernel {
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode, const std::vector<size_t> &src_shape,
|
void GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode, const std::vector<size_t> &src_shape,
|
||||||
int kernel_size, int stride, std::vector<int> *padding_l, std::vector<int> *padding_r);
|
const std::vector<size_t> &kernel_size, int stride, std::vector<int> *padding_l,
|
||||||
|
std::vector<int> *padding_r);
|
||||||
void AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc = false);
|
void AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc = false);
|
||||||
void SetArgumentHandle(int arg_key, void *ptr);
|
void SetArgumentHandle(int arg_key, void *ptr);
|
||||||
dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const;
|
dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const;
|
||||||
|
|
|
@ -28,17 +28,18 @@ void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||||
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||||
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
|
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
|
||||||
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
||||||
std::vector<int> kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE);
|
std::vector<int> origin_kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE);
|
||||||
std::vector<int> strides = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES);
|
std::vector<int> strides = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES);
|
||||||
if (kernel_sizes.size() != 4 || strides.size() != 4) {
|
if (origin_kernel_sizes.size() != 4 || strides.size() != 4) {
|
||||||
MS_LOG(EXCEPTION) << "invalid kernel size " << kernel_sizes.size() << " or stride size " << strides.size();
|
MS_LOG(EXCEPTION) << "invalid kernel size " << origin_kernel_sizes.size() << " or stride size " << strides.size();
|
||||||
}
|
}
|
||||||
dnnl::memory::dims strides_dims{strides[2], strides[3]};
|
dnnl::memory::dims strides_dims{strides[2], strides[3]};
|
||||||
dnnl::memory::dims kernels_dims{kernel_sizes[2], kernel_sizes[3]};
|
dnnl::memory::dims kernels_dims{origin_kernel_sizes[2], origin_kernel_sizes[3]};
|
||||||
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING);
|
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING);
|
||||||
std::vector<int> int_padding_l;
|
std::vector<int> int_padding_l;
|
||||||
std::vector<int> int_padding_r;
|
std::vector<int> int_padding_r;
|
||||||
GetPadding(kernel_node, pad_mode, src_shape, kernel_sizes[3], strides[3], &int_padding_l, &int_padding_r);
|
std::vector<size_t> kernel_size({IntToSize(origin_kernel_sizes[2]), IntToSize(origin_kernel_sizes[3])});
|
||||||
|
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, strides[3], &int_padding_l, &int_padding_r);
|
||||||
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
|
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
|
||||||
MS_LOG(EXCEPTION) << "pooling get padding failed";
|
MS_LOG(EXCEPTION) << "pooling get padding failed";
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,7 @@ void PoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||||
}
|
}
|
||||||
std::vector<int> padding_r;
|
std::vector<int> padding_r;
|
||||||
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING);
|
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING);
|
||||||
kernel_size_ = kernel_sizes[3];
|
kernel_size_ = {IntToSize(kernel_sizes[2]), IntToSize(kernel_sizes[3])};
|
||||||
stride_ = strides[3];
|
stride_ = strides[3];
|
||||||
GetPadding(kernel_node, pad_mode, src_shape_, kernel_size_, stride_, &padding_l_, &padding_r);
|
GetPadding(kernel_node, pad_mode, src_shape_, kernel_size_, stride_, &padding_l_, &padding_r);
|
||||||
}
|
}
|
||||||
|
@ -77,7 +77,7 @@ void PoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float *d
|
||||||
size_t diff_index = 0;
|
size_t diff_index = 0;
|
||||||
for (size_t h = 0; h < dst_shape_[2]; ++h) {
|
for (size_t h = 0; h < dst_shape_[2]; ++h) {
|
||||||
box[0].first = IntToSize(std::max(h_start, 0));
|
box[0].first = IntToSize(std::max(h_start, 0));
|
||||||
box[0].second = IntToSize(std::min(h_start + kernel_size_, src_height));
|
box[0].second = IntToSize(std::min(h_start + SizeToInt(kernel_size_[1]), src_height));
|
||||||
for (size_t w = 0; w < src_shape_[3]; ++w) {
|
for (size_t w = 0; w < src_shape_[3]; ++w) {
|
||||||
row_max_pair[w].first = 0;
|
row_max_pair[w].first = 0;
|
||||||
row_max_pair[w].second = 0;
|
row_max_pair[w].second = 0;
|
||||||
|
@ -85,7 +85,7 @@ void PoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float *d
|
||||||
int w_start = -padding_l_[1];
|
int w_start = -padding_l_[1];
|
||||||
for (size_t w = 0; w < dst_shape_[3]; ++w) {
|
for (size_t w = 0; w < dst_shape_[3]; ++w) {
|
||||||
box[1].first = IntToSize(std::max(w_start, 0));
|
box[1].first = IntToSize(std::max(w_start, 0));
|
||||||
box[1].second = IntToSize(std::min(w_start + kernel_size_, src_width));
|
box[1].second = IntToSize(std::min(w_start + SizeToInt(kernel_size_[0]), src_width));
|
||||||
RowPoolingGrad(input, output, diff[diff_index], box, &row_max_pair);
|
RowPoolingGrad(input, output, diff[diff_index], box, &row_max_pair);
|
||||||
diff_index += 1;
|
diff_index += 1;
|
||||||
w_start += stride_;
|
w_start += stride_;
|
||||||
|
|
|
@ -37,7 +37,8 @@ class PoolingGradCPUKernel : public MKLCPUKernel {
|
||||||
void RowPoolingGrad(const float *input, float *output, float diff, const std::vector<std::pair<size_t, size_t>> &box,
|
void RowPoolingGrad(const float *input, float *output, float diff, const std::vector<std::pair<size_t, size_t>> &box,
|
||||||
std::vector<std::pair<size_t, float>> *row_max_pair);
|
std::vector<std::pair<size_t, float>> *row_max_pair);
|
||||||
void ChannelPoolingGrad(const float *input, const float *diff, float *output);
|
void ChannelPoolingGrad(const float *input, const float *diff, float *output);
|
||||||
int stride_{0}, kernel_size_{0};
|
int stride_{0};
|
||||||
|
std::vector<size_t> kernel_size_;
|
||||||
std::vector<int> padding_l_;
|
std::vector<int> padding_l_;
|
||||||
std::vector<size_t> src_shape_;
|
std::vector<size_t> src_shape_;
|
||||||
std::vector<size_t> dst_shape_;
|
std::vector<size_t> dst_shape_;
|
||||||
|
|
|
@ -36,23 +36,6 @@ namespace mindspore {
|
||||||
namespace device {
|
namespace device {
|
||||||
namespace cpu {
|
namespace cpu {
|
||||||
const size_t INIT_NODE_REF = 1;
|
const size_t INIT_NODE_REF = 1;
|
||||||
namespace {
|
|
||||||
TypeId GetCPUSupportOutputTypeId(const TypeId type_id) {
|
|
||||||
TypeId support_type_id = type_id;
|
|
||||||
if (type_id == kNumberTypeUInt32) {
|
|
||||||
support_type_id = kNumberTypeInt32;
|
|
||||||
}
|
|
||||||
if (type_id == kNumberTypeFloat || type_id == kNumberTypeFloat16 || type_id == kNumberTypeFloat32 ||
|
|
||||||
type_id == kNumberTypeFloat64) {
|
|
||||||
support_type_id = kNumberTypeFloat32;
|
|
||||||
}
|
|
||||||
if (support_type_id != kNumberTypeInt32 && support_type_id != kNumberTypeFloat32) {
|
|
||||||
MS_LOG(EXCEPTION) << "Check output type failed.";
|
|
||||||
}
|
|
||||||
return support_type_id;
|
|
||||||
}
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
void CPUKernelRuntime::AssignKernelAddress(session::KernelGraph *kernel_graph) {
|
void CPUKernelRuntime::AssignKernelAddress(session::KernelGraph *kernel_graph) {
|
||||||
AssignValueNodeAddress(kernel_graph);
|
AssignValueNodeAddress(kernel_graph);
|
||||||
AssignInputNodeAddress(kernel_graph);
|
AssignInputNodeAddress(kernel_graph);
|
||||||
|
@ -157,15 +140,25 @@ tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput(const CNodePtr &node, s
|
||||||
auto shape = AnfAlgo::GetOutputInferShape(node, index);
|
auto shape = AnfAlgo::GetOutputInferShape(node, index);
|
||||||
std::vector<int> temp_shape;
|
std::vector<int> temp_shape;
|
||||||
(void)temp_shape.insert(temp_shape.end(), shape.begin(), shape.end());
|
(void)temp_shape.insert(temp_shape.end(), shape.begin(), shape.end());
|
||||||
TypeId type_id = AnfAlgo::GetOutputInferDataType(node, index);
|
TypeId infer_type_id = AnfAlgo::GetOutputInferDataType(node, index);
|
||||||
type_id = GetCPUSupportOutputTypeId(type_id);
|
TypeId device_type_id = AnfAlgo::GetOutputDeviceDataType(node, index);
|
||||||
tensor::TensorPtr tensor = std::make_shared<tensor::Tensor>(type_id, temp_shape);
|
tensor::TensorPtr tensor = std::make_shared<tensor::Tensor>(infer_type_id, temp_shape);
|
||||||
MS_EXCEPTION_IF_NULL(tensor);
|
MS_EXCEPTION_IF_NULL(tensor);
|
||||||
if (bound_addresses->find(address) != bound_addresses->end()) {
|
if (bound_addresses->find(address) != bound_addresses->end()) {
|
||||||
tensor->set_device_address(address);
|
tensor->set_device_address(address);
|
||||||
need_sync_outputs->emplace_back(tensor);
|
need_sync_outputs->emplace_back(tensor);
|
||||||
|
} else {
|
||||||
|
if (infer_type_id != device_type_id) {
|
||||||
|
size_t type_size = GetTypeByte(TypeIdToType(device_type_id));
|
||||||
|
std::vector<int> data_shape = tensor->shape();
|
||||||
|
size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies<size_t>());
|
||||||
|
address->ptr_ = resource_manager_.MemMalloc(tensor_size);
|
||||||
|
need_sync_outputs->emplace_back(tensor);
|
||||||
|
tensor->set_device_address(address);
|
||||||
|
need_sync_outputs->emplace_back(tensor);
|
||||||
} else {
|
} else {
|
||||||
address->ptr_ = tensor->data_c();
|
address->ptr_ = tensor->data_c();
|
||||||
|
}
|
||||||
address->ref_count_ = INIT_NODE_REF;
|
address->ref_count_ = INIT_NODE_REF;
|
||||||
(void)bound_addresses->insert(address);
|
(void)bound_addresses->insert(address);
|
||||||
}
|
}
|
||||||
|
@ -226,12 +219,13 @@ void CPUKernelRuntime::BindInputOutput(const session::KernelGraph *kernel_graph,
|
||||||
if (tensor_address != nullptr && tensor_address != address) {
|
if (tensor_address != nullptr && tensor_address != address) {
|
||||||
(void)tensor->data_sync();
|
(void)tensor->data_sync();
|
||||||
}
|
}
|
||||||
std::vector<int> data_shape = tensor->shape();
|
|
||||||
size_t tensor_size =
|
|
||||||
std::accumulate(data_shape.begin(), data_shape.end(), sizeof(float), std::multiplies<size_t>());
|
|
||||||
if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) {
|
if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) {
|
||||||
address->ptr_ = tensor->data_c();
|
address->ptr_ = tensor->data_c();
|
||||||
} else {
|
} else {
|
||||||
|
std::vector<int> data_shape = tensor->shape();
|
||||||
|
size_t tensor_size =
|
||||||
|
std::accumulate(data_shape.begin(), data_shape.end(), sizeof(float), std::multiplies<size_t>());
|
||||||
address->ptr_ = resource_manager_.MemMalloc(tensor_size);
|
address->ptr_ = resource_manager_.MemMalloc(tensor_size);
|
||||||
if (!address->SyncHostToDevice(data_shape, LongToSize(tensor->data().nbytes()), tensor->data_type(),
|
if (!address->SyncHostToDevice(data_shape, LongToSize(tensor->data().nbytes()), tensor->data_type(),
|
||||||
tensor->data_c())) {
|
tensor->data_c())) {
|
||||||
|
|
|
@ -141,7 +141,11 @@ void SetKernelInfo(const CNodePtr &kernel_node) {
|
||||||
if (kernel_attr.GetAllSame()) {
|
if (kernel_attr.GetAllSame()) {
|
||||||
ExpandKernelAttr(kernel_node, &kernel_attr);
|
ExpandKernelAttr(kernel_node, &kernel_attr);
|
||||||
}
|
}
|
||||||
if (IsInputFormatDtypeMatched(kernel_attr, input_formats, input_types, input_not_cnode_indexes)) {
|
bool ignore_check = false;
|
||||||
|
if (index == kernel_attrs.size() - 1 && input_types.size() == input_not_cnode_indexes.size()) {
|
||||||
|
ignore_check = true;
|
||||||
|
}
|
||||||
|
if (ignore_check || IsInputFormatDtypeMatched(kernel_attr, input_formats, input_types, input_not_cnode_indexes)) {
|
||||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||||
if (kernel_attr.GetOutputSize() != output_num) {
|
if (kernel_attr.GetOutputSize() != output_num) {
|
||||||
MS_LOG(DEBUG) << "Output num is not equal!";
|
MS_LOG(DEBUG) << "Output num is not equal!";
|
||||||
|
|
|
@ -223,7 +223,7 @@ std::string GetCNodeTarget(const AnfNodePtr &node) {
|
||||||
}
|
}
|
||||||
auto target = GetValue<std::string>(att_target);
|
auto target = GetValue<std::string>(att_target);
|
||||||
if (kTargetSet.find(target) == kTargetSet.end()) {
|
if (kTargetSet.find(target) == kTargetSet.end()) {
|
||||||
MS_LOG(EXCEPTION) << "Only support string CPU|GPU|Ascend for primitive_target";
|
MS_LOG(EXCEPTION) << "Only support string CPU|GPU|Ascend for primitive_target, but get " << target;
|
||||||
}
|
}
|
||||||
return target;
|
return target;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue