forked from mindspore-Ecosystem/mindspore
cpu conv2d support diff winsize
This commit is contained in:
parent
0e3dfdd01c
commit
634035827e
|
@ -32,8 +32,6 @@ void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
|
||||
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
|
||||
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
||||
|
||||
int kernel_size = SizeToInt(weight_shape[3]);
|
||||
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
|
||||
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
|
||||
if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) {
|
||||
|
@ -57,6 +55,7 @@ void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
std::vector<int> int_padding_r;
|
||||
|
||||
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);
|
||||
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
|
||||
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r);
|
||||
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "get padding failed";
|
||||
|
|
|
@ -32,8 +32,6 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
|
||||
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
|
||||
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
||||
|
||||
int kernel_size = SizeToInt(weight_shape[3]);
|
||||
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
|
||||
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
|
||||
if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
|
||||
|
@ -53,6 +51,7 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);
|
||||
std::vector<int> int_padding_l;
|
||||
std::vector<int> int_padding_r;
|
||||
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
|
||||
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r);
|
||||
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "get padding failed";
|
||||
|
|
|
@ -33,7 +33,6 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
|
||||
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
||||
|
||||
int kernel_size = SizeToInt(weight_shape[3]);
|
||||
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
|
||||
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
|
||||
if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
|
||||
|
@ -52,6 +51,7 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
std::vector<int> int_padding_l;
|
||||
std::vector<int> int_padding_r;
|
||||
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);
|
||||
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
|
||||
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r);
|
||||
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "conv2d grad get padding failed";
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
namespace mindspore {
|
||||
namespace kernel {
|
||||
void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode,
|
||||
const std::vector<size_t> &src_shape, int kernel_size, int stride,
|
||||
const std::vector<size_t> &src_shape, const std::vector<size_t> &kernel_size, int stride,
|
||||
std::vector<int> *padding_l, std::vector<int> *padding_r) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
if (src_shape.size() < 2) {
|
||||
|
@ -32,11 +32,13 @@ void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pa
|
|||
std::vector<int> weight_height;
|
||||
weight_height.emplace_back(src_shape[src_shape.size() - 2]);
|
||||
weight_height.emplace_back(src_shape[src_shape.size() - 1]);
|
||||
int rad = kernel_size / 2;
|
||||
int need_pad = kernel_size - 1;
|
||||
|
||||
MS_LOG(INFO) << "pad mode " << pad_mode;
|
||||
if (pad_mode == PAD_MODE_LOWER_SAME || pad_mode == PAD_MODE_UPPER_SAME) {
|
||||
for (auto wh : weight_height) {
|
||||
for (size_t i = 0; i < weight_height.size(); ++i) {
|
||||
auto wh = weight_height[i];
|
||||
int rad = kernel_size[i] / 2;
|
||||
int need_pad = kernel_size[i] - 1;
|
||||
int re = (wh - 1) % stride;
|
||||
int pad = std::max(rad - (re / 2), 0);
|
||||
padding_r->emplace_back(pad);
|
||||
|
|
|
@ -33,7 +33,8 @@ class MKLCPUKernel : public CPUKernel {
|
|||
|
||||
protected:
|
||||
void GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode, const std::vector<size_t> &src_shape,
|
||||
int kernel_size, int stride, std::vector<int> *padding_l, std::vector<int> *padding_r);
|
||||
const std::vector<size_t> &kernel_size, int stride, std::vector<int> *padding_l,
|
||||
std::vector<int> *padding_r);
|
||||
void AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc = false);
|
||||
void SetArgumentHandle(int arg_key, void *ptr);
|
||||
dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const;
|
||||
|
|
|
@ -28,17 +28,18 @@ void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
|
||||
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
||||
std::vector<int> kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE);
|
||||
std::vector<int> origin_kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE);
|
||||
std::vector<int> strides = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES);
|
||||
if (kernel_sizes.size() != 4 || strides.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "invalid kernel size " << kernel_sizes.size() << " or stride size " << strides.size();
|
||||
if (origin_kernel_sizes.size() != 4 || strides.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "invalid kernel size " << origin_kernel_sizes.size() << " or stride size " << strides.size();
|
||||
}
|
||||
dnnl::memory::dims strides_dims{strides[2], strides[3]};
|
||||
dnnl::memory::dims kernels_dims{kernel_sizes[2], kernel_sizes[3]};
|
||||
dnnl::memory::dims kernels_dims{origin_kernel_sizes[2], origin_kernel_sizes[3]};
|
||||
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING);
|
||||
std::vector<int> int_padding_l;
|
||||
std::vector<int> int_padding_r;
|
||||
GetPadding(kernel_node, pad_mode, src_shape, kernel_sizes[3], strides[3], &int_padding_l, &int_padding_r);
|
||||
std::vector<size_t> kernel_size({IntToSize(origin_kernel_sizes[2]), IntToSize(origin_kernel_sizes[3])});
|
||||
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, strides[3], &int_padding_l, &int_padding_r);
|
||||
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "pooling get padding failed";
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ void PoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
}
|
||||
std::vector<int> padding_r;
|
||||
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING);
|
||||
kernel_size_ = kernel_sizes[3];
|
||||
kernel_size_ = {IntToSize(kernel_sizes[2]), IntToSize(kernel_sizes[3])};
|
||||
stride_ = strides[3];
|
||||
GetPadding(kernel_node, pad_mode, src_shape_, kernel_size_, stride_, &padding_l_, &padding_r);
|
||||
}
|
||||
|
@ -77,7 +77,7 @@ void PoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float *d
|
|||
size_t diff_index = 0;
|
||||
for (size_t h = 0; h < dst_shape_[2]; ++h) {
|
||||
box[0].first = IntToSize(std::max(h_start, 0));
|
||||
box[0].second = IntToSize(std::min(h_start + kernel_size_, src_height));
|
||||
box[0].second = IntToSize(std::min(h_start + SizeToInt(kernel_size_[1]), src_height));
|
||||
for (size_t w = 0; w < src_shape_[3]; ++w) {
|
||||
row_max_pair[w].first = 0;
|
||||
row_max_pair[w].second = 0;
|
||||
|
@ -85,7 +85,7 @@ void PoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float *d
|
|||
int w_start = -padding_l_[1];
|
||||
for (size_t w = 0; w < dst_shape_[3]; ++w) {
|
||||
box[1].first = IntToSize(std::max(w_start, 0));
|
||||
box[1].second = IntToSize(std::min(w_start + kernel_size_, src_width));
|
||||
box[1].second = IntToSize(std::min(w_start + SizeToInt(kernel_size_[0]), src_width));
|
||||
RowPoolingGrad(input, output, diff[diff_index], box, &row_max_pair);
|
||||
diff_index += 1;
|
||||
w_start += stride_;
|
||||
|
|
|
@ -37,7 +37,8 @@ class PoolingGradCPUKernel : public MKLCPUKernel {
|
|||
void RowPoolingGrad(const float *input, float *output, float diff, const std::vector<std::pair<size_t, size_t>> &box,
|
||||
std::vector<std::pair<size_t, float>> *row_max_pair);
|
||||
void ChannelPoolingGrad(const float *input, const float *diff, float *output);
|
||||
int stride_{0}, kernel_size_{0};
|
||||
int stride_{0};
|
||||
std::vector<size_t> kernel_size_;
|
||||
std::vector<int> padding_l_;
|
||||
std::vector<size_t> src_shape_;
|
||||
std::vector<size_t> dst_shape_;
|
||||
|
|
|
@ -36,23 +36,6 @@ namespace mindspore {
|
|||
namespace device {
|
||||
namespace cpu {
|
||||
const size_t INIT_NODE_REF = 1;
|
||||
namespace {
|
||||
TypeId GetCPUSupportOutputTypeId(const TypeId type_id) {
|
||||
TypeId support_type_id = type_id;
|
||||
if (type_id == kNumberTypeUInt32) {
|
||||
support_type_id = kNumberTypeInt32;
|
||||
}
|
||||
if (type_id == kNumberTypeFloat || type_id == kNumberTypeFloat16 || type_id == kNumberTypeFloat32 ||
|
||||
type_id == kNumberTypeFloat64) {
|
||||
support_type_id = kNumberTypeFloat32;
|
||||
}
|
||||
if (support_type_id != kNumberTypeInt32 && support_type_id != kNumberTypeFloat32) {
|
||||
MS_LOG(EXCEPTION) << "Check output type failed.";
|
||||
}
|
||||
return support_type_id;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void CPUKernelRuntime::AssignKernelAddress(session::KernelGraph *kernel_graph) {
|
||||
AssignValueNodeAddress(kernel_graph);
|
||||
AssignInputNodeAddress(kernel_graph);
|
||||
|
@ -157,15 +140,25 @@ tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput(const CNodePtr &node, s
|
|||
auto shape = AnfAlgo::GetOutputInferShape(node, index);
|
||||
std::vector<int> temp_shape;
|
||||
(void)temp_shape.insert(temp_shape.end(), shape.begin(), shape.end());
|
||||
TypeId type_id = AnfAlgo::GetOutputInferDataType(node, index);
|
||||
type_id = GetCPUSupportOutputTypeId(type_id);
|
||||
tensor::TensorPtr tensor = std::make_shared<tensor::Tensor>(type_id, temp_shape);
|
||||
TypeId infer_type_id = AnfAlgo::GetOutputInferDataType(node, index);
|
||||
TypeId device_type_id = AnfAlgo::GetOutputDeviceDataType(node, index);
|
||||
tensor::TensorPtr tensor = std::make_shared<tensor::Tensor>(infer_type_id, temp_shape);
|
||||
MS_EXCEPTION_IF_NULL(tensor);
|
||||
if (bound_addresses->find(address) != bound_addresses->end()) {
|
||||
tensor->set_device_address(address);
|
||||
need_sync_outputs->emplace_back(tensor);
|
||||
} else {
|
||||
if (infer_type_id != device_type_id) {
|
||||
size_t type_size = GetTypeByte(TypeIdToType(device_type_id));
|
||||
std::vector<int> data_shape = tensor->shape();
|
||||
size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies<size_t>());
|
||||
address->ptr_ = resource_manager_.MemMalloc(tensor_size);
|
||||
need_sync_outputs->emplace_back(tensor);
|
||||
tensor->set_device_address(address);
|
||||
need_sync_outputs->emplace_back(tensor);
|
||||
} else {
|
||||
address->ptr_ = tensor->data_c();
|
||||
}
|
||||
address->ref_count_ = INIT_NODE_REF;
|
||||
(void)bound_addresses->insert(address);
|
||||
}
|
||||
|
@ -226,12 +219,13 @@ void CPUKernelRuntime::BindInputOutput(const session::KernelGraph *kernel_graph,
|
|||
if (tensor_address != nullptr && tensor_address != address) {
|
||||
(void)tensor->data_sync();
|
||||
}
|
||||
std::vector<int> data_shape = tensor->shape();
|
||||
size_t tensor_size =
|
||||
std::accumulate(data_shape.begin(), data_shape.end(), sizeof(float), std::multiplies<size_t>());
|
||||
|
||||
if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) {
|
||||
address->ptr_ = tensor->data_c();
|
||||
} else {
|
||||
std::vector<int> data_shape = tensor->shape();
|
||||
size_t tensor_size =
|
||||
std::accumulate(data_shape.begin(), data_shape.end(), sizeof(float), std::multiplies<size_t>());
|
||||
address->ptr_ = resource_manager_.MemMalloc(tensor_size);
|
||||
if (!address->SyncHostToDevice(data_shape, LongToSize(tensor->data().nbytes()), tensor->data_type(),
|
||||
tensor->data_c())) {
|
||||
|
|
|
@ -141,7 +141,11 @@ void SetKernelInfo(const CNodePtr &kernel_node) {
|
|||
if (kernel_attr.GetAllSame()) {
|
||||
ExpandKernelAttr(kernel_node, &kernel_attr);
|
||||
}
|
||||
if (IsInputFormatDtypeMatched(kernel_attr, input_formats, input_types, input_not_cnode_indexes)) {
|
||||
bool ignore_check = false;
|
||||
if (index == kernel_attrs.size() - 1 && input_types.size() == input_not_cnode_indexes.size()) {
|
||||
ignore_check = true;
|
||||
}
|
||||
if (ignore_check || IsInputFormatDtypeMatched(kernel_attr, input_formats, input_types, input_not_cnode_indexes)) {
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (kernel_attr.GetOutputSize() != output_num) {
|
||||
MS_LOG(DEBUG) << "Output num is not equal!";
|
||||
|
|
|
@ -223,7 +223,7 @@ std::string GetCNodeTarget(const AnfNodePtr &node) {
|
|||
}
|
||||
auto target = GetValue<std::string>(att_target);
|
||||
if (kTargetSet.find(target) == kTargetSet.end()) {
|
||||
MS_LOG(EXCEPTION) << "Only support string CPU|GPU|Ascend for primitive_target";
|
||||
MS_LOG(EXCEPTION) << "Only support string CPU|GPU|Ascend for primitive_target, but get " << target;
|
||||
}
|
||||
return target;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue