cpu conv2d support diff winsize

This commit is contained in:
kswang 2020-07-18 16:49:14 +08:00
parent 0e3dfdd01c
commit 634035827e
11 changed files with 46 additions and 45 deletions

View File

@ -32,8 +32,6 @@ void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) {
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
int kernel_size = SizeToInt(weight_shape[3]);
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) {
@ -57,6 +55,7 @@ void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) {
std::vector<int> int_padding_r;
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r);
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
MS_LOG(EXCEPTION) << "get padding failed";

View File

@ -32,8 +32,6 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
int kernel_size = SizeToInt(weight_shape[3]);
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
@ -53,6 +51,7 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);
std::vector<int> int_padding_l;
std::vector<int> int_padding_r;
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r);
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
MS_LOG(EXCEPTION) << "get padding failed";

View File

@ -33,7 +33,6 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
int kernel_size = SizeToInt(weight_shape[3]);
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
@ -52,6 +51,7 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
std::vector<int> int_padding_l;
std::vector<int> int_padding_r;
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r);
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
MS_LOG(EXCEPTION) << "conv2d grad get padding failed";

View File

@ -23,7 +23,7 @@
namespace mindspore {
namespace kernel {
void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode,
const std::vector<size_t> &src_shape, int kernel_size, int stride,
const std::vector<size_t> &src_shape, const std::vector<size_t> &kernel_size, int stride,
std::vector<int> *padding_l, std::vector<int> *padding_r) {
MS_EXCEPTION_IF_NULL(kernel_node);
if (src_shape.size() < 2) {
@ -32,11 +32,13 @@ void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pa
std::vector<int> weight_height;
weight_height.emplace_back(src_shape[src_shape.size() - 2]);
weight_height.emplace_back(src_shape[src_shape.size() - 1]);
int rad = kernel_size / 2;
int need_pad = kernel_size - 1;
MS_LOG(INFO) << "pad mode " << pad_mode;
if (pad_mode == PAD_MODE_LOWER_SAME || pad_mode == PAD_MODE_UPPER_SAME) {
for (auto wh : weight_height) {
for (size_t i = 0; i < weight_height.size(); ++i) {
auto wh = weight_height[i];
int rad = kernel_size[i] / 2;
int need_pad = kernel_size[i] - 1;
int re = (wh - 1) % stride;
int pad = std::max(rad - (re / 2), 0);
padding_r->emplace_back(pad);

View File

@ -33,7 +33,8 @@ class MKLCPUKernel : public CPUKernel {
protected:
void GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode, const std::vector<size_t> &src_shape,
int kernel_size, int stride, std::vector<int> *padding_l, std::vector<int> *padding_r);
const std::vector<size_t> &kernel_size, int stride, std::vector<int> *padding_l,
std::vector<int> *padding_r);
void AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc = false);
void SetArgumentHandle(int arg_key, void *ptr);
dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const;

View File

@ -28,17 +28,18 @@ void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) {
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
std::vector<int> kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE);
std::vector<int> origin_kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE);
std::vector<int> strides = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES);
if (kernel_sizes.size() != 4 || strides.size() != 4) {
MS_LOG(EXCEPTION) << "invalid kernel size " << kernel_sizes.size() << " or stride size " << strides.size();
if (origin_kernel_sizes.size() != 4 || strides.size() != 4) {
MS_LOG(EXCEPTION) << "invalid kernel size " << origin_kernel_sizes.size() << " or stride size " << strides.size();
}
dnnl::memory::dims strides_dims{strides[2], strides[3]};
dnnl::memory::dims kernels_dims{kernel_sizes[2], kernel_sizes[3]};
dnnl::memory::dims kernels_dims{origin_kernel_sizes[2], origin_kernel_sizes[3]};
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING);
std::vector<int> int_padding_l;
std::vector<int> int_padding_r;
GetPadding(kernel_node, pad_mode, src_shape, kernel_sizes[3], strides[3], &int_padding_l, &int_padding_r);
std::vector<size_t> kernel_size({IntToSize(origin_kernel_sizes[2]), IntToSize(origin_kernel_sizes[3])});
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, strides[3], &int_padding_l, &int_padding_r);
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
MS_LOG(EXCEPTION) << "pooling get padding failed";
}

View File

@ -34,7 +34,7 @@ void PoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
}
std::vector<int> padding_r;
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING);
kernel_size_ = kernel_sizes[3];
kernel_size_ = {IntToSize(kernel_sizes[2]), IntToSize(kernel_sizes[3])};
stride_ = strides[3];
GetPadding(kernel_node, pad_mode, src_shape_, kernel_size_, stride_, &padding_l_, &padding_r);
}
@ -77,7 +77,7 @@ void PoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float *d
size_t diff_index = 0;
for (size_t h = 0; h < dst_shape_[2]; ++h) {
box[0].first = IntToSize(std::max(h_start, 0));
box[0].second = IntToSize(std::min(h_start + kernel_size_, src_height));
box[0].second = IntToSize(std::min(h_start + SizeToInt(kernel_size_[1]), src_height));
for (size_t w = 0; w < src_shape_[3]; ++w) {
row_max_pair[w].first = 0;
row_max_pair[w].second = 0;
@ -85,7 +85,7 @@ void PoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float *d
int w_start = -padding_l_[1];
for (size_t w = 0; w < dst_shape_[3]; ++w) {
box[1].first = IntToSize(std::max(w_start, 0));
box[1].second = IntToSize(std::min(w_start + kernel_size_, src_width));
box[1].second = IntToSize(std::min(w_start + SizeToInt(kernel_size_[0]), src_width));
RowPoolingGrad(input, output, diff[diff_index], box, &row_max_pair);
diff_index += 1;
w_start += stride_;

View File

@ -37,7 +37,8 @@ class PoolingGradCPUKernel : public MKLCPUKernel {
void RowPoolingGrad(const float *input, float *output, float diff, const std::vector<std::pair<size_t, size_t>> &box,
std::vector<std::pair<size_t, float>> *row_max_pair);
void ChannelPoolingGrad(const float *input, const float *diff, float *output);
int stride_{0}, kernel_size_{0};
int stride_{0};
std::vector<size_t> kernel_size_;
std::vector<int> padding_l_;
std::vector<size_t> src_shape_;
std::vector<size_t> dst_shape_;

View File

@ -36,23 +36,6 @@ namespace mindspore {
namespace device {
namespace cpu {
const size_t INIT_NODE_REF = 1;
namespace {
TypeId GetCPUSupportOutputTypeId(const TypeId type_id) {
TypeId support_type_id = type_id;
if (type_id == kNumberTypeUInt32) {
support_type_id = kNumberTypeInt32;
}
if (type_id == kNumberTypeFloat || type_id == kNumberTypeFloat16 || type_id == kNumberTypeFloat32 ||
type_id == kNumberTypeFloat64) {
support_type_id = kNumberTypeFloat32;
}
if (support_type_id != kNumberTypeInt32 && support_type_id != kNumberTypeFloat32) {
MS_LOG(EXCEPTION) << "Check output type failed.";
}
return support_type_id;
}
} // namespace
void CPUKernelRuntime::AssignKernelAddress(session::KernelGraph *kernel_graph) {
AssignValueNodeAddress(kernel_graph);
AssignInputNodeAddress(kernel_graph);
@ -157,15 +140,25 @@ tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput(const CNodePtr &node, s
auto shape = AnfAlgo::GetOutputInferShape(node, index);
std::vector<int> temp_shape;
(void)temp_shape.insert(temp_shape.end(), shape.begin(), shape.end());
TypeId type_id = AnfAlgo::GetOutputInferDataType(node, index);
type_id = GetCPUSupportOutputTypeId(type_id);
tensor::TensorPtr tensor = std::make_shared<tensor::Tensor>(type_id, temp_shape);
TypeId infer_type_id = AnfAlgo::GetOutputInferDataType(node, index);
TypeId device_type_id = AnfAlgo::GetOutputDeviceDataType(node, index);
tensor::TensorPtr tensor = std::make_shared<tensor::Tensor>(infer_type_id, temp_shape);
MS_EXCEPTION_IF_NULL(tensor);
if (bound_addresses->find(address) != bound_addresses->end()) {
tensor->set_device_address(address);
need_sync_outputs->emplace_back(tensor);
} else {
address->ptr_ = tensor->data_c();
if (infer_type_id != device_type_id) {
size_t type_size = GetTypeByte(TypeIdToType(device_type_id));
std::vector<int> data_shape = tensor->shape();
size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies<size_t>());
address->ptr_ = resource_manager_.MemMalloc(tensor_size);
need_sync_outputs->emplace_back(tensor);
tensor->set_device_address(address);
need_sync_outputs->emplace_back(tensor);
} else {
address->ptr_ = tensor->data_c();
}
address->ref_count_ = INIT_NODE_REF;
(void)bound_addresses->insert(address);
}
@ -226,12 +219,13 @@ void CPUKernelRuntime::BindInputOutput(const session::KernelGraph *kernel_graph,
if (tensor_address != nullptr && tensor_address != address) {
(void)tensor->data_sync();
}
std::vector<int> data_shape = tensor->shape();
size_t tensor_size =
std::accumulate(data_shape.begin(), data_shape.end(), sizeof(float), std::multiplies<size_t>());
if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) {
address->ptr_ = tensor->data_c();
} else {
std::vector<int> data_shape = tensor->shape();
size_t tensor_size =
std::accumulate(data_shape.begin(), data_shape.end(), sizeof(float), std::multiplies<size_t>());
address->ptr_ = resource_manager_.MemMalloc(tensor_size);
if (!address->SyncHostToDevice(data_shape, LongToSize(tensor->data().nbytes()), tensor->data_type(),
tensor->data_c())) {

View File

@ -141,7 +141,11 @@ void SetKernelInfo(const CNodePtr &kernel_node) {
if (kernel_attr.GetAllSame()) {
ExpandKernelAttr(kernel_node, &kernel_attr);
}
if (IsInputFormatDtypeMatched(kernel_attr, input_formats, input_types, input_not_cnode_indexes)) {
bool ignore_check = false;
if (index == kernel_attrs.size() - 1 && input_types.size() == input_not_cnode_indexes.size()) {
ignore_check = true;
}
if (ignore_check || IsInputFormatDtypeMatched(kernel_attr, input_formats, input_types, input_not_cnode_indexes)) {
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (kernel_attr.GetOutputSize() != output_num) {
MS_LOG(DEBUG) << "Output num is not equal!";

View File

@ -223,7 +223,7 @@ std::string GetCNodeTarget(const AnfNodePtr &node) {
}
auto target = GetValue<std::string>(att_target);
if (kTargetSet.find(target) == kTargetSet.end()) {
MS_LOG(EXCEPTION) << "Only support string CPU|GPU|Ascend for primitive_target";
MS_LOG(EXCEPTION) << "Only support string CPU|GPU|Ascend for primitive_target, but get " << target;
}
return target;
}