cpu_pad_op

2021-02-05 19:13:09 +08:00 · 2021-02-05 19:13:09 +08:00 · dbc0ad13db
parent 66e5e1cfc3
commit dbc0ad13db
9 changed files with 1270 additions and 0 deletions
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.cc
@ -0,0 +1,192 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
+
+namespace mindspore {
+namespace kernel {
+void MirrorPadCPUKernel::InitKernel(const CNodePtr &kernel_node) {
+  std::string mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, "mode");
+  dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);
+  if (mode == "REFLECT") {
+    mode_ = 0;
+  } else if (mode == "SYMMETRIC") {
+    mode_ = 1;
+  } else {
+    MS_LOG(EXCEPTION) << "For mirror pad, only REFLECT and SYMMETRIC are supported.";
+  }
+
+  std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+  shape_size_ = input_shape.size();
+  if (shape_size_ == 4) {  // shape adjustment from 2d/3d to 4d
+  } else if (shape_size_ == 3) {
+    auto it = input_shape.begin();
+    input_shape.insert(it, 1);  // batch padding
+    shape_size_ = 4;
+  } else if (shape_size_ == 2) {
+    auto it = input_shape.begin();
+    input_shape.insert(it, 2, 1);  // channel padding
+    shape_size_ = 4;
+  }
+
+  for (size_t i = 0; i < shape_size_; ++i) {
+    tensor_size_ *= input_shape[i];
+    input_shape_.push_back(input_shape[i]);
+  }
+
+  std::vector<size_t> padding_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+  num_paddings_ = padding_shape[0];
+
+  auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
+  for (auto x : output_shape) {
+    output_size_ *= x;
+    output_shape_.push_back(x);
+  }
+
+  size_t max_width = input_shape_[3];
+  size_t max_height = input_shape_[2];
+
+  if (mode_ == 1) {  // symmetric
+    max_width = max_width + (2 * max_width);
+    max_height = max_height + (2 * max_height);
+  } else {  // reflect
+    max_width = max_width + (2 * (max_width - 1));
+    max_height = max_height + (2 * (max_height - 1));
+  }
+  if (output_shape_[(output_shape_.size() - 2) + 0] > max_height ||
+      output_shape_[(output_shape_.size() - 2) + 1] > max_width) {
+    MS_LOG(ERROR) << "ERROR: Padding value too high for input Tensor on 1 or more dims";
+  }
+}
+
+void extract_paddings(const int64_t *paddings_arg, int padd_dim, int64_t *extracted_paddings) {
+  const int paddings_offset = MAX_PADDINGS - padd_dim;
+  for (int i = 0; i < padd_dim; i++) {
+    extracted_paddings[(paddings_offset + i) * PADDING_SIZE] = paddings_arg[i * PADDING_SIZE];
+    extracted_paddings[(paddings_offset + i) * PADDING_SIZE + 1] = paddings_arg[i * PADDING_SIZE + 1];
+  }
+}
+
+bool MirrorPadCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
+                                const std::vector<kernel::AddressPtr> & /*workspace*/,
+                                const std::vector<kernel::AddressPtr> &outputs) {
+  if (dtype_ == kNumberTypeFloat16) {
+    LaunchKernel<float16>(inputs, outputs);
+  } else if (dtype_ == kNumberTypeFloat32) {
+    LaunchKernel<float>(inputs, outputs);
+  } else if (dtype_ == kNumberTypeInt32) {
+    LaunchKernel<int>(inputs, outputs);
+  } else {
+    MS_LOG(EXCEPTION) << "Data type is " << TypeIdLabel(dtype_) << "is not support.";
+  }
+  return true;
+}
+
+template <typename T>
+void MirrorPadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
+  auto inputs_addr = reinterpret_cast<T *>(inputs[0]->addr);
+  int64_t *paddings_arg = reinterpret_cast<int64_t *>(inputs[1]->addr);
+  auto outputs_addr = reinterpret_cast<T *>(outputs[0]->addr);
+
+  const int old_batch = input_shape_[0];
+  const int old_channel = input_shape_[1];
+  const int old_height = input_shape_[2];
+  const int old_width = input_shape_[3];
+  int dim_offset = output_shape_.size() - 2;
+
+  const int padded_height = output_shape_[dim_offset + 0];
+  const int padded_width = output_shape_[dim_offset + 1];
+  const int padd_dim = num_paddings_;
+
+  const int mode = mode_;
+
+  int64_t paddings[MAX_PADDINGS * PADDING_SIZE];  // local and fixed size to keep in registers
+  for (int i = 0; i < MAX_PADDINGS * PADDING_SIZE; i++) {
+    paddings[i] = 0;
+  }
+  extract_paddings(paddings_arg, padd_dim, paddings);
+  // Create anchor points for non mirrored data inside new tensor
+  int ap1_x = paddings[WIDTH + LEFT];
+  int ap2_x = paddings[WIDTH + LEFT] + old_width - 1;
+  int ap1_y = paddings[HEIGHT + TOP];
+  int ap2_y = paddings[HEIGHT + TOP] + old_height - 1;
+  int ap1_channel = paddings[CHANNEL + LEFT];
+  int ap2_channel = paddings[CHANNEL + LEFT] + old_channel - 1;
+  int ap1_batch = paddings[BATCH + LEFT];
+  int ap2_batch = paddings[BATCH + LEFT] + old_batch - 1;
+  int channels_new = old_channel + paddings[CHANNEL + LEFT] + paddings[CHANNEL + RIGHT];
+
+  for (size_t pos = 0; pos < output_size_; ++pos) {
+    int block_num = (pos / padded_width) / padded_height;
+    // cur position
+    const int padded_x = pos % padded_width;
+    const int padded_y = (pos / padded_width) % padded_height;
+    const int padded_channel = block_num % channels_new;
+    const int padded_batch = block_num / channels_new;
+
+    // data to mirror from in new tensor dims
+    int matchval_x_index = padded_x;
+    int matchval_y_index = padded_y;
+    int matchval_channel_index = padded_channel;
+    int matchval_batch_index = padded_batch;
+    int equiv_block_num = 0;
+
+    // update matching index in original tensor across all 4 dims
+    if ((padded_x < ap1_x) || (padded_x > ap2_x)) {
+      int x_dist = (padded_x < ap1_x) ? (ap1_x - padded_x) : (padded_x - ap2_x);
+      matchval_x_index = (padded_x < ap1_x) ? (ap1_x + x_dist - mode) : (ap2_x - x_dist + mode);
+    }
+    if ((padded_y < ap1_y) || (padded_y > ap2_y)) {
+      int y_dist = (padded_y < ap1_y) ? (ap1_y - padded_y) : (padded_y - ap2_y);
+      matchval_y_index = (padded_y < ap1_y) ? (ap1_y + y_dist - mode) : (ap2_y - y_dist + mode);
+    }
+    if ((padded_channel < ap1_channel) || (padded_channel > ap2_channel)) {
+      int channel_dist =
+        (padded_channel < ap1_channel) ? (ap1_channel - padded_channel) : (padded_channel - ap2_channel);
+      matchval_channel_index =
+        (padded_channel < ap1_channel) ? (ap1_channel + channel_dist - mode) : (ap2_channel - channel_dist + mode);
+    }
+    if ((padded_batch < ap1_batch) || (padded_batch > ap2_batch)) {
+      int batch_dist = (padded_batch < ap1_batch) ? (ap1_batch - padded_batch) : (padded_batch - ap2_batch);
+      matchval_batch_index =
+        (padded_batch < ap1_batch) ? (ap1_batch + batch_dist - mode) : (ap2_batch - batch_dist + mode);
+    }
+
+    // calculate equivalent block in input
+    equiv_block_num = ((matchval_batch_index - paddings[BATCH + LEFT]) * old_channel) +
+                      (matchval_channel_index - paddings[CHANNEL + LEFT]);
+
+    // copy data from equiv block and adjusted x and y values in unpadded tensor
+    outputs_addr[pos] =
+      inputs_addr[(equiv_block_num * old_height + matchval_y_index - paddings[HEIGHT + TOP]) * old_width +
+                  matchval_x_index - paddings[WIDTH + LEFT]];
+  }
+}
+
+void MirrorPadCPUKernel::CheckParam(const CNodePtr &kernel_node) {
+  size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
+  if (input_num != 2) {
+    MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MirrorPadCPUKernel needs 2 inputs.";
+  }
+  size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
+  if (output_num != 1) {
+    MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MirrorPadCPUKernel needs 1 output.";
+  }
+}
+
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.h
@ -0,0 +1,82 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MIRROR_PAD_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MIRROR_PAD_CPU_KERNEL_H_
+#include <memory>
+#include <unordered_map>
+#include <vector>
+#include <string>
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+
+// preset size of paddings
+#define MAX_PADDINGS 4
+#define PADDING_SIZE 2
+
+// define constants for kernel indexing use
+#define BATCH 0 * PADDING_SIZE
+#define CHANNEL 1 * PADDING_SIZE
+#define HEIGHT 2 * PADDING_SIZE
+#define WIDTH 3 * PADDING_SIZE
+#define TOP 0
+#define BOTTOM 1
+#define LEFT 0
+#define RIGHT 1
+
+namespace mindspore {
+namespace kernel {
+class MirrorPadCPUKernel : public CPUKernel {
+ public:
+  MirrorPadCPUKernel() = default;
+  ~MirrorPadCPUKernel() override = default;
+
+  void InitKernel(const CNodePtr &kernel_node) override;
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs) override;
+
+  template <typename T>
+  void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
+
+ private:
+  void CheckParam(const CNodePtr &kernel_node);
+  TypeId dtype_{kTypeUnknown};
+  uint64_t tensor_size_ = 1;
+  size_t shape_size_;
+  uint64_t output_size_ = 1;
+  std::vector<size_t> input_shape_;
+  std::vector<size_t> output_shape_;
+  int mode_;
+  int num_paddings_;
+};
+
+MS_REG_CPU_KERNEL(
+  MirrorPad,
+  KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16),
+  MirrorPadCPUKernel);
+
+MS_REG_CPU_KERNEL(
+  MirrorPad,
+  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32),
+  MirrorPadCPUKernel);
+
+MS_REG_CPU_KERNEL(
+  MirrorPad, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32),
+  MirrorPadCPUKernel);
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MIRROR_PAD_CPU_KERNEL_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.cc
@ -0,0 +1,288 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
+
+namespace mindspore {
+namespace kernel {
+void MirrorPadGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
+  std::string mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, "mode");
+  dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);
+  if (mode == "REFLECT") {
+    mode_ = 0;
+  } else if (mode == "SYMMETRIC") {
+    mode_ = 1;
+  } else {
+    MS_LOG(EXCEPTION) << "For mirror pad, only REFLECT and SYMMETRIC are supported.";
+  }
+
+  std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+  shape_size_ = input_shape.size();
+  if (shape_size_ == 4) {  // shape adjustment from 2d/3d to 4d
+  } else if (shape_size_ == 3) {
+    auto it = input_shape.begin();
+    input_shape.insert(it, 1);  // batch padding
+    shape_size_ = 4;
+  } else if (shape_size_ == 2) {
+    auto it = input_shape.begin();
+    input_shape.insert(it, 2, 1);  // channel padding
+    shape_size_ = 4;
+  }
+
+  for (size_t i = 0; i < shape_size_; ++i) {
+    tensor_size_ *= input_shape[i];
+    input_shape_.push_back(input_shape[i]);
+  }
+
+  std::vector<size_t> padding_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+  num_paddings_ = padding_shape[0];
+
+  std::vector<size_t> output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
+
+  if (output_shape.size() == 4) {
+  } else if (output_shape.size() == 3) {
+    auto it = output_shape.begin();
+    output_shape.insert(it, 1);  // batch padding
+  } else if (output_shape.size() == 2) {
+    auto it = output_shape.begin();
+    output_shape.insert(it, 2, 1);  // channel padding
+  }
+  for (auto x : output_shape) {
+    output_size_ *= x;
+    output_shape_.push_back(x);
+  }
+
+  for (int i = 0; i < 2; i++) {
+    workspace_size_ *= output_shape[i];
+    workspace_size_ *= input_shape[i + 2];
+  }
+
+  size_t max_width = input_shape_[3];
+  size_t max_height = input_shape_[2];
+  // basic error check for padding value
+  if (mode_ == 1) {  // symmetric
+    max_width = max_width + (2 * max_width);
+    max_height = max_height + (2 * max_height);
+  } else {  // reflect
+    max_width = max_width + (2 * (max_width - 1));
+    max_height = max_height + (2 * (max_height - 1));
+  }
+
+  if (output_shape_[(output_shape_.size() - 2) + 0] > max_height ||
+      output_shape_[(output_shape_.size() - 2) + 1] > max_width) {
+    MS_LOG(ERROR) << "ERROR: Padding value too high for input Tensor on 1 or more DIMS";
+  }
+}
+
+void extract_paddings_(const int64_t *paddings_arg, int padd_dim, int64_t *extracted_paddings) {
+  const int paddings_offset = MAX_PADDINGS - padd_dim;
+  for (int i = 0; i < padd_dim; i++) {
+    extracted_paddings[(paddings_offset + i) * PADDING_SIZE] = paddings_arg[i * PADDING_SIZE];
+    extracted_paddings[(paddings_offset + i) * PADDING_SIZE + 1] = paddings_arg[i * PADDING_SIZE + 1];
+  }
+}
+
+bool range_check(int x, int y, int padded_width, int padded_height) {
+  if (((x >= 0) && (x <= padded_width - 1)) && ((y >= 0) && (y <= padded_height - 1))) {
+    return true;
+  }
+  return false;
+}
+
+bool MirrorPadGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
+                                    const std::vector<kernel::AddressPtr> &workspace,
+                                    const std::vector<kernel::AddressPtr> &outputs) {
+  if (dtype_ == kNumberTypeFloat16) {
+    LaunchKernel<float16>(inputs, workspace, outputs);
+  } else if (dtype_ == kNumberTypeFloat32) {
+    LaunchKernel<float>(inputs, workspace, outputs);
+  } else if (dtype_ == kNumberTypeInt32) {
+    LaunchKernel<int>(inputs, workspace, outputs);
+  } else {
+    MS_LOG(EXCEPTION) << "Data type is " << TypeIdLabel(dtype_) << "is not support.";
+  }
+
+  return true;
+}
+
+template <typename T>
+void MirrorPadGradCPUKernel::InitWorkspaceSize() {
+  workspace_size_list_.emplace_back(workspace_size_ * sizeof(T));
+}
+
+void MirrorPadGradCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
+  CPUKernel::InitInputOutputSize(kernel_node);
+  if (dtype_ == kNumberTypeFloat16) {
+    InitWorkspaceSize<float16>();
+  } else if (dtype_ == kNumberTypeFloat32) {
+    InitWorkspaceSize<float>();
+  } else if (dtype_ == kNumberTypeInt32) {
+    InitWorkspaceSize<int>();
+  }
+}
+
+template <typename T>
+void MirrorPadGradCPUKernel::MirrorPadGrad_Width_Height(const size_t size, const T *dy, T *interim_dy,
+                                                        const int dx_batches, const int dx_channels,
+                                                        const int dx_height, const int dx_width, const int dy_height,
+                                                        const int dy_width, const int padd_dim,
+                                                        const int64_t *paddings_arg, int mode, T *dx) {
+  int64_t paddings[MAX_PADDINGS * PADDING_SIZE];  // local and fixed size to keep in registers
+  for (int i = 0; i < MAX_PADDINGS * PADDING_SIZE; i++) {
+    paddings[i] = 0;  // init all to 0
+  }
+  extract_paddings_(paddings_arg, padd_dim, paddings);
+  // Create required anchor points for non-mirrored data inside new tensor
+  int ap1_x = paddings[WIDTH + LEFT];
+  int ap2_x = paddings[WIDTH + LEFT] + dx_width - 1;
+  int ap1_y = paddings[HEIGHT + TOP];
+  int ap2_y = paddings[HEIGHT + TOP] + dx_height - 1;
+
+  for (size_t pos = 0; pos < size; ++pos) {
+    int dx_block_num = (pos / dx_width) / dx_height;
+    const int grad_x = (pos % dx_width) + paddings[WIDTH + LEFT];
+    const int grad_y = ((pos / dx_width) % dx_height) + paddings[HEIGHT + TOP];
+    // copy position's own value into output
+    dx[pos] = interim_dy[(dx_block_num * dy_height + grad_y) * dy_width + grad_x];
+
+    int x_dist_1 = (ap1_x - grad_x - mode);
+    int y_dist_1 = (ap1_y - grad_y - mode);
+    int x_dist_2 = (ap2_x - grad_x + mode);
+    int y_dist_2 = (ap2_y - grad_y + mode);
+    int axis_dist[] = {x_dist_1, x_dist_2, y_dist_1, y_dist_2};
+    int anch_point[] = {ap1_x, ap2_x, ap1_y, ap2_y};
+    bool x_axis_check[] = {true, true, false, false};  // true - update X , false - update Y
+
+    int temp_x = 0;
+    int temp_y = 0;
+    // mirroring in axis lines
+    for (int x = 0; x < 4; x++) {
+      if (axis_dist[x] != 0) {
+        if (x_axis_check[x]) {
+          temp_y = grad_y;
+          temp_x = anch_point[x] + axis_dist[x];
+        } else {
+          temp_x = grad_x;
+          temp_y = anch_point[x] + axis_dist[x];
+        }
+        if (range_check(temp_x, temp_y, dy_width, dy_height)) {
+          dx[pos] = dx[pos] + interim_dy[(dx_block_num * dy_height + temp_y) * dy_width + temp_x];
+        }
+      }
+    }
+    // mirroring at corners
+    for (int x = 0; x < 2; x++) {
+      for (int y = 2; y < 4; y++) {
+        if ((axis_dist[x] != 0) && (axis_dist[y] != 0)) {
+          temp_x = anch_point[x] + axis_dist[x];
+          temp_y = anch_point[y] + axis_dist[y];
+          if (range_check(temp_x, temp_y, dy_width, dy_height)) {
+            dx[pos] = dx[pos] + interim_dy[(dx_block_num * dy_height + temp_y) * dy_width + temp_x];
+          }
+        }
+      }
+    }
+  }
+  return;
+}
+
+template <typename T>
+void MirrorPadGradCPUKernel::MirrorPadGradBatchChannel(const size_t size, T *dy, T *interim_dy, const int dx_batches,
+                                                       const int dx_channels, const int dx_height, const int dx_width,
+                                                       const int dy_height, const int dy_width, const int padd_dim,
+                                                       const int64_t *paddings_arg, int mode, T *dx) {
+  int64_t paddings[MAX_PADDINGS * PADDING_SIZE];  // local and fixed size to keep in registers
+  for (int i = 0; i < MAX_PADDINGS * PADDING_SIZE; i++) {
+    paddings[i] = 0;  // init all to 0
+  }
+  extract_paddings_(paddings_arg, padd_dim, paddings);
+  // Create anchor points for non mirrored data inside new tensor
+  int ap1_channel = paddings[CHANNEL + LEFT];
+  int ap2_channel = paddings[CHANNEL + LEFT] + dx_channels - 1;
+  int ap1_batch = paddings[BATCH + LEFT];
+  int ap2_batch = paddings[BATCH + LEFT] + dx_batches - 1;
+  int dy_channels = dx_channels + paddings[CHANNEL + LEFT] + paddings[CHANNEL + RIGHT];
+  int dy_batches = dx_batches + paddings[BATCH + LEFT] + paddings[BATCH + RIGHT];
+
+  for (size_t pos = 0; pos < size; ++pos) {
+    int block_num = (pos / dy_width) / dy_height;
+    // Select exact position inside the dy_interim array
+    const int interim_x = pos % dy_width;
+    const int interim_y = (pos / dy_width) % dy_height;
+    const int interim_channel = block_num % dx_channels;
+    const int interim_batch = block_num / dx_channels;
+    interim_dy[pos] = T(0);  // init
+    // map cur interim channel and batch to equivalent in padded dy array
+    const int equiv_dy_channel = interim_channel + paddings[CHANNEL + LEFT];
+    const int equiv_dy_batch = interim_batch + paddings[BATCH + LEFT];
+    int target_batch = 0;
+    int target_channel = 0;
+    int equiv_block_num = 0;
+    equiv_block_num = ((equiv_dy_batch * dy_channels) + equiv_dy_channel);
+    // generate values to sweep over all possible mirrored points
+    auto batch_offsets = {2 * (ap1_batch - equiv_dy_batch) - mode, 0, 2 * (ap2_batch - equiv_dy_batch) + mode};
+    auto channel_offsets = {2 * (ap1_channel - equiv_dy_channel) - mode, 0,
+                            2 * (ap2_channel - equiv_dy_channel) + mode};
+    for (auto b_adjust : batch_offsets) {
+      for (auto c_adjust : channel_offsets) {
+        target_batch = equiv_dy_batch + b_adjust;
+        target_channel = equiv_dy_channel + c_adjust;
+        // bounds check - if within bounds, mirrored value exists - copy dy
+        if ((target_batch < 0) || (target_batch > (dy_batches - 1)) || (target_channel < 0) ||
+            (target_channel > (dy_channels - 1))) {
+          continue;  // no mirrored value with these target values
+        }
+        equiv_block_num = ((target_batch * dy_channels) + target_channel);
+        // Copy data and set value at input to 0 to avoid duplicates in reflect mode
+        interim_dy[pos] = T(interim_dy[pos] + dy[(equiv_block_num * dy_height + interim_y) * dy_width + interim_x]);
+        dy[(equiv_block_num * dy_height + interim_y) * dy_width + interim_x] = T(0);
+      }
+    }
+  }
+  return;
+}
+
+template <typename T>
+void MirrorPadGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
+                                          const std::vector<AddressPtr> &workspace,
+                                          const std::vector<AddressPtr> &outputs) {
+  auto inputs_addr = reinterpret_cast<T *>(inputs[0]->addr);
+  int64_t *paddings = reinterpret_cast<int64_t *>(inputs[1]->addr);
+  auto interim = reinterpret_cast<T *>(workspace[0]->addr);
+  auto outputs_addr = reinterpret_cast<T *>(outputs[0]->addr);
+
+  MirrorPadGradBatchChannel(workspace_size_, inputs_addr, interim, output_shape_[0], output_shape_[1], output_shape_[2],
+                            output_shape_[3], input_shape_[2], input_shape_[3], num_paddings_, paddings, mode_,
+                            outputs_addr);
+
+  MirrorPadGrad_Width_Height(output_size_, inputs_addr, interim, output_shape_[0], output_shape_[1], output_shape_[2],
+                             output_shape_[3], input_shape_[2], input_shape_[3], num_paddings_, paddings, mode_,
+                             outputs_addr);
+}
+
+void MirrorPadGradCPUKernel::CheckParam(const CNodePtr &kernel_node) {
+  size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
+  if (input_num != 2) {
+    MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MirrorPadGradCPUKernel needs 2 inputs.";
+  }
+  size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
+  if (output_num != 1) {
+    MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MirrorPadGradCPUKernel needs 1 output.";
+  }
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.h
@ -0,0 +1,100 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MIRROR_PAD_GRAD_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MIRROR_PAD_GRAD_CPU_KERNEL_H_
+#include <memory>
+#include <unordered_map>
+#include <vector>
+#include <string>
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+
+// preset size of paddings
+#define MAX_PADDINGS 4
+#define PADDING_SIZE 2
+
+// define constants for kernel indexing use
+#define BATCH 0 * PADDING_SIZE
+#define CHANNEL 1 * PADDING_SIZE
+#define HEIGHT 2 * PADDING_SIZE
+#define WIDTH 3 * PADDING_SIZE
+#define TOP 0
+#define BOTTOM 1
+#define LEFT 0
+#define RIGHT 1
+
+namespace mindspore {
+namespace kernel {
+class MirrorPadGradCPUKernel : public CPUKernel {
+ public:
+  MirrorPadGradCPUKernel() = default;
+  ~MirrorPadGradCPUKernel() override = default;
+
+  void InitKernel(const CNodePtr &kernel_node) override;
+  void InitInputOutputSize(const CNodePtr &kernel_node) override;
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs) override;
+
+  template <typename T>
+  void InitWorkspaceSize();
+
+  template <typename T>
+  void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                    const std::vector<AddressPtr> &outputs);
+
+  template <typename T>
+  void MirrorPadGrad_Width_Height(const size_t size, const T *dy, T *interim_dy, const int dx_batches,
+                                  const int dx_channels, const int dx_height, const int dx_width, const int dy_height,
+                                  const int dy_width, const int padd_dim, const int64_t *paddings_arg, int mode, T *dx);
+
+  template <typename T>
+  void MirrorPadGradBatchChannel(const size_t size, T *dy, T *interim_dy, const int dx_batches, const int dx_channels,
+                                 const int dx_height, const int dx_width, const int dy_height, const int dy_width,
+                                 const int padd_dim, const int64_t *paddings_arg, int mode, T *dx);
+
+ private:
+  void CheckParam(const CNodePtr &kernel_node);
+  TypeId dtype_{kTypeUnknown};
+  size_t tensor_size_ = 1;
+  size_t shape_size_;
+  size_t output_size_ = 1;
+  size_t workspace_size_ = 1;
+  std::vector<size_t> input_shape_;
+  std::vector<size_t> output_shape_;
+  int mode_;
+  int num_paddings_;
+};
+
+MS_REG_CPU_KERNEL(
+  MirrorPadGrad,
+  KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16),
+  MirrorPadGradCPUKernel);
+
+MS_REG_CPU_KERNEL(
+  MirrorPadGrad,
+  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32),
+  MirrorPadGradCPUKernel);
+
+MS_REG_CPU_KERNEL(
+  MirrorPadGrad,
+  KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32),
+  MirrorPadGradCPUKernel);
+
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MIRROR_PAD_CPU_KERNEL_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/pad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/pad_cpu_kernel.cc
@ -0,0 +1,127 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/cpu/pad_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
+
+namespace mindspore {
+namespace kernel {
+void PadCPUKernel::InitKernel(const CNodePtr &kernel_node) {
+  paddings_ = AnfAlgo::GetNodeAttr<std::vector<std::vector<int64_t>>>(kernel_node, "paddings");
+  dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);
+  std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+
+  shape_size_ = input_shape.size();
+  if (shape_size_ == 4) {  // shape adjustment from 2d/3d to 4d
+  } else if (shape_size_ == 3) {
+    auto it = input_shape.begin();
+    input_shape.insert(it, 1);  // batch padding
+    shape_size_ = 4;
+  } else if (shape_size_ == 2) {
+    auto it = input_shape.begin();
+    input_shape.insert(it, 2, 1);  // channel padding
+    shape_size_ = 4;
+  }
+
+  for (size_t i = 0; i < shape_size_; ++i) {
+    tensor_size_ *= input_shape[i];
+    input_shape_.push_back(input_shape[i]);
+  }
+
+  if (paddings_.size() == 4) {  // shape adjustment from 2d/3d to 4d
+  } else if (paddings_.size() == 3) {
+    auto it = paddings_.begin();
+    paddings_.insert(it, 1, {0, 0});  // batch padding
+  } else if (paddings_.size() == 2) {
+    auto it = paddings_.begin();
+    paddings_.insert(it, 2, {0, 0});  // channel padding
+  }
+
+  for (size_t i = 0; i < shape_size_; i++) {
+    size_t temp = input_shape[i] + (paddings_[i][0] + paddings_[i][1]);  // compute new dim size
+    output_size_ *= temp;
+    output_shape_.push_back(temp);  // correct new dimension size
+  }
+}
+
+bool PadCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
+                          const std::vector<kernel::AddressPtr> & /*workspace*/,
+                          const std::vector<kernel::AddressPtr> &outputs) {
+  if (dtype_ == kNumberTypeFloat16) {
+    LaunchKernel<float16>(inputs, outputs);
+  } else if (dtype_ == kNumberTypeFloat32) {
+    LaunchKernel<float>(inputs, outputs);
+  } else if (dtype_ == kNumberTypeInt32) {
+    LaunchKernel<int>(inputs, outputs);
+  } else {
+    MS_LOG(EXCEPTION) << "Data type is " << TypeIdLabel(dtype_) << "is not support.";
+  }
+  return true;
+}
+
+template <typename T>
+void PadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
+  auto inputs_addr = reinterpret_cast<T *>(inputs[0]->addr);
+  auto outputs_addr = reinterpret_cast<T *>(outputs[0]->addr);
+
+  const int pad_left = paddings_[3][0];
+  const int pad_top = paddings_[2][0];
+  const int pad_channel_before = paddings_[1][0];
+  const int pad_channel_after = paddings_[1][1];
+  const T pad_value = T(0);
+
+  // const int num = input_shape_[0];
+  const int channels_orig = input_shape_[1];
+  const int old_height = input_shape_[2];
+  const int old_width = input_shape_[3];
+  const int padded_height = output_shape_[2];
+  const int padded_width = output_shape_[3];
+
+  for (size_t pos = 0; pos < output_size_; ++pos) {
+    int block_num = (pos / padded_width) / padded_height;
+    const int padded_w = pos % padded_width;                    // x coordinate referred to by cur 'pos'
+    const int padded_h = (pos / padded_width) % padded_height;  // y coordinate referred to by cur 'pos'
+
+    int channels_new = channels_orig + pad_channel_after + pad_channel_before;  // new number of channels from padding
+    int channel_num = block_num % channels_new;                                 // current channel
+    int batch_item = block_num / channels_new;                                  // current item in batch
+
+    if (padded_h - pad_top < 0 || padded_w - pad_left < 0 || padded_h - pad_top >= old_height ||
+        padded_w - pad_left >= old_width || channel_num <= pad_channel_before - 1 ||
+        channel_num > channels_orig + pad_channel_before - 1) {
+      outputs_addr[pos] = pad_value;
+    } else {
+      // on a block/x,y position that isn't padding, copy data from the correct block/x,y pos the input
+      // calculate from number of blocks of padding (due to channel padding) inserted prior
+      int equiv_block_num = block_num - (batch_item * (pad_channel_before + pad_channel_after)) - pad_channel_before;
+      outputs_addr[pos] =
+        inputs_addr[(equiv_block_num * old_height + padded_h - pad_top) * old_width + padded_w - pad_left];
+    }
+  }
+}
+
+void PadCPUKernel::CheckParam(const CNodePtr &kernel_node) {
+  size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
+  if (input_num != 1) {
+    MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but PadCPUKernel needs 1 input.";
+  }
+  size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
+  if (output_num != 1) {
+    MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but PadCPUKernel needs 1 output.";
+  }
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/pad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/pad_cpu_kernel.h
@ -0,0 +1,58 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PAD_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PAD_CPU_KERNEL_H_
+#include <memory>
+#include <unordered_map>
+#include <vector>
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+class PadCPUKernel : public CPUKernel {
+ public:
+  PadCPUKernel() = default;
+  ~PadCPUKernel() override = default;
+
+  void InitKernel(const CNodePtr &kernel_node) override;
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs) override;
+
+  template <typename T>
+  void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
+
+ private:
+  void CheckParam(const CNodePtr &kernel_node);
+  std::vector<std::vector<int64_t>> paddings_;
+  TypeId dtype_{kTypeUnknown};
+  uint64_t tensor_size_ = 1;
+  size_t shape_size_ = 1;
+  uint64_t output_size_ = 1;
+  std::vector<size_t> input_shape_;
+  std::vector<size_t> output_shape_;
+};
+
+MS_REG_CPU_KERNEL(Pad, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), PadCPUKernel);
+
+MS_REG_CPU_KERNEL(Pad, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), PadCPUKernel);
+
+MS_REG_CPU_KERNEL(Pad, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), PadCPUKernel);
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PAD_CPU_KERNEL_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.h
@ -53,8 +53,11 @@ class SliceCPUKernel : public CPUKernel {

 MS_REG_CPU_KERNEL(Slice, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  SliceCPUKernel);
+MS_REG_CPU_KERNEL(Slice, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), SliceCPUKernel);
 MS_REG_CPU_KERNEL(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  SliceCPUKernel);
+MS_REG_CPU_KERNEL(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
+                  SliceCPUKernel);
 }  // namespace kernel
 }  // namespace mindspore

--- a/tests/st/ops/cpu/test_mirror_pad.py
+++ b/tests/st/ops/cpu/test_mirror_pad.py
@ -0,0 +1,169 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import pytest
+import numpy as np
+
+import mindspore
+import mindspore.nn as nn
+import mindspore.context as context
+
+from mindspore import Tensor
+from mindspore.ops.composite import GradOperation
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_cpu_training
+@pytest.mark.env_onecard
+def test_mirror_pad():
+    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
+
+    test1_arr_in = [[[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]]
+    test_1_paddings = ((0, 0), (0, 0), (1, 1), (2, 2))
+    test1_arr_exp = [[[[6, 5, 4, 5, 6, 5, 4], [3, 2, 1, 2, 3, 2, 1], [6, 5, 4, 5, 6, 5, 4],
+                       [9, 8, 7, 8, 9, 8, 7], [6, 5, 4, 5, 6, 5, 4]]]]
+
+    test2_arr_in = [[[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]]
+    test_2_paddings = ((0, 0), (0, 0), (1, 1), (2, 2))
+    test2_arr_exp = [[[[2, 1, 1, 2, 3, 3, 2], [2, 1, 1, 2, 3, 3, 2], [5, 4, 4, 5, 6, 6, 5],
+                       [8, 7, 7, 8, 9, 9, 8], [8, 7, 7, 8, 9, 9, 8]]]]
+
+    reflectOp = nn.Pad(mode='REFLECT', paddings=test_1_paddings)
+    symmOp = nn.Pad(mode='SYMMETRIC', paddings=test_2_paddings)
+
+    x_test_1 = Tensor(np.array(test1_arr_in), dtype=mindspore.float32)
+    x_test_2 = Tensor(np.array(test2_arr_in), dtype=mindspore.float32)
+
+    y_test_1 = reflectOp(x_test_1).asnumpy()
+    y_test_2 = symmOp(x_test_2).asnumpy()
+
+    print(np.array(test1_arr_in))
+    print(y_test_1)
+
+    np.testing.assert_equal(np.array(test1_arr_exp), y_test_1)
+    np.testing.assert_equal(np.array(test2_arr_exp), y_test_2)
+
+
+class Grad(nn.Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.grad = GradOperation(get_all=True, sens_param=True)
+        self.network = network
+    def construct(self, input_, output_grad):
+        return self.grad(self.network)(input_, output_grad)
+
+class Net(nn.Cell):
+    def __init__(self, pads, mode_):
+        super(Net, self).__init__()
+        self.pad = nn.Pad(mode=mode_, paddings=pads)
+    def construct(self, x):
+        return self.pad(x)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_cpu_training
+@pytest.mark.env_onecard
+def test_mirror_pad_backprop():
+    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
+    test_arr_in = [[[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]] # size -> 3*3
+    test_arr_in = Tensor(test_arr_in, dtype=mindspore.float32)
+    dy = (np.ones((1, 1, 4, 5)) * 0.1).astype(np.float32)
+    expected_dx = np.array([[[[0.2, 0.2, 0.1],
+                              [0.4, 0.4, 0.2],
+                              [0.2, 0.2, 0.1]]]])
+    net = Grad(Net(((0, 0), (0, 0), (1, 0), (0, 2)), "REFLECT"))
+    dx = net(test_arr_in, Tensor(dy))
+    dx = dx[0].asnumpy()
+    np.testing.assert_array_almost_equal(dx, expected_dx)
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_mirror_pad_fwd_back_4d_int32_reflect():
+    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
+    # set constants
+    shape = (2, 3, 3, 5)
+    pads = ((1, 0), (2, 0), (1, 2), (3, 4))
+    total_val = np.prod(shape)
+    test_arr_np = np.arange(total_val).reshape(shape) + 1
+    test_arr_ms = Tensor(test_arr_np, dtype=mindspore.int32)
+    # fwd_pass_check
+    op = nn.Pad(mode="REFLECT", paddings=pads)
+    expected_np_result = np.pad(test_arr_np, pads, 'reflect')
+    obtained_ms_res = op(test_arr_ms).asnumpy()
+    np.testing.assert_array_equal(expected_np_result, obtained_ms_res)
+    # backwards pass check
+    GradNet = Grad(Net(pads, "REFLECT"))
+    dy_value = Tensor(np.ones(obtained_ms_res.shape), dtype=mindspore.int32)
+    dx_value_obtained = GradNet(test_arr_ms, dy_value)[0].asnumpy()
+    dx_value_expected = np.array([[[[4, 6, 6, 6, 2],
+                                    [6, 9, 9, 9, 3],
+                                    [2, 3, 3, 3, 1]],
+                                   [[8, 12, 12, 12, 4],
+                                    [12, 18, 18, 18, 6],
+                                    [4, 6, 6, 6, 2]],
+                                   [[8, 12, 12, 12, 4],
+                                    [12, 18, 18, 18, 6],
+                                    [4, 6, 6, 6, 2]]],
+                                  [[[8, 12, 12, 12, 4],
+                                    [12, 18, 18, 18, 6],
+                                    [4, 6, 6, 6, 2]],
+                                   [[16, 24, 24, 24, 8],
+                                    [24, 36, 36, 36, 12],
+                                    [8, 12, 12, 12, 4]],
+                                   [[16, 24, 24, 24, 8],
+                                    [24, 36, 36, 36, 12],
+                                    [8, 12, 12, 12, 4]]]], dtype=np.int32)
+    np.testing.assert_array_equal(dx_value_expected, dx_value_obtained)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_cpu_training
+@pytest.mark.env_onecard
+def test_mirror_pad_fwd_back_4d_int32_symm():
+    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
+    # set constants
+    shape = (2, 3, 3, 5)
+    pads = ((1, 0), (2, 0), (1, 2), (3, 4))
+    total_val = np.prod(shape)
+    test_arr_np = np.arange(total_val).reshape(shape) + 1
+    test_arr_ms = Tensor(test_arr_np, dtype=mindspore.int32)
+    # fwd_pass_check
+    op = nn.Pad(mode="SYMMETRIC", paddings=pads)
+    expected_np_result = np.pad(test_arr_np, pads, 'symmetric')
+    obtained_ms_res = op(test_arr_ms).asnumpy()
+    np.testing.assert_array_equal(expected_np_result, obtained_ms_res)
+    # backwards pass check
+    GradNet = Grad(Net(pads, "SYMMETRIC"))
+    dy_value = Tensor(np.ones(obtained_ms_res.shape), dtype=mindspore.int32)
+    dx_value_obtained = GradNet(test_arr_ms, dy_value)[0].asnumpy()
+    dx_value_expected = np.array([[[[16, 24, 24, 16, 16],
+                                    [16, 24, 24, 16, 16],
+                                    [16, 24, 24, 16, 16]],
+                                   [[16, 24, 24, 16, 16],
+                                    [16, 24, 24, 16, 16],
+                                    [16, 24, 24, 16, 16]],
+                                   [[8, 12, 12, 8, 8],
+                                    [8, 12, 12, 8, 8],
+                                    [8, 12, 12, 8, 8]]],
+                                  [[[8, 12, 12, 8, 8],
+                                    [8, 12, 12, 8, 8],
+                                    [8, 12, 12, 8, 8]],
+                                   [[8, 12, 12, 8, 8],
+                                    [8, 12, 12, 8, 8],
+                                    [8, 12, 12, 8, 8]],
+                                   [[4, 6, 6, 4, 4],
+                                    [4, 6, 6, 4, 4],
+                                    [4, 6, 6, 4, 4]]]], dtype=np.int32)
+    np.testing.assert_array_equal(dx_value_expected, dx_value_obtained)
--- a/tests/st/ops/cpu/test_pad.py
+++ b/tests/st/ops/cpu/test_pad.py
@ -0,0 +1,251 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import pytest
+import numpy as np
+
+import mindspore
+import mindspore.nn as nn
+import mindspore.context as context
+
+from mindspore import Tensor
+from mindspore.ops.composite import GradOperation
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_cpu_training
+@pytest.mark.env_onecard
+def test_pad_basic():
+    """
+    Test array is being padded with 0's
+    """
+    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
+
+    # float32
+    test_arr = np.array([[1, 2], [3, 4]]).astype(np.float32)
+    test_arr_expected = np.array(
+        [[0, 0, 0, 0], [0, 1, 2, 0], [0, 3, 4, 0], [0, 0, 0, 0]]).astype(np.float32)
+    x_test = Tensor(test_arr, dtype=mindspore.float32)
+    pad_op = nn.Pad(mode='CONSTANT', paddings=((1, 1), (1, 1)))
+    y_test = pad_op(x_test).asnumpy()
+    np.testing.assert_array_equal(y_test, test_arr_expected)
+
+    # float16
+    test_arr = np.array([[1, 2], [3, 4]]).astype(np.float16)
+    test_arr_expected = np.array(
+        [[0, 0, 0, 0], [0, 1, 2, 0], [0, 3, 4, 0], [0, 0, 0, 0]]).astype(np.float16)
+    x_test = Tensor(test_arr, dtype=mindspore.float16)
+    pad_op = nn.Pad(mode='CONSTANT', paddings=((1, 1), (1, 1)))
+    y_test = pad_op(x_test).asnumpy()
+    np.testing.assert_array_equal(y_test, test_arr_expected)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_cpu_training
+@pytest.mark.env_onecard
+def test_pad_row():
+    """
+    Test correct row padding
+    """
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
+
+    test_arr_1 = np.random.rand(40, 40).astype(np.float32)
+    test_paddings_1 = ((2, 3), (0, 0))
+    test_arr_2 = np.random.randn(3, 10, 30, 30).astype(np.float32)
+    test_paddings_2 = ((0, 0), (0, 0), (3, 0), (0, 0))
+
+    pad_op_row_1 = nn.Pad(mode='CONSTANT', paddings=test_paddings_1)
+    pad_op_row_2 = nn.Pad(mode='CONSTANT', paddings=test_paddings_2)
+
+    x_test_1 = Tensor(np.array(test_arr_1), dtype=mindspore.float32)
+    x_test_2 = Tensor(np.array(test_arr_2), dtype=mindspore.float32)
+    y_test_1 = pad_op_row_1(x_test_1).asnumpy()
+    y_test_2 = pad_op_row_2(x_test_2).asnumpy()
+
+    # check size
+    assert y_test_1.shape == (45, 40)
+    assert y_test_2.shape == (3, 10, 33, 30)
+
+    # check values - select correct sections
+    np.testing.assert_equal(y_test_1[2:-3, :], test_arr_1)
+    np.testing.assert_equal(y_test_2[:, :, 3:, :], test_arr_2)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_cpu_training
+@pytest.mark.env_onecard
+def test_pad_column():
+    """
+    Test correct column padding
+    """
+    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
+
+    test_arr_1 = np.random.randn(40, 40).astype(np.float32)
+    test_paddings_1 = ((0, 0), (3, 3))
+    test_arr_2 = np.random.randn(3, 10, 30, 30).astype(np.float32)
+    test_paddings_2 = ((0, 0), (0, 0), (0, 0), (6, 1))
+
+    pad_op_col_1 = nn.Pad(mode='CONSTANT', paddings=test_paddings_1)
+    pad_op_col_2 = nn.Pad(mode='CONSTANT', paddings=test_paddings_2)
+
+    x_test_1 = Tensor(np.array(test_arr_1), dtype=mindspore.float32)
+    x_test_2 = Tensor(np.array(test_arr_2), dtype=mindspore.float32)
+    y_test_1 = pad_op_col_1(x_test_1).asnumpy()
+    y_test_2 = pad_op_col_2(x_test_2).asnumpy()
+
+    # check size
+    assert y_test_1.shape == (40, 46)
+    assert y_test_2.shape == (3, 10, 30, 37)
+
+    # check values - select correct sections - should match
+    np.testing.assert_equal(y_test_1[:, 3:-3], test_arr_1)
+    np.testing.assert_equal(y_test_2[:, :, :, 6:-1], test_arr_2)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_cpu_training
+@pytest.mark.env_onecard
+def test_pad_3d_pad():
+    """
+    Test full 3d padding, with all 3 input types
+    """
+    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
+
+    # float32
+    test_arr = np.random.randn(5, 3, 30, 30).astype(np.float32)
+    test_paddings = ((0, 0), (2, 1), (0, 1), (0, 2))  # padding 3 dims now
+    pad_op_3d = nn.Pad(mode='CONSTANT', paddings=test_paddings)
+    x_test = Tensor(np.array(test_arr), dtype=mindspore.float32)
+    y_test = pad_op_3d(x_test).asnumpy()
+    assert y_test.shape == (5, 6, 31, 32)
+    np.testing.assert_equal(test_arr, y_test[:, 2:-1, :-1, :-2])
+
+    # float16
+    test_arr = np.random.randn(5, 3, 30, 30).astype(np.float16)
+    test_paddings = ((0, 0), (2, 1), (0, 1), (0, 2))
+    pad_op_3d = nn.Pad(mode='CONSTANT', paddings=test_paddings)
+    x_test = Tensor(np.array(test_arr), dtype=mindspore.float16)
+    y_test = pad_op_3d(x_test).asnumpy()
+    assert y_test.shape == (5, 6, 31, 32)
+    np.testing.assert_equal(test_arr, y_test[:, 2:-1, :-1, :-2])
+
+    # int32
+    test_arr = np.random.randint(1, 3000, (5, 3, 30, 30)).astype(np.int32)
+    test_paddings = ((0, 0), (2, 1), (0, 1), (0, 2))
+    pad_op_3d = nn.Pad(mode='CONSTANT', paddings=test_paddings)
+    x_test = Tensor(np.array(test_arr), dtype=mindspore.int32)
+    y_test = pad_op_3d(x_test).asnumpy()
+    assert y_test.shape == (5, 6, 31, 32)
+    np.testing.assert_equal(test_arr, y_test[:, 2:-1, :-1, :-2])
+
+
+# For testing backprop
+class Grad(nn.Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.grad = GradOperation(get_all=True, sens_param=True)
+        self.network = network
+
+    def construct(self, input_, output_grad):
+        return self.grad(self.network)(input_, output_grad)
+
+
+class Net(nn.Cell):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.pad = nn.Pad(mode="CONSTANT", paddings=(
+            (0, 0), (4, 3), (1, 1), (0, 2)))
+
+    def construct(self, x):
+        return self.pad(x)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_cpu_training
+@pytest.mark.env_onecard
+def test_pad_3d_backprop():
+    """
+    Confirm correct 3d padding backprop
+    """
+    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
+    net = Grad(Net())
+    padded_shape = (5, 10, 32, 32)
+
+    # float32
+    test_arr = np.random.randn(5, 3, 30, 30).astype(np.float32)
+    x_test = Tensor(test_arr, dtype=mindspore.float32)
+    dy = np.random.randn(*padded_shape).astype(np.float32)
+    expected_dx = dy[:, 4:-3, 1:-1, :-2]
+    dx = net(x_test, Tensor(dy))
+    dx = dx[0].asnumpy()
+    np.testing.assert_array_equal(dx, expected_dx)
+
+    # float16
+    test_arr = np.random.randn(5, 3, 30, 30).astype(np.float16)
+    x_test = Tensor(test_arr, dtype=mindspore.float16)
+    dy = np.random.randn(*padded_shape).astype(np.float16)
+    expected_dx = dy[:, 4:-3, 1:-1, :-2]
+    dx = net(x_test, Tensor(dy))
+    dx = dx[0].asnumpy()
+    np.testing.assert_array_equal(dx, expected_dx)
+
+    # int32
+    test_arr = np.random.randint(1, 3000, (5, 3, 30, 30)).astype(np.int32)
+    x_test = Tensor(test_arr, dtype=mindspore.int32)
+    dy = np.random.randn(*padded_shape).astype(np.int32)
+    expected_dx = dy[:, 4:-3, 1:-1, :-2]
+    dx = net(x_test, Tensor(dy))
+    dx = dx[0].asnumpy()
+    np.testing.assert_array_equal(dx, expected_dx)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_cpu_training
+@pytest.mark.env_onecard
+def test_pad_error_cases():
+    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
+
+    # TEST 1 - Neg padding values
+    test_op = nn.Pad(paddings=((0, 0), (-1, -1)), mode="CONSTANT")
+    test_arr = np.random.randn(3, 3)
+    test_arr_ms = Tensor(test_arr, dtype=mindspore.float32)
+
+    with pytest.raises(ValueError):
+        test_op(test_arr_ms)
+
+    # TEST 2 - Mismatched input size and paddings - 1D tensor
+    test_op = nn.Pad(paddings=((0, 0), (1, 0)), mode="CONSTANT")
+    test_arr = np.random.randn(3)  # 1D Tensor
+    test_arr_ms = Tensor(test_arr, dtype=mindspore.float32)
+
+    with pytest.raises(ValueError):
+        test_op(test_arr_ms)
+
+    # TEST 3 - Mismatched input size and paddings - 2D tensor, 3D padding
+    test_op = nn.Pad(paddings=((0, 0), (1, 0)), mode="CONSTANT")  # 2D Padding
+    test_arr = np.random.randn(1, 3, 3)  # 3D Tensor
+    test_arr_ms = Tensor(test_arr, dtype=mindspore.float32)
+
+    with pytest.raises(ValueError):
+        test_op(test_arr_ms)
+
+    # TEST 4 - 1D Paddings should not work
+    with pytest.raises(TypeError):
+        test_op = nn.Pad(paddings=((0, 2)), mode="CONSTANT")
+
+    # TEST 5 - Padding beyond 4d - (added check in nn file in PR)
+    with pytest.raises(ValueError):
+        _ = nn.Pad(paddings=((0, 0), (0, 0,), (0, 0), (0, 0),
+                             (1, 0)), mode="CONSTANT")  # 2D Padding