pad fp32 support less than 4d input

2020-07-31 11:59:18 +08:00 · 2020-07-31 11:59:18 +08:00 · 8e3eaae2fc
parent fafae4c6ee
commit 8e3eaae2fc
12 changed files with 34 additions and 65 deletions
--- a/mindspore/lite/src/ops/ops.cc
+++ b/mindspore/lite/src/ops/ops.cc
@ -127,6 +127,10 @@ Primitive *Primitive::CreatePrimitive(schema::Primitive *primitive) {
      return new lite::Flatten(const_cast<schema::Primitive *>(primitive));
    case schema::PrimitiveType_StridedSlice:
      return new lite::StridedSlice(const_cast<schema::Primitive *>(primitive));
+    case schema::PrimitiveType_Resize:
+      return new lite::Resize(const_cast<schema::Primitive *>(primitive));
+    case schema::PrimitiveType_OneHot:
+      return new lite::OneHot(const_cast<schema::Primitive *>(primitive));
    default:
      break;
  }
--- a/mindspore/lite/src/ops/pad.cc
+++ b/mindspore/lite/src/ops/pad.cc
@ -37,14 +37,12 @@ int Pad::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::Te
  if (paddings == nullptr) {
    return RET_NULL_PTR;
  }
-  MS_ASSERT(paddings->size() == kPaddingsSize);

  auto input = inputs.front();
  if (input == nullptr) {
    return RET_NULL_PTR;
  }
  auto input_shape = input->shape();
-  MS_ASSERT(input_shape.size() == kInputRank);
  std::vector<int> output_shape;
  for (size_t i = 0; i < input_shape.size(); i++) {
    auto shape = input_shape[i] + (*paddings)[2 * i] + (*paddings)[2 * i + 1];
--- a/mindspore/lite/src/populate_parameter.cc
+++ b/mindspore/lite/src/populate_parameter.cc
@ -383,12 +383,14 @@ PadParameter *PopulatePadParameter(const lite::Primitive *primitive) {
    pad_param->constant_value_ = pad_node->constantValue();
  } else {
    MS_LOG(ERROR) << "Invalid padding mode: " << pad_param->pad_mode_;
+    delete (pad_param);
    return nullptr;
  }

  auto size = pad_node->paddings()->size();
  if (size > MAX_PAD_SIZE) {
    MS_LOG(ERROR) << "Invalid padding size: " << size;
+    delete (pad_param);
    return nullptr;
  }

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/pad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pad.cc
@ -33,8 +33,6 @@ namespace mindspore::kernel {
 namespace {
 constexpr int kInputNum = 1;
 constexpr int kOutputNum = 1;
-constexpr int kInputRank = 4;
-constexpr int kPaddingsSize = 8;
 }  // namespace

 int PadCPUKernel::Init() {
@ -52,21 +50,14 @@ int PadCPUKernel::Init() {
  }

  auto rank = input->shape().size();
-  if (rank != kInputRank) {
-    MS_LOG(ERROR) << "Pad input rank should be " << kInputRank << ", got " << rank;
+  if (rank > DEFAULT_PAD_NDIMS) {
+    MS_LOG(ERROR) << "Pad input rank should <= " << DEFAULT_PAD_NDIMS << ", got " << rank;
    return RET_ERROR;
  }

-  if (paddings_size_ != kPaddingsSize) {
-    MS_LOG(ERROR) << "Pad op paddings size should be 2*input_rank: " << 2 * rank << " but got " << paddings_size_;
-    return RET_ERROR;
-  }
-
-  for (auto pad : paddings_) {
-    if (pad < 0) {
-      MS_LOG(ERROR) << "Pad op paddings should be >= 0, but got " << pad;
-      return RET_ERROR;
-    }
+  for (int i = 0; i < rank; i++) {
+    in_[DEFAULT_PAD_NDIMS - rank + i] = input->shape()[i];
+    out_[DEFAULT_PAD_NDIMS - rank + i] = output->shape()[i];
  }
  return RET_OK;
 }
@ -87,10 +78,8 @@ int PadCPUKernel::RunImpl(int task_id) {

  auto input_data = reinterpret_cast<float *>(input->Data());
  auto output_data = reinterpret_cast<float *>(output->Data());
-  auto input_shape = input->shape().data();
-  auto output_shape = output->shape().data();

-  Pad(input_data, output_data, input_shape, output_shape, paddings_.data(), task_id, context_->threadNum);
+  Pad(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, context_->threadNum);

  return RET_OK;
 }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/pad.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pad.h
@ -27,7 +27,9 @@ class PadCPUKernel : public LiteKernel {
 public:
  PadCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
               const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
-      : LiteKernel(parameter, inputs, outputs), context_(ctx) {}
+      : LiteKernel(parameter, inputs, outputs), context_(ctx) {
+    pad_param_ = reinterpret_cast<PadParameter *>(parameter);
+  }

  ~PadCPUKernel() {}

@ -37,9 +39,10 @@ class PadCPUKernel : public LiteKernel {
  int RunImpl(int task_id);

 private:
-  std::vector<int> paddings_;
-  size_t paddings_size_;
  const lite::Context *context_;
+  const PadParameter *pad_param_;
+  int in_[4] = {1, 1, 1, 1};
+  int out_[4] = {1, 1, 1, 1};
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h
@ -41,6 +41,16 @@ void IndirectGemmFp32_Comm(float *output, const float *input, const float *weigh
 void IndirectGemmFp32(float *output, const float *input, const float *weight, const float *bias, size_t step, int ic4,
                      int output_channel, size_t offset, size_t relu, size_t relu6);

+inline int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3) {
+  return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3] + dim3;
+}
+
+inline int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2) {
+  return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3];
+}
+
+inline int offset4d(const int *shape, const int *dims) { return offset(shape, dims[0], dims[1], dims[2], dims[3]); }
+
 #ifdef ENABLE_ARM64
 void BiasAdd(const float *bias, float *data, size_t oc4, size_t plan_size);
 void BiasAddRelu6(const float *bias, float *data, size_t oc4, size_t plan_size);
@ -54,4 +64,3 @@ void Relu(float *data, size_t element4);
 #endif

 #endif /* MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_COMMON_FUNC_H_ */
-
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/pad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/pad.cc
@ -15,6 +15,7 @@
 */

 #include "src/runtime/kernel/arm/opclib/fp32/pad.h"
+#include "src/runtime/kernel/arm/opclib/common_func.h"

 void Pad(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
         const int *paddings, const int tid, const int thread_num) {
@ -25,10 +26,9 @@ void Pad(const float *input_data, float *output_data, const int *input_shape, co
      out[1] = in[1] + paddings[2];
      for (in[2] = 0; in[2] < input_shape[2]; in[2]++) {
        out[2] = in[2] + paddings[4];
-        for (in[3] = 0; in[3] < input_shape[3]; in[3]++) {
-          out[3] = in[3] + paddings[6];
-          output_data[offset4d(output_shape, out)] = input_data[offset4d(input_shape, in)];
-        }
+        float *dst = output_data + offset(output_shape, out[0], out[1], out[2], paddings[6]);
+        const float *src = input_data + offset(input_shape, in[0], in[1], in[2], 0);
+        memcpy(dst, src, input_shape[3] * sizeof(float));
      }
    }
  }
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/pad.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/pad.h
@ -21,7 +21,6 @@
 #endif
 #include <memory.h>
 #include <float.h>
-#include "src/runtime/kernel/arm/opclib/offset_utils.h"
 #include "src/runtime/kernel/arm/opclib/op_base.h"
 #include "src/runtime/kernel/arm/opclib/pad_parameter.h"

--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/pad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/pad.cc
@ -15,6 +15,7 @@
 */

 #include "src/runtime/kernel/arm/opclib/int8/pad.h"
+#include "src/runtime/kernel/arm/opclib/common_func.h"

 void PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
                   const int32_t *paddings) {
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/pad.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/pad.h
@ -19,7 +19,6 @@

 #include <string.h>
 #include "src/runtime/kernel/arm/opclib/op_base.h"
-#include "src/runtime/kernel/arm/opclib/offset_utils.h"
 #include "src/runtime/kernel/arm/opclib/pad_parameter.h"

 void PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/offset_utils.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/offset_utils.h
@ -1,34 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_OFFSET_UTILS_H_
-#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_OFFSET_UTILS_H_
-
-#ifdef ENABLE_NEON
-#include <arm_neon.h>
-#endif
-
-inline int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3) {
-  return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3] + dim3;
-}
-
-inline int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2) {
-  return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3];
-}
-
-inline int offset4d(const int *shape, const int *dims) { return offset(shape, dims[0], dims[1], dims[2], dims[3]); }
-#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_OFFSET_UTILS_H_
-
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/resize.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/resize.cc
@ -15,8 +15,7 @@
 */
 #include <math.h>
 #include "src/runtime/kernel/arm/opclib/resize.h"
-#include "src/runtime/kernel/arm/opclib/offset_utils.h"
-#include "src/runtime/kernel/arm/opclib/op_base.h"
+#include "src/runtime/kernel/arm/opclib/common_func.h"

 int ResizeBilinear(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
                   bool align_corners, int tid, int thread_num) {