pad fp32 support less than 4d input

This commit is contained in:
zhaozhenlong 2020-07-31 11:59:18 +08:00
parent fafae4c6ee
commit 8e3eaae2fc
12 changed files with 34 additions and 65 deletions

View File

@ -127,6 +127,10 @@ Primitive *Primitive::CreatePrimitive(schema::Primitive *primitive) {
return new lite::Flatten(const_cast<schema::Primitive *>(primitive));
case schema::PrimitiveType_StridedSlice:
return new lite::StridedSlice(const_cast<schema::Primitive *>(primitive));
case schema::PrimitiveType_Resize:
return new lite::Resize(const_cast<schema::Primitive *>(primitive));
case schema::PrimitiveType_OneHot:
return new lite::OneHot(const_cast<schema::Primitive *>(primitive));
default:
break;
}

View File

@ -37,14 +37,12 @@ int Pad::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::Te
if (paddings == nullptr) {
return RET_NULL_PTR;
}
MS_ASSERT(paddings->size() == kPaddingsSize);
auto input = inputs.front();
if (input == nullptr) {
return RET_NULL_PTR;
}
auto input_shape = input->shape();
MS_ASSERT(input_shape.size() == kInputRank);
std::vector<int> output_shape;
for (size_t i = 0; i < input_shape.size(); i++) {
auto shape = input_shape[i] + (*paddings)[2 * i] + (*paddings)[2 * i + 1];

View File

@ -383,12 +383,14 @@ PadParameter *PopulatePadParameter(const lite::Primitive *primitive) {
pad_param->constant_value_ = pad_node->constantValue();
} else {
MS_LOG(ERROR) << "Invalid padding mode: " << pad_param->pad_mode_;
delete (pad_param);
return nullptr;
}
auto size = pad_node->paddings()->size();
if (size > MAX_PAD_SIZE) {
MS_LOG(ERROR) << "Invalid padding size: " << size;
delete (pad_param);
return nullptr;
}

View File

@ -33,8 +33,6 @@ namespace mindspore::kernel {
namespace {
constexpr int kInputNum = 1;
constexpr int kOutputNum = 1;
constexpr int kInputRank = 4;
constexpr int kPaddingsSize = 8;
} // namespace
int PadCPUKernel::Init() {
@ -52,21 +50,14 @@ int PadCPUKernel::Init() {
}
auto rank = input->shape().size();
if (rank != kInputRank) {
MS_LOG(ERROR) << "Pad input rank should be " << kInputRank << ", got " << rank;
if (rank > DEFAULT_PAD_NDIMS) {
MS_LOG(ERROR) << "Pad input rank should <= " << DEFAULT_PAD_NDIMS << ", got " << rank;
return RET_ERROR;
}
if (paddings_size_ != kPaddingsSize) {
MS_LOG(ERROR) << "Pad op paddings size should be 2*input_rank: " << 2 * rank << " but got " << paddings_size_;
return RET_ERROR;
}
for (auto pad : paddings_) {
if (pad < 0) {
MS_LOG(ERROR) << "Pad op paddings should be >= 0, but got " << pad;
return RET_ERROR;
}
for (int i = 0; i < rank; i++) {
in_[DEFAULT_PAD_NDIMS - rank + i] = input->shape()[i];
out_[DEFAULT_PAD_NDIMS - rank + i] = output->shape()[i];
}
return RET_OK;
}
@ -87,10 +78,8 @@ int PadCPUKernel::RunImpl(int task_id) {
auto input_data = reinterpret_cast<float *>(input->Data());
auto output_data = reinterpret_cast<float *>(output->Data());
auto input_shape = input->shape().data();
auto output_shape = output->shape().data();
Pad(input_data, output_data, input_shape, output_shape, paddings_.data(), task_id, context_->threadNum);
Pad(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, context_->threadNum);
return RET_OK;
}

View File

@ -27,7 +27,9 @@ class PadCPUKernel : public LiteKernel {
public:
PadCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs), context_(ctx) {}
: LiteKernel(parameter, inputs, outputs), context_(ctx) {
pad_param_ = reinterpret_cast<PadParameter *>(parameter);
}
~PadCPUKernel() {}
@ -37,9 +39,10 @@ class PadCPUKernel : public LiteKernel {
int RunImpl(int task_id);
private:
std::vector<int> paddings_;
size_t paddings_size_;
const lite::Context *context_;
const PadParameter *pad_param_;
int in_[4] = {1, 1, 1, 1};
int out_[4] = {1, 1, 1, 1};
};
} // namespace mindspore::kernel

View File

@ -41,6 +41,16 @@ void IndirectGemmFp32_Comm(float *output, const float *input, const float *weigh
void IndirectGemmFp32(float *output, const float *input, const float *weight, const float *bias, size_t step, int ic4,
int output_channel, size_t offset, size_t relu, size_t relu6);
inline int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3) {
return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3] + dim3;
}
inline int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2) {
return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3];
}
inline int offset4d(const int *shape, const int *dims) { return offset(shape, dims[0], dims[1], dims[2], dims[3]); }
#ifdef ENABLE_ARM64
void BiasAdd(const float *bias, float *data, size_t oc4, size_t plan_size);
void BiasAddRelu6(const float *bias, float *data, size_t oc4, size_t plan_size);
@ -54,4 +64,3 @@ void Relu(float *data, size_t element4);
#endif
#endif /* MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_COMMON_FUNC_H_ */

View File

@ -15,6 +15,7 @@
*/
#include "src/runtime/kernel/arm/opclib/fp32/pad.h"
#include "src/runtime/kernel/arm/opclib/common_func.h"
void Pad(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
const int *paddings, const int tid, const int thread_num) {
@ -25,10 +26,9 @@ void Pad(const float *input_data, float *output_data, const int *input_shape, co
out[1] = in[1] + paddings[2];
for (in[2] = 0; in[2] < input_shape[2]; in[2]++) {
out[2] = in[2] + paddings[4];
for (in[3] = 0; in[3] < input_shape[3]; in[3]++) {
out[3] = in[3] + paddings[6];
output_data[offset4d(output_shape, out)] = input_data[offset4d(input_shape, in)];
}
float *dst = output_data + offset(output_shape, out[0], out[1], out[2], paddings[6]);
const float *src = input_data + offset(input_shape, in[0], in[1], in[2], 0);
memcpy(dst, src, input_shape[3] * sizeof(float));
}
}
}

View File

@ -21,7 +21,6 @@
#endif
#include <memory.h>
#include <float.h>
#include "src/runtime/kernel/arm/opclib/offset_utils.h"
#include "src/runtime/kernel/arm/opclib/op_base.h"
#include "src/runtime/kernel/arm/opclib/pad_parameter.h"

View File

@ -15,6 +15,7 @@
*/
#include "src/runtime/kernel/arm/opclib/int8/pad.h"
#include "src/runtime/kernel/arm/opclib/common_func.h"
void PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
const int32_t *paddings) {

View File

@ -19,7 +19,6 @@
#include <string.h>
#include "src/runtime/kernel/arm/opclib/op_base.h"
#include "src/runtime/kernel/arm/opclib/offset_utils.h"
#include "src/runtime/kernel/arm/opclib/pad_parameter.h"
void PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,

View File

@ -1,34 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_OFFSET_UTILS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_OFFSET_UTILS_H_
#ifdef ENABLE_NEON
#include <arm_neon.h>
#endif
inline int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3) {
return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3] + dim3;
}
inline int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2) {
return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3];
}
inline int offset4d(const int *shape, const int *dims) { return offset(shape, dims[0], dims[1], dims[2], dims[3]); }
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_OFFSET_UTILS_H_

View File

@ -15,8 +15,7 @@
*/
#include <math.h>
#include "src/runtime/kernel/arm/opclib/resize.h"
#include "src/runtime/kernel/arm/opclib/offset_utils.h"
#include "src/runtime/kernel/arm/opclib/op_base.h"
#include "src/runtime/kernel/arm/opclib/common_func.h"
int ResizeBilinear(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
bool align_corners, int tid, int thread_num) {