pad fp32 support less than 4d input
This commit is contained in:
parent
fafae4c6ee
commit
8e3eaae2fc
|
@ -127,6 +127,10 @@ Primitive *Primitive::CreatePrimitive(schema::Primitive *primitive) {
|
|||
return new lite::Flatten(const_cast<schema::Primitive *>(primitive));
|
||||
case schema::PrimitiveType_StridedSlice:
|
||||
return new lite::StridedSlice(const_cast<schema::Primitive *>(primitive));
|
||||
case schema::PrimitiveType_Resize:
|
||||
return new lite::Resize(const_cast<schema::Primitive *>(primitive));
|
||||
case schema::PrimitiveType_OneHot:
|
||||
return new lite::OneHot(const_cast<schema::Primitive *>(primitive));
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -37,14 +37,12 @@ int Pad::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::Te
|
|||
if (paddings == nullptr) {
|
||||
return RET_NULL_PTR;
|
||||
}
|
||||
MS_ASSERT(paddings->size() == kPaddingsSize);
|
||||
|
||||
auto input = inputs.front();
|
||||
if (input == nullptr) {
|
||||
return RET_NULL_PTR;
|
||||
}
|
||||
auto input_shape = input->shape();
|
||||
MS_ASSERT(input_shape.size() == kInputRank);
|
||||
std::vector<int> output_shape;
|
||||
for (size_t i = 0; i < input_shape.size(); i++) {
|
||||
auto shape = input_shape[i] + (*paddings)[2 * i] + (*paddings)[2 * i + 1];
|
||||
|
|
|
@ -383,12 +383,14 @@ PadParameter *PopulatePadParameter(const lite::Primitive *primitive) {
|
|||
pad_param->constant_value_ = pad_node->constantValue();
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Invalid padding mode: " << pad_param->pad_mode_;
|
||||
delete (pad_param);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto size = pad_node->paddings()->size();
|
||||
if (size > MAX_PAD_SIZE) {
|
||||
MS_LOG(ERROR) << "Invalid padding size: " << size;
|
||||
delete (pad_param);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -33,8 +33,6 @@ namespace mindspore::kernel {
|
|||
namespace {
|
||||
constexpr int kInputNum = 1;
|
||||
constexpr int kOutputNum = 1;
|
||||
constexpr int kInputRank = 4;
|
||||
constexpr int kPaddingsSize = 8;
|
||||
} // namespace
|
||||
|
||||
int PadCPUKernel::Init() {
|
||||
|
@ -52,21 +50,14 @@ int PadCPUKernel::Init() {
|
|||
}
|
||||
|
||||
auto rank = input->shape().size();
|
||||
if (rank != kInputRank) {
|
||||
MS_LOG(ERROR) << "Pad input rank should be " << kInputRank << ", got " << rank;
|
||||
if (rank > DEFAULT_PAD_NDIMS) {
|
||||
MS_LOG(ERROR) << "Pad input rank should <= " << DEFAULT_PAD_NDIMS << ", got " << rank;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (paddings_size_ != kPaddingsSize) {
|
||||
MS_LOG(ERROR) << "Pad op paddings size should be 2*input_rank: " << 2 * rank << " but got " << paddings_size_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
for (auto pad : paddings_) {
|
||||
if (pad < 0) {
|
||||
MS_LOG(ERROR) << "Pad op paddings should be >= 0, but got " << pad;
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (int i = 0; i < rank; i++) {
|
||||
in_[DEFAULT_PAD_NDIMS - rank + i] = input->shape()[i];
|
||||
out_[DEFAULT_PAD_NDIMS - rank + i] = output->shape()[i];
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -87,10 +78,8 @@ int PadCPUKernel::RunImpl(int task_id) {
|
|||
|
||||
auto input_data = reinterpret_cast<float *>(input->Data());
|
||||
auto output_data = reinterpret_cast<float *>(output->Data());
|
||||
auto input_shape = input->shape().data();
|
||||
auto output_shape = output->shape().data();
|
||||
|
||||
Pad(input_data, output_data, input_shape, output_shape, paddings_.data(), task_id, context_->threadNum);
|
||||
Pad(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, context_->threadNum);
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -27,7 +27,9 @@ class PadCPUKernel : public LiteKernel {
|
|||
public:
|
||||
PadCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs), context_(ctx) {}
|
||||
: LiteKernel(parameter, inputs, outputs), context_(ctx) {
|
||||
pad_param_ = reinterpret_cast<PadParameter *>(parameter);
|
||||
}
|
||||
|
||||
~PadCPUKernel() {}
|
||||
|
||||
|
@ -37,9 +39,10 @@ class PadCPUKernel : public LiteKernel {
|
|||
int RunImpl(int task_id);
|
||||
|
||||
private:
|
||||
std::vector<int> paddings_;
|
||||
size_t paddings_size_;
|
||||
const lite::Context *context_;
|
||||
const PadParameter *pad_param_;
|
||||
int in_[4] = {1, 1, 1, 1};
|
||||
int out_[4] = {1, 1, 1, 1};
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
|
|
|
@ -41,6 +41,16 @@ void IndirectGemmFp32_Comm(float *output, const float *input, const float *weigh
|
|||
void IndirectGemmFp32(float *output, const float *input, const float *weight, const float *bias, size_t step, int ic4,
|
||||
int output_channel, size_t offset, size_t relu, size_t relu6);
|
||||
|
||||
inline int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3) {
|
||||
return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3] + dim3;
|
||||
}
|
||||
|
||||
inline int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2) {
|
||||
return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3];
|
||||
}
|
||||
|
||||
inline int offset4d(const int *shape, const int *dims) { return offset(shape, dims[0], dims[1], dims[2], dims[3]); }
|
||||
|
||||
#ifdef ENABLE_ARM64
|
||||
void BiasAdd(const float *bias, float *data, size_t oc4, size_t plan_size);
|
||||
void BiasAddRelu6(const float *bias, float *data, size_t oc4, size_t plan_size);
|
||||
|
@ -54,4 +64,3 @@ void Relu(float *data, size_t element4);
|
|||
#endif
|
||||
|
||||
#endif /* MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_COMMON_FUNC_H_ */
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/opclib/fp32/pad.h"
|
||||
#include "src/runtime/kernel/arm/opclib/common_func.h"
|
||||
|
||||
void Pad(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
|
||||
const int *paddings, const int tid, const int thread_num) {
|
||||
|
@ -25,10 +26,9 @@ void Pad(const float *input_data, float *output_data, const int *input_shape, co
|
|||
out[1] = in[1] + paddings[2];
|
||||
for (in[2] = 0; in[2] < input_shape[2]; in[2]++) {
|
||||
out[2] = in[2] + paddings[4];
|
||||
for (in[3] = 0; in[3] < input_shape[3]; in[3]++) {
|
||||
out[3] = in[3] + paddings[6];
|
||||
output_data[offset4d(output_shape, out)] = input_data[offset4d(input_shape, in)];
|
||||
}
|
||||
float *dst = output_data + offset(output_shape, out[0], out[1], out[2], paddings[6]);
|
||||
const float *src = input_data + offset(input_shape, in[0], in[1], in[2], 0);
|
||||
memcpy(dst, src, input_shape[3] * sizeof(float));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
#endif
|
||||
#include <memory.h>
|
||||
#include <float.h>
|
||||
#include "src/runtime/kernel/arm/opclib/offset_utils.h"
|
||||
#include "src/runtime/kernel/arm/opclib/op_base.h"
|
||||
#include "src/runtime/kernel/arm/opclib/pad_parameter.h"
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/opclib/int8/pad.h"
|
||||
#include "src/runtime/kernel/arm/opclib/common_func.h"
|
||||
|
||||
void PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
|
||||
const int32_t *paddings) {
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
#include <string.h>
|
||||
#include "src/runtime/kernel/arm/opclib/op_base.h"
|
||||
#include "src/runtime/kernel/arm/opclib/offset_utils.h"
|
||||
#include "src/runtime/kernel/arm/opclib/pad_parameter.h"
|
||||
|
||||
void PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
|
||||
|
|
|
@ -1,34 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_OFFSET_UTILS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_OFFSET_UTILS_H_
|
||||
|
||||
#ifdef ENABLE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
inline int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3) {
|
||||
return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3] + dim3;
|
||||
}
|
||||
|
||||
inline int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2) {
|
||||
return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3];
|
||||
}
|
||||
|
||||
inline int offset4d(const int *shape, const int *dims) { return offset(shape, dims[0], dims[1], dims[2], dims[3]); }
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_OFFSET_UTILS_H_
|
||||
|
|
@ -15,8 +15,7 @@
|
|||
*/
|
||||
#include <math.h>
|
||||
#include "src/runtime/kernel/arm/opclib/resize.h"
|
||||
#include "src/runtime/kernel/arm/opclib/offset_utils.h"
|
||||
#include "src/runtime/kernel/arm/opclib/op_base.h"
|
||||
#include "src/runtime/kernel/arm/opclib/common_func.h"
|
||||
|
||||
int ResizeBilinear(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
|
||||
bool align_corners, int tid, int thread_num) {
|
||||
|
|
Loading…
Reference in New Issue