add bias_add,arithmetic,reduce c++ lib free ops

This commit is contained in:
wangzhe 2020-09-16 10:24:18 +08:00
parent 3fe8916afa
commit 37fd058a6c
8 changed files with 643 additions and 1 deletions

View File

@ -10,6 +10,8 @@ file(GLOB KERNEL_SRC
${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/arithmetic_self.c
${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/arithmetic.c
${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/matmul.c
${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/reduce.c
${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/arithmetic.c
${CMAKE_CURRENT_SOURCE_DIR}/src/kernel/fp32/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/src/kernel/common/*.cc
)

View File

@ -0,0 +1,238 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "internal/src/kernel/fp32/arithmetic.h"
#include "internal/src/lite_log.h"
#include "internal/include/errorcode.h"
#include "internal/include/model.h"
#include "internal/include/ms_tensor.h"
#include "internal/include/lite_utils.h"
#include "src/runtime/allocator.h"
#include "nnacl/arithmetic_common.h"
#include "nnacl/fp32/arithmetic.h"
#include "schema/ops_generated.h"
typedef int (*ArithmeticRun)(float *input0, float *input1, float *output, int element_size);
typedef int (*ArithmeticOptRun)(float *input0, float *input1, float *output, int element_size,
ArithmeticParameter *param);
int BroadcastRun(float *input0, float *input1, float *output, int dim, int out_count, int break_pos,
ArithmeticRun arithmetic_run, ArithmeticParameter *params) {
if (dim > break_pos) {
return arithmetic_run(input0, input1, output, out_count);
}
for (int i = 0; i < params->out_shape_[dim]; ++i) {
int pos0_ = params->in_shape0_[dim] == 1 ? 0 : i;
int pos1_ = params->in_shape1_[dim] == 1 ? 0 : i;
int error_code =
BroadcastRun(input0 + pos0_ * params->in_strides0_[dim], input1 + pos1_ * params->in_strides1_[dim],
output + i * params->out_strides_[dim], dim + 1, out_count, break_pos, arithmetic_run, params);
if (error_code != RET_OK) {
return error_code;
}
}
return RET_OK;
}
int CalBroadCasting(const TensorPtrVector &in_tensors, int *outside, int *break_pos, ArithmeticParameter *params) {
params->broadcasting_ = false;
for (int i = 0; i < params->ndim_; i++) {
if (params->in_shape0_[i] != params->in_shape1_[i]) {
if (params->in_shape0_[i] == 1) {
params->out_shape_[i] = params->in_shape1_[i];
} else if (params->in_shape1_[i] == 1) {
params->out_shape_[i] = params->in_shape0_[i];
} else {
LITE_ERROR_LOG("shapes of input tensors can not be broadCasted");
return RET_INPUT_TENSOR_ERROR;
}
params->broadcasting_ = true;
} else {
params->out_shape_[i] = params->in_shape0_[i];
}
}
if (params->broadcasting_) {
*outside = 1;
for (auto i = params->ndim_ - 1; i >= 0; --i) {
if (params->in_shape0_[i] != params->in_shape1_[i]) {
*break_pos = i;
break;
}
(*outside) *= params->out_shape_[i];
}
ComputeStrides(params->in_shape0_, params->in_strides0_, params->ndim_);
ComputeStrides(params->in_shape1_, params->in_strides1_, params->ndim_);
ComputeStrides(params->out_shape_, params->out_strides_, params->ndim_);
}
return RET_OK;
}
int RunArithmetic(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, ArithmeticRun arithmetic_run,
ArithmeticOptRun arithmetic_opt_run, int outside, int break_pos, ArithmeticParameter *params) {
int error_code = RET_OK;
int count = out_tensors[0]->ElementsNum();
float *input0_data = reinterpret_cast<float *>(in_tensors[0]->data_);
float *input1_data1 = reinterpret_cast<float *>(in_tensors[1]->data_);
float *output_data = reinterpret_cast<float *>(out_tensors[0]->data_);
if (params->broadcasting_) {
error_code = BroadcastRun(input0_data, input1_data1, output_data, 0, outside, break_pos, arithmetic_run, params);
} else if (arithmetic_opt_run != NULL) {
error_code = arithmetic_opt_run(input0_data, input1_data1, output_data, count, params);
} else {
error_code = arithmetic_run(input0_data, input1_data1, output_data, count);
}
if (error_code != RET_OK) {
return error_code;
}
return RET_OK;
}
int DoArithmeticInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) {
if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) {
LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
return RET_INPUT_TENSOR_ERROR;
}
if (out_tensors.size() != 1) {
LITE_ERROR_LOG("output tensors num not correct!")
return RET_ERROR;
}
ShapeVector in_shape0 = in_tensors[0]->shape_;
ShapeVector in_shape1 = in_tensors[1]->shape_;
int ndim0 = in_shape0.size();
int ndim1 = in_shape1.size();
ArithmeticParameter *arithmeticParameter = (ArithmeticParameter *)param;
if (ndim0 < ndim1) {
arithmeticParameter->ndim_ = ndim1;
int fill_dim_num = ndim1 - ndim0;
int j = 0;
for (size_t i = 0; i < ndim1; i++) {
if (i < fill_dim_num) {
arithmeticParameter->in_shape0_[i] = 1;
} else {
arithmeticParameter->in_shape0_[i] = in_shape0[j++];
}
arithmeticParameter->in_shape1_[i] = in_shape1[i];
}
} else if (ndim0 > ndim1) {
arithmeticParameter->ndim_ = ndim0;
int fill_dim_num = ndim0 - ndim1;
int j = 0;
for (size_t i = 0; i < ndim0; i++) {
if (i < fill_dim_num) {
arithmeticParameter->in_shape1_[i] = 1;
} else {
arithmeticParameter->in_shape1_[i] = in_shape1[j++];
}
arithmeticParameter->in_shape0_[i] = in_shape0[i];
}
} else {
arithmeticParameter->ndim_ = ndim0;
for (size_t i = 0; i < ndim0; i++) {
arithmeticParameter->in_shape0_[i] = in_shape0[i];
arithmeticParameter->in_shape1_[i] = in_shape1[i];
}
}
ShapeVector out_shape;
for (int i = 0; i < arithmeticParameter->ndim_; i++) {
if (arithmeticParameter->in_shape0_[i] != arithmeticParameter->in_shape1_[i]) {
if (arithmeticParameter->in_shape0_[i] == 1) {
out_shape.push_back(arithmeticParameter->in_shape1_[i]);
} else if (arithmeticParameter->in_shape1_[i] == 1) {
out_shape.push_back(arithmeticParameter->in_shape0_[i]);
} else {
LITE_ERROR_LOG("shapes of input tensors can not be broadcasted!")
return RET_INPUT_TENSOR_ERROR;
}
} else {
out_shape.push_back(arithmeticParameter->in_shape0_[i]);
}
}
out_tensors[0]->shape_ = out_shape;
out_tensors[0]->data_type_ = in_tensors[0]->data_type_;
out_tensors[0]->format_ = in_tensors[0]->format_;
return RET_OK;
}
int ChooseKernel(const int kernel_type, ArithmeticRun *arithmetic_run, ArithmeticParameter *params) {
if (kernel_type == KernelType::Mul) {
if (params->activation_type_ == mindspore::schema::ActivationType_RELU) {
*arithmetic_run = ElementMulRelu;
} else if (params->activation_type_ == mindspore::schema::ActivationType_RELU6) {
*arithmetic_run = ElementMulRelu6;
} else {
*arithmetic_run = ElementMul;
}
} else {
LITE_ERROR_LOG("unsupported operator type");
return RET_ERROR;
}
return RET_OK;
}
int ChooseOptKernel(const int kernel_type, ArithmeticOptRun *arithmetic_opt_run, ArithmeticParameter *params) {
if (kernel_type == KernelType::Mul) {
if (params->activation_type_ == mindspore::schema::ActivationType_RELU) {
*arithmetic_opt_run = ElementOptMulRelu;
} else if (params->activation_type_ == mindspore::schema::ActivationType_RELU6) {
*arithmetic_opt_run = ElementOptMulRelu6;
} else {
*arithmetic_opt_run = ElementOptMul;
}
} else {
LITE_INFO_LOG("kernel not have opt version");
}
return RET_OK;
}
int DoArithmetic(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
mindspore::lite::Allocator *allocator) {
if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) {
LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
return RET_INPUT_TENSOR_ERROR;
}
if (out_tensors.size() != 1 || out_tensors[0]->data_ == NULL) {
LITE_ERROR_LOG("output tensors num not correct or output data is NULL!")
return RET_ERROR;
}
if (allocator == NULL) {
LITE_ERROR_LOG("allocator is NULL!")
return RET_ERROR;
}
ArithmeticParameter *params = reinterpret_cast<ArithmeticParameter *>(node->primitive_);
ArithmeticRun arithmetic_run = NULL;
int kernel_type = params->op_parameter_.type_;
int status = ChooseKernel(kernel_type, &arithmetic_run, params);
if (status != RET_OK) {
return status;
}
int outside = 0;
int break_pos = 0;
// when one of input only has one element
params->in_elements_num0_ = in_tensors[0]->ElementsNum();
params->in_elements_num1_ = in_tensors[1]->ElementsNum();
params->out_elements_num_ = out_tensors[0]->ElementsNum();
ArithmeticOptRun arithmetic_opt_run = NULL;
if (params->in_elements_num0_ == 1 || params->in_elements_num1_ == 1) {
params->broadcasting_ = false;
ChooseOptKernel(kernel_type, &arithmetic_opt_run, params);
} else {
int ret = CalBroadCasting(in_tensors, &outside, &break_pos, params);
if (ret != RET_OK) {
return ret;
}
}
return RunArithmetic(in_tensors, out_tensors, arithmetic_run, arithmetic_opt_run, outside, break_pos, params);
}

View File

@ -0,0 +1,29 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef INTERNAL_SRC_RUNTIME_KERNEL_MUL_H_
#define INTERNAL_SRC_RUNTIME_KERNEL_MUL_H_
#include "internal/include/model.h"
#include "internal/include/lite_utils.h"
#include "src/runtime/allocator.h"
#include "nnacl/arithmetic_common.h"
int DoArithmeticInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param);
int DoArithmetic(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
mindspore::lite::Allocator *allocator);
#endif // INTERNAL_SRC_RUNTIME_KERNEL_MUL_H_

View File

@ -0,0 +1,82 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "internal/src/kernel/fp32/bias_add.h"
#include "internal/include/model.h"
#include "internal/include/ms_tensor.h"
#include "internal/include/lite_utils.h"
#include "src/runtime/allocator.h"
#include "internal/src/lite_log.h"
#include "internal/include/errorcode.h"
#include "nnacl/arithmetic_common.h"
#include "nnacl/fp32/arithmetic.h"
int DoBiasAddInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) {
if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) {
LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
return RET_INPUT_TENSOR_ERROR;
}
if (out_tensors.size() != 1) {
LITE_ERROR_LOG("output tensors num not correct!")
return RET_ERROR;
}
out_tensors[0]->shape_ = in_tensors[0]->shape_;
out_tensors[0]->data_type_ = in_tensors[0]->data_type_;
out_tensors[0]->format_ = in_tensors[0]->format_;
return RET_OK;
}
int DoBiasAdd(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
mindspore::lite::Allocator *allocator) {
if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) {
LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
return RET_INPUT_TENSOR_ERROR;
}
if (out_tensors.size() != 1 || out_tensors[0]->data_ == NULL) {
LITE_ERROR_LOG("output tensors num not correct or output data is NULL!")
return RET_ERROR;
}
if (allocator == NULL) {
LITE_ERROR_LOG("allocator is NULL!")
return RET_ERROR;
}
ArithmeticParameter *params = reinterpret_cast<ArithmeticParameter *>(node->primitive_);
ShapeVector dims = in_tensors[0]->shape_;
params->ndim_ = dims.size();
for (size_t i = 0; i < params->ndim_; i++) {
params->in_shape0_[i] = dims[i];
params->in_shape1_[i] = 1;
params->out_shape_[i] = dims[i];
}
params->in_shape1_[params->ndim_ - 1] = dims[params->ndim_ - 1];
float *in = reinterpret_cast<float *>(in_tensors[0]->data_);
float *bias = reinterpret_cast<float *>(in_tensors[1]->data_);
float *out = reinterpret_cast<float *>(out_tensors[0]->data_);
size_t data_size = in_tensors[0]->ElementsNum();
float *tile_in = reinterpret_cast<float *>(allocator->Malloc(data_size * sizeof(float)));
float *tile_bias = reinterpret_cast<float *>(allocator->Malloc(data_size * sizeof(float)));
if (tile_in == NULL || tile_bias == NULL) {
LITE_ERROR_LOG("Memory allocation failed!")
allocator->Free(tile_in);
allocator->Free(tile_bias);
return RET_ERROR;
}
BroadcastAdd(in, bias, tile_in, tile_bias, out, data_size, params);
allocator->Free(tile_in);
allocator->Free(tile_bias);
return RET_OK;
}

View File

@ -0,0 +1,28 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef INTERNAL_SRC_RUNTIME_KERNEL_BIAS_H_
#define INTERNAL_SRC_RUNTIME_KERNEL_BIAS_H_
#include "internal/include/model.h"
#include "internal/include/lite_utils.h"
#include "src/runtime/allocator.h"
int DoBiasAddInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param);
int DoBiasAdd(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
mindspore::lite::Allocator *allocator);
#endif // INTERNAL_SRC_RUNTIME_KERNEL_BIAS_H_

View File

@ -0,0 +1,233 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "internal/src/kernel/fp32/reduce.h"
#include <vector>
#include "internal/include/model.h"
#include "internal/include/lite_utils.h"
#include "src/runtime/allocator.h"
#include "internal/src/lite_log.h"
#include "internal/include/errorcode.h"
#include "nnacl/reduce_parameter.h"
#include "nnacl/fp32/reduce.h"
#include "schema/ops_generated.h"
typedef int (*Reducer)(const int outer_size, const int inner_size, const int axis_size, const float *src_data,
float *dst_data, const int tid, const int thread_num);
int MallocTmpBuffer(std::vector<float *> *data_buffers, const ShapeVector &shape, const int *axes, const int num_axes,
mindspore::lite::Allocator *allocator) {
for (int i = 0; i < data_buffers->size(); ++i) {
if (data_buffers->at(i) != NULL) {
free(data_buffers->at(i));
data_buffers->at(i) = NULL;
}
}
data_buffers->clear();
ShapeVector input_shape = shape;
const int rank = input_shape.size();
for (auto i = 0; i < num_axes - 1; i++) {
int axis = axes[i];
size_t size = 1;
for (int j = 0; j < rank; j++) {
if (axis != j) {
size *= input_shape[j];
}
}
float *buffer = reinterpret_cast<float *>(allocator->Malloc(size * sizeof(float)));
if (buffer == NULL) {
LITE_ERROR_LOG("Memory allocation failed!")
return RET_ERROR;
}
data_buffers->emplace_back(buffer);
input_shape[axis] = 1;
}
return RET_OK;
}
int FreeTmpBuffer(std::vector<float *> *data_buffers, mindspore::lite::Allocator *allocator) {
for (int i = 0; i < data_buffers->size(); ++i) {
allocator->Free(data_buffers->at(i));
}
data_buffers->clear();
return RET_OK;
}
int RunReduce(Reducer reducer, std::vector<float *> data_buffers, float *in_data, float *out_data, Int32Vector axes,
ShapeVector shape) {
int rank = shape.size();
float *dst_data = NULL;
float *src_data = in_data;
ShapeVector tmp_shape = shape;
for (size_t i = 0; i < axes.size(); ++i) {
if (i != axes.size() - 1) {
dst_data = data_buffers[i];
} else {
dst_data = out_data;
}
int axis = axes[i];
int outer_size = 1;
for (int j = 0; j < axis; j++) {
outer_size *= tmp_shape[j];
}
int inner_size = 1;
for (int k = axis + 1; k < rank; k++) {
inner_size *= tmp_shape[k];
}
int axis_size = tmp_shape[axis];
int error_code = reducer(outer_size, inner_size, axis_size, src_data, dst_data, 0, 1);
if (error_code != RET_OK) {
LITE_ERROR_LOG("Reduce run error!")
return RET_ERROR;
}
tmp_shape[axis] = 1;
src_data = dst_data;
}
return RET_OK;
}
int DoReduceInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) {
if (in_tensors.size() != 1 || in_tensors[0]->data_ == NULL) {
LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
return RET_INPUT_TENSOR_ERROR;
}
if (out_tensors.size() != 1) {
LITE_ERROR_LOG("output tensors num not correct!")
return RET_ERROR;
}
ReduceParameter *reduceParameter = reinterpret_cast<ReduceParameter *>(param);
bool keep_dims = reduceParameter->keep_dims_;
int num_axes = reduceParameter->num_axes_;
ShapeVector in_shape = in_tensors[0]->shape_;
int rank = in_shape.size();
Int32Vector out_shape;
Int32Vector axes;
int actual_axes_num = num_axes;
for (int i = 0; i < num_axes; ++i) {
if (reduceParameter->axes_[i] < -rank || reduceParameter->axes_[i] >= rank) {
LITE_ERROR_LOG("reduce_sum got invalid axis!")
return RET_ERROR;
}
if (reduceParameter->axes_[i] < 0) {
axes.push_back(reduceParameter->axes_[i] + rank);
} else {
axes.push_back(reduceParameter->axes_[i]);
}
}
if (reduceParameter->reduce_to_end_) {
if (num_axes != 1) {
LITE_ERROR_LOG("Reduce when reduce_to_end, num of axis should be 1!")
return RET_ERROR;
}
int begin_axis = axes[0];
num_axes = rank - begin_axis;
for (auto i = begin_axis + 1; i < rank; ++i) {
axes[actual_axes_num++] = i;
}
}
if (num_axes == 0) {
axes.resize(rank);
for (size_t i = 0; i < rank; i++) {
axes[i] = i;
if (keep_dims) {
out_shape.push_back(1);
}
}
reduceParameter->num_axes_ = axes.size();
for (int i = 0; i < axes.size(); ++i) {
reduceParameter->axes_[i] = axes[i];
}
out_tensors[0]->shape_ = out_shape;
out_tensors[0]->data_type_ = in_tensors[0]->data_type_;
out_tensors[0]->format_ = in_tensors[0]->format_;
return RET_OK;
}
// reduce on selected axes
for (size_t i = 0; i < rank; i++) {
bool reduce_axis = false;
for (size_t idx = 0; idx < num_axes; ++idx) {
if (axes[idx] == i) {
reduce_axis = true;
break;
}
}
if (reduce_axis) {
if (keep_dims) {
out_shape.push_back(1);
}
} else {
out_shape.push_back(in_shape[i]);
}
}
reduceParameter->num_axes_ = axes.size();
for (int i = 0; i < axes.size(); ++i) {
reduceParameter->axes_[i] = axes[i];
}
out_tensors[0]->shape_ = out_shape;
out_tensors[0]->data_type_ = in_tensors[0]->data_type_;
out_tensors[0]->format_ = in_tensors[0]->format_;
return RET_OK;
}
int DoReduce(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
mindspore::lite::Allocator *allocator) {
if (in_tensors.size() != 1 || in_tensors[0]->data_ == NULL) {
LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
return RET_INPUT_TENSOR_ERROR;
}
if (out_tensors.size() != 1 || out_tensors[0]->data_ == NULL) {
LITE_ERROR_LOG("output tensors num not correct or output data is NULL!")
return RET_ERROR;
}
if (allocator == NULL) {
LITE_ERROR_LOG("allocator is NULL!")
return RET_ERROR;
}
ReduceParameter *params = reinterpret_cast<ReduceParameter *>(node->primitive_);
Reducer reducer = NULL;
if (params->mode_ == mindspore::schema::ReduceMode::ReduceMode_ReduceSum) {
reducer = ReduceSum;
} else if (params->mode_ == mindspore::schema::ReduceMode::ReduceMode_ReduceMean) {
reducer = ReduceMean;
}
std::vector<float *> data_buffers;
int status = MallocTmpBuffer(&data_buffers, in_tensors[0]->shape_, params->axes_, params->num_axes_, allocator);
if (status != RET_OK) {
FreeTmpBuffer(&data_buffers, allocator);
return status;
}
Int32Vector axes;
for (int i = 0; i < params->num_axes_; ++i) {
axes.push_back(params->axes_[i]);
}
status = RunReduce(reducer, data_buffers, reinterpret_cast<float *>(in_tensors[0]->data_),
reinterpret_cast<float *>(out_tensors[0]->data_), axes, in_tensors[0]->shape_);
if (status != RET_OK) {
return status;
}
status = FreeTmpBuffer(&data_buffers, allocator);
if (status != RET_OK) {
return status;
}
return RET_OK;
}

View File

@ -0,0 +1,29 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef INTERNAL_SRC_KERNEL_FP32_REDUCE_COMMON_H_
#define INTERNAL_SRC_KERNEL_FP32_REDUCE_COMMON_H_
#include "internal/include/model.h"
#include "internal/include/ms_tensor.h"
#include "internal/include/lite_utils.h"
#include "src/runtime/allocator.h"
int DoReduceInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param);
int DoReduce(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
mindspore::lite::Allocator *allocator);
#endif // INTERNAL_SRC_KERNEL_FP32_REDUCE_COMMON_H_

View File

@ -33,6 +33,7 @@ class InferTest : public mindspore::CommonTest {
TEST_F(InferTest, TestSession) {
Model model;
Node node;
node.name_ = String("node");
model.nodes_.push_back(&node);
node.node_type_ = NodeType::NodeType_CNode;
@ -64,7 +65,7 @@ TEST_F(InferTest, TestSession) {
TensorPtrVector outvec = session.GetOutputs();
ASSERT_EQ(outvec.size(), 1);
for (int i = 0; i < kOutSize; ++i) {
std::cout << *(reinterpret_cast<float *>(outvec.at(0)->data_)+ i) << " ";
std::cout << *(reinterpret_cast<float *>(outvec.at(0)->data_) + i) << " ";
}
std::cout << "\n";
CompareOutputData(reinterpret_cast<float *>(outvec.at(0)->data_), expect_out, kOutSize, 0.000001);