!6372 add coefficient for reduce

Merge pull request !6372 from zhaozhenlong/lite/issue/reduce_coefficient
This commit is contained in:
mindspore-ci-bot 2020-09-18 10:28:55 +08:00 committed by Gitee
commit 7d6874b78f
8 changed files with 356 additions and 141 deletions

View File

@ -23,6 +23,7 @@ struct ReduceParameter {
OpParameter op_parameter_;
bool keep_dims_;
bool reduce_to_end_;
float coeff;
int axes_[REDUCE_MAX_AXES_NUM];
int num_axes_;
int mode_;

View File

@ -676,6 +676,7 @@ table Reduce {
keepDims: int;
mode: ReduceMode;
reduceToEnd: bool = false;
coeff: float = 1.0;
}
table Transpose {

View File

@ -24,11 +24,13 @@ std::vector<int> Reduce::GetAxes() const { return this->primitive_->value.AsRedu
int Reduce::GetKeepDims() const { return this->primitive_->value.AsReduce()->keepDims; }
int Reduce::GetMode() const { return this->primitive_->value.AsReduce()->mode; }
bool Reduce::GetReduceToEnd() const { return this->primitive_->value.AsReduce()->reduceToEnd; }
float Reduce::GetCoeff() const { return this->primitive_->value.AsReduce()->coeff; }
void Reduce::SetAxes(const std::vector<int> &axes) { this->primitive_->value.AsReduce()->axes = axes; }
void Reduce::SetKeepDims(int keep_dims) { this->primitive_->value.AsReduce()->keepDims = keep_dims; }
void Reduce::SetMode(int mode) { this->primitive_->value.AsReduce()->mode = (schema::ReduceMode)mode; }
void Reduce::SetReduceToEnd(bool reduce_to_end) { this->primitive_->value.AsReduce()->reduceToEnd = reduce_to_end; }
void Reduce::SetCoeff(float coeff) { this->primitive_->value.AsReduce()->coeff = coeff; }
int Reduce::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
if (this->primitive_ == nullptr) {
@ -101,6 +103,7 @@ std::vector<int> Reduce::GetAxes() const {
int Reduce::GetKeepDims() const { return this->primitive_->value_as_Reduce()->keepDims(); }
int Reduce::GetMode() const { return this->primitive_->value_as_Reduce()->mode(); }
bool Reduce::GetReduceToEnd() const { return this->primitive_->value_as_Reduce()->reduceToEnd(); }
float Reduce::GetCoeff() const { return this->primitive_->value_as_Reduce()->coeff(); }
int Reduce::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) {
MS_ASSERT(nullptr != primitive);
MS_ASSERT(nullptr != fbb);

View File

@ -38,6 +38,7 @@ class Reduce : public PrimitiveC {
void SetKeepDims(int keep_dims);
void SetMode(int mode);
void SetReduceToEnd(bool reduce_to_end);
void SetCoeff(float coeff);
#else
Reduce() = default;
@ -48,6 +49,7 @@ class Reduce : public PrimitiveC {
int GetKeepDims() const;
int GetMode() const;
bool GetReduceToEnd() const;
float GetCoeff() const;
};
} // namespace lite
} // namespace mindspore

View File

@ -551,6 +551,7 @@ OpParameter *PopulateReduceParameter(const mindspore::lite::PrimitiveC *primitiv
auto reduce = reinterpret_cast<mindspore::lite::Reduce *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
reduce_param->keep_dims_ = reduce->GetKeepDims();
reduce_param->reduce_to_end_ = reduce->GetReduceToEnd();
reduce_param->coeff = reduce->GetCoeff();
auto axisVector = reduce->GetAxes();
if (axisVector.size() > REDUCE_MAX_AXES_NUM) {
MS_LOG(ERROR) << "Reduce axes size " << axisVector.size() << " exceed limit " << REDUCE_MAX_AXES_NUM;

View File

@ -30,6 +30,7 @@ using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Mean;
using mindspore::schema::PrimitiveType_Reduce;
using mindspore::schema::ReduceMode;
using mindspore::schema::ReduceMode_ReduceASum;
using mindspore::schema::ReduceMode_ReduceMax;
using mindspore::schema::ReduceMode_ReduceMean;
using mindspore::schema::ReduceMode_ReduceMin;
@ -68,7 +69,11 @@ int ReduceCPUKernel::Init() {
break;
}
case static_cast<int>(ReduceMode_ReduceSumSquare): {
reducer_ = ReduceSumSquare;
reducer_ = ReduceSum;
break;
}
case static_cast<int>(ReduceMode_ReduceASum): {
reducer_ = ReduceSum;
break;
}
default:
@ -125,6 +130,7 @@ int ReduceCPUKernel::Run() {
}
src_data_ = in_tensors_.at(0)->MutableData();
PreProcess();
for (size_t i = 0; i < static_cast<size_t>(num_axes_); ++i) {
if (i != static_cast<size_t>(num_axes_ - 1)) {
dst_data_ = data_buffers_[i];
@ -142,10 +148,56 @@ int ReduceCPUKernel::Run() {
}
src_data_ = dst_data_;
}
if (reduce_param_->reduce_to_end_ && reduce_param_->coeff - 1.0f > 1e-5) {
ret = CalculateCoeffOutput();
if (ret != RET_OK) {
return ret;
}
}
FreeTmpBuffer();
return RET_OK;
}
void ReduceCPUKernel::PreProcess() {
if (data_type_ == kDataTypeInt) {
return;
}
int num = in_tensors_.at(0)->ElementsNum();
float *data = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
if (data == nullptr) {
return;
}
if (reduce_param_->mode_ == static_cast<int>(ReduceMode_ReduceASum)) {
for (int i = 0; i < num; ++i) {
if (data[i] < 0.0f) {
data[i] = 0.0f - data[i];
}
}
}
if (reduce_param_->mode_ == static_cast<int>(ReduceMode_ReduceSumSquare)) {
for (int i = 0; i < num; ++i) {
data[i] = data[i] * data[i];
}
}
}
int ReduceCPUKernel::CalculateCoeffOutput() {
auto out_tensor = out_tensors_.at(0);
int num = out_tensor->ElementsNum();
if (data_type_ != kDataTypeFloat) {
return RET_ERROR;
}
float *out_data = reinterpret_cast<float *>(out_tensor->MutableData());
if (out_data == nullptr) {
return RET_NULL_PTR;
}
for (int i = 0; i < num; ++i) {
out_data[i] *= reduce_param_->coeff;
}
return RET_OK;
}
int ReduceCPUKernel::MallocTmpBuffer() {
data_buffers_.clear();
for (auto size : buffer_sizes_) {

View File

@ -36,7 +36,9 @@ class ReduceCPUKernel : public ReduceBaseCPUKernel {
ReduceCPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {}
: ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {
reduce_param_ = reinterpret_cast<ReduceParameter *>(param);
}
~ReduceCPUKernel() {
src_data_ = nullptr;
dst_data_ = nullptr;
@ -50,6 +52,7 @@ class ReduceCPUKernel : public ReduceBaseCPUKernel {
int CallReduceUnit(int task_id);
private:
ReduceParameter *reduce_param_;
Reducer reducer_ = nullptr;
IntReducer int_reducer_ = nullptr;
std::vector<void *> data_buffers_;
@ -61,6 +64,8 @@ class ReduceCPUKernel : public ReduceBaseCPUKernel {
private:
int MallocTmpBuffer();
void FreeTmpBuffer();
int CalculateCoeffOutput();
void PreProcess();
};
} // namespace mindspore::kernel

View File

@ -17,18 +17,82 @@
#include "utils/log_adapter.h"
#include "common/common_test.h"
#include "mindspore/lite/nnacl/fp32/reduce.h"
#include "schema/inner/model_generated.h"
#include "src/tensor.h"
#include "mindspore/lite/src/kernel_registry.h"
#include "mindspore/lite/src/runtime/allocator.h"
using mindspore::lite::Allocator;
using mindspore::lite::Tensor;
using mindspore::schema::ReduceMode;
using mindspore::schema::ReduceMode_ReduceMax;
using mindspore::schema::ReduceMode_ReduceMean;
using mindspore::schema::ReduceMode_ReduceMin;
using mindspore::schema::ReduceMode_ReduceProd;
using mindspore::schema::ReduceMode_ReduceSum;
using mindspore::schema::ReduceMode_ReduceASum;
using mindspore::schema::ReduceMode_ReduceSumSquare;
namespace mindspore {
class TestReduceFp32 : public mindspore::CommonTest {
public:
TestReduceFp32() = default;
int tid = 0;
int thread_num = 1;
void Prepare(const std::vector<int> &in_shape, const std::vector<int> &out_shape, float *input_data,
float *output_data, ReduceMode mode, const int *axes, const int num_axes, bool reduce_to_end,
float coeff);
void TearDown() override;
public:
int tid_ = 0;
int thread_num_ = 1;
float err_tol = 1e-5;
ReduceParameter param_ = {};
Tensor in_tensor_;
Tensor out_tensor_;
std::vector<Tensor *> inputs{&in_tensor_};
std::vector<Tensor *> outputs{&out_tensor_};
kernel::KernelKey desc_ = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Reduce};
kernel::KernelCreator creator_ = nullptr;
lite::InnerContext *ctx_ = nullptr;
kernel::LiteKernel *kernel_ = nullptr;
};
TEST_F(TestReduceFp32, Mean) {
void TestReduceFp32::TearDown() {
delete ctx_;
in_tensor_.SetData(nullptr);
out_tensor_.SetData(nullptr);
}
void TestReduceFp32::Prepare(const std::vector<int> &in_shape, const std::vector<int> &out_shape, float *input_data,
float *output_data, ReduceMode mode, const int *axes, const int num_axes,
bool reduce_to_end, float coeff) {
in_tensor_.set_data_type(kNumberTypeFloat32);
in_tensor_.set_shape(in_shape);
in_tensor_.SetData(input_data);
out_tensor_.set_data_type(kNumberTypeFloat32);
out_tensor_.set_shape(out_shape);
out_tensor_.SetData(output_data);
param_.mode_ = static_cast<int>(mode);
param_.num_axes_ = num_axes;
memcpy(param_.axes_, axes, num_axes * sizeof(int));
param_.reduce_to_end_ = reduce_to_end;
param_.coeff = coeff;
ctx_ = new (std::nothrow) lite::InnerContext;
ASSERT_EQ(lite::RET_OK, ctx_->Init());
creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc_);
if (ctx_->allocator == nullptr) {
ctx_->allocator = Allocator::Create();
}
ctx_->thread_num_ = thread_num_;
kernel_ = creator_(inputs, outputs, reinterpret_cast<OpParameter *>(&param_), ctx_, desc_, nullptr);
}
TEST_F(TestReduceFp32, Mean1) {
/* 2 4 4 3 NHWC */
float in[96] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
@ -39,21 +103,52 @@ TEST_F(TestReduceFp32, Mean) {
float correct[24] = {18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0,
66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0};
int input_shape[4] = {2, 4, 4, 3};
// int output_shape[4] = {2, 1, 4, 3};
std::vector<int> in_shape{2, 4, 4, 3};
std::vector<int> out_shape{2, 1, 4, 3};
int axes[1] = {1};
int axis_num = 1;
float out[24] = {0};
int outer_size = 2;
int inner_size = 12;
int axis_size = 4;
(void)ReduceMean(outer_size, inner_size, axis_size, in, out, tid, thread_num);
bool reduce_to_end = false;
float coeff = 1.0f;
thread_num_ = 2;
Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceMean, axes, axis_num, reduce_to_end, coeff);
kernel_->Run();
int output_size = 24;
CompareOutputData(out, correct, output_size, err_tol);
}
TEST_F(TestReduceFp32, Mean2Thread) {
/* 2*4*4*3 NHWC */
// thread num 2 reduce_to_end
TEST_F(TestReduceFp32, Mean2) {
/* 2 4 4 3 NHWC */
float in[96] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0,
48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0,
64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0,
80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0};
float correct[2] = {47.0, 143.0};
std::vector<int> in_shape{2, 4, 4, 3};
std::vector<int> out_shape{2, 1, 1, 1};
int axes[1] = {1};
int axis_num = 1;
float out[24] = {0};
bool reduce_to_end = true;
float coeff = 2.0f;
thread_num_ = 2;
Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceMean, axes, axis_num, reduce_to_end, coeff);
kernel_->Run();
int output_size = 2;
CompareOutputData(out, correct, output_size, err_tol);
}
// thread num 1
TEST_F(TestReduceFp32, Mean3) {
/* 2 4 4 3 NHWC */
float in[96] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0,
@ -63,20 +158,19 @@ TEST_F(TestReduceFp32, Mean2Thread) {
float correct[24] = {18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0,
66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0};
int input_shape[4] = {2, 4, 4, 3};
// int output_shape[4] = {2, 1, 4, 3};
std::vector<int> in_shape{2, 4, 4, 3};
std::vector<int> out_shape{2, 1, 4, 3};
int axes[1] = {1};
int axis_num = 1;
float out[24] = {0};
int outer_size = 2;
int inner_size = 12;
int axis_size = 4;
thread_num = 2;
tid = 0;
(void)ReduceMean(outer_size, inner_size, axis_size, in, out, tid, thread_num);
tid = 1;
(void)ReduceMean(outer_size, inner_size, axis_size, in, out, tid, thread_num);
bool reduce_to_end = false;
float coeff = 2.0f;
thread_num_ = 1;
int output_size = 24;
Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceMean, axes, axis_num, reduce_to_end, coeff);
kernel_->Run();
int output_size = 2;
CompareOutputData(out, correct, output_size, err_tol);
}
@ -91,37 +185,16 @@ TEST_F(TestReduceFp32, MeanAllAxis) {
float correct[1] = {47.5};
float out[1] = {0};
int input_shape[4] = {2, 4, 4, 3};
int outer_size = 1;
int inner_size = 48;
int axis_size = 2;
float *src = in;
float dst1[48] = {0};
MS_ASSERT(dst != nullptr);
(void)ReduceMean(outer_size, inner_size, axis_size, src, dst1, tid, thread_num);
std::vector<int> in_shape{2, 4, 4, 3};
std::vector<int> out_shape{1, 1, 1, 1};
int axes[4] = {0, 1, 2, 3};
int axis_num = 4;
bool reduce_to_end = false;
float coeff = 0.0f;
thread_num_ = 2;
input_shape[0] = 1; // 1 4 4 3
outer_size = 1;
inner_size = 12;
axis_size = 4;
src = dst1;
float dst2[12] = {0};
(void)ReduceMean(outer_size, inner_size, axis_size, src, dst2, tid, thread_num);
input_shape[1] = 1; // 1 1 4 3
outer_size = 1;
inner_size = 3;
axis_size = 4;
src = dst2;
float dst3[3] = {0};
(void)ReduceMean(outer_size, inner_size, axis_size, src, dst3, tid, thread_num);
input_shape[2] = 1; // 1 1 1 3
outer_size = 1;
inner_size = 1;
axis_size = 3;
src = dst3;
(void)ReduceMean(outer_size, inner_size, axis_size, src, out, tid, thread_num);
Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceMean, axes, axis_num, reduce_to_end, coeff);
kernel_->Run();
int output_size = 1;
CompareOutputData(out, correct, output_size, err_tol);
@ -138,20 +211,24 @@ TEST_F(TestReduceFp32, Sum) {
float correct[24] = {72.0, 76.0, 80.0, 84.0, 88.0, 92.0, 96.0, 100.0, 104.0, 108.0, 112.0, 116.0,
264.0, 268.0, 272.0, 276.0, 280.0, 284.0, 288.0, 292.0, 296.0, 300.0, 304.0, 308.0};
int input_shape[4] = {2, 4, 4, 3};
// int output_shape[4] = {2, 1, 4, 3};
std::vector<int> in_shape{2, 4, 4, 3};
std::vector<int> out_shape{2, 1, 4, 3};
int axes[1] = {1};
int axis_num = 1;
float out[24] = {0};
int outer_size = 2;
int inner_size = 12;
int axis_size = 4;
(void)ReduceSum(outer_size, inner_size, axis_size, in, out, tid, thread_num);
bool reduce_to_end = false;
float coeff = 1.0f;
thread_num_ = 2;
Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceSum, axes, axis_num, reduce_to_end, coeff);
kernel_->Run();
int output_size = 24;
CompareOutputData(out, correct, output_size, err_tol);
}
TEST_F(TestReduceFp32, Sum2Thread) {
// sum reduce_to_end
TEST_F(TestReduceFp32, Sum2) {
/* 2*4*4*3 NHWC */
float in[96] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
@ -159,23 +236,51 @@ TEST_F(TestReduceFp32, Sum2Thread) {
48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0,
64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0,
80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0};
float correct[24] = {72.0, 76.0, 80.0, 84.0, 88.0, 92.0, 96.0, 100.0, 104.0, 108.0, 112.0, 116.0,
264.0, 268.0, 272.0, 276.0, 280.0, 284.0, 288.0, 292.0, 296.0, 300.0, 304.0, 308.0};
float correct[32] = {6.0, 24.0, 42.0, 60.0, 78.0, 96.0, 114.0, 132.0, 150.0, 168.0, 186.0,
204.0, 222.0, 240.0, 258.0, 276.0, 294.0, 312.0, 330.0, 348.0, 366.0, 384.0,
402.0, 420.0, 438.0, 456.0, 474.0, 492.0, 510.0, 528.0, 546.0, 564.0};
int input_shape[4] = {2, 4, 4, 3};
// int output_shape[4] = {2, 1, 4, 3};
std::vector<int> in_shape{2, 4, 4, 3};
std::vector<int> out_shape{2, 4, 4, 1};
int axes[1] = {-1};
int axis_num = 1;
float out[32] = {0};
bool reduce_to_end = true;
float coeff = 2.0f;
thread_num_ = 2;
float out[24] = {0};
int outer_size = 2;
int inner_size = 12;
int axis_size = 4;
thread_num = 2;
tid = 0;
(void)ReduceSum(outer_size, inner_size, axis_size, in, out, tid, thread_num);
tid = 1;
(void)ReduceSum(outer_size, inner_size, axis_size, in, out, tid, thread_num);
Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceSum, axes, axis_num, reduce_to_end, coeff);
kernel_->Run();
int output_size = 24;
int output_size = 32;
CompareOutputData(out, correct, output_size, err_tol);
}
TEST_F(TestReduceFp32, Sum3) {
/* 2*4*4*3 NHWC */
float in[96] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0,
48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0,
64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0,
80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0};
float correct[32] = {3.0, 12.0, 21.0, 30.0, 39.0, 48.0, 57.0, 66.0, 75.0, 84.0, 93.0,
102.0, 111.0, 120.0, 129.0, 138.0, 147.0, 156.0, 165.0, 174.0, 183.0, 192.0,
201.0, 210.0, 219.0, 228.0, 237.0, 246.0, 255.0, 264.0, 273.0, 282.0};
std::vector<int> in_shape{2, 4, 4, 3};
std::vector<int> out_shape{2, 4, 4, 1};
int axes[1] = {-1};
int axis_num = 1;
float out[32] = {0};
bool reduce_to_end = false;
float coeff = 0.0f;
thread_num_ = 2;
Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceSum, axes, axis_num, reduce_to_end, coeff);
kernel_->Run();
int output_size = 32;
CompareOutputData(out, correct, output_size, err_tol);
}
@ -189,38 +294,16 @@ TEST_F(TestReduceFp32, SumAllAxis) {
80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0};
float correct[1] = {4560};
float out[1] = {0};
std::vector<int> in_shape{2, 4, 4, 3};
std::vector<int> out_shape{1, 1, 1, 1};
int axes[4] = {0};
int axis_num = 4;
bool reduce_to_end = true;
float coeff = 1.0f;
thread_num_ = 2;
int input_shape[4] = {2, 4, 4, 3};
int outer_size = 1;
int inner_size = 48;
int axis_size = 2;
float *src = in;
float dst1[48] = {0};
MS_ASSERT(dst != nullptr);
(void)ReduceSum(outer_size, inner_size, axis_size, src, dst1, tid, thread_num);
input_shape[0] = 1; // 1 4 4 3
outer_size = 1;
inner_size = 12;
axis_size = 4;
src = dst1;
float dst2[12] = {0};
(void)ReduceSum(outer_size, inner_size, axis_size, src, dst2, tid, thread_num);
input_shape[1] = 1; // 1 1 4 3
outer_size = 1;
inner_size = 3;
axis_size = 4;
src = dst2;
float dst3[3] = {0};
(void)ReduceSum(outer_size, inner_size, axis_size, src, dst3, tid, thread_num);
input_shape[2] = 1; // 1 1 1 3
outer_size = 1;
inner_size = 1;
axis_size = 3;
src = dst3;
(void)ReduceSum(outer_size, inner_size, axis_size, src, out, tid, thread_num);
Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceSum, axes, axis_num, reduce_to_end, coeff);
kernel_->Run();
int output_size = 1;
CompareOutputData(out, correct, output_size, err_tol);
@ -237,14 +320,17 @@ TEST_F(TestReduceFp32, Max) {
float correct[24] = {36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0,
84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0};
int input_shape[4] = {2, 4, 4, 3};
// int output_shape[4] = {2, 1, 4, 3};
std::vector<int> in_shape{2, 4, 4, 3};
std::vector<int> out_shape{2, 1, 4, 3};
int axes[1] = {1};
int axis_num = 1;
float out[24] = {0};
int outer_size = 2;
int inner_size = 12;
int axis_size = 4;
(void)ReduceMax(outer_size, inner_size, axis_size, in, out, tid, thread_num);
bool reduce_to_end = false;
float coeff = 1.0f;
thread_num_ = 2;
Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceMax, axes, axis_num, reduce_to_end, coeff);
kernel_->Run();
int output_size = 24;
CompareOutputData(out, correct, output_size, err_tol);
@ -261,14 +347,17 @@ TEST_F(TestReduceFp32, Min) {
float correct[24] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0};
int input_shape[4] = {2, 4, 4, 3};
// int output_shape[4] = {2, 1, 4, 3};
std::vector<int> in_shape{2, 4, 4, 3};
std::vector<int> out_shape{2, 1, 4, 3};
int axes[1] = {1};
int axis_num = 1;
float out[24] = {0};
int outer_size = 2;
int inner_size = 12;
int axis_size = 4;
(void)ReduceMin(outer_size, inner_size, axis_size, in, out, tid, thread_num);
bool reduce_to_end = false;
float coeff = 1.0f;
thread_num_ = 2;
Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceMin, axes, axis_num, reduce_to_end, coeff);
kernel_->Run();
int output_size = 24;
CompareOutputData(out, correct, output_size, err_tol);
@ -286,14 +375,17 @@ TEST_F(TestReduceFp32, Prod) {
225280.0, 280665.0, 344080.0, 416185.0, 17418240.0, 18546744.0, 19728400.0, 20964824.0,
22257664.0, 23608584.0, 25019280.0, 26491464.0, 28026880.0, 29627288.0, 31294480.0, 33030264.0};
int input_shape[4] = {2, 4, 4, 3};
// int output_shape[4] = {2, 1, 4, 3};
std::vector<int> in_shape{2, 4, 4, 3};
std::vector<int> out_shape{2, 1, 4, 3};
int axes[1] = {1};
int axis_num = 1;
float out[24] = {0};
int outer_size = 2;
int inner_size = 12;
int axis_size = 4;
(void)ReduceProd(outer_size, inner_size, axis_size, in, out, tid, thread_num);
bool reduce_to_end = false;
float coeff = 1.0f;
thread_num_ = 2;
Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceProd, axes, axis_num, reduce_to_end, coeff);
kernel_->Run();
int output_size = 24;
CompareOutputData(out, correct, output_size, err_tol);
@ -307,20 +399,78 @@ TEST_F(TestReduceFp32, SumSquare) {
48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0,
64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0,
80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0};
float correct[24] = {2016.0, 2164.0, 2320.0, 2484.0, 2656.0, 2836.0, 3024.0, 3220.0,
3424.0, 3636.0, 3856.0, 4084.0, 18144.0, 18676.0, 19216.0, 19764.0,
20320.0, 20884.0, 21456.0, 22036.0, 22624.0, 23220.0, 23824.0, 24436.0};
float correct[8] = {1012.0, 7636.0, 21172.0, 41620.0, 68980.0, 103252.0, 144436.0, 192532.0};
int input_shape[4] = {2, 4, 4, 3};
// int output_shape[4] = {2, 1, 4, 3};
std::vector<int> in_shape{2, 4, 4, 3};
std::vector<int> out_shape{2, 4, 1, 1};
int axes[1] = {2};
int axis_num = 1;
float out[8] = {0};
bool reduce_to_end = true;
float coeff = 2.0f;
thread_num_ = 2;
float out[24] = {0};
int outer_size = 2;
int inner_size = 12;
int axis_size = 4;
(void)ReduceSumSquare(outer_size, inner_size, axis_size, in, out, tid, thread_num);
Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceSumSquare, axes, axis_num, reduce_to_end, coeff);
kernel_->Run();
int output_size = 24;
int output_size = 8;
CompareOutputData(out, correct, output_size, err_tol);
}
TEST_F(TestReduceFp32, SumSquare2) {
/* 2*4*4*3 NHWC */
float in[96] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0,
48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0,
64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0,
80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0};
float correct[32] = {10.0, 100.0, 298.0, 604.0, 1018.0, 1540.0, 2170.0, 2908.0,
3754.0, 4708.0, 5770.0, 6940.0, 8218.0, 9604.0, 11098.0, 12700.0,
14410.0, 16228.0, 18154.0, 20188.0, 22330.0, 24580.0, 26938.0, 29404.0,
31978.0, 34660.0, 37450.0, 40348.0, 43354.0, 46468.0, 49690.0, 53020.0};
std::vector<int> in_shape{2, 4, 4, 3};
std::vector<int> out_shape{2, 4, 4, 1};
int axes[1] = {3};
int axis_num = 1;
float out[32] = {0};
bool reduce_to_end = true;
float coeff = 2.0f;
thread_num_ = 2;
Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceSumSquare, axes, axis_num, reduce_to_end, coeff);
kernel_->Run();
int output_size = 32;
CompareOutputData(out, correct, output_size, err_tol);
}
TEST_F(TestReduceFp32, ASum) {
/* 2*4*4*3 NHWC */
float in[96] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0,
48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0,
64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0,
80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0};
float correct[32] = {3.0, 12.0, 21.0, 30.0, 39.0, 48.0, 57.0, 66.0, 75.0, 84.0, 93.0,
102.0, 111.0, 120.0, 129.0, 138.0, 147.0, 156.0, 165.0, 174.0, 183.0, 192.0,
201.0, 210.0, 219.0, 228.0, 237.0, 246.0, 255.0, 264.0, 273.0, 282.0};
std::vector<int> in_shape{2, 4, 4, 3};
std::vector<int> out_shape{2, 4, 4, 1};
int axes[1] = {3};
int axis_num = 1;
float out[32] = {0};
bool reduce_to_end = true;
float coeff = 1.0f;
thread_num_ = 2;
Prepare(in_shape, out_shape, in, out, ReduceMode_ReduceASum, axes, axis_num, reduce_to_end, coeff);
kernel_->Run();
int output_size = 32;
CompareOutputData(out, correct, output_size, err_tol);
}
} // namespace mindspore