add_reducesum_fp16

This commit is contained in:
sunsuodong 2021-07-14 11:38:59 +08:00
parent 749fdcaf68
commit ad9f90dc1e
3 changed files with 47 additions and 4 deletions

View File

@ -61,3 +61,43 @@ int ReduceMaxFp16(int outer_size, int inner_size, int axis_size, const float16_t
}
return NNACL_OK;
}
int ReduceSumFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float16_t *dst_data,
int tid, int thread_num) {
int stride = UP_DIV(outer_size, thread_num);
int start = stride * tid;
int end = MSMIN(outer_size, start + stride);
int num = end - start;
#ifdef ENABLE_NEON
int block_c8 = inner_size - inner_size % C8NUM;
#endif
int src_stride = axis_size * inner_size;
src_data += start * src_stride;
dst_data += start * inner_size;
for (int i = 0; i < num; i++, src_data += src_stride, dst_data += inner_size) {
int j = 0;
#ifdef ENABLE_NEON
for (; j < block_c8; j += C8NUM) {
const float16_t *inner_src = src_data + j;
float16_t *inner_dst = dst_data + j;
float16x8_t tmp = {0, 0, 0, 0, 0, 0, 0, 0};
for (int k = 0; k < axis_size; k++) {
tmp = vaddq_f16(tmp, vld1q_f16(inner_src + k * inner_size));
}
vst1q_f16(inner_dst, tmp);
}
#endif
for (; j < inner_size; j++) {
const float16_t *inner_src = src_data + j;
float16_t *inner_dst = dst_data + j;
float tmp = 0.0f;
for (int k = 0; k < axis_size; k++) {
tmp += inner_src[k * inner_size];
}
*inner_dst = tmp;
}
}
return NNACL_OK;
}

View File

@ -26,6 +26,8 @@ int ReduceMeanFp16(const int outer_size, const int inner_size, const int axis_si
float16_t *dst_data, const int tid, const int thread_num);
int ReduceMaxFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float16_t *dst_data,
int tid, int thread_num);
int ReduceSumFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float16_t *dst_data,
int tid, int thread_num);
#ifdef __cplusplus
}
#endif

View File

@ -49,6 +49,9 @@ int ReduceFp16CPUKernel::Init() {
case static_cast<int>(ReduceMode_ReduceMax):
reducer_ = ReduceMaxFp16;
break;
case static_cast<int>(ReduceMode_ReduceSum):
reducer_ = ReduceSumFp16;
break;
default:
MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << mode_;
return RET_ERROR;
@ -142,11 +145,9 @@ int ReduceFp16CPUKernel::MallocTmpBuffer() {
kernel::InnerKernel *CpuReduceFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const lite::Context *ctx, const kernel::KernelKey &desc) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_ReduceFusion);
auto reduce_param = reinterpret_cast<ReduceParameter *>(opParameter);
if (reduce_param->mode_ != ReduceMode_ReduceMean && reduce_param->mode_ != ReduceMode_ReduceMax) {
if (reduce_param->mode_ != ReduceMode_ReduceMean && reduce_param->mode_ != ReduceMode_ReduceMax &&
reduce_param->mode_ != ReduceMode_ReduceSum) {
MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << reduce_param->mode_;
return nullptr;
}