!44510 fix issue I5WGD9

Merge pull request !44510 from tan-wei-cheng/develop-twc-master
This commit is contained in:
i-robot 2022-11-02 03:01:00 +00:00 committed by Gitee
commit 6abef12861
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
4 changed files with 53 additions and 15 deletions

View File

@ -329,3 +329,31 @@ int ReduceSumDim2Axis1(size_t col_len, const float *src_data, float *dst_data) {
dst_data[0] = tmp;
return NNACL_OK;
}
int ReduceMeanWithAxis(const float *src_data, float *mean, int64_t size) {
if (size == 0 || src_data == NULL) {
return NNACL_NULL_PTR;
}
float sum = 0.0;
int64_t i = 0;
SIMD_RUN_NO_SCALAR(ReduceSumByLastAxis, i, src_data, &sum, 0);
for (; i < size; ++i) {
sum += src_data[i];
}
*mean = sum / size;
return NNACL_OK;
}
int ReduceDeviation(const float *src_data, int64_t size, float mean, float *deviation) {
if (size == 0 || src_data == NULL) {
return NNACL_NULL_PTR;
}
int64_t i = 0;
SIMD_RUN_NO_SCALAR(FloatReduceDeviation, i, src_data, mean, size, deviation);
for (; i < size; ++i) {
float tmp = src_data[i] - mean;
tmp = tmp * tmp;
*deviation += tmp;
}
return NNACL_OK;
}

View File

@ -55,6 +55,8 @@ int ReduceAll(int outer_size, int inner_size, int axis_size, const bool *src_dat
int thread_num);
int ReduceSumDim2Axis0(size_t col_size, size_t col_len, size_t row_len, const float *src_data, float *dst_data);
int ReduceSumDim2Axis1(size_t col_len, const float *src_data, float *dst_data);
int ReduceMeanWithAxis(const float *src_data, float *mean, int64_t size);
int ReduceDeviation(const float *src_data, int64_t size, float mean, float *deviation);
#ifdef ENABLE_NNACL_INFER_SHAPE
int ReduceInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format,

View File

@ -199,6 +199,20 @@ static inline int64_t ReduceSumDim2Axis0@SIMD_INSTRUCTION@(int64_t index, size_
return index;
}
static inline int64_t FloatReduceDeviation@SIMD_INSTRUCTION@(int64_t index, const float *src_data, float mean, size_t size, float *deviation) {
SIMD_F32 fs_deviation = SIMD_MOV_F32(0);
SIMD_F32 fs_mean = SIMD_MOV_F32(mean);
for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
SIMD_F32 fs_sub = SIMD_LD_F32(src_data + index);
fs_sub = SIMD_SUB_F32(fs_sub, fs_mean);
SIMD_F32 fs_pow = SIMD_MUL_F32(fs_sub, fs_sub);
fs_deviation = SIMD_ADD_F32(fs_deviation, fs_pow);
}
*deviation += SIMD_GET_SUM_F32(fs_deviation);
return index;
}
@SIMD_INSTRUCTION_END@
#ifdef __cplusplus
}

View File

@ -125,30 +125,24 @@ void ReduceStdCpuKernelMod::RunReduceStdWithSAxis(const std::vector<kernel::Addr
for (size_t i = 0; i < dimension; ++i) {
transpose_shape[i] = input_shape_[axes[i]];
}
TransposeIterator base_iter(std::move(transpose_shape), std::move(axes), input_shape_);
auto task = [this, &base_iter, input_addr, output_mean_addr, output_std_addr, stride](size_t start, size_t end) {
auto iter = base_iter;
iter.SetPos(start * stride);
for (size_t i = start; i < end; ++i) {
float mean = 0.0;
std::vector<float> src_data(stride);
for (size_t j = 0; j < stride; ++j) {
mean += static_cast<float>(input_addr[iter.GetPos()]);
src_data[j] = static_cast<float>(input_addr[iter.GetPos()]);
iter.GenNextPos();
}
mean = mean / SizeToFloat(stride);
float mean = 0.0f;
ReduceMeanWithAxis(src_data.data(), &mean, stride);
output_mean_addr[i] = static_cast<T>(mean);
}
iter = base_iter;
iter.SetPos(start * stride);
for (size_t i = start; i < end; ++i) {
float deviation = 0.0;
for (size_t j = 0; j < stride; ++j) {
deviation += std::pow(static_cast<float>(input_addr[iter.GetPos()] - output_mean_addr[i]), kPowExp);
iter.GenNextPos();
}
float length = unbiased_ ? static_cast<float>(stride - 1) : static_cast<float>(stride);
deviation = std::sqrt(deviation / length);
float deviation = 0.0f;
float size = unbiased_ ? static_cast<float>(stride - 1) : static_cast<float>(stride);
ReduceDeviation(src_data.data(), stride, mean, &deviation);
deviation = std::sqrt(deviation / SizeToFloat(size));
output_std_addr[i] = static_cast<T>(deviation);
}
};