!44510 fix issue I5WGD9
Merge pull request !44510 from tan-wei-cheng/develop-twc-master
This commit is contained in:
commit
6abef12861
|
@ -329,3 +329,31 @@ int ReduceSumDim2Axis1(size_t col_len, const float *src_data, float *dst_data) {
|
|||
dst_data[0] = tmp;
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ReduceMeanWithAxis(const float *src_data, float *mean, int64_t size) {
|
||||
if (size == 0 || src_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
float sum = 0.0;
|
||||
int64_t i = 0;
|
||||
SIMD_RUN_NO_SCALAR(ReduceSumByLastAxis, i, src_data, &sum, 0);
|
||||
for (; i < size; ++i) {
|
||||
sum += src_data[i];
|
||||
}
|
||||
*mean = sum / size;
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ReduceDeviation(const float *src_data, int64_t size, float mean, float *deviation) {
|
||||
if (size == 0 || src_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
int64_t i = 0;
|
||||
SIMD_RUN_NO_SCALAR(FloatReduceDeviation, i, src_data, mean, size, deviation);
|
||||
for (; i < size; ++i) {
|
||||
float tmp = src_data[i] - mean;
|
||||
tmp = tmp * tmp;
|
||||
*deviation += tmp;
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
|
|
@ -55,6 +55,8 @@ int ReduceAll(int outer_size, int inner_size, int axis_size, const bool *src_dat
|
|||
int thread_num);
|
||||
int ReduceSumDim2Axis0(size_t col_size, size_t col_len, size_t row_len, const float *src_data, float *dst_data);
|
||||
int ReduceSumDim2Axis1(size_t col_len, const float *src_data, float *dst_data);
|
||||
int ReduceMeanWithAxis(const float *src_data, float *mean, int64_t size);
|
||||
int ReduceDeviation(const float *src_data, int64_t size, float mean, float *deviation);
|
||||
|
||||
#ifdef ENABLE_NNACL_INFER_SHAPE
|
||||
int ReduceInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format,
|
||||
|
|
|
@ -199,6 +199,20 @@ static inline int64_t ReduceSumDim2Axis0@SIMD_INSTRUCTION@(int64_t index, size_
|
|||
return index;
|
||||
}
|
||||
|
||||
static inline int64_t FloatReduceDeviation@SIMD_INSTRUCTION@(int64_t index, const float *src_data, float mean, size_t size, float *deviation) {
|
||||
SIMD_F32 fs_deviation = SIMD_MOV_F32(0);
|
||||
SIMD_F32 fs_mean = SIMD_MOV_F32(mean);
|
||||
for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
|
||||
SIMD_F32 fs_sub = SIMD_LD_F32(src_data + index);
|
||||
|
||||
fs_sub = SIMD_SUB_F32(fs_sub, fs_mean);
|
||||
SIMD_F32 fs_pow = SIMD_MUL_F32(fs_sub, fs_sub);
|
||||
fs_deviation = SIMD_ADD_F32(fs_deviation, fs_pow);
|
||||
}
|
||||
*deviation += SIMD_GET_SUM_F32(fs_deviation);
|
||||
return index;
|
||||
}
|
||||
|
||||
@SIMD_INSTRUCTION_END@
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -125,30 +125,24 @@ void ReduceStdCpuKernelMod::RunReduceStdWithSAxis(const std::vector<kernel::Addr
|
|||
for (size_t i = 0; i < dimension; ++i) {
|
||||
transpose_shape[i] = input_shape_[axes[i]];
|
||||
}
|
||||
|
||||
TransposeIterator base_iter(std::move(transpose_shape), std::move(axes), input_shape_);
|
||||
auto task = [this, &base_iter, input_addr, output_mean_addr, output_std_addr, stride](size_t start, size_t end) {
|
||||
auto iter = base_iter;
|
||||
iter.SetPos(start * stride);
|
||||
for (size_t i = start; i < end; ++i) {
|
||||
float mean = 0.0;
|
||||
std::vector<float> src_data(stride);
|
||||
for (size_t j = 0; j < stride; ++j) {
|
||||
mean += static_cast<float>(input_addr[iter.GetPos()]);
|
||||
src_data[j] = static_cast<float>(input_addr[iter.GetPos()]);
|
||||
iter.GenNextPos();
|
||||
}
|
||||
mean = mean / SizeToFloat(stride);
|
||||
float mean = 0.0f;
|
||||
ReduceMeanWithAxis(src_data.data(), &mean, stride);
|
||||
output_mean_addr[i] = static_cast<T>(mean);
|
||||
}
|
||||
iter = base_iter;
|
||||
iter.SetPos(start * stride);
|
||||
for (size_t i = start; i < end; ++i) {
|
||||
float deviation = 0.0;
|
||||
for (size_t j = 0; j < stride; ++j) {
|
||||
deviation += std::pow(static_cast<float>(input_addr[iter.GetPos()] - output_mean_addr[i]), kPowExp);
|
||||
iter.GenNextPos();
|
||||
}
|
||||
float length = unbiased_ ? static_cast<float>(stride - 1) : static_cast<float>(stride);
|
||||
deviation = std::sqrt(deviation / length);
|
||||
|
||||
float deviation = 0.0f;
|
||||
float size = unbiased_ ? static_cast<float>(stride - 1) : static_cast<float>(stride);
|
||||
ReduceDeviation(src_data.data(), stride, mean, &deviation);
|
||||
deviation = std::sqrt(deviation / SizeToFloat(size));
|
||||
output_std_addr[i] = static_cast<T>(deviation);
|
||||
}
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue