!49848 [MS][LITE] logical and neon/sse/avx/avx512 optimation
Merge pull request !49848 from Greatpan/compare_avx512
This commit is contained in:
commit
96a7881414
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -16,20 +16,20 @@
|
|||
|
||||
#include "nnacl/fp32/arithmetic_compare_fp32.h"
|
||||
|
||||
bool EqualFp32(float x, float y) { return x == y; }
|
||||
bool NotEqualFp32(float x, float y) { return x != y; }
|
||||
bool LessFp32(float x, float y) { return x < y; }
|
||||
bool LessEqualFp32(float x, float y) { return x <= y; }
|
||||
bool GreaterFp32(float x, float y) { return x > y; }
|
||||
bool GreaterEqualFp32(float x, float y) { return x >= y; }
|
||||
inline bool EqualFp32(float x, float y) { return x == y; }
|
||||
inline bool NotEqualFp32(float x, float y) { return x != y; }
|
||||
inline bool LessFp32(float x, float y) { return x < y; }
|
||||
inline bool LessEqualFp32(float x, float y) { return x <= y; }
|
||||
inline bool GreaterFp32(float x, float y) { return x > y; }
|
||||
inline bool GreaterEqualFp32(float x, float y) { return x >= y; }
|
||||
|
||||
bool EqualInt32(int x, int y) { return x == y; }
|
||||
bool NotEqualInt32(int x, int y) { return x != y; }
|
||||
bool NotEqualInt64(int64_t x, int64_t y) { return x != y; }
|
||||
bool LessInt32(int x, int y) { return x < y; }
|
||||
bool LessEqualInt32(int x, int y) { return x <= y; }
|
||||
bool GreaterInt32(int x, int y) { return x > y; }
|
||||
bool GreaterEqualInt32(int x, int y) { return x >= y; }
|
||||
inline bool EqualInt32(int x, int y) { return x == y; }
|
||||
inline bool NotEqualInt32(int x, int y) { return x != y; }
|
||||
inline bool NotEqualInt64(int64_t x, int64_t y) { return x != y; }
|
||||
inline bool LessInt32(int x, int y) { return x < y; }
|
||||
inline bool LessEqualInt32(int x, int y) { return x <= y; }
|
||||
inline bool GreaterInt32(int x, int y) { return x > y; }
|
||||
inline bool GreaterEqualInt32(int x, int y) { return x >= y; }
|
||||
|
||||
#define ELEMENT_COMPARE(input0, input1, output, element_size, compare_func) \
|
||||
do { \
|
||||
|
|
|
@ -188,18 +188,8 @@ int ElementOptFloorDivInt(const int *in0, const int *in1, int *out, int size, co
|
|||
|
||||
int ElementLogicalAnd(const float *in0, const float *in1, float *out, int size) {
|
||||
int index = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t vtrue = vdupq_n_f32(1);
|
||||
float32x4_t vfalse = vdupq_n_f32(0);
|
||||
uint32x4_t mask = vmovq_n_u32(((uint32_t)(1u << 31) - 1));
|
||||
uint32x4_t zeros = vdupq_n_u32(0);
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
uint32x4_t vin0 = vandq_u32(vreinterpretq_u32_f32(vld1q_f32(in0 + index)), mask);
|
||||
uint32x4_t vin1 = vandq_u32(vreinterpretq_u32_f32(vld1q_f32(in1 + index)), mask);
|
||||
float32x4_t vout = vbslq_f32(vceqq_u32(vandq_u32(vin0, vin1), zeros), vfalse, vtrue);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
|
||||
SIMD_RUN_NO_SCALAR(ElementLogicalAnd, index, in0, in1, out, size);
|
||||
for (; index < size; index++) {
|
||||
out[index] = (float)((bool)(in0[index]) & (bool)(in1[index]));
|
||||
}
|
||||
|
@ -208,6 +198,7 @@ int ElementLogicalAnd(const float *in0, const float *in1, float *out, int size)
|
|||
|
||||
int ElementOptLogicalAnd(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
|
||||
int index = 0;
|
||||
SIMD_RUN_NO_SCALAR(ElementOptLogicalAnd, index, in0, in1, out, size, param);
|
||||
if (param->in_elements_num0_ == 1) {
|
||||
for (; index < size; index++) {
|
||||
out[index] = (float)((bool)(in0[0]) & (bool)(in1[index]));
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
* Copyright 2022-2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -13,7 +13,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// clang-format off
|
||||
#ifndef MINDSPORE_NNACL_ARITHMETIC_@SIMD_INSTRUCTION@_H_
|
||||
#define MINDSPORE_NNACL_ARITHMETIC_@SIMD_INSTRUCTION@_H_
|
||||
|
||||
|
@ -250,6 +250,36 @@ static inline size_t AssignSubOpt@SIMD_INSTRUCTION@(int index, float *in0, const
|
|||
return index;
|
||||
}
|
||||
|
||||
int ElementLogicalAnd@SIMD_INSTRUCTION@(int index, const float *in0, const float *in1, float *out, int size) {
|
||||
for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
|
||||
SIMD_F32 in0_tmp = SIMD_LD_F32(in0 + index);
|
||||
SIMD_F32 in1_tmp = SIMD_LD_F32(in1 + index);
|
||||
SIMD_F32 out_tmp = SIMD_AND_F32(SIMD_GETSIGN_F32(in0_tmp), SIMD_GETSIGN_F32(in1_tmp));
|
||||
SIMD_ST_F32(out + index, out_tmp);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
int ElementOptLogicalAnd@SIMD_INSTRUCTION@(int index, const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
|
||||
if (param->in_elements_num0_ == 1) {
|
||||
SIMD_F32 in0_tmp = SIMD_MOV_F32(*in0);
|
||||
for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
|
||||
SIMD_F32 in1_tmp = SIMD_LD_F32(in1 + index);
|
||||
SIMD_F32 out_tmp = SIMD_AND_F32(SIMD_GETSIGN_F32(in0_tmp), SIMD_GETSIGN_F32(in1_tmp));
|
||||
SIMD_ST_F32(out + index, out_tmp);
|
||||
}
|
||||
} else {
|
||||
SIMD_F32 in1_tmp = SIMD_MOV_F32(*in1);
|
||||
for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
|
||||
SIMD_F32 in0_tmp = SIMD_LD_F32(in0 + index);
|
||||
SIMD_F32 out_tmp = SIMD_AND_F32(SIMD_GETSIGN_F32(in0_tmp), SIMD_GETSIGN_F32(in1_tmp));
|
||||
SIMD_ST_F32(out + index, out_tmp);
|
||||
}
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
@SIMD_INSTRUCTION_END@
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue