forked from OSchip/llvm-project
[ARM] ACLE Chapter 9 intrinsics
Implemented the remaining integer data processing intrinsics from the ARM ACLE v2.1 spec, such as parallel arithemtic and DSP style multiplications. Differential Revision: https://reviews.llvm.org/D32282 llvm-svn: 302131
This commit is contained in:
parent
927edebd04
commit
b9ea36f9c1
|
@ -25,11 +25,93 @@
|
|||
// In libgcc
|
||||
BUILTIN(__clear_cache, "vv*v*", "i")
|
||||
|
||||
// 16-bit multiplications
|
||||
BUILTIN(__builtin_arm_smulbb, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_smulbt, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_smultb, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_smultt, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_smulwb, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_smulwt, "iii", "nc")
|
||||
|
||||
// Saturating arithmetic
|
||||
BUILTIN(__builtin_arm_qadd, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_qsub, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_ssat, "iiUi", "nc")
|
||||
BUILTIN(__builtin_arm_usat, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_usat, "UiiUi", "nc")
|
||||
|
||||
BUILTIN(__builtin_arm_smlabb, "iiii", "nc")
|
||||
BUILTIN(__builtin_arm_smlabt, "iiii", "nc")
|
||||
BUILTIN(__builtin_arm_smlatb, "iiii", "nc")
|
||||
BUILTIN(__builtin_arm_smlatt, "iiii", "nc")
|
||||
BUILTIN(__builtin_arm_smlawb, "iiii", "nc")
|
||||
BUILTIN(__builtin_arm_smlawt, "iiii", "nc")
|
||||
|
||||
BUILTIN(__builtin_arm_ssat16, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_usat16, "iii", "nc")
|
||||
|
||||
BUILTIN(__builtin_arm_sxtab16, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_sxtb16, "ii", "nc")
|
||||
BUILTIN(__builtin_arm_uxtab16, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_uxtb16, "ii", "nc")
|
||||
|
||||
BUILTIN(__builtin_arm_sel, "iii", "nc")
|
||||
|
||||
BUILTIN(__builtin_arm_qadd8, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_qsub8, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_sadd8, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_shadd8, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_shsub8, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_ssub8, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_uadd8, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_uhadd8, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_uhsub8, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_uqadd8, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_uqsub8, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_usub8, "UiUiUi", "nc")
|
||||
|
||||
// Sum of 8-bit absolute differences
|
||||
BUILTIN(__builtin_arm_usad8, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_usada8, "UiUiUiUi", "nc")
|
||||
|
||||
// Parallel 16-bit addition and subtraction
|
||||
BUILTIN(__builtin_arm_qadd16, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_qasx, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_qsax, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_qsub16, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_sadd16, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_sasx, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_shadd16, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_shasx, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_shsax, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_shsub16, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_ssax, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_ssub16, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_uadd16, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_uasx, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_uhadd16, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_uhasx, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_uhsax, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_uhsub16, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_uqadd16, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_uqasx, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_uqsax, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_uqsub16, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_usax, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_arm_usub16, "UiUiUi", "nc")
|
||||
|
||||
// Parallel 16-bit multiplication
|
||||
BUILTIN(__builtin_arm_smlad, "iiii", "nc")
|
||||
BUILTIN(__builtin_arm_smladx, "iiii", "nc")
|
||||
BUILTIN(__builtin_arm_smlald, "LLiiiLLi", "nc")
|
||||
BUILTIN(__builtin_arm_smlaldx, "LLiiiLLi", "nc")
|
||||
BUILTIN(__builtin_arm_smlsd, "iiii", "nc")
|
||||
BUILTIN(__builtin_arm_smlsdx, "iiii", "nc")
|
||||
BUILTIN(__builtin_arm_smlsld, "LLiiiLLi", "nc")
|
||||
BUILTIN(__builtin_arm_smlsldx, "LLiiiLLi", "nc")
|
||||
BUILTIN(__builtin_arm_smuad, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_smuadx, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_smusd, "iii", "nc")
|
||||
BUILTIN(__builtin_arm_smusdx, "iii", "nc")
|
||||
|
||||
// Bit manipulation
|
||||
BUILTIN(__builtin_arm_rbit, "UiUi", "nc")
|
||||
|
|
|
@ -224,6 +224,36 @@ __rbitl(unsigned long __t) {
|
|||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* 9.3 16-bit multiplications
|
||||
*/
|
||||
#if __ARM_FEATURE_DSP
|
||||
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
|
||||
__smulbb(int32_t __a, int32_t __b) {
|
||||
return __builtin_arm_smulbb(__a, __b);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
|
||||
__smulbt(int32_t __a, int32_t __b) {
|
||||
return __builtin_arm_smulbt(__a, __b);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
|
||||
__smultb(int32_t __a, int32_t __b) {
|
||||
return __builtin_arm_smultb(__a, __b);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
|
||||
__smultt(int32_t __a, int32_t __b) {
|
||||
return __builtin_arm_smultt(__a, __b);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
|
||||
__smulwb(int32_t __a, int32_t __b) {
|
||||
return __builtin_arm_smulwb(__a, __b);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
|
||||
__smulwt(int32_t __a, int32_t __b) {
|
||||
return __builtin_arm_smulwt(__a, __b);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 9.4 Saturating intrinsics
|
||||
*
|
||||
|
@ -231,13 +261,13 @@ __rbitl(unsigned long __t) {
|
|||
* intrinsics are implemented and the flag is enabled.
|
||||
*/
|
||||
/* 9.4.1 Width-specified saturation intrinsics */
|
||||
#if __ARM_32BIT_STATE
|
||||
#if __ARM_FEATURE_SAT
|
||||
#define __ssat(x, y) __builtin_arm_ssat(x, y)
|
||||
#define __usat(x, y) __builtin_arm_usat(x, y)
|
||||
#endif
|
||||
|
||||
/* 9.4.2 Saturating addition and subtraction intrinsics */
|
||||
#if __ARM_32BIT_STATE
|
||||
#if __ARM_FEATURE_DSP
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__qadd(int32_t __t, int32_t __v) {
|
||||
return __builtin_arm_qadd(__t, __v);
|
||||
|
@ -254,6 +284,290 @@ __qdbl(int32_t __t) {
|
|||
}
|
||||
#endif
|
||||
|
||||
/* 9.4.3 Accumultating multiplications */
|
||||
#if __ARM_FEATURE_DSP
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlabb(int32_t __a, int32_t __b, int32_t __c) {
|
||||
return __builtin_arm_smlabb(__a, __b, __c);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlabt(int32_t __a, int32_t __b, int32_t __c) {
|
||||
return __builtin_arm_smlabt(__a, __b, __c);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlatb(int32_t __a, int32_t __b, int32_t __c) {
|
||||
return __builtin_arm_smlatb(__a, __b, __c);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlatt(int32_t __a, int32_t __b, int32_t __c) {
|
||||
return __builtin_arm_smlatt(__a, __b, __c);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlawb(int32_t __a, int32_t __b, int32_t __c) {
|
||||
return __builtin_arm_smlawb(__a, __b, __c);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlawt(int32_t __a, int32_t __b, int32_t __c) {
|
||||
return __builtin_arm_smlawt(__a, __b, __c);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* 9.5.4 Parallel 16-bit saturation */
|
||||
#if __ARM_FEATURE_SIMD32
|
||||
#define __ssat16(x, y) __builtin_arm_ssat16(x, y)
|
||||
#define __usat16(x, y) __builtin_arm_usat16(x, y)
|
||||
#endif
|
||||
|
||||
/* 9.5.5 Packing and unpacking */
|
||||
#if __ARM_FEATURE_SIMD32
|
||||
typedef int32_t int8x4_t;
|
||||
typedef int32_t int16x2_t;
|
||||
typedef uint32_t uint8x4_t;
|
||||
typedef uint32_t uint16x2_t;
|
||||
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__sxtab16(int16x2_t __a, int8x4_t __b) {
|
||||
return __builtin_arm_sxtab16(__a, __b);
|
||||
}
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__sxtb16(int8x4_t __a) {
|
||||
return __builtin_arm_sxtb16(__a);
|
||||
}
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uxtab16(int16x2_t __a, int8x4_t __b) {
|
||||
return __builtin_arm_uxtab16(__a, __b);
|
||||
}
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uxtb16(int8x4_t __a) {
|
||||
return __builtin_arm_uxtb16(__a);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.6 Parallel selection */
|
||||
#if __ARM_FEATURE_SIMD32
|
||||
static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__sel(uint8x4_t __a, uint8x4_t __b) {
|
||||
return __builtin_arm_sel(__a, __b);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.7 Parallel 8-bit addition and subtraction */
|
||||
#if __ARM_FEATURE_SIMD32
|
||||
static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__qadd8(int8x4_t __a, int8x4_t __b) {
|
||||
return __builtin_arm_qadd8(__a, __b);
|
||||
}
|
||||
static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__qsub8(int8x4_t __a, int8x4_t __b) {
|
||||
return __builtin_arm_qsub8(__a, __b);
|
||||
}
|
||||
static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__sadd8(int8x4_t __a, int8x4_t __b) {
|
||||
return __builtin_arm_sadd8(__a, __b);
|
||||
}
|
||||
static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__shadd8(int8x4_t __a, int8x4_t __b) {
|
||||
return __builtin_arm_shadd8(__a, __b);
|
||||
}
|
||||
static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__shsub8(int8x4_t __a, int8x4_t __b) {
|
||||
return __builtin_arm_shsub8(__a, __b);
|
||||
}
|
||||
static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__ssub8(int8x4_t __a, int8x4_t __b) {
|
||||
return __builtin_arm_ssub8(__a, __b);
|
||||
}
|
||||
static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uadd8(uint8x4_t __a, uint8x4_t __b) {
|
||||
return __builtin_arm_uadd8(__a, __b);
|
||||
}
|
||||
static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uhadd8(uint8x4_t __a, uint8x4_t __b) {
|
||||
return __builtin_arm_uhadd8(__a, __b);
|
||||
}
|
||||
static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uhsub8(uint8x4_t __a, uint8x4_t __b) {
|
||||
return __builtin_arm_uhsub8(__a, __b);
|
||||
}
|
||||
static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uqadd8(uint8x4_t __a, uint8x4_t __b) {
|
||||
return __builtin_arm_uqadd8(__a, __b);
|
||||
}
|
||||
static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uqsub8(uint8x4_t __a, uint8x4_t __b) {
|
||||
return __builtin_arm_uqsub8(__a, __b);
|
||||
}
|
||||
static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__usub8(uint8x4_t __a, uint8x4_t __b) {
|
||||
return __builtin_arm_usub8(__a, __b);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.8 Sum of 8-bit absolute differences */
|
||||
#if __ARM_FEATURE_SIMD32
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__usad8(uint8x4_t __a, uint8x4_t __b) {
|
||||
return __builtin_arm_usad8(__a, __b);
|
||||
}
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {
|
||||
return __builtin_arm_usada8(__a, __b, __c);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.9 Parallel 16-bit addition and subtraction */
|
||||
#if __ARM_FEATURE_SIMD32
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__qadd16(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_qadd16(__a, __b);
|
||||
}
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__qasx(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_qasx(__a, __b);
|
||||
}
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__qsax(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_qsax(__a, __b);
|
||||
}
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__qsub16(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_qsub16(__a, __b);
|
||||
}
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__sadd16(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_sadd16(__a, __b);
|
||||
}
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__sasx(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_sasx(__a, __b);
|
||||
}
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__shadd16(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_shadd16(__a, __b);
|
||||
}
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__shasx(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_shasx(__a, __b);
|
||||
}
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__shsax(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_shsax(__a, __b);
|
||||
}
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__shsub16(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_shsub16(__a, __b);
|
||||
}
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__ssax(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_ssax(__a, __b);
|
||||
}
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__ssub16(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_ssub16(__a, __b);
|
||||
}
|
||||
static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uadd16(uint16x2_t __a, uint16x2_t __b) {
|
||||
return __builtin_arm_uadd16(__a, __b);
|
||||
}
|
||||
static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uasx(uint16x2_t __a, uint16x2_t __b) {
|
||||
return __builtin_arm_uasx(__a, __b);
|
||||
}
|
||||
static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uhadd16(uint16x2_t __a, uint16x2_t __b) {
|
||||
return __builtin_arm_uhadd16(__a, __b);
|
||||
}
|
||||
static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uhasx(uint16x2_t __a, uint16x2_t __b) {
|
||||
return __builtin_arm_uhasx(__a, __b);
|
||||
}
|
||||
static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uhsax(uint16x2_t __a, uint16x2_t __b) {
|
||||
return __builtin_arm_uhsax(__a, __b);
|
||||
}
|
||||
static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uhsub16(uint16x2_t __a, uint16x2_t __b) {
|
||||
return __builtin_arm_uhsub16(__a, __b);
|
||||
}
|
||||
static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uqadd16(uint16x2_t __a, uint16x2_t __b) {
|
||||
return __builtin_arm_uqadd16(__a, __b);
|
||||
}
|
||||
static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uqasx(uint16x2_t __a, uint16x2_t __b) {
|
||||
return __builtin_arm_uqasx(__a, __b);
|
||||
}
|
||||
static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uqsax(uint16x2_t __a, uint16x2_t __b) {
|
||||
return __builtin_arm_uqsax(__a, __b);
|
||||
}
|
||||
static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__uqsub16(uint16x2_t __a, uint16x2_t __b) {
|
||||
return __builtin_arm_uqsub16(__a, __b);
|
||||
}
|
||||
static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__usax(uint16x2_t __a, uint16x2_t __b) {
|
||||
return __builtin_arm_usax(__a, __b);
|
||||
}
|
||||
static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__usub16(uint16x2_t __a, uint16x2_t __b) {
|
||||
return __builtin_arm_usub16(__a, __b);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.10 Parallel 16-bit multiplications */
|
||||
#if __ARM_FEATURE_SIMD32
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
|
||||
return __builtin_arm_smlad(__a, __b, __c);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smladx(int16x2_t __a, int16x2_t __b, int32_t __c) {
|
||||
return __builtin_arm_smladx(__a, __b, __c);
|
||||
}
|
||||
static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlald(int16x2_t __a, int16x2_t __b, int64_t __c) {
|
||||
return __builtin_arm_smlald(__a, __b, __c);
|
||||
}
|
||||
static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
|
||||
return __builtin_arm_smlaldx(__a, __b, __c);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) {
|
||||
return __builtin_arm_smlsd(__a, __b, __c);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) {
|
||||
return __builtin_arm_smlsdx(__a, __b, __c);
|
||||
}
|
||||
static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) {
|
||||
return __builtin_arm_smlsld(__a, __b, __c);
|
||||
}
|
||||
static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
|
||||
return __builtin_arm_smlsldx(__a, __b, __c);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smuad(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_smuad(__a, __b);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smuadx(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_smuadx(__a, __b);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smusd(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_smusd(__a, __b);
|
||||
}
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smusdx(int16x2_t __a, int16x2_t __b) {
|
||||
return __builtin_arm_smusdx(__a, __b);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.7 CRC32 intrinsics */
|
||||
#if __ARM_FEATURE_CRC32
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
|
|
|
@ -76,7 +76,7 @@ void test_dbg(void) {
|
|||
// AArch32: call i32 @llvm.arm.strex
|
||||
// AArch64: call i64 @llvm.aarch64.ldxr
|
||||
// AArch64: call i32 @llvm.aarch64.stxr
|
||||
uint32_t test_swp(uint32_t x, volatile void *p) {
|
||||
void test_swp(uint32_t x, volatile void *p) {
|
||||
__swp(x, p);
|
||||
}
|
||||
|
||||
|
@ -118,6 +118,7 @@ void test_nop(void) {
|
|||
}
|
||||
|
||||
/* 9 DATA-PROCESSING INTRINSICS */
|
||||
|
||||
/* 9.2 Miscellaneous data-processing intrinsics */
|
||||
// ARM-LABEL: test_ror
|
||||
// ARM: lshr
|
||||
|
@ -266,8 +267,7 @@ uint64_t test_rbitll(uint64_t t) {
|
|||
}
|
||||
|
||||
/* 9.4 Saturating intrinsics */
|
||||
#ifdef __ARM_32BIT_STATE
|
||||
|
||||
#ifdef __ARM_FEATURE_SAT
|
||||
/* 9.4.1 Width-specified saturation intrinsics */
|
||||
// AArch32-LABEL: test_ssat
|
||||
// AArch32: call i32 @llvm.arm.ssat(i32 %t, i32 1)
|
||||
|
@ -277,11 +277,13 @@ int32_t test_ssat(int32_t t) {
|
|||
|
||||
// AArch32-LABEL: test_usat
|
||||
// AArch32: call i32 @llvm.arm.usat(i32 %t, i32 2)
|
||||
int32_t test_usat(int32_t t) {
|
||||
uint32_t test_usat(int32_t t) {
|
||||
return __usat(t, 2);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.4.2 Saturating addition and subtraction intrinsics */
|
||||
#ifdef __ARM_FEATURE_DSP
|
||||
// AArch32-LABEL: test_qadd
|
||||
// AArch32: call i32 @llvm.arm.qadd(i32 %a, i32 %b)
|
||||
int32_t test_qadd(int32_t a, int32_t b) {
|
||||
|
@ -304,6 +306,389 @@ int32_t test_qdbl() {
|
|||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 9.3 16-bit multiplications
|
||||
*/
|
||||
#if __ARM_FEATURE_DSP
|
||||
// AArch32-LABEL: test_smulbb
|
||||
// AArch32: call i32 @llvm.arm.smulbb
|
||||
int32_t test_smulbb(int32_t a, int32_t b) {
|
||||
return __smulbb(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_smulbt
|
||||
// AArch32: call i32 @llvm.arm.smulbt
|
||||
int32_t test_smulbt(int32_t a, int32_t b) {
|
||||
return __smulbt(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_smultb
|
||||
// AArch32: call i32 @llvm.arm.smultb
|
||||
int32_t test_smultb(int32_t a, int32_t b) {
|
||||
return __smultb(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_smultt
|
||||
// AArch32: call i32 @llvm.arm.smultt
|
||||
int32_t test_smultt(int32_t a, int32_t b) {
|
||||
return __smultt(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_smulwb
|
||||
// AArch32: call i32 @llvm.arm.smulwb
|
||||
int32_t test_smulwb(int32_t a, int32_t b) {
|
||||
return __smulwb(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_smulwt
|
||||
// AArch32: call i32 @llvm.arm.smulwt
|
||||
int32_t test_smulwt(int32_t a, int32_t b) {
|
||||
return __smulwt(a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.4.3 Accumultating multiplications */
|
||||
#if __ARM_FEATURE_DSP
|
||||
// AArch32-LABEL: test_smlabb
|
||||
// AArch32: call i32 @llvm.arm.smlabb(i32 %a, i32 %b, i32 %c)
|
||||
int32_t test_smlabb(int32_t a, int32_t b, int32_t c) {
|
||||
return __smlabb(a, b, c);
|
||||
}
|
||||
// AArch32-LABEL: test_smlabt
|
||||
// AArch32: call i32 @llvm.arm.smlabt(i32 %a, i32 %b, i32 %c)
|
||||
int32_t test_smlabt(int32_t a, int32_t b, int32_t c) {
|
||||
return __smlabt(a, b, c);
|
||||
}
|
||||
// AArch32-LABEL: test_smlatb
|
||||
// AArch32: call i32 @llvm.arm.smlatb(i32 %a, i32 %b, i32 %c)
|
||||
int32_t test_smlatb(int32_t a, int32_t b, int32_t c) {
|
||||
return __smlatb(a, b, c);
|
||||
}
|
||||
// AArch32-LABEL: test_smlatt
|
||||
// AArch32: call i32 @llvm.arm.smlatt(i32 %a, i32 %b, i32 %c)
|
||||
int32_t test_smlatt(int32_t a, int32_t b, int32_t c) {
|
||||
return __smlatt(a, b, c);
|
||||
}
|
||||
// AArch32-LABEL: test_smlawb
|
||||
// AArch32: call i32 @llvm.arm.smlawb(i32 %a, i32 %b, i32 %c)
|
||||
int32_t test_smlawb(int32_t a, int32_t b, int32_t c) {
|
||||
return __smlawb(a, b, c);
|
||||
}
|
||||
// AArch32-LABEL: test_smlawt
|
||||
// AArch32: call i32 @llvm.arm.smlawt(i32 %a, i32 %b, i32 %c)
|
||||
int32_t test_smlawt(int32_t a, int32_t b, int32_t c) {
|
||||
return __smlawt(a, b, c);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.4 Parallel 16-bit saturation */
|
||||
#if __ARM_FEATURE_SIMD32
|
||||
// AArch32-LABEL: test_ssat16
|
||||
// AArch32: call i32 @llvm.arm.ssat16
|
||||
int16x2_t test_ssat16(int16x2_t a) {
|
||||
return __ssat16(a, 15);
|
||||
}
|
||||
// AArch32-LABEL: test_usat16
|
||||
// AArch32: call i32 @llvm.arm.usat16
|
||||
uint16x2_t test_usat16(int16x2_t a) {
|
||||
return __usat16(a, 15);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.5 Packing and unpacking */
|
||||
#if __ARM_FEATURE_SIMD32
|
||||
// AArch32-LABEL: test_sxtab16
|
||||
// AArch32: call i32 @llvm.arm.sxtab16
|
||||
int16x2_t test_sxtab16(int16x2_t a, int8x4_t b) {
|
||||
return __sxtab16(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_sxtb16
|
||||
// AArch32: call i32 @llvm.arm.sxtb16
|
||||
int16x2_t test_sxtb16(int8x4_t a) {
|
||||
return __sxtb16(a);
|
||||
}
|
||||
// AArch32-LABEL: test_uxtab16
|
||||
// AArch32: call i32 @llvm.arm.uxtab16
|
||||
int16x2_t test_uxtab16(int16x2_t a, int8x4_t b) {
|
||||
return __uxtab16(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uxtb16
|
||||
// AArch32: call i32 @llvm.arm.uxtb16
|
||||
int16x2_t test_uxtb16(int8x4_t a) {
|
||||
return __uxtb16(a);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.6 Parallel selection */
|
||||
#if __ARM_FEATURE_SIMD32
|
||||
// AArch32-LABEL: test_sel
|
||||
// AArch32: call i32 @llvm.arm.sel
|
||||
uint8x4_t test_sel(uint8x4_t a, uint8x4_t b) {
|
||||
return __sel(a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.7 Parallel 8-bit addition and subtraction */
|
||||
#if __ARM_FEATURE_SIMD32
|
||||
// AArch32-LABEL: test_qadd8
|
||||
// AArch32: call i32 @llvm.arm.qadd8
|
||||
int16x2_t test_qadd8(int8x4_t a, int8x4_t b) {
|
||||
return __qadd8(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_qsub8
|
||||
// AArch32: call i32 @llvm.arm.qsub8
|
||||
int8x4_t test_qsub8(int8x4_t a, int8x4_t b) {
|
||||
return __qsub8(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_sadd8
|
||||
// AArch32: call i32 @llvm.arm.sadd8
|
||||
int8x4_t test_sadd8(int8x4_t a, int8x4_t b) {
|
||||
return __sadd8(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_shadd8
|
||||
// AArch32: call i32 @llvm.arm.shadd8
|
||||
int8x4_t test_shadd8(int8x4_t a, int8x4_t b) {
|
||||
return __shadd8(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_shsub8
|
||||
// AArch32: call i32 @llvm.arm.shsub8
|
||||
int8x4_t test_shsub8(int8x4_t a, int8x4_t b) {
|
||||
return __shsub8(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_ssub8
|
||||
// AArch32: call i32 @llvm.arm.ssub8
|
||||
int8x4_t test_ssub8(int8x4_t a, int8x4_t b) {
|
||||
return __ssub8(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uadd8
|
||||
// AArch32: call i32 @llvm.arm.uadd8
|
||||
uint8x4_t test_uadd8(uint8x4_t a, uint8x4_t b) {
|
||||
return __uadd8(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uhadd8
|
||||
// AArch32: call i32 @llvm.arm.uhadd8
|
||||
uint8x4_t test_uhadd8(uint8x4_t a, uint8x4_t b) {
|
||||
return __uhadd8(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uhsub8
|
||||
// AArch32: call i32 @llvm.arm.uhsub8
|
||||
uint8x4_t test_uhsub8(uint8x4_t a, uint8x4_t b) {
|
||||
return __uhsub8(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uqadd8
|
||||
// AArch32: call i32 @llvm.arm.uqadd8
|
||||
uint8x4_t test_uqadd8(uint8x4_t a, uint8x4_t b) {
|
||||
return __uqadd8(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uqsub8
|
||||
// AArch32: call i32 @llvm.arm.uqsub8
|
||||
uint8x4_t test_uqsub8(uint8x4_t a, uint8x4_t b) {
|
||||
return __uqsub8(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_usub8
|
||||
// AArch32: call i32 @llvm.arm.usub8
|
||||
uint8x4_t test_usub8(uint8x4_t a, uint8x4_t b) {
|
||||
return __usub8(a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.8 Sum of 8-bit absolute differences */
|
||||
#if __ARM_FEATURE_SIMD32
|
||||
// AArch32-LABEL: test_usad8
|
||||
// AArch32: call i32 @llvm.arm.usad8
|
||||
uint32_t test_usad8(uint8x4_t a, uint8x4_t b) {
|
||||
return __usad8(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_usada8
|
||||
// AArch32: call i32 @llvm.arm.usada8
|
||||
uint32_t test_usada8(uint8_t a, uint8_t b, uint8_t c) {
|
||||
return __usada8(a, b, c);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.9 Parallel 16-bit addition and subtraction */
|
||||
#if __ARM_FEATURE_SIMD32
|
||||
// AArch32-LABEL: test_qadd16
|
||||
// AArch32: call i32 @llvm.arm.qadd16
|
||||
int16x2_t test_qadd16(int16x2_t a, int16x2_t b) {
|
||||
return __qadd16(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_qasx
|
||||
// AArch32: call i32 @llvm.arm.qasx
|
||||
int16x2_t test_qasx(int16x2_t a, int16x2_t b) {
|
||||
return __qasx(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_qsax
|
||||
// AArch32: call i32 @llvm.arm.qsax
|
||||
int16x2_t test_qsax(int16x2_t a, int16x2_t b) {
|
||||
return __qsax(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_qsub16
|
||||
// AArch32: call i32 @llvm.arm.qsub16
|
||||
int16x2_t test_qsub16(int16x2_t a, int16x2_t b) {
|
||||
return __qsub16(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_sadd16
|
||||
// AArch32: call i32 @llvm.arm.sadd16
|
||||
int16x2_t test_sadd16(int16x2_t a, int16x2_t b) {
|
||||
return __sadd16(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_sasx
|
||||
// AArch32: call i32 @llvm.arm.sasx
|
||||
int16x2_t test_sasx(int16x2_t a, int16x2_t b) {
|
||||
return __sasx(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_shadd16
|
||||
// AArch32: call i32 @llvm.arm.shadd16
|
||||
int16x2_t test_shadd16(int16x2_t a, int16x2_t b) {
|
||||
return __shadd16(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_shasx
|
||||
// AArch32: call i32 @llvm.arm.shasx
|
||||
int16x2_t test_shasx(int16x2_t a, int16x2_t b) {
|
||||
return __shasx(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_shsax
|
||||
// AArch32: call i32 @llvm.arm.shsax
|
||||
int16x2_t test_shsax(int16x2_t a, int16x2_t b) {
|
||||
return __shsax(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_shsub16
|
||||
// AArch32: call i32 @llvm.arm.shsub16
|
||||
int16x2_t test_shsub16(int16x2_t a, int16x2_t b) {
|
||||
return __shsub16(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_ssax
|
||||
// AArch32: call i32 @llvm.arm.ssax
|
||||
int16x2_t test_ssax(int16x2_t a, int16x2_t b) {
|
||||
return __ssax(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_ssub16
|
||||
// AArch32: call i32 @llvm.arm.ssub16
|
||||
int16x2_t test_ssub16(int16x2_t a, int16x2_t b) {
|
||||
return __ssub16(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uadd16
|
||||
// AArch32: call i32 @llvm.arm.uadd16
|
||||
uint16x2_t test_uadd16(uint16x2_t a, uint16x2_t b) {
|
||||
return __uadd16(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uasx
|
||||
// AArch32: call i32 @llvm.arm.uasx
|
||||
uint16x2_t test_uasx(uint16x2_t a, uint16x2_t b) {
|
||||
return __uasx(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uhadd16
|
||||
// AArch32: call i32 @llvm.arm.uhadd16
|
||||
uint16x2_t test_uhadd16(uint16x2_t a, uint16x2_t b) {
|
||||
return __uhadd16(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uhasx
|
||||
// AArch32: call i32 @llvm.arm.uhasx
|
||||
uint16x2_t test_uhasx(uint16x2_t a, uint16x2_t b) {
|
||||
return __uhasx(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uhsax
|
||||
// AArch32: call i32 @llvm.arm.uhsax
|
||||
uint16x2_t test_uhsax(uint16x2_t a, uint16x2_t b) {
|
||||
return __uhsax(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uhsub16
|
||||
// AArch32: call i32 @llvm.arm.uhsub16
|
||||
uint16x2_t test_uhsub16(uint16x2_t a, uint16x2_t b) {
|
||||
return __uhsub16(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uqadd16
|
||||
// AArch32: call i32 @llvm.arm.uqadd16
|
||||
uint16x2_t test_uqadd16(uint16x2_t a, uint16x2_t b) {
|
||||
return __uqadd16(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uqasx
|
||||
// AArch32: call i32 @llvm.arm.uqasx
|
||||
uint16x2_t test_uqasx(uint16x2_t a, uint16x2_t b) {
|
||||
return __uqasx(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uqsax
|
||||
// AArch32: call i32 @llvm.arm.uqsax
|
||||
uint16x2_t test_uqsax(uint16x2_t a, uint16x2_t b) {
|
||||
return __uqsax(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_uqsub16
|
||||
// AArch32: call i32 @llvm.arm.uqsub16
|
||||
uint16x2_t test_uqsub16(uint16x2_t a, uint16x2_t b) {
|
||||
return __uqsub16(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_usax
|
||||
// AArch32: call i32 @llvm.arm.usax
|
||||
uint16x2_t test_usax(uint16x2_t a, uint16x2_t b) {
|
||||
return __usax(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_usub16
|
||||
// AArch32: call i32 @llvm.arm.usub16
|
||||
uint16x2_t test_usub16(uint16x2_t a, uint16x2_t b) {
|
||||
return __usub16(a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.10 Parallel 16-bit multiplications */
|
||||
#if __ARM_FEATURE_SIMD32
|
||||
// AArch32-LABEL: test_smlad
|
||||
// AArch32: call i32 @llvm.arm.smlad
|
||||
int32_t test_smlad(int16x2_t a, int16x2_t b, int32_t c) {
|
||||
return __smlad(a, b, c);
|
||||
}
|
||||
// AArch32-LABEL: test_smladx
|
||||
// AArch32: call i32 @llvm.arm.smladx
|
||||
int32_t test_smladx(int16x2_t a, int16x2_t b, int32_t c) {
|
||||
return __smladx(a, b, c);
|
||||
}
|
||||
// AArch32-LABEL: test_smlald
|
||||
// AArch32: call i64 @llvm.arm.smlald
|
||||
int64_t test_smlald(int16x2_t a, int16x2_t b, int64_t c) {
|
||||
return __smlald(a, b, c);
|
||||
}
|
||||
// AArch32-LABEL: test_smlaldx
|
||||
// AArch32: call i64 @llvm.arm.smlaldx
|
||||
int64_t test_smlaldx(int16x2_t a, int16x2_t b, int64_t c) {
|
||||
return __smlaldx(a, b, c);
|
||||
}
|
||||
// AArch32-LABEL: test_smlsd
|
||||
// AArch32: call i32 @llvm.arm.smlsd
|
||||
int32_t test_smlsd(int16x2_t a, int16x2_t b, int32_t c) {
|
||||
return __smlsd(a, b, c);
|
||||
}
|
||||
// AArch32-LABEL: test_smlsdx
|
||||
// AArch32: call i32 @llvm.arm.smlsdx
|
||||
int32_t test_smlsdx(int16x2_t a, int16x2_t b, int32_t c) {
|
||||
return __smlsdx(a, b, c);
|
||||
}
|
||||
// AArch32-LABEL: test_smlsld
|
||||
// AArch32: call i64 @llvm.arm.smlsld
|
||||
int64_t test_smlsld(int16x2_t a, int16x2_t b, int64_t c) {
|
||||
return __smlsld(a, b, c);
|
||||
}
|
||||
// AArch32-LABEL: test_smlsldx
|
||||
// AArch32: call i64 @llvm.arm.smlsldx
|
||||
int64_t test_smlsldx(int16x2_t a, int16x2_t b, int64_t c) {
|
||||
return __smlsldx(a, b, c);
|
||||
}
|
||||
// AArch32-LABEL: test_smuad
|
||||
// AArch32: call i32 @llvm.arm.smuad
|
||||
int32_t test_smuad(int16x2_t a, int16x2_t b) {
|
||||
return __smuad(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_smuadx
|
||||
// AArch32: call i32 @llvm.arm.smuadx
|
||||
int32_t test_smuadx(int16x2_t a, int16x2_t b) {
|
||||
return __smuadx(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_smusd
|
||||
// AArch32: call i32 @llvm.arm.smusd
|
||||
int32_t test_smusd(int16x2_t a, int16x2_t b) {
|
||||
return __smusd(a, b);
|
||||
}
|
||||
// AArch32-LABEL: test_smusdx
|
||||
// AArch32: call i32 @llvm.arm.smusdx
|
||||
int32_t test_smusdx(int16x2_t a, int16x2_t b) {
|
||||
return __smusdx(a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.7 CRC32 intrinsics */
|
||||
// ARM-LABEL: test_crc32b
|
||||
// AArch32: call i32 @llvm.arm.crc32b
|
||||
|
|
Loading…
Reference in New Issue