forked from OSchip/llvm-project
[X86] Add missing typecasts in intrinsic macros. This should make them more robust against inputs that aren't already the right type.
llvm-svn: 252700
This commit is contained in:
parent
b9b0c9c61d
commit
d619eaaae4
|
@ -59,7 +59,7 @@ _mm_aesimc_si128(__m128i __V)
|
|||
}
|
||||
|
||||
#define _mm_aeskeygenassist_si128(C, R) \
|
||||
__builtin_ia32_aeskeygenassist128((C), (R))
|
||||
(__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R))
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
|
|
|
@ -882,20 +882,18 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256 __b)
|
|||
(__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (M)); })
|
||||
|
||||
#define _mm256_extracti128_si256(V, M) __extension__ ({ \
|
||||
(__m128i)__builtin_shufflevector( \
|
||||
(__v4di)(V), \
|
||||
(__v4di)(_mm256_setzero_si256()), \
|
||||
(((M) & 1) ? 2 : 0), \
|
||||
(((M) & 1) ? 3 : 1) );})
|
||||
(__m128i)__builtin_shufflevector((__v4di)(__m256i)(V), \
|
||||
(__v4di)_mm256_setzero_si256(), \
|
||||
(((M) & 1) ? 2 : 0), \
|
||||
(((M) & 1) ? 3 : 1) ); })
|
||||
|
||||
#define _mm256_inserti128_si256(V1, V2, M) __extension__ ({ \
|
||||
(__m256i)__builtin_shufflevector( \
|
||||
(__v4di)(V1), \
|
||||
(__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
|
||||
(((M) & 1) ? 0 : 4), \
|
||||
(((M) & 1) ? 1 : 5), \
|
||||
(((M) & 1) ? 4 : 2), \
|
||||
(((M) & 1) ? 5 : 3) );})
|
||||
(__m256i)__builtin_shufflevector((__v4di)(__m256i)(V1), \
|
||||
(__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
|
||||
(((M) & 1) ? 0 : 4), \
|
||||
(((M) & 1) ? 1 : 5), \
|
||||
(((M) & 1) ? 4 : 2), \
|
||||
(((M) & 1) ? 5 : 3) ); })
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskload_epi32(int const *__X, __m256i __M)
|
||||
|
|
|
@ -1157,7 +1157,7 @@ _mm256_castsi128_si256(__m128i __a)
|
|||
*/
|
||||
#define _mm256_insertf128_ps(V1, V2, M) __extension__ ({ \
|
||||
(__m256)__builtin_shufflevector( \
|
||||
(__v8sf)(V1), \
|
||||
(__v8sf)(__m256)(V1), \
|
||||
(__v8sf)_mm256_castps128_ps256((__m128)(V2)), \
|
||||
(((M) & 1) ? 0 : 8), \
|
||||
(((M) & 1) ? 1 : 9), \
|
||||
|
@ -1170,7 +1170,7 @@ _mm256_castsi128_si256(__m128i __a)
|
|||
|
||||
#define _mm256_insertf128_pd(V1, V2, M) __extension__ ({ \
|
||||
(__m256d)__builtin_shufflevector( \
|
||||
(__v4df)(V1), \
|
||||
(__v4df)(__m256d)(V1), \
|
||||
(__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \
|
||||
(((M) & 1) ? 0 : 4), \
|
||||
(((M) & 1) ? 1 : 5), \
|
||||
|
@ -1179,7 +1179,7 @@ _mm256_castsi128_si256(__m128i __a)
|
|||
|
||||
#define _mm256_insertf128_si256(V1, V2, M) __extension__ ({ \
|
||||
(__m256i)__builtin_shufflevector( \
|
||||
(__v4di)(V1), \
|
||||
(__v4di)(__m256i)(V1), \
|
||||
(__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
|
||||
(((M) & 1) ? 0 : 4), \
|
||||
(((M) & 1) ? 1 : 5), \
|
||||
|
@ -1193,7 +1193,7 @@ _mm256_castsi128_si256(__m128i __a)
|
|||
*/
|
||||
#define _mm256_extractf128_ps(V, M) __extension__ ({ \
|
||||
(__m128)__builtin_shufflevector( \
|
||||
(__v8sf)(V), \
|
||||
(__v8sf)(__m256)(V), \
|
||||
(__v8sf)(_mm256_setzero_ps()), \
|
||||
(((M) & 1) ? 4 : 0), \
|
||||
(((M) & 1) ? 5 : 1), \
|
||||
|
@ -1202,14 +1202,14 @@ _mm256_castsi128_si256(__m128i __a)
|
|||
|
||||
#define _mm256_extractf128_pd(V, M) __extension__ ({ \
|
||||
(__m128d)__builtin_shufflevector( \
|
||||
(__v4df)(V), \
|
||||
(__v4df)(__m256d)(V), \
|
||||
(__v4df)(_mm256_setzero_pd()), \
|
||||
(((M) & 1) ? 2 : 0), \
|
||||
(((M) & 1) ? 3 : 1) );})
|
||||
|
||||
#define _mm256_extractf128_si256(V, M) __extension__ ({ \
|
||||
(__m128i)__builtin_shufflevector( \
|
||||
(__v4di)(V), \
|
||||
(__v4di)(__m256i)(V), \
|
||||
(__v4di)(_mm256_setzero_si256()), \
|
||||
(((M) & 1) ? 2 : 0), \
|
||||
(((M) & 1) ? 3 : 1) );})
|
||||
|
|
|
@ -1439,8 +1439,8 @@ _mm_movemask_pd(__m128d __a)
|
|||
}
|
||||
|
||||
#define _mm_shuffle_pd(a, b, i) __extension__ ({ \
|
||||
__builtin_shufflevector((__m128d)(a), (__m128d)(b), \
|
||||
(i) & 1, (((i) & 2) >> 1) + 2); })
|
||||
(__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
|
||||
(i) & 1, (((i) & 2) >> 1) + 2); })
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_castpd_ps(__m128d __a)
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha")))
|
||||
|
||||
#define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \
|
||||
__builtin_ia32_sha1rnds4((V1), (V2), (M)); })
|
||||
__builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M)); })
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_sha1nexte_epu32(__m128i __X, __m128i __Y)
|
||||
|
|
|
@ -199,7 +199,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
|||
#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
|
||||
#define _mm_extract_ps(X, N) (__extension__ \
|
||||
({ union { int __i; float __f; } __t; \
|
||||
__v4sf __a = (__v4sf)(X); \
|
||||
__v4sf __a = (__v4sf)(__m128)(X); \
|
||||
__t.__f = __a[(N) & 3]; \
|
||||
__t.__i;}))
|
||||
|
||||
|
@ -217,29 +217,34 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
|||
_MM_MK_INSERTPS_NDX((N), 0, 0x0e))
|
||||
|
||||
/* Insert int into packed integer array at index. */
|
||||
#define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \
|
||||
__a[(N) & 15] = (I); \
|
||||
__a;}))
|
||||
#define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)(X); \
|
||||
__a[(N) & 3] = (I); \
|
||||
__a;}))
|
||||
#define _mm_insert_epi8(X, I, N) (__extension__ \
|
||||
({ __v16qi __a = (__v16qi)(__m128i)(X); \
|
||||
__a[(N) & 15] = (I); \
|
||||
__a;}))
|
||||
#define _mm_insert_epi32(X, I, N) (__extension__ \
|
||||
({ __v4si __a = (__v4si)(__m128i)(X); \
|
||||
__a[(N) & 3] = (I); \
|
||||
__a;}))
|
||||
#ifdef __x86_64__
|
||||
#define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)(X); \
|
||||
__a[(N) & 1] = (I); \
|
||||
__a;}))
|
||||
#define _mm_insert_epi64(X, I, N) (__extension__ \
|
||||
({ __v2di __a = (__v2di)(__m128i)(X); \
|
||||
__a[(N) & 1] = (I); \
|
||||
__a;}))
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
/* Extract int from packed integer array at index. This returns the element
|
||||
* as a zero extended value, so it is unsigned.
|
||||
*/
|
||||
#define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \
|
||||
(int)(unsigned char) \
|
||||
__a[(N) & 15];}))
|
||||
#define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)(X); \
|
||||
__a[(N) & 3];}))
|
||||
#define _mm_extract_epi8(X, N) (__extension__ \
|
||||
({ __v16qi __a = (__v16qi)(__m128i)(X); \
|
||||
(int)(unsigned char) __a[(N) & 15];}))
|
||||
#define _mm_extract_epi32(X, N) (__extension__ \
|
||||
({ __v4si __a = (__v4si)(__m128i)(X); \
|
||||
(int)__a[(N) & 3];}))
|
||||
#ifdef __x86_64__
|
||||
#define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \
|
||||
__a[(N) & 1];}))
|
||||
#define _mm_extract_epi64(X, N) (__extension__ \
|
||||
({ __v2di __a = (__v2di)(__m128i)(X); \
|
||||
(long long)__a[(N) & 1];}))
|
||||
#endif /* __x86_64 */
|
||||
|
||||
/* SSE4 128-bit Packed Integer Comparisons. */
|
||||
|
@ -406,36 +411,59 @@ _mm_minpos_epu16(__m128i __V)
|
|||
#define _SIDD_UNIT_MASK 0x40
|
||||
|
||||
/* SSE4.2 Packed Comparison Intrinsics. */
|
||||
#define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M))
|
||||
#define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M))
|
||||
#define _mm_cmpistrm(A, B, M) \
|
||||
(__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(M))
|
||||
#define _mm_cmpistri(A, B, M) \
|
||||
(int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(M))
|
||||
|
||||
#define _mm_cmpestrm(A, LA, B, LB, M) \
|
||||
__builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M))
|
||||
(__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \
|
||||
(__v16qi)(__m128i)(B), (int)(LB), \
|
||||
(int)(M))
|
||||
#define _mm_cmpestri(A, LA, B, LB, M) \
|
||||
__builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M))
|
||||
(int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \
|
||||
(__v16qi)(__m128i)(B), (int)(LB), \
|
||||
(int)(M))
|
||||
|
||||
/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */
|
||||
#define _mm_cmpistra(A, B, M) \
|
||||
__builtin_ia32_pcmpistria128((A), (B), (M))
|
||||
(int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(M))
|
||||
#define _mm_cmpistrc(A, B, M) \
|
||||
__builtin_ia32_pcmpistric128((A), (B), (M))
|
||||
(int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(M))
|
||||
#define _mm_cmpistro(A, B, M) \
|
||||
__builtin_ia32_pcmpistrio128((A), (B), (M))
|
||||
(int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(M))
|
||||
#define _mm_cmpistrs(A, B, M) \
|
||||
__builtin_ia32_pcmpistris128((A), (B), (M))
|
||||
(int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(M))
|
||||
#define _mm_cmpistrz(A, B, M) \
|
||||
__builtin_ia32_pcmpistriz128((A), (B), (M))
|
||||
(int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(M))
|
||||
|
||||
#define _mm_cmpestra(A, LA, B, LB, M) \
|
||||
__builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M))
|
||||
(int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \
|
||||
(__v16qi)(__m128i)(B), (int)(LB), \
|
||||
(int)(M))
|
||||
#define _mm_cmpestrc(A, LA, B, LB, M) \
|
||||
__builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M))
|
||||
(int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \
|
||||
(__v16qi)(__m128i)(B), (int)(LB), \
|
||||
(int)(M))
|
||||
#define _mm_cmpestro(A, LA, B, LB, M) \
|
||||
__builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M))
|
||||
(int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \
|
||||
(__v16qi)(__m128i)(B), (int)(LB), \
|
||||
(int)(M))
|
||||
#define _mm_cmpestrs(A, LA, B, LB, M) \
|
||||
__builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M))
|
||||
(int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \
|
||||
(__v16qi)(__m128i)(B), (int)(LB), \
|
||||
(int)(M))
|
||||
#define _mm_cmpestrz(A, LA, B, LB, M) \
|
||||
__builtin_ia32_pcmpestriz128((A), (LA), (B), (LB), (M))
|
||||
(int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \
|
||||
(__v16qi)(__m128i)(B), (int)(LB), \
|
||||
(int)(M))
|
||||
|
||||
/* SSE4.2 Compare Packed Data -- Greater Than. */
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
|
|
|
@ -31,7 +31,9 @@
|
|||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("tbm")))
|
||||
|
||||
#define __bextri_u32(a, b) (__builtin_ia32_bextri_u32((a), (b)))
|
||||
#define __bextri_u32(a, b) \
|
||||
((unsigned int)__builtin_ia32_bextri_u32((unsigned int)(a), \
|
||||
(unsigned int)(b)))
|
||||
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
__blcfill_u32(unsigned int a)
|
||||
|
@ -88,7 +90,9 @@ __tzmsk_u32(unsigned int a)
|
|||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define __bextri_u64(a, b) (__builtin_ia32_bextri_u64((a), (int)(b)))
|
||||
#define __bextri_u64(a, b) \
|
||||
((unsigned long long)__builtin_ia32_bextri_u64((unsigned long long)(a), \
|
||||
(unsigned long long)(b)))
|
||||
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__blcfill_u64(unsigned long long a)
|
||||
|
|
Loading…
Reference in New Issue