From d619eaaae421f70574b9618bb51f8c6959491fa5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 11 Nov 2015 03:47:10 +0000 Subject: [PATCH] [X86] Add missing typecasts in intrinsic macros. This should make them more robust against inputs that aren't already the right type. llvm-svn: 252700 --- clang/lib/Headers/__wmmintrin_aes.h | 2 +- clang/lib/Headers/avx2intrin.h | 22 ++++--- clang/lib/Headers/avxintrin.h | 12 ++-- clang/lib/Headers/emmintrin.h | 4 +- clang/lib/Headers/shaintrin.h | 2 +- clang/lib/Headers/smmintrin.h | 90 +++++++++++++++++++---------- clang/lib/Headers/tbmintrin.h | 8 ++- 7 files changed, 85 insertions(+), 55 deletions(-) diff --git a/clang/lib/Headers/__wmmintrin_aes.h b/clang/lib/Headers/__wmmintrin_aes.h index 81b2b8d0b0a4..100799ebfdb8 100644 --- a/clang/lib/Headers/__wmmintrin_aes.h +++ b/clang/lib/Headers/__wmmintrin_aes.h @@ -59,7 +59,7 @@ _mm_aesimc_si128(__m128i __V) } #define _mm_aeskeygenassist_si128(C, R) \ - __builtin_ia32_aeskeygenassist128((C), (R)) + (__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R)) #undef __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 73e32f71457f..84b40b11ea4b 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -882,20 +882,18 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256 __b) (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (M)); }) #define _mm256_extracti128_si256(V, M) __extension__ ({ \ - (__m128i)__builtin_shufflevector( \ - (__v4di)(V), \ - (__v4di)(_mm256_setzero_si256()), \ - (((M) & 1) ? 2 : 0), \ - (((M) & 1) ? 3 : 1) );}) + (__m128i)__builtin_shufflevector((__v4di)(__m256i)(V), \ + (__v4di)_mm256_setzero_si256(), \ + (((M) & 1) ? 2 : 0), \ + (((M) & 1) ? 3 : 1) ); }) #define _mm256_inserti128_si256(V1, V2, M) __extension__ ({ \ - (__m256i)__builtin_shufflevector( \ - (__v4di)(V1), \ - (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \ - (((M) & 1) ? 0 : 4), \ - (((M) & 1) ? 1 : 5), \ - (((M) & 1) ? 4 : 2), \ - (((M) & 1) ? 5 : 3) );}) + (__m256i)__builtin_shufflevector((__v4di)(__m256i)(V1), \ + (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \ + (((M) & 1) ? 0 : 4), \ + (((M) & 1) ? 1 : 5), \ + (((M) & 1) ? 4 : 2), \ + (((M) & 1) ? 5 : 3) ); }) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskload_epi32(int const *__X, __m256i __M) diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 292f10d9e935..6d1ca5473dcf 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -1157,7 +1157,7 @@ _mm256_castsi128_si256(__m128i __a) */ #define _mm256_insertf128_ps(V1, V2, M) __extension__ ({ \ (__m256)__builtin_shufflevector( \ - (__v8sf)(V1), \ + (__v8sf)(__m256)(V1), \ (__v8sf)_mm256_castps128_ps256((__m128)(V2)), \ (((M) & 1) ? 0 : 8), \ (((M) & 1) ? 1 : 9), \ @@ -1170,7 +1170,7 @@ _mm256_castsi128_si256(__m128i __a) #define _mm256_insertf128_pd(V1, V2, M) __extension__ ({ \ (__m256d)__builtin_shufflevector( \ - (__v4df)(V1), \ + (__v4df)(__m256d)(V1), \ (__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \ (((M) & 1) ? 0 : 4), \ (((M) & 1) ? 1 : 5), \ @@ -1179,7 +1179,7 @@ _mm256_castsi128_si256(__m128i __a) #define _mm256_insertf128_si256(V1, V2, M) __extension__ ({ \ (__m256i)__builtin_shufflevector( \ - (__v4di)(V1), \ + (__v4di)(__m256i)(V1), \ (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \ (((M) & 1) ? 0 : 4), \ (((M) & 1) ? 1 : 5), \ @@ -1193,7 +1193,7 @@ _mm256_castsi128_si256(__m128i __a) */ #define _mm256_extractf128_ps(V, M) __extension__ ({ \ (__m128)__builtin_shufflevector( \ - (__v8sf)(V), \ + (__v8sf)(__m256)(V), \ (__v8sf)(_mm256_setzero_ps()), \ (((M) & 1) ? 4 : 0), \ (((M) & 1) ? 5 : 1), \ @@ -1202,14 +1202,14 @@ _mm256_castsi128_si256(__m128i __a) #define _mm256_extractf128_pd(V, M) __extension__ ({ \ (__m128d)__builtin_shufflevector( \ - (__v4df)(V), \ + (__v4df)(__m256d)(V), \ (__v4df)(_mm256_setzero_pd()), \ (((M) & 1) ? 2 : 0), \ (((M) & 1) ? 3 : 1) );}) #define _mm256_extractf128_si256(V, M) __extension__ ({ \ (__m128i)__builtin_shufflevector( \ - (__v4di)(V), \ + (__v4di)(__m256i)(V), \ (__v4di)(_mm256_setzero_si256()), \ (((M) & 1) ? 2 : 0), \ (((M) & 1) ? 3 : 1) );}) diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 136afc73f13a..cb216c07e949 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -1439,8 +1439,8 @@ _mm_movemask_pd(__m128d __a) } #define _mm_shuffle_pd(a, b, i) __extension__ ({ \ - __builtin_shufflevector((__m128d)(a), (__m128d)(b), \ - (i) & 1, (((i) & 2) >> 1) + 2); }) + (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ + (i) & 1, (((i) & 2) >> 1) + 2); }) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a) diff --git a/clang/lib/Headers/shaintrin.h b/clang/lib/Headers/shaintrin.h index 8602d0249d48..9b5d21800819 100644 --- a/clang/lib/Headers/shaintrin.h +++ b/clang/lib/Headers/shaintrin.h @@ -32,7 +32,7 @@ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha"))) #define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \ - __builtin_ia32_sha1rnds4((V1), (V2), (M)); }) + __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M)); }) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha1nexte_epu32(__m128i __X, __m128i __Y) diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 5973300416d6..69ad07f42ad6 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -199,7 +199,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) #define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N)) #define _mm_extract_ps(X, N) (__extension__ \ ({ union { int __i; float __f; } __t; \ - __v4sf __a = (__v4sf)(X); \ + __v4sf __a = (__v4sf)(__m128)(X); \ __t.__f = __a[(N) & 3]; \ __t.__i;})) @@ -217,29 +217,34 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) _MM_MK_INSERTPS_NDX((N), 0, 0x0e)) /* Insert int into packed integer array at index. */ -#define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \ - __a[(N) & 15] = (I); \ - __a;})) -#define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)(X); \ - __a[(N) & 3] = (I); \ - __a;})) +#define _mm_insert_epi8(X, I, N) (__extension__ \ + ({ __v16qi __a = (__v16qi)(__m128i)(X); \ + __a[(N) & 15] = (I); \ + __a;})) +#define _mm_insert_epi32(X, I, N) (__extension__ \ + ({ __v4si __a = (__v4si)(__m128i)(X); \ + __a[(N) & 3] = (I); \ + __a;})) #ifdef __x86_64__ -#define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ - __a[(N) & 1] = (I); \ - __a;})) +#define _mm_insert_epi64(X, I, N) (__extension__ \ + ({ __v2di __a = (__v2di)(__m128i)(X); \ + __a[(N) & 1] = (I); \ + __a;})) #endif /* __x86_64__ */ /* Extract int from packed integer array at index. This returns the element * as a zero extended value, so it is unsigned. */ -#define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \ - (int)(unsigned char) \ - __a[(N) & 15];})) -#define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)(X); \ - __a[(N) & 3];})) +#define _mm_extract_epi8(X, N) (__extension__ \ + ({ __v16qi __a = (__v16qi)(__m128i)(X); \ + (int)(unsigned char) __a[(N) & 15];})) +#define _mm_extract_epi32(X, N) (__extension__ \ + ({ __v4si __a = (__v4si)(__m128i)(X); \ + (int)__a[(N) & 3];})) #ifdef __x86_64__ -#define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ - __a[(N) & 1];})) +#define _mm_extract_epi64(X, N) (__extension__ \ + ({ __v2di __a = (__v2di)(__m128i)(X); \ + (long long)__a[(N) & 1];})) #endif /* __x86_64 */ /* SSE4 128-bit Packed Integer Comparisons. */ @@ -406,36 +411,59 @@ _mm_minpos_epu16(__m128i __V) #define _SIDD_UNIT_MASK 0x40 /* SSE4.2 Packed Comparison Intrinsics. */ -#define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M)) -#define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M)) +#define _mm_cmpistrm(A, B, M) \ + (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(M)) +#define _mm_cmpistri(A, B, M) \ + (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(M)) #define _mm_cmpestrm(A, LA, B, LB, M) \ - __builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M)) + (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \ + (__v16qi)(__m128i)(B), (int)(LB), \ + (int)(M)) #define _mm_cmpestri(A, LA, B, LB, M) \ - __builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M)) + (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \ + (__v16qi)(__m128i)(B), (int)(LB), \ + (int)(M)) /* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ #define _mm_cmpistra(A, B, M) \ - __builtin_ia32_pcmpistria128((A), (B), (M)) + (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(M)) #define _mm_cmpistrc(A, B, M) \ - __builtin_ia32_pcmpistric128((A), (B), (M)) + (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(M)) #define _mm_cmpistro(A, B, M) \ - __builtin_ia32_pcmpistrio128((A), (B), (M)) + (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(M)) #define _mm_cmpistrs(A, B, M) \ - __builtin_ia32_pcmpistris128((A), (B), (M)) + (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(M)) #define _mm_cmpistrz(A, B, M) \ - __builtin_ia32_pcmpistriz128((A), (B), (M)) + (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(M)) #define _mm_cmpestra(A, LA, B, LB, M) \ - __builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M)) + (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \ + (__v16qi)(__m128i)(B), (int)(LB), \ + (int)(M)) #define _mm_cmpestrc(A, LA, B, LB, M) \ - __builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M)) + (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \ + (__v16qi)(__m128i)(B), (int)(LB), \ + (int)(M)) #define _mm_cmpestro(A, LA, B, LB, M) \ - __builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M)) + (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \ + (__v16qi)(__m128i)(B), (int)(LB), \ + (int)(M)) #define _mm_cmpestrs(A, LA, B, LB, M) \ - __builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M)) + (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \ + (__v16qi)(__m128i)(B), (int)(LB), \ + (int)(M)) #define _mm_cmpestrz(A, LA, B, LB, M) \ - __builtin_ia32_pcmpestriz128((A), (LA), (B), (LB), (M)) + (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \ + (__v16qi)(__m128i)(B), (int)(LB), \ + (int)(M)) /* SSE4.2 Compare Packed Data -- Greater Than. */ static __inline__ __m128i __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/tbmintrin.h b/clang/lib/Headers/tbmintrin.h index 62f613f9ee5c..785961c6ab86 100644 --- a/clang/lib/Headers/tbmintrin.h +++ b/clang/lib/Headers/tbmintrin.h @@ -31,7 +31,9 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("tbm"))) -#define __bextri_u32(a, b) (__builtin_ia32_bextri_u32((a), (b))) +#define __bextri_u32(a, b) \ + ((unsigned int)__builtin_ia32_bextri_u32((unsigned int)(a), \ + (unsigned int)(b))) static __inline__ unsigned int __DEFAULT_FN_ATTRS __blcfill_u32(unsigned int a) @@ -88,7 +90,9 @@ __tzmsk_u32(unsigned int a) } #ifdef __x86_64__ -#define __bextri_u64(a, b) (__builtin_ia32_bextri_u64((a), (int)(b))) +#define __bextri_u64(a, b) \ + ((unsigned long long)__builtin_ia32_bextri_u64((unsigned long long)(a), \ + (unsigned long long)(b))) static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blcfill_u64(unsigned long long a)