From c94122e05bf086f3630ec8d3a4b319df8a31ff4d Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Wed, 23 Oct 2013 20:33:14 +0000 Subject: [PATCH] Intrinsics: fix extract & insert when index is out of bound. Now, all extract & insert intrinsics should have the correct and operation to ignore higher bits. rdar://15250497 llvm-svn: 193267 --- clang/lib/Headers/avxintrin.h | 8 ++++---- clang/lib/Headers/smmintrin.h | 14 +++++++------- clang/test/CodeGen/avx-builtins.c | 18 ++++++++++++++++++ clang/test/CodeGen/sse-builtins.c | 24 ++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 11 deletions(-) diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 50454f265368..141c4d994bbc 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -435,21 +435,21 @@ static __inline int __attribute__((__always_inline__, __nodebug__)) _mm256_extract_epi32(__m256i __a, int const __imm) { __v8si __b = (__v8si)__a; - return __b[__imm]; + return __b[__imm & 7]; } static __inline int __attribute__((__always_inline__, __nodebug__)) _mm256_extract_epi16(__m256i __a, int const __imm) { __v16hi __b = (__v16hi)__a; - return __b[__imm]; + return __b[__imm & 15]; } static __inline int __attribute__((__always_inline__, __nodebug__)) _mm256_extract_epi8(__m256i __a, int const __imm) { __v32qi __b = (__v32qi)__a; - return __b[__imm]; + return __b[__imm & 31]; } #ifdef __x86_64__ @@ -457,7 +457,7 @@ static __inline long long __attribute__((__always_inline__, __nodebug__)) _mm256_extract_epi64(__m256i __a, const int __imm) { __v4di __b = (__v4di)__a; - return __b[__imm]; + return __b[__imm & 3]; } #endif diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 5b6db6c4456d..53b3ccb4310c 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -197,7 +197,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) #define _mm_extract_ps(X, N) (__extension__ \ ({ union { int __i; float __f; } __t; \ __v4sf __a = (__v4sf)(X); \ - __t.__f = __a[N]; \ + __t.__f = __a[(N) & 3]; \ __t.__i;})) /* Miscellaneous insert and extract macros. */ @@ -215,14 +215,14 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /* Insert int into packed integer array at index. */ #define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \ - __a[(N)] = (I); \ + __a[(N) & 15] = (I); \ __a;})) #define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)(X); \ - __a[(N)] = (I); \ + __a[(N) & 3] = (I); \ __a;})) #ifdef __x86_64__ #define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ - __a[(N)] = (I); \ + __a[(N) & 1] = (I); \ __a;})) #endif /* __x86_64__ */ @@ -231,12 +231,12 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) */ #define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \ (int)(unsigned char) \ - __a[(N)];})) + __a[(N) & 15];})) #define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)(X); \ - __a[(N)];})) + __a[(N) & 3];})) #ifdef __x86_64__ #define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ - __a[(N)];})) + __a[(N) & 1];})) #endif /* __x86_64 */ /* SSE4 128-bit Packed Integer Comparisons. */ diff --git a/clang/test/CodeGen/avx-builtins.c b/clang/test/CodeGen/avx-builtins.c index 0e5a741bcf67..c88946fe8567 100644 --- a/clang/test/CodeGen/avx-builtins.c +++ b/clang/test/CodeGen/avx-builtins.c @@ -93,3 +93,21 @@ int test_mm_cmpistrz(__m128i A, __m128i B) { // CHECK: @llvm.x86.sse42.pcmpistriz128 return _mm_cmpistrz(A, B, 7); } + +int test_extract_epi32(__m256i __a) { + // CHECK-LABEL: @test_extract_epi32 + // CHECK: extractelement <8 x i32> %{{.*}}, i32 0 + return _mm256_extract_epi32(__a, 8); +} + +int test_extract_epi16(__m256i __a) { + // CHECK-LABEL: @test_extract_epi16 + // CHECK: extractelement <16 x i16> %{{.*}}, i32 0 + return _mm256_extract_epi16(__a, 16); +} + +int test_extract_epi8(__m256i __a) { + // CHECK-LABEL: @test_extract_epi8 + // CHECK: extractelement <32 x i8> %{{.*}}, i32 0 + return _mm256_extract_epi8(__a, 32); +} diff --git a/clang/test/CodeGen/sse-builtins.c b/clang/test/CodeGen/sse-builtins.c index cee9b3c2a5df..1f5cb8e9e107 100644 --- a/clang/test/CodeGen/sse-builtins.c +++ b/clang/test/CodeGen/sse-builtins.c @@ -213,3 +213,27 @@ void test_extract_epi16(__m128i __a) { // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]] _mm_extract_epi16(__a, 8); } + +int test_extract_ps(__m128i __a) { + // CHECK-LABEL: @test_extract_ps + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + return _mm_extract_ps(__a, 4); +} + +int test_extract_epi8(__m128i __a) { + // CHECK-LABEL: @test_extract_epi8 + // CHECK: extractelement <16 x i8> %{{.*}}, i32 0 + return _mm_extract_epi8(__a, 16); +} + +int test_extract_epi32(__m128i __a) { + // CHECK-LABEL: @test_extract_epi32 + // CHECK: extractelement <4 x i32> %{{.*}}, i32 0 + return _mm_extract_epi32(__a, 4); +} + +void test_insert_epi32(__m128i __a, int b) { + // CHECK-LABEL: @test_insert_epi32 + // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0 + _mm_insert_epi32(__a, b, 4); +}