forked from OSchip/llvm-project
[X86, AVX2] Replace inserti128 and extracti128 intrinsics with generic shuffles
This is nearly identical to the v*f128_si256 parts of r231792 and r232052. AVX2 introduced proper integer variants of the hacked integer insert/extract C intrinsics that were created for this same functionality with AVX1. This should complete the front end fixes for insert/extract128 intrinsics. Corresponding LLVM patch to follow. llvm-svn: 232109
This commit is contained in:
parent
fb53eded5f
commit
0a6da5de55
|
@ -587,8 +587,6 @@ BUILTIN(__builtin_ia32_pbroadcastq128, "V2LLiV2LLi", "")
|
|||
BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "")
|
||||
BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8f", "")
|
||||
BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "")
|
||||
BUILTIN(__builtin_ia32_extract128i256, "V2LLiV4LLiIc", "")
|
||||
BUILTIN(__builtin_ia32_insert128i256, "V4LLiV4LLiV2LLiIc", "")
|
||||
BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "")
|
||||
BUILTIN(__builtin_ia32_maskloadq256, "V4LLiV4LLiC*V4LLi", "")
|
||||
BUILTIN(__builtin_ia32_maskloadd, "V4iV4iC*V4i", "")
|
||||
|
|
|
@ -874,14 +874,21 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256 __b)
|
|||
__m256i __V2 = (V2); \
|
||||
(__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); })
|
||||
|
||||
#define _mm256_extracti128_si256(A, O) __extension__ ({ \
|
||||
__m256i __A = (A); \
|
||||
(__m128i)__builtin_ia32_extract128i256(__A, (O)); })
|
||||
#define _mm256_extracti128_si256(V, M) __extension__ ({ \
|
||||
(__m128i)__builtin_shufflevector( \
|
||||
(__v4di)(V), \
|
||||
(__v4di)(_mm256_setzero_si256()), \
|
||||
(((M) & 1) ? 2 : 0), \
|
||||
(((M) & 1) ? 3 : 1) );})
|
||||
|
||||
#define _mm256_inserti128_si256(V1, V2, O) __extension__ ({ \
|
||||
__m256i __V1 = (V1); \
|
||||
__m128i __V2 = (V2); \
|
||||
(__m256i)__builtin_ia32_insert128i256(__V1, __V2, (O)); })
|
||||
#define _mm256_inserti128_si256(V1, V2, M) __extension__ ({ \
|
||||
(__m256i)__builtin_shufflevector( \
|
||||
(__v4di)(V1), \
|
||||
(__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
|
||||
(((M) & 1) ? 0 : 4), \
|
||||
(((M) & 1) ? 1 : 5), \
|
||||
(((M) & 1) ? 4 : 2), \
|
||||
(((M) & 1) ? 5 : 3) );})
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_maskload_epi32(int const *__X, __m256i __M)
|
||||
|
|
|
@ -882,8 +882,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
switch (BuiltinID) {
|
||||
default: return false;
|
||||
case X86::BI_mm_prefetch: i = 1; l = 0; u = 3; break;
|
||||
case X86::BI__builtin_ia32_extract128i256: i = 1, l = 0, u = 1; break;
|
||||
case X86::BI__builtin_ia32_insert128i256: i = 2, l = 0; u = 1; break;
|
||||
case X86::BI__builtin_ia32_sha1rnds4: i = 2, l = 0; u = 3; break;
|
||||
case X86::BI__builtin_ia32_vpermil2pd:
|
||||
case X86::BI__builtin_ia32_vpermil2pd256:
|
||||
|
|
|
@ -695,16 +695,44 @@ __m256i test_mm256_permute2x128_si256(__m256i a, __m256i b) {
|
|||
return _mm256_permute2x128_si256(a, b, 0x31);
|
||||
}
|
||||
|
||||
__m128i test_mm256_extracti128_si256(__m256i a) {
|
||||
// CHECK: @llvm.x86.avx2.vextracti128
|
||||
__m128i test_mm256_extracti128_si256_0(__m256i a) {
|
||||
// CHECK-LABEL: @test_mm256_extracti128_si256_0
|
||||
// CHECK: shufflevector{{.*}}<i32 0, i32 1>
|
||||
return _mm256_extracti128_si256(a, 0);
|
||||
}
|
||||
|
||||
__m128i test_mm256_extracti128_si256_1(__m256i a) {
|
||||
// CHECK-LABEL: @test_mm256_extracti128_si256_1
|
||||
// CHECK: shufflevector{{.*}}<i32 2, i32 3>
|
||||
return _mm256_extracti128_si256(a, 1);
|
||||
}
|
||||
|
||||
__m256i test_mm256_inserti128_si256(__m256i a, __m128i b) {
|
||||
// CHECK: @llvm.x86.avx2.vinserti128
|
||||
// Immediate should be truncated to one bit.
|
||||
__m128i test_mm256_extracti128_si256_2(__m256i a) {
|
||||
// CHECK-LABEL: @test_mm256_extracti128_si256_2
|
||||
// CHECK: shufflevector{{.*}}<i32 0, i32 1>
|
||||
return _mm256_extracti128_si256(a, 2);
|
||||
}
|
||||
|
||||
__m256i test_mm256_inserti128_si256_0(__m256i a, __m128i b) {
|
||||
// CHECK-LABEL: @test_mm256_inserti128_si256_0
|
||||
// CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
|
||||
return _mm256_inserti128_si256(a, b, 0);
|
||||
}
|
||||
|
||||
__m256i test_mm256_inserti128_si256_1(__m256i a, __m128i b) {
|
||||
// CHECK-LABEL: @test_mm256_inserti128_si256_1
|
||||
// CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5>
|
||||
return _mm256_inserti128_si256(a, b, 1);
|
||||
}
|
||||
|
||||
// Immediate should be truncated to one bit.
|
||||
__m256i test_mm256_inserti128_si256_2(__m256i a, __m128i b) {
|
||||
// CHECK-LABEL: @test_mm256_inserti128_si256_2
|
||||
// CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
|
||||
return _mm256_inserti128_si256(a, b, 2);
|
||||
}
|
||||
|
||||
__m256i test_mm256_maskload_epi32(int const *a, __m256i m) {
|
||||
// CHECK: @llvm.x86.avx2.maskload.d.256
|
||||
return _mm256_maskload_epi32(a, m);
|
||||
|
|
Loading…
Reference in New Issue