forked from OSchip/llvm-project
Convert vperm2f128 and vperm2i128 intrinsics back to using llvm intrinsics. Unfortunately, these instructions have behavior that can't be modeled with shuffle vector.
llvm-svn: 154906
This commit is contained in:
parent
6c4eea4db7
commit
26e74e50b6
|
@ -412,6 +412,9 @@ BUILTIN(__builtin_ia32_cvtps2pd256, "V4dV4f", "")
|
|||
BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "")
|
||||
BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "")
|
||||
BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "")
|
||||
BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "")
|
||||
BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "")
|
||||
BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "")
|
||||
BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dIc", "")
|
||||
BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fIc", "")
|
||||
BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4iIc", "")
|
||||
|
@ -561,6 +564,7 @@ BUILTIN(__builtin_ia32_pbroadcastd128, "V4iV4i", "")
|
|||
BUILTIN(__builtin_ia32_pbroadcastq128, "V2LLiV2LLi", "")
|
||||
BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "")
|
||||
BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8f", "")
|
||||
BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "")
|
||||
BUILTIN(__builtin_ia32_extract128i256, "V2LLiV4LLiIc", "")
|
||||
BUILTIN(__builtin_ia32_insert128i256, "V4LLiV4LLiV2LLiIc", "")
|
||||
BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "")
|
||||
|
|
|
@ -841,11 +841,7 @@ _mm256_permutevar8x32_ps(__m256 a, __m256 b)
|
|||
#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \
|
||||
__m256i __V1 = (V1); \
|
||||
__m256i __V2 = (V2); \
|
||||
__builtin_shufflevector(__V1, __V2, \
|
||||
((M) & 0x3) * 2, \
|
||||
((M) & 0x3) * 2 + 1, \
|
||||
(((M) & 0x30) >> 4) * 2, \
|
||||
(((M) & 0x30) >> 4) * 2 + 1); })
|
||||
(__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); })
|
||||
|
||||
#define _mm256_extracti128_si256(A, O) __extension__ ({ \
|
||||
__m256i __A = (A); \
|
||||
|
|
|
@ -289,37 +289,17 @@ _mm256_permutevar_ps(__m256 a, __m256i c)
|
|||
#define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \
|
||||
__m256d __V1 = (V1); \
|
||||
__m256d __V2 = (V2); \
|
||||
(__m256d)__builtin_shufflevector((__v4df)__V1, (__v4df)__V2, \
|
||||
((M) & 0x3) * 2, \
|
||||
((M) & 0x3) * 2 + 1, \
|
||||
(((M) & 0x30) >> 4) * 2, \
|
||||
(((M) & 0x30) >> 4) * 2 + 1); })
|
||||
(__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); })
|
||||
|
||||
#define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \
|
||||
__m256 __V1 = (V1); \
|
||||
__m256 __V2 = (V2); \
|
||||
(__m256)__builtin_shufflevector((__v8sf)__V1, (__v8sf)__V2, \
|
||||
((M) & 0x3) * 4, \
|
||||
((M) & 0x3) * 4 + 1, \
|
||||
((M) & 0x3) * 4 + 2, \
|
||||
((M) & 0x3) * 4 + 3, \
|
||||
(((M) & 0x30) >> 4) * 4, \
|
||||
(((M) & 0x30) >> 4) * 4 + 1, \
|
||||
(((M) & 0x30) >> 4) * 4 + 2, \
|
||||
(((M) & 0x30) >> 4) * 4 + 3); })
|
||||
(__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
|
||||
|
||||
#define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \
|
||||
__m256i __V1 = (V1); \
|
||||
__m256i __V2 = (V2); \
|
||||
(__m256i)__builtin_shufflevector((__v8si)__V1, (__v8si)__V2, \
|
||||
((M) & 0x3) * 4, \
|
||||
((M) & 0x3) * 4 + 1, \
|
||||
((M) & 0x3) * 4 + 2, \
|
||||
((M) & 0x3) * 4 + 3, \
|
||||
(((M) & 0x30) >> 4) * 4, \
|
||||
(((M) & 0x30) >> 4) * 4 + 1, \
|
||||
(((M) & 0x30) >> 4) * 4 + 2, \
|
||||
(((M) & 0x30) >> 4) * 4 + 3); })
|
||||
(__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); })
|
||||
|
||||
/* Vector Blend */
|
||||
#define _mm256_blend_pd(V1, V2, M) __extension__ ({ \
|
||||
|
|
|
@ -48,18 +48,18 @@ __m256 test_mm256_permute_ps(__m256 a) {
|
|||
|
||||
__m256d test_mm256_permute2f128_pd(__m256d a, __m256d b) {
|
||||
// Check if the mask is correct
|
||||
// CHECK: shufflevector{{.*}}<i32 2, i32 3, i32 6, i32 7>
|
||||
// CHECK: @llvm.x86.avx.vperm2f128.pd.256
|
||||
return _mm256_permute2f128_pd(a, b, 0x31);
|
||||
}
|
||||
|
||||
__m256 test_mm256_permute2f128_ps(__m256 a, __m256 b) {
|
||||
// Check if the mask is correct
|
||||
// CHECK: shufflevector{{.*}}<i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7>
|
||||
// CHECK: @llvm.x86.avx.vperm2f128.ps.256
|
||||
return _mm256_permute2f128_ps(a, b, 0x13);
|
||||
}
|
||||
|
||||
__m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) {
|
||||
// Check if the mask is correct
|
||||
// CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
|
||||
// CHECK: @llvm.x86.avx.vperm2f128.si.256
|
||||
return _mm256_permute2f128_si256(a, b, 0x20);
|
||||
}
|
||||
|
|
|
@ -677,7 +677,7 @@ __m256i test_mm256_permute4x64_epi64(__m256i a) {
|
|||
}
|
||||
|
||||
__m256i test_mm256_permute2x128_si256(__m256i a, __m256i b) {
|
||||
// CHECK: shufflevector{{.*}}<i32 2, i32 3, i32 6, i32 7>
|
||||
// CHECK: @llvm.x86.avx2.vperm2i128
|
||||
return _mm256_permute2x128_si256(a, b, 0x31);
|
||||
}
|
||||
|
||||
|
|
|
@ -414,6 +414,9 @@ void f0() {
|
|||
tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d);
|
||||
tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d);
|
||||
tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f);
|
||||
tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7);
|
||||
tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7);
|
||||
tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7);
|
||||
tmp_V4d = __builtin_ia32_vinsertf128_pd256(tmp_V4d, tmp_V2d, 0x7);
|
||||
tmp_V8f = __builtin_ia32_vinsertf128_ps256(tmp_V8f, tmp_V4f, 0x7);
|
||||
tmp_V8i = __builtin_ia32_vinsertf128_si256(tmp_V8i, tmp_V4i, 0x7);
|
||||
|
|
Loading…
Reference in New Issue