Remove 256-bit shuffle built-ins and make the AVX intrinsic call llvm __builtin_shufflevector with the appropriate arguments

llvm-svn: 110766
This commit is contained in:
Bruno Cardoso Lopes 2010-08-11 01:17:34 +00:00
parent 79937dfc5b
commit e712a135b7
3 changed files with 12 additions and 14 deletions

View File

@ -346,8 +346,6 @@ BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fi", "")
BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "")
BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "")
BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fi", "")
BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dc", "")
BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fc", "")
BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dc", "")
BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fc", "")
BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dc", "")

View File

@ -340,17 +340,19 @@ _mm256_dp_ps(__m256 a, __m256 b, const int c)
}
/* Vector shuffle */
static __inline __m256d __attribute__((__always_inline__, __nodebug__))
_mm256_shuffle_pd(__m256d a, __m256d b, const int s)
{
return (__m256d)__builtin_ia32_shufpd256((__v4df)a, (__v4df)b, s);
}
#define _mm256_shuffle_ps(a, b, mask) \
(__builtin_shufflevector((__v8sf)(a), (__v8sf)(b), \
(mask) & 0x3, ((mask) & 0xc) >> 2, \
(((mask) & 0x30) >> 4) + 8, (((mask) & 0xc0) >> 6) + 8 \
(mask) & 0x3 + 4, (((mask) & 0xc) >> 2) + 4, \
(((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12))
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_shuffle_ps(__m256 a, __m256 b, const int s)
{
return (__m256)__builtin_ia32_shufps256((__v8sf)a, (__v8sf)b, s);
}
#define _mm256_shuffle_pd(a, b, mask) \
(__builtin_shufflevector((__v4df)(a), (__v4df)(b), \
(mask) & 0x1, \
(((mask) & 0x2) >> 1) + 4, \
(((mask) & 0x4) >> 2) + 2, \
(((mask) & 0x8) >> 3) + 6))
/* Compare */
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */

View File

@ -409,8 +409,6 @@ void f0() {
tmp_V4d = __builtin_ia32_blendvpd256(tmp_V4d, tmp_V4d, tmp_V4d);
tmp_V8f = __builtin_ia32_blendvps256(tmp_V8f, tmp_V8f, tmp_V8f);
tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7);
tmp_V4d = __builtin_ia32_shufpd256(tmp_V4d, tmp_V4d, 0x7);
tmp_V8f = __builtin_ia32_shufps256(tmp_V8f, tmp_V8f, 0x7);
tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0);
tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0);
tmp_V2d = __builtin_ia32_vextractf128_pd256(tmp_V4d, 0x7);