forked from OSchip/llvm-project
Make replicate intrinsics use shufflevector instead of dup builtins, also remove the dup builtins
llvm-svn: 110646
This commit is contained in:
parent
85da72a88f
commit
3d3fc1d075
|
@ -371,9 +371,6 @@ BUILTIN(__builtin_ia32_vpermilps256, "V8fV8fc", "")
|
||||||
BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dc", "")
|
BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dc", "")
|
||||||
BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fc", "")
|
BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fc", "")
|
||||||
BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4ic", "")
|
BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4ic", "")
|
||||||
BUILTIN(__builtin_ia32_movshdup256, "V8fV8f", "")
|
|
||||||
BUILTIN(__builtin_ia32_movsldup256, "V8fV8f", "")
|
|
||||||
BUILTIN(__builtin_ia32_movddup256, "V4dV4d", "")
|
|
||||||
BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "")
|
BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "")
|
||||||
BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "")
|
BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "")
|
||||||
BUILTIN(__builtin_ia32_sqrtps_nr256, "V8fV8f", "")
|
BUILTIN(__builtin_ia32_sqrtps_nr256, "V8fV8f", "")
|
||||||
|
|
|
@ -577,19 +577,19 @@ _mm256_cvttps_epi32(__m256 a)
|
||||||
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
|
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_movehdup_ps(__m256 a)
|
_mm256_movehdup_ps(__m256 a)
|
||||||
{
|
{
|
||||||
return (__m256)__builtin_ia32_movshdup256((__v8sf)a);
|
return __builtin_shufflevector(a, a, 1, 1, 3, 3, 5, 5, 7, 7);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
|
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_moveldup_ps(__m256 a)
|
_mm256_moveldup_ps(__m256 a)
|
||||||
{
|
{
|
||||||
return (__m256)__builtin_ia32_movsldup256((__v8sf)a);
|
return __builtin_shufflevector(a, a, 0, 0, 2, 2, 4, 4, 6, 6);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline __m256d __attribute__((__always_inline__, __nodebug__))
|
static __inline __m256d __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_movedup_pd(__m256d a)
|
_mm256_movedup_pd(__m256d a)
|
||||||
{
|
{
|
||||||
return (__m256d)__builtin_ia32_movddup256((__v4df)a);
|
return __builtin_shufflevector(a, a, 0, 0, 2, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Unpack and Interleave */
|
/* Unpack and Interleave */
|
||||||
|
|
|
@ -434,9 +434,6 @@ void f0() {
|
||||||
tmp_V4d = __builtin_ia32_vinsertf128_pd256(tmp_V4d, tmp_V2d, 0x7);
|
tmp_V4d = __builtin_ia32_vinsertf128_pd256(tmp_V4d, tmp_V2d, 0x7);
|
||||||
tmp_V8f = __builtin_ia32_vinsertf128_ps256(tmp_V8f, tmp_V4f, 0x7);
|
tmp_V8f = __builtin_ia32_vinsertf128_ps256(tmp_V8f, tmp_V4f, 0x7);
|
||||||
tmp_V8i = __builtin_ia32_vinsertf128_si256(tmp_V8i, tmp_V4i, 0x7);
|
tmp_V8i = __builtin_ia32_vinsertf128_si256(tmp_V8i, tmp_V4i, 0x7);
|
||||||
tmp_V8f = __builtin_ia32_movshdup256(tmp_V8f);
|
|
||||||
tmp_V8f = __builtin_ia32_movsldup256(tmp_V8f);
|
|
||||||
tmp_V4d = __builtin_ia32_movddup256(tmp_V4d);
|
|
||||||
tmp_V4d = __builtin_ia32_sqrtpd256(tmp_V4d);
|
tmp_V4d = __builtin_ia32_sqrtpd256(tmp_V4d);
|
||||||
tmp_V8f = __builtin_ia32_sqrtps256(tmp_V8f);
|
tmp_V8f = __builtin_ia32_sqrtps256(tmp_V8f);
|
||||||
tmp_V8f = __builtin_ia32_sqrtps_nr256(tmp_V8f);
|
tmp_V8f = __builtin_ia32_sqrtps_nr256(tmp_V8f);
|
||||||
|
|
Loading…
Reference in New Issue