Get rid of storelv4si builtin as it can be expressed directly. This is general

goodness because it provides opportunites to cleanup things.  For example,

uint64_t t1(__m128i vA)
{
  uint64_t Alo;
  _mm_storel_epi64((__m128i*)&Alo, vA);
  return Alo;
}

was generating 

	movq	%xmm0, -8(%rbp)
	movq	-8(%rbp), %rax

and now generates

	movd	%xmm0, %rax

rdar://11282581

llvm-svn: 155924
This commit is contained in:
Chad Rosier 2012-05-01 18:11:51 +00:00
parent 8568fb91ea
commit 87622b8b84
4 changed files with 10 additions and 4 deletions

View File

@ -303,8 +303,6 @@ BUILTIN(__builtin_ia32_lddqu, "V16ccC*", "")
BUILTIN(__builtin_ia32_palignr128, "V16cV16cV16cIc", "") BUILTIN(__builtin_ia32_palignr128, "V16cV16cV16cIc", "")
BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fi", "") BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fi", "")
BUILTIN(__builtin_ia32_storelv4si, "vV2i*V2LLi", "")
BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "") BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "")
BUILTIN(__builtin_ia32_pblendw128, "V8sV8sV8sIi", "") BUILTIN(__builtin_ia32_pblendw128, "V8sV8sV8sIi", "")
BUILTIN(__builtin_ia32_blendpd, "V2dV2dV2dIi", "") BUILTIN(__builtin_ia32_blendpd, "V2dV2dV2dIi", "")

View File

@ -1186,7 +1186,10 @@ _mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
static __inline__ void __attribute__((__always_inline__, __nodebug__)) static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_storel_epi64(__m128i *p, __m128i a) _mm_storel_epi64(__m128i *p, __m128i a)
{ {
__builtin_ia32_storelv4si((__v2si *)p, a); struct __mm_storel_epi64_struct {
long long u;
} __attribute__((__packed__, __may_alias__));
((struct __mm_storel_epi64_struct*)p)->u = a[0];
} }
static __inline__ void __attribute__((__always_inline__, __nodebug__)) static __inline__ void __attribute__((__always_inline__, __nodebug__))

View File

@ -344,7 +344,6 @@ void f0() {
tmp_V16c = __builtin_ia32_lddqu(tmp_cCp); tmp_V16c = __builtin_ia32_lddqu(tmp_cCp);
tmp_V2LLi = __builtin_ia32_palignr128(tmp_V2LLi, tmp_V2LLi, imm_i); tmp_V2LLi = __builtin_ia32_palignr128(tmp_V2LLi, tmp_V2LLi, imm_i);
tmp_V1LLi = __builtin_ia32_palignr(tmp_V1LLi, tmp_V1LLi, imm_i); tmp_V1LLi = __builtin_ia32_palignr(tmp_V1LLi, tmp_V1LLi, imm_i);
(void) __builtin_ia32_storelv4si(tmp_V2ip, tmp_V2LLi);
#ifdef USE_SSE4 #ifdef USE_SSE4
tmp_V16c = __builtin_ia32_pblendvb128(tmp_V16c, tmp_V16c, tmp_V16c); tmp_V16c = __builtin_ia32_pblendvb128(tmp_V16c, tmp_V16c, tmp_V16c);
tmp_V8s = __builtin_ia32_pblendw128(tmp_V8s, tmp_V8s, imm_i_0_256); tmp_V8s = __builtin_ia32_pblendw128(tmp_V8s, tmp_V8s, imm_i_0_256);

View File

@ -151,3 +151,9 @@ __m128d test_mm_round_sd(__m128d x, __m128d y) {
// CHECK: @llvm.x86.sse41.round.sd // CHECK: @llvm.x86.sse41.round.sd
return _mm_round_sd(x, y, 2); return _mm_round_sd(x, y, 2);
} }
void test_storel_epi64(__m128i x, void* y) {
// CHECK: define void @test_storel_epi64
// CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
_mm_storel_epi64(y, x);
}