when we see a unaligned load from an insufficiently aligned global or
alloca, increase the alignment of the load, turning it into an aligned load.
This allows us to compile:
#include <xmmintrin.h>
__m128i foo(__m128i x){
static const unsigned int c_0[4] = { 0, 0, 0, 0 };
__m128i v_Zero = _mm_loadu_si128((__m128i*)c_0);
x = _mm_unpacklo_epi8(x, v_Zero);
return x;
}
into:
_foo:
punpcklbw _c_0.5944, %xmm0
ret
.data
.lcomm _c_0.5944,16,4 # c_0.5944
instead of:
_foo:
movdqu _c_0.5944, %xmm1
punpcklbw %xmm1, %xmm0
ret
.data
.lcomm _c_0.5944,16,2 # c_0.5944
llvm-svn: 40971
2007-08-10 03:05:49 +08:00
|
|
|
; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {GLOBAL.*align 16}
|
|
|
|
; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {tmp = load}
|
|
|
|
|
2007-08-13 00:55:14 +08:00
|
|
|
@GLOBAL = internal global [4 x i32] zeroinitializer
|
when we see a unaligned load from an insufficiently aligned global or
alloca, increase the alignment of the load, turning it into an aligned load.
This allows us to compile:
#include <xmmintrin.h>
__m128i foo(__m128i x){
static const unsigned int c_0[4] = { 0, 0, 0, 0 };
__m128i v_Zero = _mm_loadu_si128((__m128i*)c_0);
x = _mm_unpacklo_epi8(x, v_Zero);
return x;
}
into:
_foo:
punpcklbw _c_0.5944, %xmm0
ret
.data
.lcomm _c_0.5944,16,4 # c_0.5944
instead of:
_foo:
movdqu _c_0.5944, %xmm1
punpcklbw %xmm1, %xmm0
ret
.data
.lcomm _c_0.5944,16,2 # c_0.5944
llvm-svn: 40971
2007-08-10 03:05:49 +08:00
|
|
|
|
|
|
|
declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*)
|
|
|
|
|
|
|
|
|
|
|
|
define <16 x i8> @foo(<2 x i64> %x) {
|
|
|
|
entry:
|
|
|
|
%tmp = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* bitcast ([4 x i32]* @GLOBAL to i8*) )
|
|
|
|
ret <16 x i8> %tmp
|
|
|
|
}
|
|
|
|
|