llvm-project/llvm/test/Transforms/InstCombine/align-inc.ll

; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {GLOBAL.*align 16}
; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {tmp = load}

@GLOBAL = internal global [4 x i32] zeroinitializer

declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*)


define <16 x i8> @foo(<2 x i64> %x) {
entry:
	%tmp = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* bitcast ([4 x i32]* @GLOBAL to i8*) )
	ret <16 x i8> %tmp
}
when we see a unaligned load from an insufficiently aligned global or alloca, increase the alignment of the load, turning it into an aligned load. This allows us to compile: #include <xmmintrin.h> __m128i foo(__m128i x){ static const unsigned int c_0[4] = { 0, 0, 0, 0 }; __m128i v_Zero = _mm_loadu_si128((__m128i*)c_0); x = _mm_unpacklo_epi8(x, v_Zero); return x; } into: _foo: punpcklbw _c_0.5944, %xmm0 ret .data .lcomm _c_0.5944,16,4 # c_0.5944 instead of: _foo: movdqu _c_0.5944, %xmm1 punpcklbw %xmm1, %xmm0 ret .data .lcomm _c_0.5944,16,2 # c_0.5944 llvm-svn: 40971 2007-08-10 03:05:49 +08:00			`; RUN: llvm-as < %s \| opt -instcombine \| llvm-dis \| grep {GLOBAL.*align 16}`
			`; RUN: llvm-as < %s \| opt -instcombine \| llvm-dis \| grep {tmp = load}`

oops, forgot to commit this. llvm-svn: 41034 2007-08-13 00:55:14 +08:00			`@GLOBAL = internal global [4 x i32] zeroinitializer`
when we see a unaligned load from an insufficiently aligned global or alloca, increase the alignment of the load, turning it into an aligned load. This allows us to compile: #include <xmmintrin.h> __m128i foo(__m128i x){ static const unsigned int c_0[4] = { 0, 0, 0, 0 }; __m128i v_Zero = _mm_loadu_si128((__m128i*)c_0); x = _mm_unpacklo_epi8(x, v_Zero); return x; } into: _foo: punpcklbw _c_0.5944, %xmm0 ret .data .lcomm _c_0.5944,16,4 # c_0.5944 instead of: _foo: movdqu _c_0.5944, %xmm1 punpcklbw %xmm1, %xmm0 ret .data .lcomm _c_0.5944,16,2 # c_0.5944 llvm-svn: 40971 2007-08-10 03:05:49 +08:00
			`declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*)`


			`define <16 x i8> @foo(<2 x i64> %x) {`
			`entry:`
			`%tmp = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* bitcast ([4 x i32]* @GLOBAL to i8*) )`
			`ret <16 x i8> %tmp`
			`}`