llvm-project/llvm/test/CodeGen/X86/widen_cast-1.ll

; RUN: llc -march=x86 -mcpu=generic -mattr=+sse4.2 < %s | FileCheck %s
; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s

; CHECK: movl
; CHECK: paddd
; CHECK: movlpd

; Scheduler causes produce a different instruction order
; ATOM: movl
; ATOM: paddd
; ATOM: movlpd

; bitcast a v4i16 to v2i32

define void @convert(<2 x i32>* %dst, <4 x i16>* %src) nounwind {
entry:
	%dst.addr = alloca <2 x i32>*		; <<2 x i32>**> [#uses=2]
	%src.addr = alloca <4 x i16>*		; <<4 x i16>**> [#uses=2]
	%i = alloca i32, align 4		; <i32*> [#uses=6]
	store <2 x i32>* %dst, <2 x i32>** %dst.addr
	store <4 x i16>* %src, <4 x i16>** %src.addr
	store i32 0, i32* %i
	br label %forcond

forcond:		; preds = %forinc, %entry
	%tmp = load i32* %i		; <i32> [#uses=1]
	%cmp = icmp slt i32 %tmp, 4		; <i1> [#uses=1]
	br i1 %cmp, label %forbody, label %afterfor

forbody:		; preds = %forcond
	%tmp1 = load i32* %i		; <i32> [#uses=1]
	%tmp2 = load <2 x i32>** %dst.addr		; <<2 x i32>*> [#uses=1]
	%arrayidx = getelementptr <2 x i32>* %tmp2, i32 %tmp1		; <<2 x i32>*> [#uses=1]
	%tmp3 = load i32* %i		; <i32> [#uses=1]
	%tmp4 = load <4 x i16>** %src.addr		; <<4 x i16>*> [#uses=1]
	%arrayidx5 = getelementptr <4 x i16>* %tmp4, i32 %tmp3		; <<4 x i16>*> [#uses=1]
	%tmp6 = load <4 x i16>* %arrayidx5		; <<4 x i16>> [#uses=1]
	%add = add <4 x i16> %tmp6, < i16 1, i16 1, i16 1, i16 1 >		; <<4 x i16>> [#uses=1]
	%conv = bitcast <4 x i16> %add to <2 x i32>		; <<2 x i32>> [#uses=1]
	store <2 x i32> %conv, <2 x i32>* %arrayidx
	br label %forinc

forinc:		; preds = %forbody
	%tmp7 = load i32* %i		; <i32> [#uses=1]
	%inc = add i32 %tmp7, 1		; <i32> [#uses=1]
	store i32 %inc, i32* %i
	br label %forcond

afterfor:		; preds = %forcond
	ret void
}
Rename features to match what gcc and clang use. There is no advantage in being different and using the same names simplifies clang a bit. llvm-svn: 189141 2013-08-24 04:21:34 +08:00			`; RUN: llc -march=x86 -mcpu=generic -mattr=+sse4.2 < %s \| FileCheck %s`
Add test for ATOM ISA SSSE3 - Remove SSE4.1 feature in other ATOM-based test cases llvm-svn: 166699 2012-10-26 01:50:05 +08:00			`; RUN: llc -march=x86 -mcpu=atom < %s \| FileCheck -check-prefix=ATOM %s`
This patch fixes 8 out of 20 unexpected failures in "make check" when run on an Intel Atom processor. The failures have arisen due to changes elsewhere in the trunk over the past 8 weeks or so. These failures were not detected by the Atom buildbot because the CPU on the Atom buildbot was not being detected as an Atom CPU. The fix for this problem is in Host.cpp and X86Subtarget.cpp, but shall remain commented out until the current set of Atom test failures are fixed. Patch by Andy Zhang and Tyler Nowicki! llvm-svn: 160451 2012-07-19 04:49:17 +08:00
Revert "Temporarily enable MI-Sched on X86." This reverts commit 98a9b72e8c56dc13a2617de84503a3d78352789c. llvm-svn: 184823 2013-06-25 10:48:58 +08:00			`; CHECK: movl`
Enable MI Sched for x86. This changes the SelectionDAG scheduling preference to source order. Soon, the SelectionDAG scheduler can be bypassed saving a nice chunk of compile time. Performance differences that result from this change are often a consequence of register coalescing. The register coalescer is far from perfect. Bugs can be filed for deficiencies. On x86 SandyBridge/Haswell, the source order schedule is often preserved, particularly for small blocks. Register pressure is generally improved over the SD scheduler's ILP mode. However, we are still able to handle large blocks that require latency hiding, unlike the SD scheduler's BURR mode. MI scheduler also attempts to discover the critical path in single-block loops and adjust heuristics accordingly. The MI scheduler relies on the new machine model. This is currently unimplemented for AVX, so we may not be generating the best code yet. Unit tests are updated so they don't depend on SD scheduling heuristics. llvm-svn: 192750 2013-10-16 07:33:07 +08:00			`; CHECK: paddd`
When ext-loading and trunc-storing vectors to memory, on x86 32bit systems, allow loads/stores of 64bit values from xmm registers. llvm-svn: 160044 2012-07-11 21:27:05 +08:00			`; CHECK: movlpd`
Added some basic test cases for r61209 llvm-svn: 61210 2008-12-19 04:05:58 +08:00
This patch fixes 8 out of 20 unexpected failures in "make check" when run on an Intel Atom processor. The failures have arisen due to changes elsewhere in the trunk over the past 8 weeks or so. These failures were not detected by the Atom buildbot because the CPU on the Atom buildbot was not being detected as an Atom CPU. The fix for this problem is in Host.cpp and X86Subtarget.cpp, but shall remain commented out until the current set of Atom test failures are fixed. Patch by Andy Zhang and Tyler Nowicki! llvm-svn: 160451 2012-07-19 04:49:17 +08:00			`; Scheduler causes produce a different instruction order`
			`; ATOM: movl`
			`; ATOM: paddd`
			`; ATOM: movlpd`

Added some basic test cases for r61209 llvm-svn: 61210 2008-12-19 04:05:58 +08:00			`; bitcast a v4i16 to v2i32`

			`define void @convert(<2 x i32>* %dst, <4 x i16>* %src) nounwind {`
			`entry:`
			`%dst.addr = alloca <2 x i32>* ; <<2 x i32>**> [#uses=2]`
			`%src.addr = alloca <4 x i16>* ; <<4 x i16>**> [#uses=2]`
			`%i = alloca i32, align 4 ; <i32*> [#uses=6]`
			`store <2 x i32>* %dst, <2 x i32>** %dst.addr`
			`store <4 x i16>* %src, <4 x i16>** %src.addr`
			`store i32 0, i32* %i`
			`br label %forcond`

			`forcond: ; preds = %forinc, %entry`
			`%tmp = load i32* %i ; <i32> [#uses=1]`
			`%cmp = icmp slt i32 %tmp, 4 ; <i1> [#uses=1]`
			`br i1 %cmp, label %forbody, label %afterfor`

			`forbody: ; preds = %forcond`
			`%tmp1 = load i32* %i ; <i32> [#uses=1]`
			`%tmp2 = load <2 x i32>** %dst.addr ; <<2 x i32>*> [#uses=1]`
			`%arrayidx = getelementptr <2 x i32>* %tmp2, i32 %tmp1 ; <<2 x i32>*> [#uses=1]`
			`%tmp3 = load i32* %i ; <i32> [#uses=1]`
			`%tmp4 = load <4 x i16>** %src.addr ; <<4 x i16>*> [#uses=1]`
			`%arrayidx5 = getelementptr <4 x i16>* %tmp4, i32 %tmp3 ; <<4 x i16>*> [#uses=1]`
			`%tmp6 = load <4 x i16>* %arrayidx5 ; <<4 x i16>> [#uses=1]`
			`%add = add <4 x i16> %tmp6, < i16 1, i16 1, i16 1, i16 1 > ; <<4 x i16>> [#uses=1]`
			`%conv = bitcast <4 x i16> %add to <2 x i32> ; <<2 x i32>> [#uses=1]`
			`store <2 x i32> %conv, <2 x i32>* %arrayidx`
			`br label %forinc`

			`forinc: ; preds = %forbody`
			`%tmp7 = load i32* %i ; <i32> [#uses=1]`
			`%inc = add i32 %tmp7, 1 ; <i32> [#uses=1]`
			`store i32 %inc, i32* %i`
			`br label %forcond`

			`afterfor: ; preds = %forcond`
			`ret void`
			`}`