llvm-project/llvm/test/CodeGen/X86/fold-load.ll

; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 }
@stmt_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=1]

; This should just not crash.
define void @test1() nounwind {
entry:
	br i1 true, label %cond_true, label %cond_next

cond_true:		; preds = %entry
	%new_size.0.i = select i1 false, i32 0, i32 0		; <i32> [#uses=1]
	%tmp.i = load i32* bitcast (i8* getelementptr (%struct.obstack* @stmt_obstack, i32 0, i32 10) to i32*)		; <i32> [#uses=1]
	%tmp.i.upgrd.1 = trunc i32 %tmp.i to i8		; <i8> [#uses=1]
	%tmp21.i = and i8 %tmp.i.upgrd.1, 1		; <i8> [#uses=1]
	%tmp22.i = icmp eq i8 %tmp21.i, 0		; <i1> [#uses=1]
	br i1 %tmp22.i, label %cond_false30.i, label %cond_true23.i

cond_true23.i:		; preds = %cond_true
	ret void

cond_false30.i:		; preds = %cond_true
	%tmp35.i = tail call %struct._obstack_chunk* null( i32 %new_size.0.i )		; <%struct._obstack_chunk*> [#uses=0]
	ret void

cond_next:		; preds = %entry
	ret void
}


define i32 @test2(i16* %P, i16* %Q) nounwind {
  %A = load i16* %P, align 4                      ; <i16> [#uses=11]
  %C = zext i16 %A to i32                         ; <i32> [#uses=1]
  %D = and i32 %C, 255                            ; <i32> [#uses=1]
  br label %L
L:

  store i16 %A, i16* %Q
  ret i32 %D

; CHECK-LABEL: test2:
; CHECK: 	movl	4(%esp), %eax
; CHECK-NEXT:	movzwl	(%eax), %e{{..}}

}

; rdar://10554090
; xor in exit block will be CSE'ed and load will be folded to xor in entry.
define i1 @test3(i32* %P, i32* %Q) nounwind {
; CHECK-LABEL: test3:
; CHECK: movl 8(%esp), %e
; CHECK: movl 4(%esp), %e
; CHECK: xorl (%e
; CHECK: j
entry:
  %0 = load i32* %P, align 4
  %1 = load i32* %Q, align 4
  %2 = xor i32 %0, %1
  %3 = and i32 %2, 89947
  %4 = icmp eq i32 %3, 0
  br i1 %4, label %exit, label %land.end

exit:
  %shr.i.i19 = xor i32 %1, %0
  %5 = and i32 %shr.i.i19, 3456789123
  %6 = icmp eq i32 %5, 0
  br label %land.end

land.end:
  %7 = phi i1 [ %6, %exit ], [ false, %entry ]
  ret i1 %7
}
Instruction scheduling itinerary for Intel Atom. Adds an instruction itinerary to all x86 instructions, giving each a default latency of 1, using the InstrItinClass IIC_DEFAULT. Sets specific latencies for Atom for the instructions in files X86InstrCMovSetCC.td, X86InstrArithmetic.td, X86InstrControl.td, and X86InstrShiftRotate.td. The Atom latencies for the remainder of the x86 instructions will be set in subsequent patches. Adds a test to verify that the scheduler is working. Also changes the scheduling preference to "Hybrid" for i386 Atom, while leaving x86_64 as ILP. Patch by Preston Gurd! llvm-svn: 149558 2012-02-02 07:20:51 +08:00			`; RUN: llc < %s -mcpu=generic -march=x86 \| FileCheck %s`
Remove llvm-upgrade and update tests. llvm-svn: 47432 2008-02-21 15:42:26 +08:00			`%struct._obstack_chunk = type { i8, %struct._obstack_chunk, [4 x i8] }`
			`%struct.obstack = type { i32, %struct._obstack_chunk, i8, i8, i8, i32, i32, %struct._obstack_chunk* (...), void (...), i8*, i8 }`
			`@stmt_obstack = external global %struct.obstack ; <%struct.obstack*> [#uses=1]`
testcase, ensure this never breaks. llvm-svn: 30137 2006-09-07 05:54:59 +08:00
Fix rdar://7517201, a regression introduced by r92849. When folding a and(any_ext(load)) both the any_ext and the load have to have only a single use. This removes the anyext-uses.ll testcase which started failing because it is unreduced and unclear what it is testing. llvm-svn: 92950 2010-01-08 05:59:23 +08:00			`; This should just not crash.`
			`define void @test1() nounwind {`
testcase, ensure this never breaks. llvm-svn: 30137 2006-09-07 05:54:59 +08:00			`entry:`
Fix rdar://7517201, a regression introduced by r92849. When folding a and(any_ext(load)) both the any_ext and the load have to have only a single use. This removes the anyext-uses.ll testcase which started failing because it is unreduced and unclear what it is testing. llvm-svn: 92950 2010-01-08 05:59:23 +08:00			`br i1 true, label %cond_true, label %cond_next`
testcase, ensure this never breaks. llvm-svn: 30137 2006-09-07 05:54:59 +08:00
			`cond_true: ; preds = %entry`
Remove llvm-upgrade and update tests. llvm-svn: 47432 2008-02-21 15:42:26 +08:00			`%new_size.0.i = select i1 false, i32 0, i32 0 ; <i32> [#uses=1]`
			`%tmp.i = load i32* bitcast (i8* getelementptr (%struct.obstack* @stmt_obstack, i32 0, i32 10) to i32*) ; <i32> [#uses=1]`
			`%tmp.i.upgrd.1 = trunc i32 %tmp.i to i8 ; <i8> [#uses=1]`
			`%tmp21.i = and i8 %tmp.i.upgrd.1, 1 ; <i8> [#uses=1]`
			`%tmp22.i = icmp eq i8 %tmp21.i, 0 ; <i1> [#uses=1]`
			`br i1 %tmp22.i, label %cond_false30.i, label %cond_true23.i`
testcase, ensure this never breaks. llvm-svn: 30137 2006-09-07 05:54:59 +08:00
			`cond_true23.i: ; preds = %cond_true`
			`ret void`

			`cond_false30.i: ; preds = %cond_true`
Remove llvm-upgrade and update tests. llvm-svn: 47432 2008-02-21 15:42:26 +08:00			`%tmp35.i = tail call %struct._obstack_chunk* null( i32 %new_size.0.i ) ; <%struct._obstack_chunk*> [#uses=0]`
testcase, ensure this never breaks. llvm-svn: 30137 2006-09-07 05:54:59 +08:00			`ret void`

			`cond_next: ; preds = %entry`
			`ret void`
			`}`
Fix rdar://7517201, a regression introduced by r92849. When folding a and(any_ext(load)) both the any_ext and the load have to have only a single use. This removes the anyext-uses.ll testcase which started failing because it is unreduced and unclear what it is testing. llvm-svn: 92950 2010-01-08 05:59:23 +08:00


			`define i32 @test2(i16* %P, i16* %Q) nounwind {`
			`%A = load i16* %P, align 4 ; <i16> [#uses=11]`
			`%C = zext i16 %A to i32 ; <i32> [#uses=1]`
			`%D = and i32 %C, 255 ; <i32> [#uses=1]`
			`br label %L`
			`L:`

			`store i16 %A, i16* %Q`
			`ret i32 %D`
Enable MI Sched for x86. This changes the SelectionDAG scheduling preference to source order. Soon, the SelectionDAG scheduler can be bypassed saving a nice chunk of compile time. Performance differences that result from this change are often a consequence of register coalescing. The register coalescer is far from perfect. Bugs can be filed for deficiencies. On x86 SandyBridge/Haswell, the source order schedule is often preserved, particularly for small blocks. Register pressure is generally improved over the SD scheduler's ILP mode. However, we are still able to handle large blocks that require latency hiding, unlike the SD scheduler's BURR mode. MI scheduler also attempts to discover the critical path in single-block loops and adjust heuristics accordingly. The MI scheduler relies on the new machine model. This is currently unimplemented for AVX, so we may not be generating the best code yet. Unit tests are updated so they don't depend on SD scheduling heuristics. llvm-svn: 192750 2013-10-16 07:33:07 +08:00
Convert CodeGen//.ll tests to use the new CHECK-LABEL for easier debugging. No functionality change and all tests pass after conversion. This was done with the following sed invocation to catch label lines demarking function boundaries: sed -i '' "s/^;\( \)\([A-Z0-9_]\):\( \)test\([A-Za-z0-9_-]\):\( \)$/;\1\2-LABEL:\3test\4:\5/g" test/CodeGen//*.ll which was written conservatively to avoid false positives rather than false negatives. I scanned through all the changes and everything looks correct. llvm-svn: 186258 2013-07-14 04:38:47 +08:00			`; CHECK-LABEL: test2:`
Fix rdar://7517201, a regression introduced by r92849. When folding a and(any_ext(load)) both the any_ext and the load have to have only a single use. This removes the anyext-uses.ll testcase which started failing because it is unreduced and unclear what it is testing. llvm-svn: 92950 2010-01-08 05:59:23 +08:00			`; CHECK: movl 4(%esp), %eax`
Enable MI Sched for x86. This changes the SelectionDAG scheduling preference to source order. Soon, the SelectionDAG scheduler can be bypassed saving a nice chunk of compile time. Performance differences that result from this change are often a consequence of register coalescing. The register coalescer is far from perfect. Bugs can be filed for deficiencies. On x86 SandyBridge/Haswell, the source order schedule is often preserved, particularly for small blocks. Register pressure is generally improved over the SD scheduler's ILP mode. However, we are still able to handle large blocks that require latency hiding, unlike the SD scheduler's BURR mode. MI scheduler also attempts to discover the critical path in single-block loops and adjust heuristics accordingly. The MI scheduler relies on the new machine model. This is currently unimplemented for AVX, so we may not be generating the best code yet. Unit tests are updated so they don't depend on SD scheduling heuristics. llvm-svn: 192750 2013-10-16 07:33:07 +08:00			`; CHECK-NEXT: movzwl (%eax), %e{{..}}`
Fix rdar://7517201, a regression introduced by r92849. When folding a and(any_ext(load)) both the any_ext and the load have to have only a single use. This removes the anyext-uses.ll testcase which started failing because it is unreduced and unclear what it is testing. llvm-svn: 92950 2010-01-08 05:59:23 +08:00
			`}`

X86 Peephole: fold loads to the source register operand if possible. Machine CSE and other optimizations can remove instructions so folding is possible at peephole while not possible at ISel. This patch is a rework of r160919 and was tested on clang self-host on my local machine. rdar://10554090 and rdar://11873276 llvm-svn: 161152 2012-08-02 08:56:42 +08:00			`; rdar://10554090`
			`; xor in exit block will be CSE'ed and load will be folded to xor in entry.`
			`define i1 @test3(i32* %P, i32* %Q) nounwind {`
Convert CodeGen//.ll tests to use the new CHECK-LABEL for easier debugging. No functionality change and all tests pass after conversion. This was done with the following sed invocation to catch label lines demarking function boundaries: sed -i '' "s/^;\( \)\([A-Z0-9_]\):\( \)test\([A-Za-z0-9_-]\):\( \)$/;\1\2-LABEL:\3test\4:\5/g" test/CodeGen//*.ll which was written conservatively to avoid false positives rather than false negatives. I scanned through all the changes and everything looks correct. llvm-svn: 186258 2013-07-14 04:38:47 +08:00			`; CHECK-LABEL: test3:`
MachineSink: Fix and tweak critical-edge breaking heuristic. Per original comment, the intention of this loop is to go ahead and break the critical edge (in order to sink this instruction) if there's reason to believe doing so might "unblock" the sinking of additional instructions that define registers used by this one. The idea is that if we have a few instructions to sink "together" breaking the edge might be worthwhile. This commit makes a few small changes to help better realize this goal: First, modify the loop to ignore registers defined by this instruction. We don't sink definitions of physical registers, and sinking an SSA definition isn't going to unblock an upstream instruction. Second, ignore uses of physical registers. Instructions that define physical registers are rejected for sinking, and so moving this one won't enable moving any defining instructions. As an added bonus, while virtual register use-def chains are generally small due to SSA goodness, iteration over the uses and definitions (used by hasOneNonDBGUse) for physical registers like EFLAGS can be rather expensive in practice. (This is the original reason for looking at this) Finally, to keep things simple continue to only consider this trick for registers that have a single use (via hasOneNonDBGUse), but to avoid spuriously breaking critical edges only do so if the definition resides in the same MBB and therefore this one directly blocks it from being sunk as well. If sinking them together is meant to be, let the iterative nature of this pass sink the definition into this block first. Update tests to accomodate this change, add new testcase where sinking avoids pipeline stalls. llvm-svn: 192608 2013-10-15 00:57:17 +08:00			`; CHECK: movl 8(%esp), %e`
			`; CHECK: movl 4(%esp), %e`
			`; CHECK: xorl (%e`
X86 Peephole: fold loads to the source register operand if possible. Machine CSE and other optimizations can remove instructions so folding is possible at peephole while not possible at ISel. This patch is a rework of r160919 and was tested on clang self-host on my local machine. rdar://10554090 and rdar://11873276 llvm-svn: 161152 2012-08-02 08:56:42 +08:00			`; CHECK: j`
			`entry:`
			`%0 = load i32* %P, align 4`
			`%1 = load i32* %Q, align 4`
			`%2 = xor i32 %0, %1`
Reapply r162160 with a fix: Optimize Arith->Trunc->SETCC sequence to allow better compare/branch code. llvm-svn: 162172 2012-08-19 01:53:03 +08:00			`%3 = and i32 %2, 89947`
X86 Peephole: fold loads to the source register operand if possible. Machine CSE and other optimizations can remove instructions so folding is possible at peephole while not possible at ISel. This patch is a rework of r160919 and was tested on clang self-host on my local machine. rdar://10554090 and rdar://11873276 llvm-svn: 161152 2012-08-02 08:56:42 +08:00			`%4 = icmp eq i32 %3, 0`
			`br i1 %4, label %exit, label %land.end`

			`exit:`
			`%shr.i.i19 = xor i32 %1, %0`
Reapply r162160 with a fix: Optimize Arith->Trunc->SETCC sequence to allow better compare/branch code. llvm-svn: 162172 2012-08-19 01:53:03 +08:00			`%5 = and i32 %shr.i.i19, 3456789123`
X86 Peephole: fold loads to the source register operand if possible. Machine CSE and other optimizations can remove instructions so folding is possible at peephole while not possible at ISel. This patch is a rework of r160919 and was tested on clang self-host on my local machine. rdar://10554090 and rdar://11873276 llvm-svn: 161152 2012-08-02 08:56:42 +08:00			`%6 = icmp eq i32 %5, 0`
			`br label %land.end`

			`land.end:`
			`%7 = phi i1 [ %6, %exit ], [ false, %entry ]`
			`ret i1 %7`
			`}`