llvm-project/llvm/test/Transforms/SimpleLoopUnswitch/2007-05-09-tl.ll

; RUN: opt < %s -simple-loop-unswitch -disable-output
; PR1333

define void @pp_cxx_expression() {
entry:
	%tmp6 = lshr i32 0, 24		; <i32> [#uses=1]
	br label %tailrecurse

tailrecurse:		; preds = %tailrecurse, %tailrecurse, %entry
	switch i32 %tmp6, label %bb96 [
		 i32 24, label %bb10
		 i32 25, label %bb10
		 i32 28, label %bb10
		 i32 29, label %bb48
		 i32 31, label %bb48
		 i32 32, label %bb48
		 i32 33, label %bb48
		 i32 34, label %bb48
		 i32 36, label %bb15
		 i32 51, label %bb89
		 i32 52, label %bb89
		 i32 54, label %bb83
		 i32 57, label %bb59
		 i32 63, label %bb80
		 i32 64, label %bb80
		 i32 68, label %bb80
		 i32 169, label %bb75
		 i32 170, label %bb19
		 i32 171, label %bb63
		 i32 172, label %bb63
		 i32 173, label %bb67
		 i32 174, label %bb67
		 i32 175, label %bb19
		 i32 176, label %bb75
		 i32 178, label %bb59
		 i32 179, label %bb89
		 i32 180, label %bb59
		 i32 182, label %bb48
		 i32 183, label %bb48
		 i32 184, label %bb48
		 i32 185, label %bb48
		 i32 186, label %bb48
		 i32 195, label %bb48
		 i32 196, label %bb59
		 i32 197, label %bb89
		 i32 198, label %bb70
		 i32 199, label %bb59
		 i32 200, label %bb59
		 i32 201, label %bb59
		 i32 202, label %bb59
		 i32 203, label %bb75
		 i32 204, label %bb59
		 i32 205, label %tailrecurse
		 i32 210, label %tailrecurse
	]

bb10:		; preds = %tailrecurse, %tailrecurse, %tailrecurse
	ret void

bb15:		; preds = %tailrecurse
	ret void

bb19:		; preds = %tailrecurse, %tailrecurse
	ret void

bb48:		; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
	ret void

bb59:		; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
	ret void

bb63:		; preds = %tailrecurse, %tailrecurse
	ret void

bb67:		; preds = %tailrecurse, %tailrecurse
	ret void

bb70:		; preds = %tailrecurse
	ret void

bb75:		; preds = %tailrecurse, %tailrecurse, %tailrecurse
	ret void

bb80:		; preds = %tailrecurse, %tailrecurse, %tailrecurse
	ret void

bb83:		; preds = %tailrecurse
	ret void

bb89:		; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
	ret void

bb96:		; preds = %tailrecurse
	ret void
}
[PM/LoopUnswitch] Introduce a new, simpler loop unswitch pass. Currently, this pass only focuses on trivial loop unswitching. At that reduced problem it remains significantly better than the current loop unswitch: - Old pass is worse than cubic complexity. New pass is (I think) linear. - New pass is much simpler in its design by focusing on full unswitching. (See below for details on this). - New pass doesn't carry state for thresholds between pass iterations. - New pass doesn't carry state for correctness (both miscompile and infloop) between pass iterations. - New pass produces substantially better code after unswitching. - New pass can handle more trivial unswitch cases. - New pass doesn't recompute the dominator tree for the entire function and instead incrementally updates it. I've ported all of the trivial unswitching test cases from the old pass to the new one to make sure that major functionality isn't lost in the process. For several of the test cases I've worked to improve the precision and rigor of the CHECKs, but for many I've just updated them to handle the new IR produced. My initial motivation was the fact that the old pass carried state in very unreliable ways between pass iterations, and these mechansims were incompatible with the new pass manager. However, I discovered many more improvements to make along the way. This pass makes two very significant assumptions that enable most of these improvements: 1) Focus on full unswitching -- that is, completely removing whatever control flow construct is being unswitched from the loop. In the case of trivial unswitching, this means removing the trivial (exiting) edge. In non-trivial unswitching, this means removing the branch or switch itself. This is in opposition to partial unswitching where some part of the unswitched control flow remains in the loop. Partial unswitching only really applies to switches and to folded branches. These are very similar to full unrolling and partial unrolling. The full form is an effective canonicalization, the partial form needs a complex cost model, cannot be iterated, isn't canonicalizing, and should be a separate pass that runs very late (much like unrolling). 2) Leverage LLVM's Loop machinery to the fullest. The original unswitch dates from a time when a great deal of LLVM's loop infrastructure was missing, ineffective, and/or unreliable. As a consequence, a lot of complexity was added which we no longer need. With these two overarching principles, I think we can build a fast and effective unswitcher that fits in well in the new PM and in the canonicalization pipeline. Some of the remaining functionality around partial unswitching may not be relevant today (not many test cases or benchmarks I can find) but if they are I'd like to add support for them as a separate layer that runs very late in the pipeline. Purely to make reviewing and introducing this code more manageable, I've split this into first a trivial-unswitch-only pass and in the next patch I'll add support for full non-trivial unswitching against a fixed threshold, exactly like full unrolling. I even plan to re-use the unrolling thresholds, as these are incredibly similar cost tradeoffs: we're cloning a loop body in order to end up with simplified control flow. We should only do that when the total growth is reasonably small. One of the biggest changes with this pass compared to the previous one is that previously, each individual trivial exiting edge from a switch was unswitched separately as a branch. Now, we unswitch the entire switch at once, with cases going to the various destinations. This lets us unswitch multiple exiting edges in a single operation and also avoids numerous extremely bad behaviors, where we would introduce 1000s of branches to test for thousands of possible values, all of which would take the exact same exit path bypassing the loop. Now we will use a switch with 1000s of cases that can be efficiently lowered into a jumptable. This avoids relying on somehow forming a switch out of the branches or getting horrible code if that fails for any reason. Another significant change is that this pass actively updates the CFG based on unswitching. For trivial unswitching, this is actually very easy because of the definition of loop simplified form. Doing this makes the code coming out of loop unswitch dramatically more friendly. We still should run loop-simplifycfg (at the least) after this to clean up, but it will have to do a lot less work. Finally, this pass makes much fewer attempts to simplify instructions based on the unswitch. Something like loop-instsimplify, instcombine, or GVN can be used to do increasingly powerful simplifications based on the now dominating predicate. The old simplifications are things that something like loop-instsimplify should get today or a very, very basic loop-instcombine could get. Keeping that logic separate is a big simplifying technique. Most of the code in this pass that isn't in the old one has to do with achieving specific goals: - Updating the dominator tree as we go - Unswitching all cases in a switch in a single step. I think it is still shorter than just the trivial unswitching code in the old pass despite having this functionality. Differential Revision: https://reviews.llvm.org/D32409 llvm-svn: 301576 2017-04-28 02:45:20 +08:00			`; RUN: opt < %s -simple-loop-unswitch -disable-output`
			`; PR1333`

			`define void @pp_cxx_expression() {`
			`entry:`
			`%tmp6 = lshr i32 0, 24 ; <i32> [#uses=1]`
			`br label %tailrecurse`

			`tailrecurse: ; preds = %tailrecurse, %tailrecurse, %entry`
			`switch i32 %tmp6, label %bb96 [`
			`i32 24, label %bb10`
			`i32 25, label %bb10`
			`i32 28, label %bb10`
			`i32 29, label %bb48`
			`i32 31, label %bb48`
			`i32 32, label %bb48`
			`i32 33, label %bb48`
			`i32 34, label %bb48`
			`i32 36, label %bb15`
			`i32 51, label %bb89`
			`i32 52, label %bb89`
			`i32 54, label %bb83`
			`i32 57, label %bb59`
			`i32 63, label %bb80`
			`i32 64, label %bb80`
			`i32 68, label %bb80`
			`i32 169, label %bb75`
			`i32 170, label %bb19`
			`i32 171, label %bb63`
			`i32 172, label %bb63`
			`i32 173, label %bb67`
			`i32 174, label %bb67`
			`i32 175, label %bb19`
			`i32 176, label %bb75`
			`i32 178, label %bb59`
			`i32 179, label %bb89`
			`i32 180, label %bb59`
			`i32 182, label %bb48`
			`i32 183, label %bb48`
			`i32 184, label %bb48`
			`i32 185, label %bb48`
			`i32 186, label %bb48`
			`i32 195, label %bb48`
			`i32 196, label %bb59`
			`i32 197, label %bb89`
			`i32 198, label %bb70`
			`i32 199, label %bb59`
			`i32 200, label %bb59`
			`i32 201, label %bb59`
			`i32 202, label %bb59`
			`i32 203, label %bb75`
			`i32 204, label %bb59`
			`i32 205, label %tailrecurse`
			`i32 210, label %tailrecurse`
			`]`

			`bb10: ; preds = %tailrecurse, %tailrecurse, %tailrecurse`
			`ret void`

			`bb15: ; preds = %tailrecurse`
			`ret void`

			`bb19: ; preds = %tailrecurse, %tailrecurse`
			`ret void`

			`bb48: ; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse`
			`ret void`

			`bb59: ; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse`
			`ret void`

			`bb63: ; preds = %tailrecurse, %tailrecurse`
			`ret void`

			`bb67: ; preds = %tailrecurse, %tailrecurse`
			`ret void`

			`bb70: ; preds = %tailrecurse`
			`ret void`

			`bb75: ; preds = %tailrecurse, %tailrecurse, %tailrecurse`
			`ret void`

			`bb80: ; preds = %tailrecurse, %tailrecurse, %tailrecurse`
			`ret void`

			`bb83: ; preds = %tailrecurse`
			`ret void`

			`bb89: ; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse`
			`ret void`

			`bb96: ; preds = %tailrecurse`
			`ret void`
			`}`