2014-04-04 07:47:24 +08:00
|
|
|
; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 -arm-adjust-jump-tables=0 %s -o - | FileCheck %s
|
[Thumb-1] Synthesize TBB/TBH instructions to make use of compressed jump tables
[Reapplying r284580 and r285917 with fix and testing to ensure emitted jump tables for Thumb-1 have 4-byte alignment]
The TBB and TBH instructions in Thumb-2 allow jump tables to be compressed into sequences of bytes or shorts respectively. These instructions do not exist in Thumb-1, however it is possible to synthesize them out of a sequence of other instructions.
It turns out this sequence is so short that it's almost never a lose for performance and is ALWAYS a significant win for code size.
TBB example:
Before: lsls r0, r0, #2 After: add r0, pc
adr r1, .LJTI0_0 ldrb r0, [r0, #6]
ldr r0, [r0, r1] lsls r0, r0, #1
mov pc, r0 add pc, r0
=> No change in prologue code size or dynamic instruction count. Jump table shrunk by a factor of 4.
The only case that can increase dynamic instruction count is the TBH case:
Before: lsls r0, r4, #2 After: lsls r4, r4, #1
adr r1, .LJTI0_0 add r4, pc
ldr r0, [r0, r1] ldrh r4, [r4, #6]
mov pc, r0 lsls r4, r4, #1
add pc, r4
=> 1 more instruction in prologue. Jump table shrunk by a factor of 2.
So there is an argument that this should be disabled when optimizing for performance (and a TBH needs to be generated). I'm not so sure about that in practice, because on small cores with Thumb-1 performance is often tied to code size. But I'm willing to turn it off when optimizing for performance if people want (also note that TBHs are fairly rare in practice!)
llvm-svn: 285690
2016-11-01 21:37:41 +08:00
|
|
|
; RUN: llc -mtriple=thumbv6-eabi -mcpu=cortex-m0 -arm-adjust-jump-tables=0 %s -o - | FileCheck %s
|
2009-07-30 07:20:20 +08:00
|
|
|
|
|
|
|
; Do not use tbb / tbh if any destination is before the jumptable.
|
|
|
|
; rdar://7102917
|
|
|
|
|
2012-04-16 21:49:17 +08:00
|
|
|
define i16 @main__getopt_internal_2E_exit_2E_ce(i32, i1 %b) nounwind {
|
2012-05-22 07:50:00 +08:00
|
|
|
; CHECK: main__getopt_internal_2E_exit_2E_ce
|
|
|
|
; CHECK-NOT: tbb
|
|
|
|
; CHECK-NOT: tbh
|
|
|
|
; 32-bit jump tables use explicit branches, not data regions, so make sure
|
|
|
|
; we don't annotate this region.
|
|
|
|
; CHECK-NOT: data_region
|
2012-04-16 21:49:17 +08:00
|
|
|
entry:
|
|
|
|
br i1 %b, label %codeRepl127.exitStub, label %newFuncRoot
|
|
|
|
|
2009-07-30 07:20:20 +08:00
|
|
|
codeRepl127.exitStub: ; preds = %_getopt_internal.exit.ce
|
2012-04-16 21:49:17 +08:00
|
|
|
; Add an explicit edge back to before the jump table to ensure this block
|
|
|
|
; is placed first.
|
|
|
|
br i1 %b, label %newFuncRoot, label %codeRepl127.exitStub.exit
|
|
|
|
|
|
|
|
codeRepl127.exitStub.exit:
|
2009-07-30 07:20:20 +08:00
|
|
|
ret i16 0
|
|
|
|
|
|
|
|
parse_options.exit.loopexit.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 1
|
|
|
|
|
|
|
|
bb1.i.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 2
|
|
|
|
|
|
|
|
bb90.i.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 3
|
|
|
|
|
|
|
|
codeRepl104.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 4
|
|
|
|
|
|
|
|
codeRepl113.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 5
|
|
|
|
|
|
|
|
codeRepl51.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 6
|
|
|
|
|
|
|
|
codeRepl70.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 7
|
|
|
|
|
|
|
|
codeRepl119.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 8
|
|
|
|
|
|
|
|
codeRepl93.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 9
|
|
|
|
|
|
|
|
codeRepl101.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 10
|
|
|
|
|
|
|
|
codeRepl120.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 11
|
|
|
|
|
|
|
|
codeRepl89.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 12
|
|
|
|
|
|
|
|
codeRepl45.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 13
|
|
|
|
|
|
|
|
codeRepl58.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 14
|
|
|
|
|
|
|
|
codeRepl46.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 15
|
|
|
|
|
|
|
|
codeRepl50.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 16
|
|
|
|
|
|
|
|
codeRepl52.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 17
|
|
|
|
|
|
|
|
codeRepl53.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 18
|
|
|
|
|
|
|
|
codeRepl61.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 19
|
|
|
|
|
|
|
|
codeRepl85.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 20
|
|
|
|
|
|
|
|
codeRepl97.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 21
|
|
|
|
|
|
|
|
codeRepl79.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 22
|
|
|
|
|
|
|
|
codeRepl102.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 23
|
|
|
|
|
|
|
|
codeRepl54.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 24
|
|
|
|
|
|
|
|
codeRepl57.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 25
|
|
|
|
|
|
|
|
codeRepl103.exitStub: ; preds = %_getopt_internal.exit.ce
|
|
|
|
ret i16 26
|
|
|
|
|
Generalize MergeBlockIntoPredecessor. Replace uses of MergeBasicBlockIntoOnlyPred.
Summary:
Two utils methods have essentially the same functionality. This is an attempt to merge them into one.
1. lib/Transforms/Utils/Local.cpp : MergeBasicBlockIntoOnlyPred
2. lib/Transforms/Utils/BasicBlockUtils.cpp : MergeBlockIntoPredecessor
Prior to the patch:
1. MergeBasicBlockIntoOnlyPred
Updates either DomTree or DeferredDominance
Moves all instructions from Pred to BB, deletes Pred
Asserts BB has single predecessor
If address was taken, replace the block address with constant 1 (?)
2. MergeBlockIntoPredecessor
Updates DomTree, LoopInfo and MemoryDependenceResults
Moves all instruction from BB to Pred, deletes BB
Returns if doesn't have a single predecessor
Returns if BB's address was taken
After the patch:
Method 2. MergeBlockIntoPredecessor is attempting to become the new default:
Updates DomTree or DeferredDominance, and LoopInfo and MemoryDependenceResults
Moves all instruction from BB to Pred, deletes BB
Returns if doesn't have a single predecessor
Returns if BB's address was taken
Uses of MergeBasicBlockIntoOnlyPred that need to be replaced:
1. lib/Transforms/Scalar/LoopSimplifyCFG.cpp
Updated in this patch. No challenges.
2. lib/CodeGen/CodeGenPrepare.cpp
Updated in this patch.
i. eliminateFallThrough is straightforward, but I added using a temporary array to avoid the iterator invalidation.
ii. eliminateMostlyEmptyBlock(s) methods also now use a temporary array for blocks
Some interesting aspects:
- Since Pred is not deleted (BB is), the entry block does not need updating.
- The entry block was being updated with the deleted block in eliminateMostlyEmptyBlock. Added assert to make obvious that BB=SinglePred.
- isMergingEmptyBlockProfitable assumes BB is the one to be deleted.
- eliminateMostlyEmptyBlock(BB) does not delete BB on one path, it deletes its unique predecessor instead.
- adding some test owner as subscribers for the interesting tests modified:
test/CodeGen/X86/avx-cmp.ll
test/CodeGen/AMDGPU/nested-loop-conditions.ll
test/CodeGen/AMDGPU/si-annotate-cf.ll
test/CodeGen/X86/hoist-spill.ll
test/CodeGen/X86/2006-11-17-IllegalMove.ll
3. lib/Transforms/Scalar/JumpThreading.cpp
Not covered in this patch. It is the only use case using the DeferredDominance.
I would defer to Brian Rzycki to make this replacement.
Reviewers: chandlerc, spatel, davide, brzycki, bkramer, javed.absar
Subscribers: qcolombet, sanjoy, nemanjai, nhaehnle, jlebar, tpr, kbarton, RKSimon, wmi, arsenm, llvm-commits
Differential Revision: https://reviews.llvm.org/D48202
llvm-svn: 335183
2018-06-21 06:01:04 +08:00
|
|
|
newFuncRoot:
|
|
|
|
br label %_getopt_internal.exit.ce
|
|
|
|
|
2009-07-30 07:20:20 +08:00
|
|
|
_getopt_internal.exit.ce: ; preds = %newFuncRoot
|
|
|
|
switch i32 %0, label %codeRepl127.exitStub [
|
|
|
|
i32 -1, label %parse_options.exit.loopexit.exitStub
|
|
|
|
i32 0, label %bb1.i.exitStub
|
|
|
|
i32 63, label %bb90.i.exitStub
|
|
|
|
i32 66, label %codeRepl104.exitStub
|
|
|
|
i32 67, label %codeRepl113.exitStub
|
|
|
|
i32 71, label %codeRepl51.exitStub
|
|
|
|
i32 77, label %codeRepl70.exitStub
|
|
|
|
i32 78, label %codeRepl119.exitStub
|
|
|
|
i32 80, label %codeRepl93.exitStub
|
|
|
|
i32 81, label %codeRepl101.exitStub
|
|
|
|
i32 82, label %codeRepl120.exitStub
|
|
|
|
i32 88, label %codeRepl89.exitStub
|
|
|
|
i32 97, label %codeRepl45.exitStub
|
|
|
|
i32 98, label %codeRepl58.exitStub
|
|
|
|
i32 99, label %codeRepl46.exitStub
|
|
|
|
i32 100, label %codeRepl50.exitStub
|
|
|
|
i32 104, label %codeRepl52.exitStub
|
|
|
|
i32 108, label %codeRepl53.exitStub
|
|
|
|
i32 109, label %codeRepl61.exitStub
|
|
|
|
i32 110, label %codeRepl85.exitStub
|
|
|
|
i32 111, label %codeRepl97.exitStub
|
|
|
|
i32 113, label %codeRepl79.exitStub
|
|
|
|
i32 114, label %codeRepl102.exitStub
|
|
|
|
i32 115, label %codeRepl54.exitStub
|
|
|
|
i32 116, label %codeRepl57.exitStub
|
|
|
|
i32 118, label %codeRepl103.exitStub
|
|
|
|
]
|
|
|
|
}
|