forked from OSchip/llvm-project
AArch64CollectLOH: Rewrite as block-local analysis.
Re-apply r288561: This time with a fix where the ADDs that are part of a 3 instruction LOH would not invalidate the "LastAdrp" state. This fixes http://llvm.org/PR31361 Previously this pass was using up to 5% compile time in some cases which is a bit much for what it is doing. The pass featured a full blown data-flow analysis which in the default configuration was restricted to a single block. This rewrites the pass under the assumption that we only ever work on a single block. This is done in a single pass maintaining a state machine per general purpose register to catch LOH patterns. Differential Revision: https://reviews.llvm.org/D27329 This reverts commit 9e6cedb0a4f14364d6511597a9160305e7d34493. llvm-svn: 291266
This commit is contained in:
parent
2715d92389
commit
258b847c4f
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -mtriple=arm64-apple-ios -O3 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=true -aarch64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s
|
||||
; RUN: llc -o - %s -mtriple=arm64-apple-ios -O3 -aarch64-enable-collect-loh | FileCheck %s
|
||||
; Check that the LOH analysis does not crash when the analysed chained
|
||||
; contains instructions that are filtered out.
|
||||
;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s
|
||||
; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s
|
||||
; Test case for <rdar://problem/15942912>.
|
||||
; AdrpAddStr cannot be used when the store uses same
|
||||
; register as address and value. Indeed, the related
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s
|
||||
; RUN: llc -mtriple=arm64-linux-gnu -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s --check-prefix=CHECK-ELF
|
||||
; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s
|
||||
; RUN: llc -o - %s -mtriple=arm64-linux-gnu -O2 | FileCheck %s --check-prefix=CHECK-ELF
|
||||
|
||||
; CHECK-ELF-NOT: .loh
|
||||
; CHECK-ELF-NOT: AdrpAdrp
|
||||
|
@ -633,11 +633,14 @@ define void @setL(<1 x i8> %t) {
|
|||
; a tuple register to appear in the lowering. Thus, the target
|
||||
; cpu is required to have the problem reproduced.
|
||||
; CHECK-LABEL: _uninterestingSub
|
||||
; CHECK: [[LOH_LABEL0:Lloh[0-9]+]]:
|
||||
; CHECK: adrp [[ADRP_REG:x[0-9]+]], [[CONSTPOOL:lCPI[0-9]+_[0-9]+]]@PAGE
|
||||
; CHECK-NEXT: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF]
|
||||
; CHECK: [[LOH_LABEL1:Lloh[0-9]+]]:
|
||||
; CHECK: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF]
|
||||
; The tuple comes from the next instruction.
|
||||
; CHECK-NEXT: tbl.16b v{{[0-9]+}}, { v{{[0-9]+}}, v{{[0-9]+}} }, v[[IDX]]
|
||||
; CHECK: ret
|
||||
; CHECK: .loh AdrpLdr [[LOH_LABEL0]], [[LOH_LABEL1]]
|
||||
define void @uninterestingSub(i8* nocapture %row) #0 {
|
||||
%tmp = bitcast i8* %row to <16 x i8>*
|
||||
%tmp1 = load <16 x i8>, <16 x i8>* %tmp, align 16
|
||||
|
@ -664,10 +667,10 @@ entry:
|
|||
if.then.i:
|
||||
ret void
|
||||
if.end.i:
|
||||
; CHECK: .loh AdrpAdrp Lloh91, Lloh93
|
||||
; CHECK: .loh AdrpLdr Lloh91, Lloh92
|
||||
; CHECK: .loh AdrpLdrGot Lloh93, Lloh95
|
||||
; CHECK: .loh AdrpLdrGot Lloh94, Lloh96
|
||||
; CHECK: .loh AdrpLdrGot
|
||||
; CHECK: .loh AdrpLdrGot
|
||||
; CHECK: .loh AdrpAdrp
|
||||
; CHECK: .loh AdrpLdr
|
||||
%mul.i.i.i = fmul double undef, 1.000000e-06
|
||||
%add.i.i.i = fadd double undef, %mul.i.i.i
|
||||
%sub.i.i = fsub double %add.i.i.i, undef
|
||||
|
|
|
@ -0,0 +1,193 @@
|
|||
# RUN: llc -o /dev/null %s -mtriple=aarch64-apple-ios -run-pass=aarch64-collect-loh -debug-only=aarch64-collect-loh 2>&1 | FileCheck %s
|
||||
# REQUIRES: asserts
|
||||
--- |
|
||||
define void @func0() { ret void }
|
||||
|
||||
declare void @extfunc()
|
||||
|
||||
@g0 = external global i32
|
||||
@g1 = external global i32
|
||||
@g2 = external global i32
|
||||
@g3 = external global i32
|
||||
@g4 = external global i32
|
||||
@g5 = external global i32
|
||||
...
|
||||
---
|
||||
# Check various LOH variants. Remember that the algorithms walks the basic
|
||||
# blocks backwards.
|
||||
# CHECK-LABEL: ********** AArch64 Collect LOH **********
|
||||
# CHECK-LABEL: Looking in function func0
|
||||
name: func0
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK: Adding MCLOH_AdrpAdrp:
|
||||
; CHECK-NEXT: %X1<def> = ADRP <ga:@g3>
|
||||
; CHECK-NEXT: %X1<def> = ADRP <ga:@g4>
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpAdrp:
|
||||
; CHECK-NEXT: %X1<def> = ADRP <ga:@g2>
|
||||
; CHECK-NEXT: %X1<def> = ADRP <ga:@g3>
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpAdrp:
|
||||
; CHECK-NEXT: %X0<def> = ADRP <ga:@g0>
|
||||
; CHECK-NEXT: %X0<def> = ADRP <ga:@g1>
|
||||
%x0 = ADRP target-flags(aarch64-page) @g0
|
||||
%x0 = ADRP target-flags(aarch64-page) @g1
|
||||
%x1 = ADRP target-flags(aarch64-page) @g2
|
||||
%x1 = ADRP target-flags(aarch64-page) @g3
|
||||
%x1 = ADRP target-flags(aarch64-page) @g4
|
||||
|
||||
bb.1:
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpAdd:
|
||||
; CHECK-NEXT: %X20<def> = ADRP <ga:@g0>
|
||||
; CHECK-NEXT: %X3<def> = ADDXri %X20, <ga:@g0>
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpAdd:
|
||||
; CHECK-NEXT: %X1<def> = ADRP <ga:@g0>
|
||||
; CHECK-NEXT: %X1<def> = ADDXri %X1, <ga:@g0>
|
||||
%x1 = ADRP target-flags(aarch64-page) @g0
|
||||
%x9 = SUBXri undef %x11, 5, 0 ; should not affect MCLOH formation
|
||||
%x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g0, 0
|
||||
%x20 = ADRP target-flags(aarch64-page) @g0
|
||||
BL @extfunc, csr_aarch64_aapcs ; should not clobber X20
|
||||
%x3 = ADDXri %x20, target-flags(aarch64-pageoff) @g0, 0
|
||||
|
||||
bb.2:
|
||||
; CHECK-NOT: MCLOH_AdrpAdd
|
||||
%x9 = ADRP target-flags(aarch64-page) @g0
|
||||
BL @extfunc, csr_aarch64_aapcs ; clobbers x9
|
||||
; Verification requires the use of 'undef' in front of the clobbered %x9
|
||||
%x9 = ADDXri undef %x9, target-flags(aarch64-pageoff) @g0, 0
|
||||
|
||||
bb.3:
|
||||
; CHECK-NOT: MCLOH_AdrpAdd
|
||||
%x10 = ADRP target-flags(aarch64-page) @g0
|
||||
HINT 0, implicit def %x10 ; clobbers x10
|
||||
%x10 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0
|
||||
|
||||
bb.4:
|
||||
; Cannot produce a LOH for multiple users
|
||||
; CHECK-NOT: MCLOH_AdrpAdd
|
||||
%x10 = ADRP target-flags(aarch64-page) @g0
|
||||
HINT 0, implicit def %x10 ; clobbers x10
|
||||
%x11 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0
|
||||
%x12 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0
|
||||
|
||||
bb.5:
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpLdr:
|
||||
; CHECK-NEXT: %X5<def> = ADRP <ga:@g2>
|
||||
; CHECK-NEXT: %S6<def> = LDRSui %X5, <ga:@g2>
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpLdr:
|
||||
; CHECK-NEXT: %X4<def> = ADRP <ga:@g2>
|
||||
; CHECK-NEXT: %X4<def> = LDRXui %X4, <ga:@g2>
|
||||
%x4 = ADRP target-flags(aarch64-page) @g2
|
||||
%x4 = LDRXui %x4, target-flags(aarch64-pageoff) @g2
|
||||
%x5 = ADRP target-flags(aarch64-page) @g2
|
||||
%s6 = LDRSui %x5, target-flags(aarch64-pageoff) @g2
|
||||
|
||||
bb.6:
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpLdrGot:
|
||||
; CHECK-NEXT: %X5<def> = ADRP <ga:@g2>
|
||||
; CHECK-NEXT: %X6<def> = LDRXui %X5, <ga:@g2>
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpLdrGot:
|
||||
; CHECK-NEXT: %X4<def> = ADRP <ga:@g2>
|
||||
; CHECK-NEXT: %X4<def> = LDRXui %X4, <ga:@g2>
|
||||
%x4 = ADRP target-flags(aarch64-page, aarch64-got) @g2
|
||||
%x4 = LDRXui %x4, target-flags(aarch64-pageoff, aarch64-got) @g2
|
||||
%x5 = ADRP target-flags(aarch64-page, aarch64-got) @g2
|
||||
%x6 = LDRXui %x5, target-flags(aarch64-pageoff, aarch64-got) @g2
|
||||
|
||||
bb.7:
|
||||
; CHECK-NOT: Adding MCLOH_AdrpLdrGot:
|
||||
; Loading a float value from a GOT table makes no sense so this should not
|
||||
; produce an LOH.
|
||||
%x11 = ADRP target-flags(aarch64-page, aarch64-got) @g5
|
||||
%s11 = LDRSui %x11, target-flags(aarch64-pageoff, aarch64-got) @g5
|
||||
|
||||
bb.8:
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpAddLdr:
|
||||
; CHECK-NEXT: %X7<def> = ADRP <ga:@g3>[TF=1]
|
||||
; CHECK-NEXT: %X8<def> = ADDXri %X7, <ga:@g3>
|
||||
; CHECK-NEXT: %D1<def> = LDRDui %X8, 8
|
||||
%x7 = ADRP target-flags(aarch64-page) @g3
|
||||
%x8 = ADDXri %x7, target-flags(aarch64-pageoff) @g3, 0
|
||||
%d1 = LDRDui %x8, 8
|
||||
|
||||
bb.9:
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpAdd:
|
||||
; CHECK-NEXT: %X3<def> = ADRP <ga:@g3>
|
||||
; CHECK-NEXT: %X3<def> = ADDXri %X3, <ga:@g3>
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpAdd:
|
||||
; CHECK-NEXT: %X5<def> = ADRP <ga:@g3>
|
||||
; CHECK-NEXT: %X2<def> = ADDXri %X5, <ga:@g3>
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpAddStr:
|
||||
; CHECK-NEXT: %X1<def> = ADRP <ga:@g3>
|
||||
; CHECK-NEXT: %X1<def> = ADDXri %X1, <ga:@g3>
|
||||
; CHECK-NEXT: STRXui %XZR, %X1, 16
|
||||
%x1 = ADRP target-flags(aarch64-page) @g3
|
||||
%x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g3, 0
|
||||
STRXui %xzr, %x1, 16
|
||||
|
||||
; This sequence should just produce an AdrpAdd (not AdrpAddStr)
|
||||
%x5 = ADRP target-flags(aarch64-page) @g3
|
||||
%x2 = ADDXri %x5, target-flags(aarch64-pageoff) @g3, 0
|
||||
STRXui %x2, undef %x11, 16
|
||||
|
||||
; This sequence should just produce an AdrpAdd (not AdrpAddStr)
|
||||
%x3 = ADRP target-flags(aarch64-page) @g3
|
||||
%x3 = ADDXri %x3, target-flags(aarch64-pageoff) @g3, 0
|
||||
STRXui %x3, %x3, 16
|
||||
|
||||
bb.10:
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpLdr:
|
||||
; CHECK-NEXT: %X2<def> = ADRP <ga:@g3>
|
||||
; CHECK-NEXT: %X2<def> = LDRXui %X2, <ga:@g3>
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpLdrGotLdr:
|
||||
; CHECK-NEXT: %X1<def> = ADRP <ga:@g4>
|
||||
; CHECK-NEXT: %X1<def> = LDRXui %X1, <ga:@g4>
|
||||
; CHECK-NEXT: %X1<def> = LDRXui %X1, 24
|
||||
%x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4
|
||||
%x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4
|
||||
%x1 = LDRXui %x1, 24
|
||||
; Should just produce a MCLOH_AdrpLdr (not MCLOH_AdrpLdrGotLdr)
|
||||
%x2 = ADRP target-flags(aarch64-page) @g3
|
||||
%x2 = LDRXui %x2, target-flags(aarch64-pageoff) @g3
|
||||
%x2 = LDRXui %x2, 24
|
||||
|
||||
bb.11:
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpLdr
|
||||
; CHECK-NEXT: %X5<def> = ADRP <ga:@g1>
|
||||
; CHECK-NEXT: %X5<def> = LDRXui %X5, <ga:@g1>
|
||||
; CHECK-NEXT: Adding MCLOH_AdrpLdrGotStr:
|
||||
; CHECK-NEXT: %X1<def> = ADRP <ga:@g4>
|
||||
; CHECK-NEXT: %X1<def> = LDRXui %X1, <ga:@g4>
|
||||
; CHECK-NEXT: STRXui %XZR, %X1, 32
|
||||
%x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4
|
||||
%x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4
|
||||
STRXui %xzr, %x1, 32
|
||||
; Should just produce a MCLOH_AdrpLdr (not MCLOH_AdrpLdrGotStr)
|
||||
%x5 = ADRP target-flags(aarch64-page) @g1
|
||||
%x5 = LDRXui %x5, target-flags(aarch64-pageoff) @g1
|
||||
STRXui undef %x11, %x5, 32
|
||||
|
||||
bb.12:
|
||||
; CHECK-NOT: MCLOH_AdrpAdrp
|
||||
; CHECK: Adding MCLOH_AdrpAddLdr
|
||||
; %X9<def> = ADRP <ga:@g4>
|
||||
; %X9<def> = ADDXri %X9, <ga:@g4>
|
||||
; %X5<def> = LDRXui %X9, 0
|
||||
%x9 = ADRP target-flags(aarch64-page, aarch64-got) @g4
|
||||
%x9 = ADDXri %x9, target-flags(aarch64-pageoff, aarch64-got) @g4, 0
|
||||
%x5 = LDRXui %x9, 0
|
||||
%x9 = ADRP target-flags(aarch64-page, aarch64-got) @g5
|
||||
|
||||
bb.13:
|
||||
successors: %bb.14
|
||||
; Cannot produce a LOH for multiple users
|
||||
; CHECK-NOT: MCLOH_AdrpAdd
|
||||
%x10 = ADRP target-flags(aarch64-page) @g0
|
||||
%x11 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0
|
||||
B %bb.14
|
||||
|
||||
bb.14:
|
||||
liveins: %x10
|
||||
%x12 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0
|
||||
...
|
Loading…
Reference in New Issue