[GreedyRA ORE] Separate Folder Reloads and Zero Cost Folder Reloads

Patchpoint instructions have operands which is actually zero cost
(or the same as register) to use the value from the stack.
In terms of statistic it makes same to separate them.

Move from computation instructions related to stack spill/reload to
number of stack slot referenced.

Reviewers: reames, MatzeB, anemet, thegameg
Reviewed By: reames
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D100016
This commit is contained in:
Serguei Katkov 2021-04-14 13:30:58 +07:00
parent 530456caf9
commit cf0d3477aa
2 changed files with 147 additions and 6 deletions

View File

@ -551,16 +551,19 @@ private:
struct RAGreedyStats {
unsigned Reloads = 0;
unsigned FoldedReloads = 0;
unsigned ZeroCostFoldedReloads = 0;
unsigned Spills = 0;
unsigned FoldedSpills = 0;
bool isEmpty() {
return !(Reloads || FoldedReloads || Spills || FoldedSpills);
return !(Reloads || FoldedReloads || Spills || FoldedSpills ||
ZeroCostFoldedReloads);
}
void add(RAGreedyStats other) {
Reloads += other.Reloads;
FoldedReloads += other.FoldedReloads;
ZeroCostFoldedReloads += other.ZeroCostFoldedReloads;
Spills += other.Spills;
FoldedSpills += other.FoldedSpills;
}
@ -3139,6 +3142,9 @@ void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
R << NV("NumReloads", Reloads) << " reloads ";
if (FoldedReloads)
R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
if (ZeroCostFoldedReloads)
R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
<< " zero cost folded reloads ";
}
RAGreedy::RAGreedyStats
@ -3151,6 +3157,11 @@ RAGreedy::computeNumberOfSplillsReloads(MachineBasicBlock &MBB) {
return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
A->getPseudoValue())->getFrameIndex());
};
auto isPatchpointInstr = [](const MachineInstr &MI) {
return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
MI.getOpcode() == TargetOpcode::STACKMAP ||
MI.getOpcode() == TargetOpcode::STATEPOINT;
};
for (MachineInstr &MI : MBB) {
SmallVector<const MachineMemOperand *, 2> Accesses;
@ -3164,13 +3175,35 @@ RAGreedy::computeNumberOfSplillsReloads(MachineBasicBlock &MBB) {
}
if (TII->hasLoadFromStackSlot(MI, Accesses) &&
llvm::any_of(Accesses, isSpillSlotAccess)) {
++Stats.FoldedReloads;
if (!isPatchpointInstr(MI)) {
Stats.FoldedReloads += Accesses.size();
continue;
}
// For statepoint there may be folded and zero cost folded stack reloads.
std::pair<unsigned, unsigned> NonZeroCostRange =
TII->getPatchpointUnfoldableRange(MI);
SmallSet<unsigned, 16> FoldedReloads;
SmallSet<unsigned, 16> ZeroCostFoldedReloads;
for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
MachineOperand &MO = MI.getOperand(Idx);
if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
continue;
if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
FoldedReloads.insert(MO.getIndex());
else
ZeroCostFoldedReloads.insert(MO.getIndex());
}
// If stack slot is used in folded reload it is not zero cost then.
for (unsigned Slot : FoldedReloads)
ZeroCostFoldedReloads.erase(Slot);
Stats.FoldedReloads += FoldedReloads.size();
Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
continue;
}
Accesses.clear();
if (TII->hasStoreToStackSlot(MI, Accesses) &&
llvm::any_of(Accesses, isSpillSlotAccess)) {
++Stats.FoldedSpills;
Stats.FoldedSpills += Accesses.size();
}
}
return Stats;

View File

@ -1,9 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -O3 -use-registers-for-deopt-values -restrict-statepoint-remat=true < %s 2>&1 | FileCheck %s
; RUN: llc -verify-machineinstrs -O3 -use-registers-for-deopt-values -restrict-statepoint-remat=true -pass-remarks-filter=regalloc -pass-remarks-output=%t.yaml -stop-after=greedy -o - < %s 2>&1 | FileCheck %s
; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
target triple = "x86_64-unknown-linux-gnu"
; CHECK-NOT: error: ran out of registers during register allocation
;CHECK-NOT: error: ran out of registers during register allocation
;YAML: --- !Missed
;YAML: Pass: regalloc
;YAML: Name: SpillReload
;YAML: Function: barney
;YAML: Args:
;YAML: - NumSpills: '10'
;YAML: - String: ' spills '
;YAML: - NumReloads: '7'
;YAML: - String: ' reloads '
;YAML: - NumZeroCostFoldedReloads: '20'
;YAML: - String: ' zero cost folded reloads '
;YAML: - String: generated in function
define void @barney(i8 addrspace(1)* %arg, double %arg1, double %arg2, double %arg3, double %arg4, double %arg5, double %arg6, double %arg7, double %arg8, double %arg9, double %arg10, double %arg11, double %arg12) gc "statepoint-example" personality i32* ()* @widget {
bb:
@ -46,3 +59,98 @@ declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 , i32 , void ()*,
declare token @llvm.experimental.gc.statepoint.p0f_i32p1i8f64f64f64f64f64f64f64f64f64f(i64 , i32 , i32 (i8 addrspace(1)*, double, double, double, double, double, double, double, double, double)*, i32 , i32 , ...)
declare token @llvm.experimental.gc.statepoint.p0f_i32i32p1i8i32f(i64 , i32 , i32 (i32, i8 addrspace(1)*, i32)*, i32 , i32 , ...)
declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i8f64f64f64f64f64f64f64f64f64i32f(i64 , i32 , void (i8 addrspace(1)*, double, double, double, double, double, double, double, double, double, i32)*, i32 , i32 , ...)
;CHECK: body: |
;CHECK: bb.0.bb:
;CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
;CHECK: liveins: $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7
;CHECK: %49:fr64 = COPY $xmm7
;CHECK: %10:fr64 = COPY $xmm6
;CHECK: %41:fr64 = COPY $xmm5
;CHECK: %45:fr64 = COPY $xmm4
;CHECK: %53:fr64 = COPY $xmm3
;CHECK: %6:fr64 = COPY $xmm2
;CHECK: %58:fr64 = COPY $xmm1
;CHECK: %62:fr64 = COPY $xmm0
;CHECK: %3:gr64 = COPY $rdi
;CHECK: %76:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0)
;CHECK: %14:fr64 = MOVSDrm_alt %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.1, align 16)
;CHECK: %66:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.2)
;CHECK: %71:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.3, align 16)
;CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %3 :: (store 8 into %stack.0)
;CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
;CHECK: STATEPOINT 2882400000, 0, 0, target-flags(x86-plt) @blam, 2, 9, 2, 0, 2, 59, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 26, 2, 0, 2, 0, 1, 8, %stack.0, 0, 2, 4, %62, 2, 7, 2, 0, 2, 4, %58, 2, 7, 2, 0, 2, 4, %6, 2, 7, 2, 0, 2, 4, %53, 2, 7, 2, 0, 2, 4, %45, 2, 7, 2, 0, 2, 4, %41, 2, 7, 2, 0, 2, 4, %10, 2, 7, 2, 0, 2, 4, %49, 2, 7, 2, 0, 2, 4, %71, 2, 7, 2, 0, 2, 4, %66, 2, 7, 2, 0, 2, 4, %14, 2, 7, 2, 0, 2, 4, %76, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64_mostregs, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0)
;CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
;CHECK: %17:gr32 = MOV32r0 implicit-def dead $eflags
;CHECK: TEST8rr %17.sub_8bit, %17.sub_8bit, implicit-def $eflags
;CHECK: MOVSDmr %stack.1, 1, $noreg, 0, $noreg, %41 :: (store 8 into %stack.1)
;CHECK: MOVSDmr %stack.2, 1, $noreg, 0, $noreg, %45 :: (store 8 into %stack.2)
;CHECK: MOVSDmr %stack.5, 1, $noreg, 0, $noreg, %58 :: (store 8 into %stack.5)
;CHECK: MOVSDmr %stack.6, 1, $noreg, 0, $noreg, %62 :: (store 8 into %stack.6)
;CHECK: JCC_1 %bb.2, 4, implicit killed $eflags
;CHECK: bb.1:
;CHECK: successors: %bb.3(0x80000000)
;CHECK: %54:fr64 = MOVSDrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool)
;CHECK: MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %54 :: (store 8 into %stack.3)
;CHECK: MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %54 :: (store 8 into %stack.4)
;CHECK: MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %54 :: (store 8 into %stack.7)
;CHECK: JMP_1 %bb.3
;CHECK: bb.2.bb13:
;CHECK: successors: %bb.3(0x80000000)
;CHECK: ADJCALLSTACKDOWN64 8, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
;CHECK: MOVSDmr $rsp, 1, $noreg, 0, $noreg, %14 :: (store 8 into stack)
;CHECK: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
;CHECK: $xmm0 = COPY %62
;CHECK: $xmm1 = COPY %58
;CHECK: $xmm2 = COPY %6
;CHECK: $xmm3 = COPY %45
;CHECK: $xmm4 = COPY %41
;CHECK: $xmm5 = COPY %10
;CHECK: $xmm6 = COPY %71
;CHECK: $xmm7 = COPY %66
;CHECK: MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %49 :: (store 8 into %stack.3)
;CHECK: MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %53 :: (store 8 into %stack.4)
;CHECK: MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %76 :: (store 8 into %stack.7)
;CHECK: STATEPOINT 2, 5, 9, undef %22:gr64, $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7, 2, 0, 2, 0, 2, 59, 2, 0, 2, 2, 2, 0, 2, 70, 2, 0, 2, 26, 2, 0, 2, 0, 2, 0, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.0, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 2, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (load 8 from %stack.1), (load 8 from %stack.2), (load 8 from %stack.3), (load 8 from %stack.4), (load 8 from %stack.5), (load 8 from %stack.6), (load 8 from %fixed-stack.2), (load 8 from %fixed-stack.3, align 16), (load 8 from %fixed-stack.0)
;CHECK: ADJCALLSTACKUP64 8, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
;CHECK: bb.3.bb15:
;CHECK: successors: %bb.7(0x7ffff800), %bb.4(0x00000800)
;CHECK: %24:gr32 = MOV32r0 implicit-def dead $eflags
;CHECK: TEST8rr %24.sub_8bit, %24.sub_8bit, implicit-def $eflags
;CHECK: JCC_1 %bb.7, 5, implicit killed $eflags
;CHECK: JMP_1 %bb.4
;CHECK: bb.4.bb19:
;CHECK: successors: %bb.5(0x00000000), %bb.6(0x80000000)
;CHECK: EH_LABEL <mcsymbol >
;CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
;CHECK: $edx = MOV32r0 implicit-def dead $eflags
;CHECK: STATEPOINT 1, 16, 3, undef %29:gr64, undef $edi, undef $rsi, $edx, 2, 0, 2, 0, 2, 105, 2, 0, 2, 2, 2, 0, 2, 97, 2, 0, 2, 26, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, 2, 2, 2, 3, 2, 0, 2, 20, 2, 0, 2, 0, 2, 4278124286, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.7, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 2, 4278124286, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (load 8 from %stack.1), (load 8 from %stack.2), (load 8 from %stack.3), (load 8 from %stack.4), (load 8 from %stack.5), (load 8 from %stack.6), (load 8 from %fixed-stack.2), (load 8 from %fixed-stack.3, align 16), (load 8 from %stack.7)
;CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
;CHECK: EH_LABEL <mcsymbol >
;CHECK: JMP_1 %bb.5
;CHECK: bb.5.bb21:
;CHECK: successors:
;CHECK: ADJCALLSTACKDOWN64 8, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
;CHECK: %81:fr64 = MOVSDrm_alt %stack.7, 1, $noreg, 0, $noreg :: (load 8 from %stack.7)
;CHECK: MOVSDmr $rsp, 1, $noreg, 0, $noreg, %81 :: (store 8 into stack)
;CHECK: $xmm0 = MOVSDrm_alt %stack.6, 1, $noreg, 0, $noreg :: (load 8 from %stack.6)
;CHECK: $xmm1 = MOVSDrm_alt %stack.5, 1, $noreg, 0, $noreg :: (load 8 from %stack.5)
;CHECK: $xmm2 = MOVSDrm_alt %stack.4, 1, $noreg, 0, $noreg :: (load 8 from %stack.4)
;CHECK: $xmm3 = MOVSDrm_alt %stack.2, 1, $noreg, 0, $noreg :: (load 8 from %stack.2)
;CHECK: $xmm4 = MOVSDrm_alt %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1)
;CHECK: $xmm5 = MOVSDrm_alt %stack.3, 1, $noreg, 0, $noreg :: (load 8 from %stack.3)
;CHECK: %74:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.3, align 16)
;CHECK: %95:fr64 = COPY %74
;CHECK: $xmm6 = COPY %95
;CHECK: $esi = MOV32ri 51
;CHECK: %69:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.2)
;CHECK: %97:fr64 = COPY %69
;CHECK: $xmm7 = COPY %97
;CHECK: STATEPOINT 2, 5, 10, undef %36:gr64, undef $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7, killed $esi, 2, 0, 2, 0, 2, 105, 2, 0, 2, 2, 2, 0, 2, 97, 2, 0, 2, 26, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 2, 2, 2, 2, 46, 2, 0, 2, 20, 2, 0, 2, 0, 2, 4278124286, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.7, 0, 2, 7, 2, 0, 2, 3, 2, 51, 2, 1, 2, 4278124286, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (load 8 from %stack.7), (load 8 from %stack.6), (load 8 from %stack.5), (load 8 from %stack.4), (load 8 from %stack.2), (load 8 from %stack.1), (load 8 from %stack.3), (load 8 from %fixed-stack.3, align 16), (load 8 from %fixed-stack.2)
;CHECK: ADJCALLSTACKUP64 8, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
;CHECK: bb.6.bb23 (landing-pad):
;CHECK: liveins: $rax, $rdx
;CHECK: EH_LABEL <mcsymbol >
;CHECK: RET 0
;CHECK: bb.7.bb25:
;CHECK: RET 0