forked from OSchip/llvm-project
[Machinesink] add one more profitable loop related pattern
Reviewed By: qcolombet Differential Revision: https://reviews.llvm.org/D86925
This commit is contained in:
parent
55f727306e
commit
c8f6c0f961
|
@ -596,9 +596,55 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
|
|||
FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
|
||||
return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
|
||||
|
||||
// If SuccToSinkTo is final destination and it is a post dominator of current
|
||||
// block then it is not profitable to sink MI into SuccToSinkTo block.
|
||||
return false;
|
||||
MachineLoop *ML = LI->getLoopFor(MBB);
|
||||
|
||||
// If the instruction is not inside a loop, it is not profitable to sink MI to
|
||||
// a post dominate block SuccToSinkTo.
|
||||
if (!ML)
|
||||
return false;
|
||||
|
||||
// If this instruction is inside a loop and sinking this instruction can make
|
||||
// more registers live range shorten, it is still prifitable.
|
||||
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &MO = MI.getOperand(i);
|
||||
// Ignore non-register operands.
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
Register Reg = MO.getReg();
|
||||
if (Reg == 0)
|
||||
continue;
|
||||
|
||||
// Don't handle physical register.
|
||||
if (Register::isPhysicalRegister(Reg))
|
||||
return false;
|
||||
|
||||
// Users for the defs are all dominated by SuccToSinkTo.
|
||||
if (MO.isDef()) {
|
||||
// This def register's live range is shortened after sinking.
|
||||
bool LocalUse = false;
|
||||
if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge,
|
||||
LocalUse))
|
||||
return false;
|
||||
} else {
|
||||
MachineInstr *DefMI = MRI->getVRegDef(Reg);
|
||||
// DefMI is defined outside of loop. There should be no live range
|
||||
// impact for this operand. Defination outside of loop means:
|
||||
// 1: defination is outside of loop.
|
||||
// 2: defination is in this loop, but it is a PHI in the loop header.
|
||||
if (LI->getLoopFor(DefMI->getParent()) != ML ||
|
||||
(DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent())))
|
||||
continue;
|
||||
// DefMI is inside the loop. Mark it as not profitable as sinking MI will
|
||||
// enlarge DefMI live range.
|
||||
// FIXME: check the register pressure in block SuccToSinkTo, if it is
|
||||
// smaller than the limit after sinking, it is still profitable to sink.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// If MI is in loop and all its operands are alive across the whole loop, it
|
||||
// is profitable to sink MI.
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Get the sorted sequence of successors for this MachineBasicBlock, possibly
|
||||
|
|
|
@ -370,7 +370,6 @@ body: |
|
|||
; CHECK: [[PHI5:%[0-9]+]]:gprc = PHI [[LI2]], %bb.2, %27, %bb.17
|
||||
; CHECK: [[PHI6:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_]], %bb.2, %55, %bb.17
|
||||
; CHECK: [[PHI7:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.2, %15, %bb.17
|
||||
; CHECK: [[ADDI8_4:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[PHI7]], 8
|
||||
; CHECK: [[LWZU:%[0-9]+]]:gprc, [[LWZU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LWZU 8, [[PHI6]] :: (load 4 from %ir.46, !tbaa !2)
|
||||
; CHECK: [[COPY10:%[0-9]+]]:gprc_and_gprc_nor0 = COPY [[PHI4]].sub_32
|
||||
; CHECK: [[MULHWU1:%[0-9]+]]:gprc = MULHWU [[COPY10]], [[ORI]]
|
||||
|
@ -396,6 +395,7 @@ body: |
|
|||
; CHECK: bb.12 (%ir-block.60):
|
||||
; CHECK: successors: %bb.15(0x2aaaaaab), %bb.13(0x55555555)
|
||||
; CHECK: [[PHI8:%[0-9]+]]:gprc = PHI [[ADDI2]], %bb.11, [[ISEL1]], %bb.10
|
||||
; CHECK: [[ADDI8_4:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[PHI7]], 8
|
||||
; CHECK: [[COPY13:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[ADDI8_4]]
|
||||
; CHECK: [[ADD4_3:%[0-9]+]]:gprc = nsw ADD4 [[PHI8]], [[ADD4_2]]
|
||||
; CHECK: STW killed [[ADD4_3]], 0, [[ADDI8_4]] :: (store 4 into %ir.44, !tbaa !2)
|
||||
|
|
|
@ -1,97 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
define signext i32 @foo(i32 signext %0, i32 signext %1, i32* %2, i32* %3, i32 signext %4) {
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: cmpwi r7, 1
|
||||
; CHECK-NEXT: blt cr0, .LBB0_8
|
||||
; CHECK-NEXT: # %bb.1:
|
||||
; CHECK-NEXT: addi r4, r5, -4
|
||||
; CHECK-NEXT: addi r8, r6, -4
|
||||
; CHECK-NEXT: clrldi r7, r7, 32
|
||||
; CHECK-NEXT: li r5, 0
|
||||
; CHECK-NEXT: mtctr r7
|
||||
; CHECK-NEXT: lis r7, -30584
|
||||
; CHECK-NEXT: li r6, 0
|
||||
; CHECK-NEXT: cmplwi r3, 3
|
||||
; CHECK-NEXT: cmplwi cr1, r3, 1
|
||||
; CHECK-NEXT: ori r7, r7, 34953
|
||||
; CHECK-NEXT: b .LBB0_4
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: .LBB0_2:
|
||||
; CHECK-NEXT: mulhwu r9, r6, r7
|
||||
; CHECK-NEXT: srwi r9, r9, 4
|
||||
; CHECK-NEXT: mulli r9, r9, 30
|
||||
; CHECK-NEXT: sub r9, r6, r9
|
||||
; CHECK-NEXT: .LBB0_3:
|
||||
; CHECK-NEXT: addi r6, r6, 1
|
||||
; CHECK-NEXT: add r9, r9, r5
|
||||
; CHECK-NEXT: stw r9, 4(r8)
|
||||
; CHECK-NEXT: mr r8, r3
|
||||
; CHECK-NEXT: bdz .LBB0_8
|
||||
; CHECK-NEXT: .LBB0_4:
|
||||
; CHECK-NEXT: lwzu r9, 4(r4)
|
||||
; CHECK-NEXT: addi r3, r8, 4
|
||||
; CHECK-NEXT: add r5, r9, r5
|
||||
; CHECK-NEXT: beq cr0, .LBB0_7
|
||||
; CHECK-NEXT: # %bb.5:
|
||||
; CHECK-NEXT: bne cr1, .LBB0_2
|
||||
; CHECK-NEXT: # %bb.6:
|
||||
; CHECK-NEXT: slwi r9, r6, 1
|
||||
; CHECK-NEXT: b .LBB0_3
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: .LBB0_7:
|
||||
; CHECK-NEXT: addi r9, r6, 100
|
||||
; CHECK-NEXT: b .LBB0_3
|
||||
; CHECK-NEXT: .LBB0_8:
|
||||
; CHECK-NEXT: li r3, 0
|
||||
; CHECK-NEXT: blr
|
||||
%6 = icmp sgt i32 %4, 0
|
||||
br i1 %6, label %7, label %9
|
||||
|
||||
7: ; preds = %5
|
||||
%8 = zext i32 %4 to i64
|
||||
br label %10
|
||||
|
||||
9: ; preds = %25, %5
|
||||
ret i32 undef
|
||||
|
||||
10: ; preds = %7, %25
|
||||
%11 = phi i64 [ 0, %7 ], [ %29, %25 ]
|
||||
%12 = phi i32 [ 0, %7 ], [ %30, %25 ]
|
||||
%13 = phi i32 [ 0, %7 ], [ %16, %25 ]
|
||||
%14 = getelementptr inbounds i32, i32* %2, i64 %11
|
||||
%15 = load i32, i32* %14, align 4
|
||||
%16 = add nsw i32 %15, %13
|
||||
switch i32 %0, label %22 [
|
||||
i32 1, label %17
|
||||
i32 3, label %20
|
||||
]
|
||||
|
||||
17: ; preds = %10
|
||||
%18 = trunc i64 %11 to i32
|
||||
%19 = shl i32 %18, 1
|
||||
br label %25
|
||||
|
||||
20: ; preds = %10
|
||||
%21 = add nuw nsw i32 %12, 100
|
||||
br label %25
|
||||
|
||||
22: ; preds = %10
|
||||
%23 = trunc i64 %11 to i32
|
||||
%24 = urem i32 %23, 30
|
||||
br label %25
|
||||
|
||||
25: ; preds = %22, %20, %17
|
||||
%26 = phi i32 [ %24, %22 ], [ %21, %20 ], [ %19, %17 ]
|
||||
%27 = add nsw i32 %26, %16
|
||||
%28 = getelementptr inbounds i32, i32* %3, i64 %11
|
||||
store i32 %27, i32* %28, align 4
|
||||
%29 = add nuw nsw i64 %11, 1
|
||||
%30 = add nuw nsw i32 %12, 1
|
||||
%31 = icmp eq i64 %29, %8
|
||||
br i1 %31, label %9, label %10
|
||||
}
|
||||
|
Loading…
Reference in New Issue