[Machinesink] add one more profitable loop related pattern

Reviewed By: qcolombet Differential Revision: https://reviews.llvm.org/D86925
2020-09-26 08:30:48 -04:00 · 2020-09-26 08:30:48 -04:00 · c8f6c0f961
parent 55f727306e
commit c8f6c0f961
3 changed files with 50 additions and 101 deletions
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@ -596,9 +596,55 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
          FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
    return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);

-  // If SuccToSinkTo is final destination and it is a post dominator of current
-  // block then it is not profitable to sink MI into SuccToSinkTo block.
-  return false;
+  MachineLoop *ML = LI->getLoopFor(MBB);
+
+  // If the instruction is not inside a loop, it is not profitable to sink MI to
+  // a post dominate block SuccToSinkTo.
+  if (!ML)
+    return false;
+
+  // If this instruction is inside a loop and sinking this instruction can make
+  // more registers live range shorten, it is still prifitable.
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    // Ignore non-register operands.
+    if (!MO.isReg())
+      continue;
+    Register Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+
+    // Don't handle physical register.
+    if (Register::isPhysicalRegister(Reg))
+      return false;
+
+    // Users for the defs are all dominated by SuccToSinkTo.
+    if (MO.isDef()) {
+      // This def register's live range is shortened after sinking.
+      bool LocalUse = false;
+      if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge,
+                                   LocalUse))
+        return false;
+    } else {
+      MachineInstr *DefMI = MRI->getVRegDef(Reg);
+      // DefMI is defined outside of loop. There should be no live range
+      // impact for this operand. Defination outside of loop means:
+      // 1: defination is outside of loop.
+      // 2: defination is in this loop, but it is a PHI in the loop header.
+      if (LI->getLoopFor(DefMI->getParent()) != ML ||
+          (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent())))
+        continue;
+      // DefMI is inside the loop. Mark it as not profitable as sinking MI will
+      // enlarge DefMI live range.
+      // FIXME: check the register pressure in block SuccToSinkTo, if it is
+      // smaller than the limit after sinking, it is still profitable to sink.
+      return false;
+    }
+  }
+
+  // If MI is in loop and all its operands are alive across the whole loop, it
+  // is profitable to sink MI.
+  return true;
 }

 /// Get the sorted sequence of successors for this MachineBasicBlock, possibly
--- a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir
+++ b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir
@ -370,7 +370,6 @@ body:             |
  ; CHECK:   [[PHI5:%[0-9]+]]:gprc = PHI [[LI2]], %bb.2, %27, %bb.17
  ; CHECK:   [[PHI6:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_]], %bb.2, %55, %bb.17
  ; CHECK:   [[PHI7:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.2, %15, %bb.17
-  ; CHECK:   [[ADDI8_4:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[PHI7]], 8
  ; CHECK:   [[LWZU:%[0-9]+]]:gprc, [[LWZU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LWZU 8, [[PHI6]] :: (load 4 from %ir.46, !tbaa !2)
  ; CHECK:   [[COPY10:%[0-9]+]]:gprc_and_gprc_nor0 = COPY [[PHI4]].sub_32
  ; CHECK:   [[MULHWU1:%[0-9]+]]:gprc = MULHWU [[COPY10]], [[ORI]]
@ -396,6 +395,7 @@ body:             |
  ; CHECK: bb.12 (%ir-block.60):
  ; CHECK:   successors: %bb.15(0x2aaaaaab), %bb.13(0x55555555)
  ; CHECK:   [[PHI8:%[0-9]+]]:gprc = PHI [[ADDI2]], %bb.11, [[ISEL1]], %bb.10
+  ; CHECK:   [[ADDI8_4:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[PHI7]], 8
  ; CHECK:   [[COPY13:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[ADDI8_4]]
  ; CHECK:   [[ADD4_3:%[0-9]+]]:gprc = nsw ADD4 [[PHI8]], [[ADD4_2]]
  ; CHECK:   STW killed [[ADD4_3]], 0, [[ADDI8_4]] :: (store 4 into %ir.44, !tbaa !2)
--- a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll
+++ b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll
@ -1,97 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN:  llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN:      -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s
-
-define signext i32 @foo(i32 signext %0, i32 signext %1, i32* %2, i32* %3, i32 signext %4) {
-; CHECK-LABEL: foo:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpwi r7, 1
-; CHECK-NEXT:    blt cr0, .LBB0_8
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    addi r4, r5, -4
-; CHECK-NEXT:    addi r8, r6, -4
-; CHECK-NEXT:    clrldi r7, r7, 32
-; CHECK-NEXT:    li r5, 0
-; CHECK-NEXT:    mtctr r7
-; CHECK-NEXT:    lis r7, -30584
-; CHECK-NEXT:    li r6, 0
-; CHECK-NEXT:    cmplwi r3, 3
-; CHECK-NEXT:    cmplwi cr1, r3, 1
-; CHECK-NEXT:    ori r7, r7, 34953
-; CHECK-NEXT:    b .LBB0_4
-; CHECK-NEXT:    .p2align 4
-; CHECK-NEXT:  .LBB0_2:
-; CHECK-NEXT:    mulhwu r9, r6, r7
-; CHECK-NEXT:    srwi r9, r9, 4
-; CHECK-NEXT:    mulli r9, r9, 30
-; CHECK-NEXT:    sub r9, r6, r9
-; CHECK-NEXT:  .LBB0_3:
-; CHECK-NEXT:    addi r6, r6, 1
-; CHECK-NEXT:    add r9, r9, r5
-; CHECK-NEXT:    stw r9, 4(r8)
-; CHECK-NEXT:    mr r8, r3
-; CHECK-NEXT:    bdz .LBB0_8
-; CHECK-NEXT:  .LBB0_4:
-; CHECK-NEXT:    lwzu r9, 4(r4)
-; CHECK-NEXT:    addi r3, r8, 4
-; CHECK-NEXT:    add r5, r9, r5
-; CHECK-NEXT:    beq cr0, .LBB0_7
-; CHECK-NEXT:  # %bb.5:
-; CHECK-NEXT:    bne cr1, .LBB0_2
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    slwi r9, r6, 1
-; CHECK-NEXT:    b .LBB0_3
-; CHECK-NEXT:    .p2align 4
-; CHECK-NEXT:  .LBB0_7:
-; CHECK-NEXT:    addi r9, r6, 100
-; CHECK-NEXT:    b .LBB0_3
-; CHECK-NEXT:  .LBB0_8:
-; CHECK-NEXT:    li r3, 0
-; CHECK-NEXT:    blr
-  %6 = icmp sgt i32 %4, 0
-  br i1 %6, label %7, label %9
-
-7:                                                ; preds = %5
-  %8 = zext i32 %4 to i64
-  br label %10
-
-9:                                                ; preds = %25, %5
-  ret i32 undef
-
-10:                                               ; preds = %7, %25
-  %11 = phi i64 [ 0, %7 ], [ %29, %25 ]
-  %12 = phi i32 [ 0, %7 ], [ %30, %25 ]
-  %13 = phi i32 [ 0, %7 ], [ %16, %25 ]
-  %14 = getelementptr inbounds i32, i32* %2, i64 %11
-  %15 = load i32, i32* %14, align 4
-  %16 = add nsw i32 %15, %13
-  switch i32 %0, label %22 [
-    i32 1, label %17
-    i32 3, label %20
-  ]
-
-17:                                               ; preds = %10
-  %18 = trunc i64 %11 to i32
-  %19 = shl i32 %18, 1
-  br label %25
-
-20:                                               ; preds = %10
-  %21 = add nuw nsw i32 %12, 100
-  br label %25
-
-22:                                               ; preds = %10
-  %23 = trunc i64 %11 to i32
-  %24 = urem i32 %23, 30
-  br label %25
-
-25:                                               ; preds = %22, %20, %17
-  %26 = phi i32 [ %24, %22 ], [ %21, %20 ], [ %19, %17 ]
-  %27 = add nsw i32 %26, %16
-  %28 = getelementptr inbounds i32, i32* %3, i64 %11
-  store i32 %27, i32* %28, align 4
-  %29 = add nuw nsw i64 %11, 1
-  %30 = add nuw nsw i32 %12, 1
-  %31 = icmp eq i64 %29, %8
-  br i1 %31, label %9, label %10
-}
-