forked from OSchip/llvm-project
[PowerPC] Extend folding RLWINM + RLWINM to post-RA.
Summary: This patch depends on D89846. We have the patterns to fold 2 RLWINMs in ppc-mi-peephole, while some RLWINM will be generated after RA, for example rGc4690b007743. If the RLWINM generated after RA followed by another RLWINM, we expect to perform the optimization after RA, too. Reviewed By: shchenz, steven.zhang Differential Revision: https://reviews.llvm.org/D89855
This commit is contained in:
parent
529ba612b0
commit
119ab2181e
|
@ -3190,18 +3190,55 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
|
|||
return false;
|
||||
}
|
||||
|
||||
bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
|
||||
MachineInstr **ToErase) const {
|
||||
// This function tries to combine two RLWINMs. We not only perform such
|
||||
// optimization in SSA, but also after RA, since some RLWINM is generated after
|
||||
// RA.
|
||||
bool PPCInstrInfo::simplifyRotateAndMaskInstr(MachineInstr &MI,
|
||||
MachineInstr *&ToErase) const {
|
||||
bool Is64Bit = false;
|
||||
switch (MI.getOpcode()) {
|
||||
case PPC::RLWINM:
|
||||
case PPC::RLWINM_rec:
|
||||
break;
|
||||
case PPC::RLWINM8:
|
||||
case PPC::RLWINM8_rec:
|
||||
Is64Bit = true;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
|
||||
unsigned FoldingReg = MI.getOperand(1).getReg();
|
||||
if (!Register::isVirtualRegister(FoldingReg))
|
||||
Register FoldingReg = MI.getOperand(1).getReg();
|
||||
MachineInstr *SrcMI = nullptr;
|
||||
bool NoUse = false;
|
||||
if (MRI->isSSA()) {
|
||||
if (!Register::isVirtualRegister(FoldingReg))
|
||||
return false;
|
||||
SrcMI = MRI->getVRegDef(FoldingReg);
|
||||
} else {
|
||||
bool OtherIntermediateUse = false;
|
||||
SrcMI = getDefMIPostRA(FoldingReg, MI, OtherIntermediateUse);
|
||||
NoUse = !OtherIntermediateUse && MI.getOperand(1).isKill();
|
||||
}
|
||||
if (!SrcMI)
|
||||
return false;
|
||||
MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
|
||||
if (SrcMI->getOpcode() != PPC::RLWINM &&
|
||||
SrcMI->getOpcode() != PPC::RLWINM_rec &&
|
||||
SrcMI->getOpcode() != PPC::RLWINM8 &&
|
||||
SrcMI->getOpcode() != PPC::RLWINM8_rec)
|
||||
// TODO: The pairs of RLWINM8(RLWINM) or RLWINM(RLWINM8) never occur before
|
||||
// RA, but after RA. And We can fold RLWINM8(RLWINM) -> RLWINM8, or
|
||||
// RLWINM(RLWINM8) -> RLWINM.
|
||||
switch (SrcMI->getOpcode()) {
|
||||
case PPC::RLWINM:
|
||||
case PPC::RLWINM_rec:
|
||||
if (Is64Bit)
|
||||
return false;
|
||||
break;
|
||||
case PPC::RLWINM8:
|
||||
case PPC::RLWINM8_rec:
|
||||
if (!Is64Bit)
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
|
||||
MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
|
||||
SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
|
||||
|
@ -3256,8 +3293,6 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
|
|||
|
||||
// If final mask is 0, MI result should be 0 too.
|
||||
if (FinalMask.isNullValue()) {
|
||||
bool Is64Bit =
|
||||
(MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
|
||||
Simplified = true;
|
||||
LLVM_DEBUG(dbgs() << "Replace Instr: ");
|
||||
LLVM_DEBUG(MI.dump());
|
||||
|
@ -3315,14 +3350,15 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
|
|||
LLVM_DEBUG(dbgs() << "To: ");
|
||||
LLVM_DEBUG(MI.dump());
|
||||
}
|
||||
if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
|
||||
!SrcMI->hasImplicitDef()) {
|
||||
// If FoldingReg has no non-debug use and it has no implicit def (it
|
||||
// is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
|
||||
// Otherwise keep it.
|
||||
*ToErase = SrcMI;
|
||||
LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
|
||||
LLVM_DEBUG(SrcMI->dump());
|
||||
if (Simplified && !SrcMI->hasImplicitDef()) {
|
||||
// If SrcMI has no implicit def, and FoldingReg has no non-debug use or
|
||||
// its flag is "killed", it's safe to delete SrcMI. Otherwise keep it.
|
||||
if ((!MRI->isSSA() && NoUse) ||
|
||||
(MRI->isSSA() && MRI->use_nodbg_empty(FoldingReg))) {
|
||||
ToErase = SrcMI;
|
||||
LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
|
||||
LLVM_DEBUG(SrcMI->dump());
|
||||
}
|
||||
}
|
||||
return Simplified;
|
||||
}
|
||||
|
|
|
@ -564,7 +564,8 @@ public:
|
|||
bool convertToImmediateForm(MachineInstr &MI,
|
||||
MachineInstr **KilledDef = nullptr) const;
|
||||
bool foldFrameOffset(MachineInstr &MI) const;
|
||||
bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase = nullptr) const;
|
||||
bool simplifyRotateAndMaskInstr(MachineInstr &MI,
|
||||
MachineInstr *&ToErase) const;
|
||||
bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const;
|
||||
bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const;
|
||||
bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg,
|
||||
|
|
|
@ -848,7 +848,7 @@ bool PPCMIPeephole::simplifyCode(void) {
|
|||
case PPC::RLWINM_rec:
|
||||
case PPC::RLWINM8:
|
||||
case PPC::RLWINM8_rec: {
|
||||
Simplified = TII->combineRLWINM(MI, &ToErase);
|
||||
Simplified = TII->simplifyRotateAndMaskInstr(MI, ToErase);
|
||||
if (Simplified)
|
||||
++NumRotatesCollapsed;
|
||||
break;
|
||||
|
|
|
@ -37,6 +37,8 @@ STATISTIC(NumberOfSelfCopies,
|
|||
"Number of self copy instructions eliminated");
|
||||
STATISTIC(NumFrameOffFoldInPreEmit,
|
||||
"Number of folding frame offset by using r+r in pre-emit peephole");
|
||||
STATISTIC(NumRotateInstrFoldInPreEmit,
|
||||
"Number of folding Rotate instructions in pre-emit peephole");
|
||||
|
||||
static cl::opt<bool>
|
||||
EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
|
||||
|
@ -413,6 +415,13 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
|
|||
LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
|
||||
LLVM_DEBUG(MI.dump());
|
||||
}
|
||||
MachineInstr *ToErase = nullptr;
|
||||
if (TII->simplifyRotateAndMaskInstr(MI, ToErase)) {
|
||||
Changed = true;
|
||||
NumRotateInstrFoldInPreEmit++;
|
||||
if (ToErase)
|
||||
InstrsToErase.push_back(ToErase);
|
||||
}
|
||||
}
|
||||
|
||||
// Eliminate conditional branch based on a constant CR bit by
|
||||
|
|
|
@ -0,0 +1,104 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -stop-after \
|
||||
# RUN: ppc-pre-emit-peephole %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: testFoldRLWINM
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $r3
|
||||
; CHECK-LABEL: name: testFoldRLWINM
|
||||
; CHECK: liveins: $r3
|
||||
; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 0, 12, implicit-def $x3
|
||||
; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
|
||||
$r3 = RLWINM killed $r3, 27, 5, 31
|
||||
dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3
|
||||
BLR8 implicit $lr8, implicit $rm, implicit killed $x3
|
||||
...
|
||||
---
|
||||
name: testFoldRLWINMSrcFullMask
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $r3
|
||||
; CHECK-LABEL: name: testFoldRLWINMSrcFullMask
|
||||
; CHECK: liveins: $r3
|
||||
; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 0, 12, implicit-def $x3
|
||||
; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
|
||||
$r3 = RLWINM killed $r3, 27, 0, 31
|
||||
dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3
|
||||
BLR8 implicit $lr8, implicit $rm, implicit killed $x3
|
||||
...
|
||||
---
|
||||
name: testFoldRLWINMSrcWrapped
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $r3
|
||||
; CHECK-LABEL: name: testFoldRLWINMSrcWrapped
|
||||
; CHECK: liveins: $r3
|
||||
; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 11, 12, implicit-def $x3
|
||||
; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
|
||||
$r3 = RLWINM killed $r3, 27, 30, 10
|
||||
dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3
|
||||
BLR8 implicit $lr8, implicit $rm, implicit killed $x3
|
||||
...
|
||||
---
|
||||
name: testFoldRLWINMToZero
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $r3
|
||||
; CHECK-LABEL: name: testFoldRLWINMToZero
|
||||
; CHECK: liveins: $r3
|
||||
; CHECK: renamable $r3 = LI 0, implicit-def $x3
|
||||
; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
|
||||
$r3 = RLWINM killed $r3, 27, 5, 10
|
||||
dead renamable $r3 = RLWINM killed renamable $r3, 8, 5, 10, implicit-def $x3
|
||||
BLR8 implicit $lr8, implicit $rm, implicit killed $x3
|
||||
...
|
||||
---
|
||||
name: testFoldRLWINM_recToZero
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $r3
|
||||
; CHECK-LABEL: name: testFoldRLWINM_recToZero
|
||||
; CHECK: liveins: $r3
|
||||
; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 0, implicit-def $cr0
|
||||
; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
|
||||
$r3 = RLWINM killed $r3, 27, 5, 10
|
||||
dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0
|
||||
BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
|
||||
...
|
||||
---
|
||||
name: testFoldRLWINMoToZeroSrcCanNotBeDeleted
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $r3
|
||||
; CHECK-LABEL: name: testFoldRLWINMoToZeroSrcCanNotBeDeleted
|
||||
; CHECK: liveins: $r3
|
||||
; CHECK: $r3 = RLWINM_rec $r3, 27, 5, 10, implicit-def dead $cr0
|
||||
; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 0, implicit-def $cr0
|
||||
; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
|
||||
$r3 = RLWINM_rec $r3, 27, 5, 10, implicit-def $cr0
|
||||
dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0
|
||||
BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
|
||||
...
|
||||
---
|
||||
name: testFoldRLWINMInvalidMask
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $r3
|
||||
; CHECK-LABEL: name: testFoldRLWINMInvalidMask
|
||||
; CHECK: liveins: $r3
|
||||
; CHECK: $r3 = RLWINM killed $r3, 20, 5, 31
|
||||
; CHECK: renamable $r3 = RLWINM killed renamable $r3, 19, 10, 20, implicit-def $x3
|
||||
; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
|
||||
$r3 = RLWINM killed $r3, 20, 5, 31
|
||||
dead renamable $r3 = RLWINM killed renamable $r3, 19, 10, 20, implicit-def $x3
|
||||
BLR8 implicit $lr8, implicit $rm, implicit killed $x3
|
||||
...
|
|
@ -14,9 +14,8 @@ define void @phi5() unnamed_addr {
|
|||
; CHECK-NEXT: # %bb.2:
|
||||
; CHECK-NEXT: lhz 3, 0(3)
|
||||
; CHECK-NEXT: slwi 3, 3, 15
|
||||
; CHECK-NEXT: clrlwi 3, 3, 31
|
||||
; CHECK-NEXT: rlwinm 4, 3, 31, 17, 31
|
||||
; CHECK-NEXT: or 3, 3, 4
|
||||
; CHECK-NEXT: li 4, 0
|
||||
; CHECK-NEXT: ori 3, 4, 0
|
||||
; CHECK-NEXT: rlwimi 3, 3, 15, 0, 16
|
||||
; CHECK-NEXT: # %bb.3:
|
||||
; CHECK-NEXT: blr
|
||||
|
|
|
@ -131,8 +131,7 @@ define i32 @xvtdivdp_shift(<2 x double> %a, <2 x double> %b) {
|
|||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xvtdivdp cr0, v2, v3
|
||||
; CHECK-NEXT: mfocrf r3, 128
|
||||
; CHECK-NEXT: srwi r3, r3, 28
|
||||
; CHECK-NEXT: rlwinm r3, r3, 28, 31, 31
|
||||
; CHECK-NEXT: li r3, 0
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %a, <2 x double> %b)
|
||||
|
|
Loading…
Reference in New Issue