forked from OSchip/llvm-project
[PowerPC][Peephole] Combine extsw and sldi after instruction selection
Summary: `extsw` and `sldi` are supposed to be combined if they are in the same BB in instruction selection phase. This patch handles the case where extsw and sldi are not in the same BB. Differential Revision: https://reviews.llvm.org/D63806 llvm-svn: 365430
This commit is contained in:
parent
25ab27e6ef
commit
1931ed73c3
|
@ -122,7 +122,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
|
||||||
(instrs
|
(instrs
|
||||||
(instregex "S(L|R)D$"),
|
(instregex "S(L|R)D$"),
|
||||||
(instregex "SRAD(I)?$"),
|
(instregex "SRAD(I)?$"),
|
||||||
(instregex "EXTSWSLI$"),
|
(instregex "EXTSWSLI_32_64$"),
|
||||||
(instregex "MFV(S)?RD$"),
|
(instregex "MFV(S)?RD$"),
|
||||||
(instregex "MTVSRD$"),
|
(instregex "MTVSRD$"),
|
||||||
(instregex "MTVSRW(A|Z)$"),
|
(instregex "MTVSRW(A|Z)$"),
|
||||||
|
@ -158,6 +158,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
|
||||||
XSNEGDP,
|
XSNEGDP,
|
||||||
XSCPSGNDP,
|
XSCPSGNDP,
|
||||||
MFVSRWZ,
|
MFVSRWZ,
|
||||||
|
EXTSWSLI,
|
||||||
SRADI_32,
|
SRADI_32,
|
||||||
RLDIC,
|
RLDIC,
|
||||||
RFEBB,
|
RFEBB,
|
||||||
|
@ -1101,8 +1102,9 @@ def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
|
||||||
// The two ops cannot be done in parallel.
|
// The two ops cannot be done in parallel.
|
||||||
def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
|
def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
|
||||||
(instrs
|
(instrs
|
||||||
(instregex "EXTSWSLIo$"),
|
(instregex "EXTSWSLI_32_64o$"),
|
||||||
(instregex "SRAD(I)?o$"),
|
(instregex "SRAD(I)?o$"),
|
||||||
|
EXTSWSLIo,
|
||||||
SLDo,
|
SLDo,
|
||||||
SRDo,
|
SRDo,
|
||||||
RLDICo
|
RLDICo
|
||||||
|
|
|
@ -725,10 +725,17 @@ defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
|
||||||
"sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
|
"sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
|
||||||
[(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
|
[(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
|
||||||
|
|
||||||
defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH),
|
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
|
||||||
"extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
|
defm EXTSWSLI_32_64 : XSForm_1r<31, 445, (outs g8rc:$rA),
|
||||||
[(set i64:$rA, (PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
|
(ins gprc:$rS, u6imm:$SH),
|
||||||
isPPC64, Requires<[IsISA3_0]>;
|
"extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
|
||||||
|
[(set i64:$rA,
|
||||||
|
(PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
|
||||||
|
isPPC64, Requires<[IsISA3_0]>;
|
||||||
|
|
||||||
|
defm EXTSWSLI : XSForm_1rc<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
|
||||||
|
"extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
|
||||||
|
[]>, isPPC64, Requires<[IsISA3_0]>;
|
||||||
|
|
||||||
// For fast-isel:
|
// For fast-isel:
|
||||||
let isCodeGenOnly = 1, Defs = [CARRY] in
|
let isCodeGenOnly = 1, Defs = [CARRY] in
|
||||||
|
|
|
@ -53,6 +53,8 @@ STATISTIC(NumFixedPointIterations,
|
||||||
"to reg-imm ones");
|
"to reg-imm ones");
|
||||||
STATISTIC(NumRotatesCollapsed,
|
STATISTIC(NumRotatesCollapsed,
|
||||||
"Number of pairs of rotate left, clear left/right collapsed");
|
"Number of pairs of rotate left, clear left/right collapsed");
|
||||||
|
STATISTIC(NumEXTSWAndSLDICombined,
|
||||||
|
"Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
|
||||||
|
|
||||||
static cl::opt<bool>
|
static cl::opt<bool>
|
||||||
FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
|
FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
|
||||||
|
@ -101,6 +103,7 @@ private:
|
||||||
// Perform peepholes.
|
// Perform peepholes.
|
||||||
bool eliminateRedundantCompare(void);
|
bool eliminateRedundantCompare(void);
|
||||||
bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
|
bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
|
||||||
|
bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
|
||||||
bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI);
|
bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI);
|
||||||
void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
|
void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
|
||||||
MachineInstr *MI);
|
MachineInstr *MI);
|
||||||
|
@ -799,7 +802,8 @@ bool PPCMIPeephole::simplifyCode(void) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PPC::RLDICR: {
|
case PPC::RLDICR: {
|
||||||
Simplified |= emitRLDICWhenLoweringJumpTables(MI);
|
Simplified |= emitRLDICWhenLoweringJumpTables(MI) ||
|
||||||
|
combineSEXTAndSHL(MI, ToErase);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1379,6 +1383,72 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For case in LLVM IR
|
||||||
|
// entry:
|
||||||
|
// %iconv = sext i32 %index to i64
|
||||||
|
// br i1 undef label %true, label %false
|
||||||
|
// true:
|
||||||
|
// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
|
||||||
|
// ...
|
||||||
|
// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
|
||||||
|
// different BBs when conducting instruction selection. We can do a peephole
|
||||||
|
// optimization to combine these two instructions into extswsli after
|
||||||
|
// instruction selection.
|
||||||
|
bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
|
||||||
|
MachineInstr *&ToErase) {
|
||||||
|
if (MI.getOpcode() != PPC::RLDICR)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
|
||||||
|
|
||||||
|
MachineOperand MOpSHMI = MI.getOperand(2);
|
||||||
|
MachineOperand MOpMEMI = MI.getOperand(3);
|
||||||
|
if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
uint64_t SHMI = MOpSHMI.getImm();
|
||||||
|
uint64_t MEMI = MOpMEMI.getImm();
|
||||||
|
if (SHMI + MEMI != 63)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
unsigned SrcReg = MI.getOperand(1).getReg();
|
||||||
|
if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
|
||||||
|
if (SrcMI->getOpcode() != PPC::EXTSW &&
|
||||||
|
SrcMI->getOpcode() != PPC::EXTSW_32_64)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// If the register defined by extsw has more than one use, combination is not
|
||||||
|
// needed.
|
||||||
|
if (!MRI->hasOneNonDBGUse(SrcReg))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
LLVM_DEBUG(dbgs() << "Combining pair: ");
|
||||||
|
LLVM_DEBUG(SrcMI->dump());
|
||||||
|
LLVM_DEBUG(MI.dump());
|
||||||
|
|
||||||
|
MachineInstr *NewInstr =
|
||||||
|
BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
|
||||||
|
SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
|
||||||
|
: TII->get(PPC::EXTSWSLI_32_64),
|
||||||
|
MI.getOperand(0).getReg())
|
||||||
|
.add(SrcMI->getOperand(1))
|
||||||
|
.add(MOpSHMI);
|
||||||
|
|
||||||
|
LLVM_DEBUG(dbgs() << "TO: ");
|
||||||
|
LLVM_DEBUG(NewInstr->dump());
|
||||||
|
++NumEXTSWAndSLDICombined;
|
||||||
|
ToErase = &MI;
|
||||||
|
// SrcMI, which is extsw, is of no use now, erase it.
|
||||||
|
SrcMI->eraseFromParent();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
} // end default namespace
|
} // end default namespace
|
||||||
|
|
||||||
INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
|
INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
|
||||||
|
|
|
@ -42,8 +42,7 @@ define dso_local i32 @poc(i32* %base, i32 %index, i1 %flag, i32 %default) {
|
||||||
; CHECK-P9-NEXT: andi. r5, r5, 1
|
; CHECK-P9-NEXT: andi. r5, r5, 1
|
||||||
; CHECK-P9-NEXT: bc 4, gt, .LBB0_2
|
; CHECK-P9-NEXT: bc 4, gt, .LBB0_2
|
||||||
; CHECK-P9-NEXT: # %bb.1: # %true
|
; CHECK-P9-NEXT: # %bb.1: # %true
|
||||||
; CHECK-P9-NEXT: extsw r4, r4
|
; CHECK-P9-NEXT: extswsli r4, r4, 2
|
||||||
; CHECK-P9-NEXT: sldi r4, r4, 2
|
|
||||||
; CHECK-P9-NEXT: lwzx r3, r3, r4
|
; CHECK-P9-NEXT: lwzx r3, r3, r4
|
||||||
; CHECK-P9-NEXT: blr
|
; CHECK-P9-NEXT: blr
|
||||||
; CHECK-P9-NEXT: .LBB0_2: # %false
|
; CHECK-P9-NEXT: .LBB0_2: # %false
|
||||||
|
@ -55,8 +54,7 @@ define dso_local i32 @poc(i32* %base, i32 %index, i1 %flag, i32 %default) {
|
||||||
; CHECK-P9-BE-NEXT: andi. r5, r5, 1
|
; CHECK-P9-BE-NEXT: andi. r5, r5, 1
|
||||||
; CHECK-P9-BE-NEXT: bc 4, gt, .LBB0_2
|
; CHECK-P9-BE-NEXT: bc 4, gt, .LBB0_2
|
||||||
; CHECK-P9-BE-NEXT: # %bb.1: # %true
|
; CHECK-P9-BE-NEXT: # %bb.1: # %true
|
||||||
; CHECK-P9-BE-NEXT: extsw r4, r4
|
; CHECK-P9-BE-NEXT: extswsli r4, r4, 2
|
||||||
; CHECK-P9-BE-NEXT: sldi r4, r4, 2
|
|
||||||
; CHECK-P9-BE-NEXT: lwzx r3, r3, r4
|
; CHECK-P9-BE-NEXT: lwzx r3, r3, r4
|
||||||
; CHECK-P9-BE-NEXT: blr
|
; CHECK-P9-BE-NEXT: blr
|
||||||
; CHECK-P9-BE-NEXT: .LBB0_2: # %false
|
; CHECK-P9-BE-NEXT: .LBB0_2: # %false
|
||||||
|
@ -74,3 +72,131 @@ true:
|
||||||
false:
|
false:
|
||||||
ret i32 %default
|
ret i32 %default
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define dso_local i64 @poc_i64(i64* %base, i32 %index, i1 %flag, i64 %default) {
|
||||||
|
; CHECK-LABEL: poc_i64:
|
||||||
|
; CHECK: # %bb.0: # %entry
|
||||||
|
; CHECK-NEXT: andi. r5, r5, 1
|
||||||
|
; CHECK-NEXT: bc 4, gt, .LBB1_2
|
||||||
|
; CHECK-NEXT: # %bb.1: # %true
|
||||||
|
; CHECK-NEXT: extsw r4, r4
|
||||||
|
; CHECK-NEXT: sldi r4, r4, 3
|
||||||
|
; CHECK-NEXT: ldx r3, r3, r4
|
||||||
|
; CHECK-NEXT: blr
|
||||||
|
; CHECK-NEXT: .LBB1_2: # %false
|
||||||
|
; CHECK-NEXT: mr r3, r6
|
||||||
|
; CHECK-NEXT: blr
|
||||||
|
;
|
||||||
|
; CHECK-BE-LABEL: poc_i64:
|
||||||
|
; CHECK-BE: # %bb.0: # %entry
|
||||||
|
; CHECK-BE-NEXT: andi. r5, r5, 1
|
||||||
|
; CHECK-BE-NEXT: bc 4, gt, .LBB1_2
|
||||||
|
; CHECK-BE-NEXT: # %bb.1: # %true
|
||||||
|
; CHECK-BE-NEXT: extsw r4, r4
|
||||||
|
; CHECK-BE-NEXT: sldi r4, r4, 3
|
||||||
|
; CHECK-BE-NEXT: ldx r3, r3, r4
|
||||||
|
; CHECK-BE-NEXT: blr
|
||||||
|
; CHECK-BE-NEXT: .LBB1_2: # %false
|
||||||
|
; CHECK-BE-NEXT: mr r3, r6
|
||||||
|
; CHECK-BE-NEXT: blr
|
||||||
|
;
|
||||||
|
; CHECK-P9-LABEL: poc_i64:
|
||||||
|
; CHECK-P9: # %bb.0: # %entry
|
||||||
|
; CHECK-P9-NEXT: andi. r5, r5, 1
|
||||||
|
; CHECK-P9-NEXT: bc 4, gt, .LBB1_2
|
||||||
|
; CHECK-P9-NEXT: # %bb.1: # %true
|
||||||
|
; CHECK-P9-NEXT: extswsli r4, r4, 3
|
||||||
|
; CHECK-P9-NEXT: ldx r3, r3, r4
|
||||||
|
; CHECK-P9-NEXT: blr
|
||||||
|
; CHECK-P9-NEXT: .LBB1_2: # %false
|
||||||
|
; CHECK-P9-NEXT: mr r3, r6
|
||||||
|
; CHECK-P9-NEXT: blr
|
||||||
|
;
|
||||||
|
; CHECK-P9-BE-LABEL: poc_i64:
|
||||||
|
; CHECK-P9-BE: # %bb.0: # %entry
|
||||||
|
; CHECK-P9-BE-NEXT: andi. r5, r5, 1
|
||||||
|
; CHECK-P9-BE-NEXT: bc 4, gt, .LBB1_2
|
||||||
|
; CHECK-P9-BE-NEXT: # %bb.1: # %true
|
||||||
|
; CHECK-P9-BE-NEXT: extswsli r4, r4, 3
|
||||||
|
; CHECK-P9-BE-NEXT: ldx r3, r3, r4
|
||||||
|
; CHECK-P9-BE-NEXT: blr
|
||||||
|
; CHECK-P9-BE-NEXT: .LBB1_2: # %false
|
||||||
|
; CHECK-P9-BE-NEXT: mr r3, r6
|
||||||
|
; CHECK-P9-BE-NEXT: blr
|
||||||
|
entry:
|
||||||
|
%iconv = sext i32 %index to i64
|
||||||
|
br i1 %flag, label %true, label %false
|
||||||
|
|
||||||
|
true:
|
||||||
|
%ptr = getelementptr inbounds i64, i64* %base, i64 %iconv
|
||||||
|
%value = load i64, i64* %ptr, align 8
|
||||||
|
ret i64 %value
|
||||||
|
|
||||||
|
false:
|
||||||
|
ret i64 %default
|
||||||
|
}
|
||||||
|
|
||||||
|
define dso_local i64 @no_extswsli(i64* %base, i32 %index, i1 %flag) {
|
||||||
|
; CHECK-LABEL: no_extswsli:
|
||||||
|
; CHECK: # %bb.0: # %entry
|
||||||
|
; CHECK-NEXT: andi. r5, r5, 1
|
||||||
|
; CHECK-NEXT: extsw r4, r4
|
||||||
|
; CHECK-NEXT: bc 4, gt, .LBB2_2
|
||||||
|
; CHECK-NEXT: # %bb.1: # %true
|
||||||
|
; CHECK-NEXT: sldi r4, r4, 3
|
||||||
|
; CHECK-NEXT: ldx r3, r3, r4
|
||||||
|
; CHECK-NEXT: blr
|
||||||
|
; CHECK-NEXT: .LBB2_2: # %false
|
||||||
|
; CHECK-NEXT: mr r3, r4
|
||||||
|
; CHECK-NEXT: blr
|
||||||
|
;
|
||||||
|
; CHECK-BE-LABEL: no_extswsli:
|
||||||
|
; CHECK-BE: # %bb.0: # %entry
|
||||||
|
; CHECK-BE-NEXT: andi. r5, r5, 1
|
||||||
|
; CHECK-BE-NEXT: extsw r4, r4
|
||||||
|
; CHECK-BE-NEXT: bc 4, gt, .LBB2_2
|
||||||
|
; CHECK-BE-NEXT: # %bb.1: # %true
|
||||||
|
; CHECK-BE-NEXT: sldi r4, r4, 3
|
||||||
|
; CHECK-BE-NEXT: ldx r3, r3, r4
|
||||||
|
; CHECK-BE-NEXT: blr
|
||||||
|
; CHECK-BE-NEXT: .LBB2_2: # %false
|
||||||
|
; CHECK-BE-NEXT: mr r3, r4
|
||||||
|
; CHECK-BE-NEXT: blr
|
||||||
|
;
|
||||||
|
; CHECK-P9-LABEL: no_extswsli:
|
||||||
|
; CHECK-P9: # %bb.0: # %entry
|
||||||
|
; CHECK-P9-NEXT: extsw r4, r4
|
||||||
|
; CHECK-P9-NEXT: andi. r5, r5, 1
|
||||||
|
; CHECK-P9-NEXT: bc 4, gt, .LBB2_2
|
||||||
|
; CHECK-P9-NEXT: # %bb.1: # %true
|
||||||
|
; CHECK-P9-NEXT: sldi r4, r4, 3
|
||||||
|
; CHECK-P9-NEXT: ldx r3, r3, r4
|
||||||
|
; CHECK-P9-NEXT: blr
|
||||||
|
; CHECK-P9-NEXT: .LBB2_2: # %false
|
||||||
|
; CHECK-P9-NEXT: mr r3, r4
|
||||||
|
; CHECK-P9-NEXT: blr
|
||||||
|
;
|
||||||
|
; CHECK-P9-BE-LABEL: no_extswsli:
|
||||||
|
; CHECK-P9-BE: # %bb.0: # %entry
|
||||||
|
; CHECK-P9-BE-NEXT: extsw r4, r4
|
||||||
|
; CHECK-P9-BE-NEXT: andi. r5, r5, 1
|
||||||
|
; CHECK-P9-BE-NEXT: bc 4, gt, .LBB2_2
|
||||||
|
; CHECK-P9-BE-NEXT: # %bb.1: # %true
|
||||||
|
; CHECK-P9-BE-NEXT: sldi r4, r4, 3
|
||||||
|
; CHECK-P9-BE-NEXT: ldx r3, r3, r4
|
||||||
|
; CHECK-P9-BE-NEXT: blr
|
||||||
|
; CHECK-P9-BE-NEXT: .LBB2_2: # %false
|
||||||
|
; CHECK-P9-BE-NEXT: mr r3, r4
|
||||||
|
; CHECK-P9-BE-NEXT: blr
|
||||||
|
entry:
|
||||||
|
%iconv = sext i32 %index to i64
|
||||||
|
br i1 %flag, label %true, label %false
|
||||||
|
|
||||||
|
true:
|
||||||
|
%ptr = getelementptr inbounds i64, i64* %base, i64 %iconv
|
||||||
|
%value = load i64, i64* %ptr, align 8
|
||||||
|
ret i64 %value
|
||||||
|
|
||||||
|
false:
|
||||||
|
ret i64 %iconv
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue