diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 140b63f7570f..030aa4908678 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3618,6 +3618,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store; @@ -3652,6 +3653,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( break; unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Addr = FIN; if (j) { @@ -3688,6 +3690,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) @@ -3733,6 +3736,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( // since otherwise we never run out of FPRs before running out // of GPRs. unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::f32) { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index a71841fd6077..9da20d9bc6f5 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -260,6 +260,7 @@ bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, switch (MI.getOpcode()) { default: return false; case PPC::EXTSW: + case PPC::EXTSW_32: case PPC::EXTSW_32_64: SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); @@ -2103,3 +2104,241 @@ PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const { int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) { return PPC::getRecordFormOpcode(Opcode); } + +// This function returns true if the machine instruction +// always outputs a value by sign-extending a 32 bit value, +// i.e. 0 to 31-th bits are same as 32-th bit. +static bool isSignExtendingOp(const MachineInstr &MI) { + int Opcode = MI.getOpcode(); + if (Opcode == PPC::LI || Opcode == PPC::LI8 || + Opcode == PPC::LIS || Opcode == PPC::LIS8 || + Opcode == PPC::SRAW || Opcode == PPC::SRAWo || + Opcode == PPC::SRAWI || Opcode == PPC::SRAWIo || + Opcode == PPC::LWA || Opcode == PPC::LWAX || + Opcode == PPC::LWA_32 || Opcode == PPC::LWAX_32 || + Opcode == PPC::LHA || Opcode == PPC::LHAX || + Opcode == PPC::LHA8 || Opcode == PPC::LHAX8 || + Opcode == PPC::LBZ || Opcode == PPC::LBZX || + Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || + Opcode == PPC::LBZU || Opcode == PPC::LBZUX || + Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 || + Opcode == PPC::LHZ || Opcode == PPC::LHZX || + Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || + Opcode == PPC::LHZU || Opcode == PPC::LHZUX || + Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 || + Opcode == PPC::EXTSB || Opcode == PPC::EXTSBo || + Opcode == PPC::EXTSH || Opcode == PPC::EXTSHo || + Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8 || + Opcode == PPC::EXTSW || Opcode == PPC::EXTSWo || + Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 || + Opcode == PPC::EXTSB8_32_64) + return true; + + if (Opcode == PPC::RLDICL && MI.getOperand(3).getImm() >= 33) + return true; + + if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo || + Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo) && + MI.getOperand(3).getImm() > 0 && + MI.getOperand(3).getImm() <= MI.getOperand(4).getImm()) + return true; + + return false; +} + +// This function returns true if the machine instruction +// always outputs zeros in higher 32 bits. +static bool isZeroExtendingOp(const MachineInstr &MI) { + int Opcode = MI.getOpcode(); + // The 16-bit immediate is sign-extended in li/lis. + // If the most significant bit is zero, all higher bits are zero. + if (Opcode == PPC::LI || Opcode == PPC::LI8 || + Opcode == PPC::LIS || Opcode == PPC::LIS8) { + int64_t Imm = MI.getOperand(1).getImm(); + if (((uint64_t)Imm & ~0x7FFFuLL) == 0) + return true; + } + + // We have some variations of rotate-and-mask instructions + // that clear higher 32-bits. + if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo || + Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo || + Opcode == PPC::RLDICL_32_64) && + MI.getOperand(3).getImm() >= 32) + return true; + + if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) && + MI.getOperand(3).getImm() >= 32 && + MI.getOperand(3).getImm() <= 63 - MI.getOperand(2).getImm()) + return true; + + if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo || + Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo || + Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) && + MI.getOperand(3).getImm() <= MI.getOperand(4).getImm()) + return true; + + // There are other instructions that clear higher 32-bits. + if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo || + Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo || + Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8 || + Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo || + Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo || + Opcode == PPC::POPCNTD || Opcode == PPC::POPCNTW || + Opcode == PPC::SLW || Opcode == PPC::SLWo || + Opcode == PPC::SRW || Opcode == PPC::SRWo || + Opcode == PPC::SLW8 || Opcode == PPC::SRW8 || + Opcode == PPC::SLWI || Opcode == PPC::SLWIo || + Opcode == PPC::SRWI || Opcode == PPC::SRWIo || + Opcode == PPC::LWZ || Opcode == PPC::LWZX || + Opcode == PPC::LWZU || Opcode == PPC::LWZUX || + Opcode == PPC::LWBRX || Opcode == PPC::LHBRX || + Opcode == PPC::LHZ || Opcode == PPC::LHZX || + Opcode == PPC::LHZU || Opcode == PPC::LHZUX || + Opcode == PPC::LBZ || Opcode == PPC::LBZX || + Opcode == PPC::LBZU || Opcode == PPC::LBZUX || + Opcode == PPC::LWZ8 || Opcode == PPC::LWZX8 || + Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8 || + Opcode == PPC::LWBRX8 || Opcode == PPC::LHBRX8 || + Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || + Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 || + Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || + Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 || + Opcode == PPC::ANDIo || Opcode == PPC::ANDISo || + Opcode == PPC::ROTRWI || Opcode == PPC::ROTRWIo || + Opcode == PPC::EXTLWI || Opcode == PPC::EXTLWIo || + Opcode == PPC::MFVSRWZ) + return true; + + return false; +} + +// We limit the max depth to track incoming values of PHIs or binary ops +// (e.g. AND) to avoid exsessive cost. +const unsigned MAX_DEPTH = 1; + +bool +PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, + const unsigned Depth) const { + const MachineFunction *MF = MI.getParent()->getParent(); + const MachineRegisterInfo *MRI = &MF->getRegInfo(); + + switch (MI.getOpcode()) { + case PPC::COPY: { + unsigned SrcReg = MI.getOperand(1).getReg(); + + // In both ELFv1 and v2 ABI, method parameters and the return value + // are sign- or zero-extended. + if (MF->getSubtarget().isSVR4ABI()) { + const PPCFunctionInfo *FuncInfo = MF->getInfo(); + // We check the ZExt/SExt flags for a method parameter. + if (MI.getParent()->getBasicBlock() == + &MF->getFunction()->getEntryBlock()) { + unsigned VReg = MI.getOperand(0).getReg(); + if (MF->getRegInfo().isLiveIn(VReg)) + return SignExt ? FuncInfo->isLiveInSExt(VReg) : + FuncInfo->isLiveInZExt(VReg); + } + + // For a method return value, we check the ZExt/SExt flags in attribute. + // We assume the following code sequence for method call. + // ADJCALLSTACKDOWN 32, %R1, %R1 + // BL8_NOP ,... + // ADJCALLSTACKUP 32, 0, %R1, %R1 + // %vreg5 = COPY %X3; G8RC:%vreg5 + if (SrcReg == PPC::X3) { + const MachineBasicBlock *MBB = MI.getParent(); + MachineBasicBlock::const_instr_iterator II = + MachineBasicBlock::const_instr_iterator(&MI); + if (II != MBB->instr_begin() && + (--II)->getOpcode() == PPC::ADJCALLSTACKUP) { + const MachineInstr &CallMI = *(--II); + if (CallMI.isCall() && CallMI.getOperand(0).isGlobal()) { + const Function *CalleeFn = + dyn_cast(CallMI.getOperand(0).getGlobal()); + const IntegerType *IntTy = + dyn_cast(CalleeFn->getReturnType()); + const AttributeSet &Attrs = + CalleeFn->getAttributes().getRetAttributes(); + if (IntTy && IntTy->getBitWidth() <= 32) + return Attrs.hasAttribute(SignExt ? Attribute::SExt : + Attribute::ZExt); + } + } + } + } + + // If this is a copy from another register, we recursively check source. + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (SrcMI != NULL) + return isSignOrZeroExtended(*SrcMI, SignExt, Depth); + + return false; + } + + case PPC::ANDIo: + case PPC::ANDISo: + case PPC::ORI: + case PPC::ORIS: + case PPC::XORI: + case PPC::XORIS: + case PPC::ANDIo8: + case PPC::ANDISo8: + case PPC::ORI8: + case PPC::ORIS8: + case PPC::XORI8: + case PPC::XORIS8: { + // logical operation with 16-bit immediate does not change the upper bits. + // So, we track the operand register as we do for register copy. + unsigned SrcReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (SrcMI != NULL) + return isSignOrZeroExtended(*SrcMI, SignExt, Depth); + + return false; + } + + // If all incoming values are sign-/zero-extended, + // the output of AND, OR, ISEL or PHI is also sign-/zero-extended. + case PPC::AND: + case PPC::AND8: + case PPC::OR: + case PPC::OR8: + case PPC::ISEL: + case PPC::PHI: { + if (Depth >= MAX_DEPTH) + return false; + + // The input registers for PHI are operand 1, 3, ... + // The input registers for others are operand 1 and 2. + unsigned E = 3, D = 1; + if (MI.getOpcode() == PPC::PHI) { + E = MI.getNumOperands(); + D = 2; + } + + for (unsigned I = 1; I != E; I += D) { + if (MI.getOperand(I).isReg()) { + unsigned SrcReg = MI.getOperand(I).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (SrcMI == NULL || !isSignOrZeroExtended(*SrcMI, SignExt, Depth+1)) + return false; + } + else + return false; + } + return true; + } + + default: + return SignExt?isSignExtendingOp(MI): + isZeroExtendingOp(MI); + } + return false; +} diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index b0629c88cf57..ab86a54f6fea 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -293,6 +293,21 @@ public: } const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const; static int getRecordFormOpcode(unsigned Opcode); + + bool isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, + const unsigned PhiDepth) const; + + /// Return true if the output of the instruction is always a sign-extended, + /// i.e. 0 to 31-th bits are same as 32-th bit. + bool isSignExtended(const MachineInstr &MI, const unsigned depth = 0) const { + return isSignOrZeroExtended(MI, true, depth); + } + + /// Return true if the output of the instruction is always zero-extended, + /// i.e. 0 to 31-th bits are all zeros + bool isZeroExtended(const MachineInstr &MI, const unsigned depth = 0) const { + return isSignOrZeroExtended(MI, false, depth); + } }; } diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index 8e7e067a21ed..d135287a8456 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -29,14 +29,27 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" #include "MCTargetDesc/PPCPredicates.h" using namespace llvm; #define DEBUG_TYPE "ppc-mi-peepholes" +STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions"); +STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions"); STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI"); +static cl::opt + EnableSExtElimination("ppc-eliminate-signext", + cl::desc("enable elimination of sign-extensions"), + cl::init(true), cl::Hidden); + +static cl::opt + EnableZExtElimination("ppc-eliminate-zeroext", + cl::desc("enable elimination of zero-extensions"), + cl::init(true), cl::Hidden); + namespace llvm { void initializePPCMIPeepholePass(PassRegistry&); } @@ -110,6 +123,59 @@ static MachineInstr *getVRegDefOrNull(MachineOperand *Op, return MRI->getVRegDef(Reg); } +// This function returns number of known zero bits in output of MI +// starting from the most significant bit. +static unsigned +getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) { + unsigned Opcode = MI->getOpcode(); + if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo || + Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo) + return MI->getOperand(3).getImm(); + + if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) && + MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm()) + return MI->getOperand(3).getImm(); + + if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo || + Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo || + Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) && + MI->getOperand(3).getImm() <= MI->getOperand(4).getImm()) + return 32 + MI->getOperand(3).getImm(); + + if (Opcode == PPC::ANDIo) { + uint16_t Imm = MI->getOperand(2).getImm(); + return 48 + countLeadingZeros(Imm); + } + + if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo || + Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo || + Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8) + // The result ranges from 0 to 32. + return 58; + + if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo || + Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo) + // The result ranges from 0 to 64. + return 57; + + if (Opcode == PPC::LHZ || Opcode == PPC::LHZX || + Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || + Opcode == PPC::LHZU || Opcode == PPC::LHZUX || + Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8) + return 48; + + if (Opcode == PPC::LBZ || Opcode == PPC::LBZX || + Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || + Opcode == PPC::LBZU || Opcode == PPC::LBZUX || + Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8) + return 56; + + if (TII->isZeroExtended(*MI)) + return 32; + + return 0; +} + // Perform peephole optimizations. bool PPCMIPeephole::simplifyCode(void) { bool Simplified = false; @@ -367,6 +433,156 @@ bool PPCMIPeephole::simplifyCode(void) { } break; } + case PPC::EXTSH: + case PPC::EXTSH8: + case PPC::EXTSH8_32_64: { + if (!EnableSExtElimination) break; + unsigned NarrowReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(NarrowReg)) + break; + + MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg); + // If we've used a zero-extending load that we will sign-extend, + // just do a sign-extending load. + if (SrcMI->getOpcode() == PPC::LHZ || + SrcMI->getOpcode() == PPC::LHZX) { + if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg())) + break; + auto is64Bit = [] (unsigned Opcode) { + return Opcode == PPC::EXTSH8; + }; + auto isXForm = [] (unsigned Opcode) { + return Opcode == PPC::LHZX; + }; + auto getSextLoadOp = [] (bool is64Bit, bool isXForm) { + if (is64Bit) + if (isXForm) return PPC::LHAX8; + else return PPC::LHA8; + else + if (isXForm) return PPC::LHAX; + else return PPC::LHA; + }; + unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()), + isXForm(SrcMI->getOpcode())); + DEBUG(dbgs() << "Zero-extending load\n"); + DEBUG(SrcMI->dump()); + DEBUG(dbgs() << "and sign-extension\n"); + DEBUG(MI.dump()); + DEBUG(dbgs() << "are merged into sign-extending load\n"); + SrcMI->setDesc(TII->get(Opc)); + SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg()); + ToErase = &MI; + Simplified = true; + NumEliminatedSExt++; + } + break; + } + case PPC::EXTSW: + case PPC::EXTSW_32: + case PPC::EXTSW_32_64: { + if (!EnableSExtElimination) break; + unsigned NarrowReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(NarrowReg)) + break; + + MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg); + // If we've used a zero-extending load that we will sign-extend, + // just do a sign-extending load. + if (SrcMI->getOpcode() == PPC::LWZ || + SrcMI->getOpcode() == PPC::LWZX) { + if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg())) + break; + auto is64Bit = [] (unsigned Opcode) { + return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64; + }; + auto isXForm = [] (unsigned Opcode) { + return Opcode == PPC::LWZX; + }; + auto getSextLoadOp = [] (bool is64Bit, bool isXForm) { + if (is64Bit) + if (isXForm) return PPC::LWAX; + else return PPC::LWA; + else + if (isXForm) return PPC::LWAX_32; + else return PPC::LWA_32; + }; + unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()), + isXForm(SrcMI->getOpcode())); + DEBUG(dbgs() << "Zero-extending load\n"); + DEBUG(SrcMI->dump()); + DEBUG(dbgs() << "and sign-extension\n"); + DEBUG(MI.dump()); + DEBUG(dbgs() << "are merged into sign-extending load\n"); + SrcMI->setDesc(TII->get(Opc)); + SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg()); + ToErase = &MI; + Simplified = true; + NumEliminatedSExt++; + } else if (MI.getOpcode() == PPC::EXTSW_32_64 && + TII->isSignExtended(*SrcMI)) { + // We can eliminate EXTSW if the input is known to be already + // sign-extended. + DEBUG(dbgs() << "Removing redundant sign-extension\n"); + unsigned TmpReg = + MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), + TmpReg); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG), + MI.getOperand(0).getReg()) + .addReg(TmpReg) + .addReg(NarrowReg) + .addImm(PPC::sub_32); + ToErase = &MI; + Simplified = true; + NumEliminatedSExt++; + } + break; + } + case PPC::RLDICL: { + // We can eliminate RLDICL (e.g. for zero-extension) + // if all bits to clear are already zero in the input. + // This code assume following code sequence for zero-extension. + // %vreg6 = COPY %vreg5:sub_32; (optional) + // %vreg8 = IMPLICIT_DEF; + // %vreg7 = INSERT_SUBREG %vreg8, %vreg6, sub_32; + if (!EnableZExtElimination) break; + + if (MI.getOperand(2).getImm() != 0) + break; + + unsigned SrcReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + break; + + MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG && + SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg())) + break; + + MachineInstr *ImpDefMI, *SubRegMI; + ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg()); + SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg()); + if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break; + + SrcMI = SubRegMI; + if (SubRegMI->getOpcode() == PPC::COPY) { + unsigned CopyReg = SubRegMI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(CopyReg)) + SrcMI = MRI->getVRegDef(CopyReg); + } + + unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII); + if (MI.getOperand(3).getImm() <= KnownZeroCount) { + DEBUG(dbgs() << "Removing redundant zero-extension\n"); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), + MI.getOperand(0).getReg()) + .addReg(SrcReg); + ToErase = &MI; + Simplified = true; + NumEliminatedZExt++; + } + break; + } // TODO: Any instruction that has an immediate form fed only by a PHI // whose operands are all load immediate can be folded away. We currently diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp index bc2d9a08b5e8..3923417257e8 100644 --- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp @@ -43,3 +43,17 @@ MCSymbol *PPCFunctionInfo::getTOCOffsetSymbol() const { "func_toc" + Twine(MF.getFunctionNumber())); } + +bool PPCFunctionInfo::isLiveInSExt(unsigned VReg) const { + for (const std::pair &LiveIn : LiveInAttrs) + if (LiveIn.first == VReg) + return LiveIn.second.isSExt(); + return false; +} + +bool PPCFunctionInfo::isLiveInZExt(unsigned VReg) const { + for (const std::pair &LiveIn : LiveInAttrs) + if (LiveIn.first == VReg) + return LiveIn.second.isZExt(); + return false; +} diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 202e10058b73..34371f7bede7 100644 --- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetCallingConv.h" namespace llvm { @@ -113,6 +114,10 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// copies bool IsSplitCSR = false; + /// We keep track attributes for each live-in virtual registers + /// to use SExt/ZExt flags in later optimization. + std::vector> LiveInAttrs; + public: explicit PPCFunctionInfo(MachineFunction &MF) : MF(MF) {} @@ -175,6 +180,19 @@ public: unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; } void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; } + /// This function associates attributes for each live-in virtual register. + void addLiveInAttr(unsigned VReg, ISD::ArgFlagsTy Flags) { + LiveInAttrs.push_back(std::make_pair(VReg, Flags)); + } + + /// This function returns true if the spesified vreg is + /// a live-in register and sign-extended. + bool isLiveInSExt(unsigned VReg) const; + + /// This function returns true if the spesified vreg is + /// a live-in register and zero-extended. + bool isLiveInZExt(unsigned VReg) const; + int getCRSpillFrameIndex() const { return CRSpillFrameIndex; } void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; } diff --git a/llvm/test/CodeGen/PowerPC/expand-isel.ll b/llvm/test/CodeGen/PowerPC/expand-isel.ll index 84d17baf13ae..76479705f020 100644 --- a/llvm/test/CodeGen/PowerPC/expand-isel.ll +++ b/llvm/test/CodeGen/PowerPC/expand-isel.ll @@ -215,9 +215,7 @@ cleanup: ; CHECK-LABEL: @testComplexISEL ; CHECK-DAG: [[LI:r[0-9]+]], 1 ; CHECK-DAG: cmplwi [[LD:r[0-9]+]], 0 -; CHECK: beq cr0, [[EQ:.LBB[0-9_]+]] -; CHECK: blr -; CHECK: [[EQ]] +; CHECK: bnelr cr0 ; CHECK: xor [[XOR:r[0-9]+]] ; CHECK: cntlzd [[CZ:r[0-9]+]], [[XOR]] ; CHECK: rldicl [[SH:r[0-9]+]], [[CZ]], 58, 63 diff --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll index cc8502732022..27e9c1b8819d 100644 --- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -45,13 +45,9 @@ define signext i32 @zeroEqualityTest01(i8* %x, i8* %y) { ; CHECK-NEXT: ld 4, 8(4) ; CHECK-NEXT: cmpld 3, 4 ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: beq 0, .LBB1_3 +; CHECK-NEXT: beqlr 0 ; CHECK-NEXT: .LBB1_2: # %res_block ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: clrldi 3, 3, 32 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB1_3: # %endblock -; CHECK-NEXT: clrldi 3, 3, 32 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 16) %not.tobool = icmp ne i32 %call, 0 @@ -77,13 +73,9 @@ define signext i32 @zeroEqualityTest03(i8* %x, i8* %y) { ; CHECK-NEXT: lbz 4, 6(4) ; CHECK-NEXT: cmplw 3, 4 ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: beq 0, .LBB2_4 +; CHECK-NEXT: beqlr 0 ; CHECK-NEXT: .LBB2_3: # %res_block ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: clrldi 3, 3, 32 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB2_4: # %endblock -; CHECK-NEXT: clrldi 3, 3, 32 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 7) %not.lnot = icmp ne i32 %call, 0 diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll index f399b2584d0b..3c705bb3e6c1 100644 --- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -328,7 +328,6 @@ entry: ; CHECK-LABEL: @getuc0 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 8, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc0 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: clrldi 3, 3, 56 @@ -342,11 +341,9 @@ entry: ; CHECK-LABEL: @getuc1 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 16, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc1 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 56, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -357,11 +354,9 @@ entry: ; CHECK-LABEL: @getuc2 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 24, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc2 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 48, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -372,11 +367,9 @@ entry: ; CHECK-LABEL: @getuc3 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 32, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc3 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 40, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -387,11 +380,9 @@ entry: ; CHECK-LABEL: @getuc4 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 40, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc4 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 32, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -402,11 +393,9 @@ entry: ; CHECK-LABEL: @getuc5 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 48, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc5 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 24, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -417,11 +406,9 @@ entry: ; CHECK-LABEL: @getuc6 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 56, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc6 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 16, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -435,7 +422,6 @@ entry: ; CHECK-LE-LABEL: @getuc7 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 8, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -446,7 +432,6 @@ entry: ; CHECK-LABEL: @getuc8 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 8, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc8 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: clrldi 3, 3, 56 @@ -460,11 +445,9 @@ entry: ; CHECK-LABEL: @getuc9 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 16, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc9 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 56, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -475,11 +458,9 @@ entry: ; CHECK-LABEL: @getuc10 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 24, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc10 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 48, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -490,11 +471,9 @@ entry: ; CHECK-LABEL: @getuc11 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 32, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc11 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 40, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -505,11 +484,9 @@ entry: ; CHECK-LABEL: @getuc12 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 40, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc12 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 32, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -520,11 +497,9 @@ entry: ; CHECK-LABEL: @getuc13 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 48, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc13 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 24, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -535,11 +510,9 @@ entry: ; CHECK-LABEL: @getuc14 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 56, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc14 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 16, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -553,7 +526,6 @@ entry: ; CHECK-LE-LABEL: @getuc15 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 8, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -739,7 +711,6 @@ entry: ; CHECK-LABEL: @getus0 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 16, 48 -; CHECK: clrldi 3, 3, 48 ; CHECK-LE-LABEL: @getus0 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: clrldi 3, 3, 48 @@ -753,11 +724,9 @@ entry: ; CHECK-LABEL: @getus1 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 32, 48 -; CHECK: clrldi 3, 3, 48 ; CHECK-LE-LABEL: @getus1 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 48, 48 -; CHECK-LE: clrldi 3, 3, 48 } ; Function Attrs: norecurse nounwind readnone @@ -768,11 +737,9 @@ entry: ; CHECK-LABEL: @getus2 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 48, 48 -; CHECK: clrldi 3, 3, 48 ; CHECK-LE-LABEL: @getus2 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 32, 48 -; CHECK-LE: clrldi 3, 3, 48 } ; Function Attrs: norecurse nounwind readnone @@ -786,7 +753,6 @@ entry: ; CHECK-LE-LABEL: @getus3 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 16, 48 -; CHECK-LE: clrldi 3, 3, 48 } ; Function Attrs: norecurse nounwind readnone @@ -797,7 +763,6 @@ entry: ; CHECK-LABEL: @getus4 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 16, 48 -; CHECK: clrldi 3, 3, 48 ; CHECK-LE-LABEL: @getus4 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: clrldi 3, 3, 48 @@ -811,11 +776,9 @@ entry: ; CHECK-LABEL: @getus5 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 32, 48 -; CHECK: clrldi 3, 3, 48 ; CHECK-LE-LABEL: @getus5 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 48, 48 -; CHECK-LE: clrldi 3, 3, 48 } ; Function Attrs: norecurse nounwind readnone @@ -826,11 +789,9 @@ entry: ; CHECK-LABEL: @getus6 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 48, 48 -; CHECK: clrldi 3, 3, 48 ; CHECK-LE-LABEL: @getus6 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 32, 48 -; CHECK-LE: clrldi 3, 3, 48 } ; Function Attrs: norecurse nounwind readnone @@ -844,7 +805,6 @@ entry: ; CHECK-LE-LABEL: @getus7 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 16, 48 -; CHECK-LE: clrldi 3, 3, 48 } ; Function Attrs: norecurse nounwind readnone @@ -973,11 +933,9 @@ entry: ; CHECK-LABEL: @getui0 ; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 3 ; CHECK: mfvsrwz 3, [[SHL]] -; CHECK: clrldi 3, 3, 32 ; CHECK-LE-LABEL: @getui0 ; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34 ; CHECK-LE: mfvsrwz 3, [[SHL]] -; CHECK-LE: clrldi 3, 3, 32 } ; Function Attrs: norecurse nounwind readnone @@ -987,11 +945,9 @@ entry: ret i32 %vecext ; CHECK-LABEL: @getui1 ; CHECK: mfvsrwz 3, 34 -; CHECK: clrldi 3, 3, 32 ; CHECK-LE-LABEL: @getui1 ; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1 ; CHECK-LE: mfvsrwz 3, [[SHL]] -; CHECK-LE: clrldi 3, 3, 32 } ; Function Attrs: norecurse nounwind readnone @@ -1002,10 +958,8 @@ entry: ; CHECK-LABEL: @getui2 ; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1 ; CHECK: mfvsrwz 3, [[SHL]] -; CHECK: clrldi 3, 3, 32 ; CHECK-LE-LABEL: @getui2 ; CHECK-LE: mfvsrwz 3, 34 -; CHECK-LE: clrldi 3, 3, 32 } ; Function Attrs: norecurse nounwind readnone @@ -1016,11 +970,9 @@ entry: ; CHECK-LABEL: @getui3 ; CHECK: xxswapd [[SHL:[0-9]+]], 34 ; CHECK: mfvsrwz 3, [[SHL]] -; CHECK: clrldi 3, 3, 32 ; CHECK-LE-LABEL: @getui3 ; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 3 ; CHECK-LE: mfvsrwz 3, [[SHL]] -; CHECK-LE: clrldi 3, 3, 32 } ; Function Attrs: norecurse nounwind readnone diff --git a/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll b/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll index 71755f722cb2..2cfbb2d00df7 100644 --- a/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll +++ b/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll @@ -31,7 +31,7 @@ cleanup: ; preds = %for.body, %for.cond ; CHECK-LABEL: limit_loop ; CHECK: mtctr ; CHECK-NOT: addi {{[0-9]+}}, {{[0-9]+}}, 1 -; CHECK: bdnz +; CHECK: bdzlr ; CHECK: blr }