diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp index db3f15900bbc..9961232cbcc3 100644 --- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp @@ -135,18 +135,22 @@ Filler::findDelayInstr(MachineBasicBlock &MBB, bool sawLoad = false; bool sawStore = false; - MachineBasicBlock::iterator I = slot; + if (slot == MBB.begin()) + return MBB.end(); if (slot->getOpcode() == SP::RET) return MBB.end(); if (slot->getOpcode() == SP::RETL) { - --I; - if (I->getOpcode() != SP::RESTORErr) - return MBB.end(); - //change retl to ret - slot->setDesc(TII->get(SP::RET)); - return I; + MachineBasicBlock::iterator J = slot; + --J; + + if (J->getOpcode() == SP::RESTORErr + || J->getOpcode() == SP::RESTOREri) { + //change retl to ret + slot->setDesc(TII->get(SP::RET)); + return J; + } } //Call's delay filler can def some of call's uses. @@ -157,6 +161,8 @@ Filler::findDelayInstr(MachineBasicBlock &MBB, bool done = false; + MachineBasicBlock::iterator I = slot; + while (!done) { done = (I == MBB.begin()); @@ -274,9 +280,13 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI, continue; if (MO.isDef()) RegDefs.insert(Reg); - if (MO.isUse()) + if (MO.isUse()) { + //Implicit register uses of retl are return values and + //retl does not use them. + if (MO.isImplicit() && MI->getOpcode() == SP::RETL) + continue; RegUses.insert(Reg); - + } } } diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp index afa8411802d3..3832c103885d 100644 --- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp @@ -26,7 +26,18 @@ using namespace llvm; +static cl::opt +DisableLeafProc("disable-sparc-leaf-proc", + cl::init(true), + cl::desc("Disable Sparc leaf procedure optimization."), + cl::Hidden); + + void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { + SparcMachineFunctionInfo *FuncInfo = MF.getInfo(); + if (FuncInfo->isLeafProc()) + return; + MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); const SparcInstrInfo &TII = @@ -97,6 +108,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, void SparcFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { + SparcMachineFunctionInfo *FuncInfo = MF.getInfo(); + if (FuncInfo->isLeafProc()) + return; MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); const SparcInstrInfo &TII = *static_cast(MF.getTarget().getInstrInfo()); @@ -121,3 +135,65 @@ bool SparcFrameLowering::hasFP(const MachineFunction &MF) const { MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); } + +static bool verifyLeafProcRegUse(MachineRegisterInfo *MRI) +{ + + for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) + if (MRI->isPhysRegUsed(reg)) + return false; + + for (unsigned reg = SP::L0; reg <= SP::L7; ++reg) + if (MRI->isPhysRegUsed(reg)) + return false; + + return true; +} + +bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const +{ + + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + return !(MFI->hasCalls() // has calls + || MRI.isPhysRegUsed(SP::L0) // Too many registers needed + || MRI.isPhysRegUsed(SP::O6) // %SP is used + || hasFP(MF)); // need %FP +} + +void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const { + + MachineRegisterInfo &MRI = MF.getRegInfo(); + + //remap %i[0-7] to %o[0-7] + for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) { + if (!MRI.isPhysRegUsed(reg)) + continue; + unsigned mapped_reg = (reg - SP::I0 + SP::O0); + assert(!MRI.isPhysRegUsed(mapped_reg)); + + //Replace I register with O register + MRI.replaceRegWith(reg, mapped_reg); + + //mark the reg unused. + MRI.setPhysRegUnused(reg); + } + + assert(verifyLeafProcRegUse(&MRI)); +#ifdef XDEBUG + MF.verify(0, "After LeafProc Remapping"); +#endif +} + +void SparcFrameLowering::processFunctionBeforeCalleeSavedScan + (MachineFunction &MF, RegScavenger *RS) const { + + if (!DisableLeafProc && isLeafProc(MF)) { + SparcMachineFunctionInfo *MFI = MF.getInfo(); + MFI->setLeafProc(true); + + remapRegsForLeafProc(MF); + } + +} diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.h b/llvm/lib/Target/Sparc/SparcFrameLowering.h index 4a3cef703ff4..731b1fb41c41 100644 --- a/llvm/lib/Target/Sparc/SparcFrameLowering.h +++ b/llvm/lib/Target/Sparc/SparcFrameLowering.h @@ -40,6 +40,15 @@ public: bool hasReservedCallFrame(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + +private: + //Remap input registers to output registers for leaf procedure. + void remapRegsForLeafProc(MachineFunction &MF) const; + + //Returns true if MF is a leaf procedure. + bool isLeafProc(MachineFunction &MF) const; }; } // End llvm namespace diff --git a/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h b/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h index 90c27a4459a1..3783c16d9922 100644 --- a/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h +++ b/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h @@ -28,11 +28,16 @@ namespace llvm { /// SRetReturnReg - Holds the virtual register into which the sret /// argument is passed. unsigned SRetReturnReg; + + /// IsLeafProc - True if the function is a leaf procedure. + bool IsLeafProc; public: SparcMachineFunctionInfo() - : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {} + : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0), + IsLeafProc(false) {} explicit SparcMachineFunctionInfo(MachineFunction &MF) - : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {} + : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0), + IsLeafProc(false) {} unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } @@ -42,6 +47,9 @@ namespace llvm { unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } + + void setLeafProc(bool rhs) { IsLeafProc = rhs; } + bool isLeafProc() const { return IsLeafProc; } }; } diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp index a5297dcae709..e860cbbaf410 100644 --- a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -13,6 +13,7 @@ #include "SparcRegisterInfo.h" #include "Sparc.h" +#include "SparcMachineFunctionInfo.h" #include "SparcSubtarget.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" @@ -89,12 +90,13 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + MI.getOperand(FIOperandNum + 1).getImm() + Subtarget.getStackPointerBias(); - + SparcMachineFunctionInfo *FuncInfo = MF.getInfo(); + unsigned FramePtr = (FuncInfo->isLeafProc()) ? SP::O6 : SP::I6; // Replace frame index with a frame pointer reference. if (Offset >= -4096 && Offset <= 4095) { // If the offset is small enough to fit in the immediate field, directly // encode it. - MI.getOperand(FIOperandNum).ChangeToRegister(SP::I6, false); + MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } else { // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to @@ -103,7 +105,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi); // Emit G1 = G1 + I6 BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1) - .addReg(SP::I6); + .addReg(FramePtr); // Insert: G1+%lo(offset) into the user. MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1)); diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/llvm/lib/Target/Sparc/SparcRegisterInfo.td index 04e92e747e0d..d1edcb6de1f4 100644 --- a/llvm/lib/Target/Sparc/SparcRegisterInfo.td +++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.td @@ -144,20 +144,10 @@ def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>; // register class for that. The i64 type is included here to allow i64 patterns // using the integer instructions. def IntRegs : RegisterClass<"SP", [i32, i64], 32, - (add I0, I1, I2, I3, I4, I5, - G1, - G2, G3, G4, // OK for use only in - // applications, not libraries. - G5, // OK for use in 64 bit mode. - L0, L1, L2, L3, L4, L5, L6, L7, - O0, O1, O2, O3, O4, O5, O7, - // Non-allocatable regs: - O6, // stack ptr - I6, // frame ptr - I7, // return address - G0, // constant zero - G6, G7 // reserved for kernel - )>; + (add (sequence "I%u", 0, 7), + (sequence "G%u", 0, 7), + (sequence "L%u", 0, 7), + (sequence "O%u", 0, 7))>; // Register class for 64-bit mode, with a 64-bit spill slot size. // These are the same as the 32-bit registers, so TableGen will consider this diff --git a/llvm/test/CodeGen/SPARC/leafproc.ll b/llvm/test/CodeGen/SPARC/leafproc.ll new file mode 100644 index 000000000000..5abdfba3e71d --- /dev/null +++ b/llvm/test/CodeGen/SPARC/leafproc.ll @@ -0,0 +1,57 @@ +; RUN: llc -march=sparc -disable-sparc-leaf-proc=0 < %s | FileCheck %s + +; CHECK: func_nobody: +; CHECK: jmp %o7+8 +; CHECK-NEXT: nop +define void @func_nobody() { +entry: + ret void +} + + +; CHECK: return_int_const: +; CHECK: jmp %o7+8 +; CHECK-NEXT: or %g0, 1729, %o0 +define i32 @return_int_const() { +entry: + ret i32 1729 +} + +; CHECK: return_double_const: +; CHECK: sethi +; CHECK: jmp %o7+8 +; CHECK-NEXT: ldd {{.*}}, %f0 + +define double @return_double_const() { +entry: + ret double 0.000000e+00 +} + +; CHECK: leaf_proc_with_args: +; CHECK: add {{%o[0-1]}}, {{%o[0-1]}}, [[R:%[go][0-7]]] +; CHECK: jmp %o7+8 +; CHECK-NEXT: add [[R]], %o2, %o0 + +define i32 @leaf_proc_with_args(i32 %a, i32 %b, i32 %c) { +entry: + %0 = add nsw i32 %b, %a + %1 = add nsw i32 %0, %c + ret i32 %1 +} + +; CHECK: leaf_proc_with_args_in_stack: +; CHECK-DAG: ld [%sp+92], {{%[go][0-7]}} +; CHECK-DAG: ld [%sp+96], {{%[go][0-7]}} +; CHECK: jmp %o7+8 +; CHECK-NEXT: add {{.*}}, %o0 +define i32 @leaf_proc_with_args_in_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) { +entry: + %0 = add nsw i32 %b, %a + %1 = add nsw i32 %0, %c + %2 = add nsw i32 %1, %d + %3 = add nsw i32 %2, %e + %4 = add nsw i32 %3, %f + %5 = add nsw i32 %4, %g + %6 = add nsw i32 %5, %h + ret i32 %6 +}