From 8695a30066181caca84f6e4b4773925d6077e5de Mon Sep 17 00:00:00 2001 From: Anton Korobeynikov Date: Thu, 16 Jul 2009 13:51:12 +0000 Subject: [PATCH] Emit callee-saved regs spills / restores llvm-svn: 75943 --- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 103 +++++++++++++++++- llvm/lib/Target/SystemZ/SystemZInstrInfo.h | 4 +- llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 34 +++--- .../SystemZ/SystemZMachineFunctionInfo.h | 11 ++ .../Target/SystemZ/SystemZRegisterInfo.cpp | 66 ++++++++--- llvm/lib/Target/SystemZ/SystemZRegisterInfo.h | 4 + llvm/test/CodeGen/SystemZ/06-CallViaStack.ll | 2 +- llvm/test/CodeGen/SystemZ/06-LocalFrame.ll | 2 +- 8 files changed, 189 insertions(+), 37 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 53f8d29d6ca9..335d460226b0 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -26,7 +26,30 @@ using namespace llvm; SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm) : TargetInstrInfoImpl(SystemZInsts, array_lengthof(SystemZInsts)), - RI(tm, *this), TM(tm) {} + RI(tm, *this), TM(tm) { + // Fill the spill offsets map + static const unsigned SpillOffsTab[][2] = { + { SystemZ::R2D, 0x10 }, + { SystemZ::R3D, 0x18 }, + { SystemZ::R4D, 0x20 }, + { SystemZ::R5D, 0x28 }, + { SystemZ::R6D, 0x30 }, + { SystemZ::R7D, 0x38 }, + { SystemZ::R8D, 0x40 }, + { SystemZ::R9D, 0x48 }, + { SystemZ::R10D, 0x50 }, + { SystemZ::R11D, 0x58 }, + { SystemZ::R12D, 0x60 }, + { SystemZ::R13D, 0x68 }, + { SystemZ::R14D, 0x70 }, + { SystemZ::R15D, 0x78 } + }; + + RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); + + for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i) + RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1]; +} void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -117,13 +140,52 @@ bool SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI) const { - if (CSI.empty()) - return false; + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); SystemZMachineFunctionInfo *MFI = MF.getInfo(); MFI->setCalleeSavedFrameSize(CSI.size() * 8); + // Scan the callee-saved and find the bounds of register spill area. + unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0; + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + unsigned Offset = RegSpillOffsets[Reg]; + if (StartOffset > Offset) { + LowReg = Reg; StartOffset = Offset; + } + if (EndOffset < Offset) { + HighReg = Reg; EndOffset = RegSpillOffsets[Reg]; + } + } + + // Save information for epilogue inserter. + MFI->setLowReg(LowReg); MFI->setHighReg(HighReg); + + // Build a store instruction. Use STORE MULTIPLE instruction if there are many + // registers to store, otherwise - just STORE. + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, get((LowReg == HighReg ? + SystemZ::MOV64mr : SystemZ::MOV64mrm))); + + // Add store operands. + MIB.addReg(SystemZ::R15D).addImm(StartOffset); + if (LowReg == HighReg) + MIB.addReg(0); + MIB.addReg(LowReg, RegState::Kill); + if (LowReg != HighReg) + MIB.addReg(HighReg, RegState::Kill); + + // Do a second scan adding regs as being killed by instruction + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + if (Reg != LowReg && Reg != HighReg) + MIB.addReg(Reg, RegState::ImplicitKill); + } + return true; } @@ -131,6 +193,41 @@ bool SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI) const { + if (CSI.empty()) + return false; + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + MachineFunction &MF = *MBB.getParent(); + const TargetRegisterInfo *RegInfo= MF.getTarget().getRegisterInfo(); + SystemZMachineFunctionInfo *MFI = MF.getInfo(); + + unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg(); + unsigned StartOffset = RegSpillOffsets[LowReg]; + + // Build a load instruction. Use LOAD MULTIPLE instruction if there are many + // registers to load, otherwise - just LOAD. + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, get((LowReg == HighReg ? + SystemZ::MOV64rm : SystemZ::MOV64rmm))); + // Add store operands. + MIB.addReg(LowReg, RegState::Define); + if (LowReg != HighReg) + MIB.addReg(HighReg, RegState::Define); + + MIB.addReg((RegInfo->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D)); + MIB.addImm(StartOffset); + if (LowReg == HighReg) + MIB.addReg(0); + + // Do a second scan adding regs as being defined by instruction + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (Reg != LowReg && Reg != HighReg) + MIB.addReg(Reg, RegState::ImplicitDefine); + } + return true; } diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 9f4613531905..1e0cc82c69b4 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -14,8 +14,9 @@ #ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H #define LLVM_TARGET_SYSTEMZINSTRINFO_H -#include "llvm/Target/TargetInstrInfo.h" #include "SystemZRegisterInfo.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/Target/TargetInstrInfo.h" namespace llvm { @@ -24,6 +25,7 @@ class SystemZTargetMachine; class SystemZInstrInfo : public TargetInstrInfoImpl { const SystemZRegisterInfo RI; SystemZTargetMachine &TM; + IndexedMap RegSpillOffsets; public: explicit SystemZInstrInfo(SystemZTargetMachine &TM); diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index decc9268ac33..007df017ff27 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -195,19 +195,12 @@ def laaddr : Operand, //===----------------------------------------------------------------------===// // Instruction list.. -// ADJCALLSTACKDOWN/UP implicitly use/def SP because they may be expanded into -// a stack adjustment and the codegen must know that they may modify the stack -// pointer before prolog-epilog rewriting occurs. -// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become -// sub / add which can clobber R15D. -let Defs = [R15D], Uses = [R15D] in { def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt), "#ADJCALLSTACKDOWN", [(SystemZcallseq_start timm:$amt)]>; def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), "#ADJCALLSTACKUP", [(SystemZcallseq_end timm:$amt1, timm:$amt2)]>; -} //===----------------------------------------------------------------------===// @@ -215,7 +208,7 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), // // FIXME: Provide proper encoding! -let isReturn = 1, isTerminator = 1, Uses = [R14D] in { +let isReturn = 1, isTerminator = 1 in { def RET : Pseudo<(outs), (ins), "br\t%r14", [(SystemZretflag)]>; } @@ -224,12 +217,9 @@ let isReturn = 1, isTerminator = 1, Uses = [R14D] in { // let isCall = 1 in - // All calls clobber the non-callee saved registers. R15 is marked as - // a use to prevent stack-pointer assignments that appear immediately - // before calls from potentially appearing dead. Uses for argument - // registers are added manually. - let Defs = [R0D, R1D, R3D, R4D, R5D, R14D, R15D], - Uses = [R15D] in { + // All calls clobber the non-callee saved registers (except R14 which we + // handle separately). Uses for argument registers are added manually. + let Defs = [R0D, R1D, R3D, R4D, R5D] in { def CALLi : Pseudo<(outs), (ins i64imm:$dst, variable_ops), "brasl\t%r14, $dst", [(SystemZcall imm:$dst)]>; def CALLr : Pseudo<(outs), (ins ADDR64:$dst, variable_ops), @@ -370,6 +360,22 @@ def MOV64m32r : Pseudo<(outs), (ins rriaddr:$dst, GR64:$src), "sty\t{$src, $dst}", [(truncstorei32 GR64:$src, rriaddr:$dst)]>; +// multiple regs moves +// FIXME: should we use multiple arg nodes? +def MOV32mrm : Pseudo<(outs), (ins riaddr:$dst, GR32:$from, GR32:$to), + "stmy\t{$from, $to, $dst}", + []>; +def MOV64mrm : Pseudo<(outs), (ins riaddr:$dst, GR64:$from, GR64:$to), + "stmg\t{$from, $to, $dst}", + []>; +def MOV32rmm : Pseudo<(outs GR32:$from, GR32:$to), (ins riaddr:$dst), + "lmy\t{$from, $to, $dst}", + []>; +def MOV64rmm : Pseudo<(outs GR64:$from, GR64:$to), (ins riaddr:$dst), + "lmg\t{$from, $to, $dst}", + []>; + + //===----------------------------------------------------------------------===// // Arithmetic Instructions diff --git a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index 9b45b0364ce1..e47d41962ea8 100644 --- a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -25,6 +25,11 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo { /// stack frame in bytes. unsigned CalleeSavedFrameSize; + /// LowReg - Low register of range of callee-saved registers to store. + unsigned LowReg; + + /// HighReg - High register of range of callee-saved registers to store. + unsigned HighReg; public: SystemZMachineFunctionInfo() : CalleeSavedFrameSize(0) {} @@ -32,6 +37,12 @@ public: unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; } + + unsigned getLowReg() const { return LowReg; } + void setLowReg(unsigned Reg) { LowReg = Reg; } + + unsigned getHighReg() const { return HighReg; } + void setHighReg(unsigned Reg) { HighReg = Reg; } }; } // End llvm namespace diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 0411569f7a83..087da0b0c4d2 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -35,7 +36,8 @@ const unsigned* SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { static const unsigned CalleeSavedRegs[] = { SystemZ::R6D, SystemZ::R7D, SystemZ::R8D, SystemZ::R9D, - SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, + SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D, + SystemZ::R14D, SystemZ::R15D, SystemZ::F1, SystemZ::F3, SystemZ::F5, SystemZ::F7, 0 }; @@ -50,7 +52,7 @@ SystemZRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::GR64RegClass, + &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, 0 }; @@ -66,16 +68,16 @@ BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const return Reserved; } -// needsFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. -// +/// needsFP - Return true if the specified function should have a dedicated +/// frame pointer register. This is true if the function has variable sized +/// allocas or if frame pointer elimination is disabled. bool SystemZRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); return NoFramePointerElim || MFI->hasVarSizedObjects(); } bool SystemZRegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { + // FIXME: Should we always have reserved call frame? return !MF.getFrameInfo()->hasVarSizedObjects(); } @@ -137,6 +139,25 @@ void SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i+1).ChangeToImmediate(Offset); } +void +SystemZRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + // Determine whether R15/R14 will ever be clobbered inside the function. And + // if yes - mark it as 'callee' saved. + MachineFrameInfo *FFI = MF.getFrameInfo(); + + if (FFI->hasCalls() + /* FIXME: function is varargs */ + /* FIXME: function grabs RA */ + /* FIXME: function calls eh_return */) + MF.getRegInfo().setPhysRegUsed(SystemZ::R14D); + + if (FFI->getObjectIndexEnd() != 0 || // Contains automatic variables + FFI->hasVarSizedObjects() // Function calls dynamic alloca's + /* FIXME: function is varargs */) + MF.getRegInfo().setPhysRegUsed(SystemZ::R15D); +} + /// emitSPUpdate - Emit a series of instructions to increment / decrement the /// stack pointer by a constant value. static @@ -177,7 +198,11 @@ void SystemZRegisterInfo::emitPrologue(MachineFunction &MF) const { uint64_t StackSize = MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize(); - // FIXME: Skip the callee-saved push instructions. + // Skip the callee-saved push instructions. + while (MBBI != MBB.end() && + (MBBI->getOpcode() == SystemZ::MOV64mr || + MBBI->getOpcode() == SystemZ::MOV64mrm)) + ++MBBI; if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); @@ -223,23 +248,30 @@ void SystemZRegisterInfo::emitEpilogue(MachineFunction &MF, MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize(); uint64_t NumBytes = StackSize - TFI.getOffsetOfLocalArea(); - // Skip the callee-saved regs load instructions. - MachineBasicBlock::iterator LastCSPop = MBBI; + // Skip the final terminator instruction. while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); + --MBBI; if (!PI->getDesc().isTerminator()) break; - --MBBI; } - DL = MBBI->getDebugLoc(); + // During callee-saved restores emission stack frame was not yet finialized + // (and thus - the stack size was unknown). Tune the offset having full stack + // size in hands. + if (SystemZMFI->getCalleeSavedFrameSize()) { + assert((MBBI->getOpcode() == SystemZ::MOV64rmm || + MBBI->getOpcode() == SystemZ::MOV64rm) && + "Expected to see callee-save register restore code"); - if (MFI->hasVarSizedObjects()) { - assert(0 && "Not implemented yet!"); - } else { - // adjust stack pointer back: R15 += numbytes - if (StackSize) - emitSPUpdate(MBB, MBBI, NumBytes, TII); + unsigned i = 0; + MachineInstr &MI = *MBBI; + while (!MI.getOperand(i).isImm()) { + ++i; + assert(i < MI.getNumOperands() && "Unexpected restore code!"); + } + + MI.getOperand(i).ChangeToImmediate(NumBytes + MI.getOperand(i).getImm()); } } diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h index 74db3d5a88b2..d800f2939893 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -49,6 +49,10 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; + void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/llvm/test/CodeGen/SystemZ/06-CallViaStack.ll b/llvm/test/CodeGen/SystemZ/06-CallViaStack.ll index 7b222d991a28..03cb164f7e67 100644 --- a/llvm/test/CodeGen/SystemZ/06-CallViaStack.ll +++ b/llvm/test/CodeGen/SystemZ/06-CallViaStack.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc | grep 168 | count 2 +; RUN: llvm-as < %s | llc | grep 168 | count 1 ; RUN: llvm-as < %s | llc | grep 160 | count 3 ; RUN: llvm-as < %s | llc | grep 328 | count 1 diff --git a/llvm/test/CodeGen/SystemZ/06-LocalFrame.ll b/llvm/test/CodeGen/SystemZ/06-LocalFrame.ll index 588c46d645cd..4f7535df7ad4 100644 --- a/llvm/test/CodeGen/SystemZ/06-LocalFrame.ll +++ b/llvm/test/CodeGen/SystemZ/06-LocalFrame.ll @@ -1,6 +1,6 @@ ; RUN: llvm-as < %s | llc | grep 160 | count 1 ; RUN: llvm-as < %s | llc | grep 328 | count 1 -; RUN: llvm-as < %s | llc | grep 168 | count 2 +; RUN: llvm-as < %s | llc | grep 168 | count 1 target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128" target triple = "s390x-unknown-linux-gnu"