[Sparc] Emit large negative adjustments to SP/FP with sethi+xor instead of sethi+or. This generates correct code for both sparc32 and sparc64.

llvm-svn: 195576
This commit is contained in:
Venkatraman Govindaraju 2013-11-24 20:23:25 +00:00
parent 9c338504e5
commit 1116868a0d
5 changed files with 129 additions and 40 deletions

View File

@ -105,5 +105,22 @@ namespace llvm {
} }
llvm_unreachable("Invalid cond code"); llvm_unreachable("Invalid cond code");
} }
inline static unsigned HI22(int64_t imm) {
return (unsigned)((imm >> 10) & ((1 << 22)-1));
}
inline static unsigned LO10(int64_t imm) {
return (unsigned)(imm & 0x3FF);
}
inline static unsigned HIX22(int64_t imm) {
return HI22(~imm);
}
inline static unsigned LOX10(int64_t imm) {
return ~LO10(~imm);
}
} // end namespace llvm } // end namespace llvm
#endif #endif

View File

@ -33,6 +33,51 @@ DisableLeafProc("disable-sparc-leaf-proc",
cl::Hidden); cl::Hidden);
void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
int NumBytes,
unsigned ADDrr,
unsigned ADDri) const {
DebugLoc dl = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc();
const SparcInstrInfo &TII =
*static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
if (NumBytes >= -4096 && NumBytes < 4096) {
BuildMI(MBB, MBBI, dl, TII.get(ADDri), SP::O6)
.addReg(SP::O6).addImm(NumBytes);
return;
}
// Emit this the hard way. This clobbers G1 which we always know is
// available here.
if (NumBytes >= 0) {
// Emit nonnegative numbers with sethi + or.
// sethi %hi(NumBytes), %g1
// or %g1, %lo(NumBytes), %g1
// add %sp, %g1, %sp
BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1)
.addImm(HI22(NumBytes));
BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
.addReg(SP::G1).addImm(LO10(NumBytes));
BuildMI(MBB, MBBI, dl, TII.get(ADDrr), SP::O6)
.addReg(SP::O6).addReg(SP::G1);
return ;
}
// Emit negative numbers with sethi + xor.
// sethi %hix(NumBytes), %g1
// xor %g1, %lox(NumBytes), %g1
// add %sp, %g1, %sp
BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1)
.addImm(HIX22(NumBytes));
BuildMI(MBB, MBBI, dl, TII.get(SP::XORri), SP::G1)
.addReg(SP::G1).addImm(LOX10(NumBytes));
BuildMI(MBB, MBBI, dl, TII.get(ADDrr), SP::O6)
.addReg(SP::O6).addReg(SP::G1);
}
void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
@ -55,21 +100,8 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
SAVErr = SP::ADDrr; SAVErr = SP::ADDrr;
} }
NumBytes = - SubTarget.getAdjustedFrameSize(NumBytes); NumBytes = - SubTarget.getAdjustedFrameSize(NumBytes);
emitSPAdjustment(MF, MBB, MBBI, NumBytes, SAVErr, SAVEri);
if (NumBytes >= -4096) {
BuildMI(MBB, MBBI, dl, TII.get(SAVEri), SP::O6)
.addReg(SP::O6).addImm(NumBytes);
} else {
// Emit this the hard way. This clobbers G1 which we always know is
// available here.
unsigned OffHi = (unsigned)NumBytes >> 10U;
BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
// Emit G1 = G1 + I6
BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
.addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
BuildMI(MBB, MBBI, dl, TII.get(SAVErr), SP::O6)
.addReg(SP::O6).addReg(SP::G1);
}
MachineModuleInfo &MMI = MF.getMMI(); MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
@ -100,11 +132,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
int Size = MI.getOperand(0).getImm(); int Size = MI.getOperand(0).getImm();
if (MI.getOpcode() == SP::ADJCALLSTACKDOWN) if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
Size = -Size; Size = -Size;
const SparcInstrInfo &TII =
*static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
if (Size) if (Size)
BuildMI(MBB, I, DL, TII.get(SP::ADDri), SP::O6).addReg(SP::O6) emitSPAdjustment(MF, MBB, I, Size, SP::ADDrr, SP::ADDri);
.addImm(Size);
} }
MBB.erase(I); MBB.erase(I);
} }
@ -131,21 +161,7 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
return; return;
NumBytes = SubTarget.getAdjustedFrameSize(NumBytes); NumBytes = SubTarget.getAdjustedFrameSize(NumBytes);
emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
if (NumBytes < 4096) {
BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), SP::O6)
.addReg(SP::O6).addImm(NumBytes);
} else {
// Emit this the hard way. This clobbers G1 which we always know is
// available here.
unsigned OffHi = (unsigned)NumBytes >> 10U;
BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
// Emit G1 = G1 + I6
BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
.addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
BuildMI(MBB, MBBI, dl, TII.get(SP::ADDrr), SP::O6)
.addReg(SP::O6).addReg(SP::G1);
}
} }
bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {

View File

@ -49,6 +49,14 @@ private:
// Returns true if MF is a leaf procedure. // Returns true if MF is a leaf procedure.
bool isLeafProc(MachineFunction &MF) const; bool isLeafProc(MachineFunction &MF) const;
// Emits code for adjusting SP in function prologue/epilogue.
void emitSPAdjustment(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
int NumBytes, unsigned ADDrr, unsigned ADDri) const;
}; };
} // End llvm namespace } // End llvm namespace

View File

@ -105,19 +105,46 @@ static void replaceFI(MachineFunction &MF,
// encode it. // encode it.
MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false); MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
} else { return;
// Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to }
// scavenge a register here instead of reserving G1 all of the time.
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
unsigned OffHi = (unsigned)Offset >> 10U;
BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi); // FIXME: it would be better to scavenge a register here instead of
// reserving G1 all of the time.
if (Offset >= 0) {
// Emit nonnegaive immediates with sethi + or.
// sethi %hi(Offset), %g1
// add %g1, %fp, %g1
// Insert G1+%lo(offset) into the user.
BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1)
.addImm(HI22(Offset));
// Emit G1 = G1 + I6 // Emit G1 = G1 + I6
BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1) BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
.addReg(FramePtr); .addReg(FramePtr);
// Insert: G1+%lo(offset) into the user. // Insert: G1+%lo(offset) into the user.
MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false); MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1)); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(LO10(Offset));
return;
} }
// Emit Negative numbers with sethi + xor
// sethi %hix(Offset), %g1
// xor %g1, %lox(offset), %g1
// add %g1, %fp, %g1
// Insert: G1 + 0 into the user.
BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1)
.addImm(HIX22(Offset));
BuildMI(*MI.getParent(), II, dl, TII.get(SP::XORri), SP::G1)
.addReg(SP::G1).addImm(LOX10(Offset));
BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
.addReg(FramePtr);
// Insert: G1+%lo(offset) into the user.
MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
} }

View File

@ -390,3 +390,24 @@ entry:
%2 = add nsw i32 %0, %1 %2 = add nsw i32 %0, %1
ret i32 %2 ret i32 %2
} }
; CHECK-LABEL: test_large_stack
; CHECK: sethi 16, %g1
; CHECK: xor %g1, -176, %g1
; CHECK: save %sp, %g1, %sp
; CHECK: sethi 14, %g1
; CHECK: xor %g1, -1, %g1
; CHECK: add %g1, %fp, %g1
; CHECK: call use_buf
define i32 @test_large_stack() {
entry:
%buffer1 = alloca [16384 x i8], align 8
%buffer1.sub = getelementptr inbounds [16384 x i8]* %buffer1, i32 0, i32 0
%0 = call i32 @use_buf(i32 16384, i8* %buffer1.sub)
ret i32 %0
}
declare i32 @use_buf(i32, i8*)