forked from OSchip/llvm-project
[PPC] Set SP after loading data from stack frame, if no red zone is present
Follow-up to r280705: Make sure that the SP is only restored after all data is loaded from the stack frame, if there is no red zone. This completes the fix for https://llvm.org/bugs/show_bug.cgi?id=26519. Differential Revision: https://reviews.llvm.org/D24466 llvm-svn: 282174
This commit is contained in:
parent
0d4b6d5c98
commit
b66efb855c
|
@ -926,7 +926,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
}
|
||||
|
||||
// Have we generated a STUX instruction to claim stack frame? If so,
|
||||
// the frame size will be placed in ScratchReg.
|
||||
// the negated frame size will be placed in ScratchReg.
|
||||
bool HasSTUX = false;
|
||||
|
||||
// This condition must be kept in sync with canUseAsPrologue.
|
||||
|
@ -986,33 +986,88 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
if (!HasRedZone) {
|
||||
assert(!isPPC64 && "A red zone is always available on PPC64");
|
||||
if (HasSTUX) {
|
||||
// The frame size is in ScratchReg, and the SPReg has been advanced
|
||||
// (downwards) by the frame size: SPReg = old SPReg + ScratchReg.
|
||||
// Set ScratchReg to the original SPReg: ScratchReg = SPReg - ScratchReg.
|
||||
// The negated frame size is in ScratchReg, and the SPReg has been
|
||||
// decremented by the frame size: SPReg = old SPReg + ScratchReg.
|
||||
// Since FPOffset, PBPOffset, etc. are relative to the beginning of
|
||||
// the stack frame (i.e. the old SP), ideally, we would put the old
|
||||
// SP into a register and use it as the base for the stores. The
|
||||
// problem is that the only available register may be ScratchReg,
|
||||
// which could be R0, and R0 cannot be used as a base address.
|
||||
|
||||
// First, set ScratchReg to the old SP. This may need to be modified
|
||||
// later.
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
|
||||
.addReg(ScratchReg, RegState::Kill)
|
||||
.addReg(SPReg);
|
||||
|
||||
// Now that the stack frame has been allocated, save all the necessary
|
||||
// registers using ScratchReg as the base address.
|
||||
if (HasFP)
|
||||
BuildMI(MBB, MBBI, dl, StoreInst)
|
||||
.addReg(FPReg)
|
||||
.addImm(FPOffset)
|
||||
.addReg(ScratchReg);
|
||||
if (FI->usesPICBase())
|
||||
BuildMI(MBB, MBBI, dl, StoreInst)
|
||||
.addReg(PPC::R30)
|
||||
.addImm(PBPOffset)
|
||||
.addReg(ScratchReg);
|
||||
if (HasBP) {
|
||||
BuildMI(MBB, MBBI, dl, StoreInst)
|
||||
.addReg(BPReg)
|
||||
.addImm(BPOffset)
|
||||
.addReg(ScratchReg);
|
||||
BuildMI(MBB, MBBI, dl, OrInst, BPReg)
|
||||
.addReg(ScratchReg, RegState::Kill)
|
||||
.addReg(ScratchReg);
|
||||
if (ScratchReg == PPC::R0) {
|
||||
// R0 cannot be used as a base register, but it can be used as an
|
||||
// index in a store-indexed.
|
||||
int LastOffset = 0;
|
||||
if (HasFP) {
|
||||
// R0 += (FPOffset-LastOffset).
|
||||
// Need addic, since addi treats R0 as 0.
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(FPOffset-LastOffset);
|
||||
LastOffset = FPOffset;
|
||||
// Store FP into *R0.
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
|
||||
.addReg(FPReg, RegState::Kill) // Save FP.
|
||||
.addReg(PPC::ZERO)
|
||||
.addReg(ScratchReg); // This will be the index (R0 is ok here).
|
||||
}
|
||||
if (FI->usesPICBase()) {
|
||||
// R0 += (PBPOffset-LastOffset).
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(PBPOffset-LastOffset);
|
||||
LastOffset = PBPOffset;
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
|
||||
.addReg(PPC::R30, RegState::Kill) // Save PIC base pointer.
|
||||
.addReg(PPC::ZERO)
|
||||
.addReg(ScratchReg); // This will be the index (R0 is ok here).
|
||||
}
|
||||
if (HasBP) {
|
||||
// R0 += (BPOffset-LastOffset).
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(BPOffset-LastOffset);
|
||||
LastOffset = BPOffset;
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
|
||||
.addReg(BPReg, RegState::Kill) // Save BP.
|
||||
.addReg(PPC::ZERO)
|
||||
.addReg(ScratchReg); // This will be the index (R0 is ok here).
|
||||
// BP = R0-LastOffset
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
|
||||
.addReg(ScratchReg, RegState::Kill)
|
||||
.addImm(-LastOffset);
|
||||
}
|
||||
} else {
|
||||
// ScratchReg is not R0, so use it as the base register. It is
|
||||
// already set to the old SP, so we can use the offsets directly.
|
||||
|
||||
// Now that the stack frame has been allocated, save all the necessary
|
||||
// registers using ScratchReg as the base address.
|
||||
if (HasFP)
|
||||
BuildMI(MBB, MBBI, dl, StoreInst)
|
||||
.addReg(FPReg)
|
||||
.addImm(FPOffset)
|
||||
.addReg(ScratchReg);
|
||||
if (FI->usesPICBase())
|
||||
BuildMI(MBB, MBBI, dl, StoreInst)
|
||||
.addReg(PPC::R30)
|
||||
.addImm(PBPOffset)
|
||||
.addReg(ScratchReg);
|
||||
if (HasBP) {
|
||||
BuildMI(MBB, MBBI, dl, StoreInst)
|
||||
.addReg(BPReg)
|
||||
.addImm(BPOffset)
|
||||
.addReg(ScratchReg);
|
||||
BuildMI(MBB, MBBI, dl, OrInst, BPReg)
|
||||
.addReg(ScratchReg, RegState::Kill)
|
||||
.addReg(ScratchReg);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// The frame size is a known 16-bit constant (fitting in the immediate
|
||||
|
@ -1190,6 +1245,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
// Do we have a frame pointer and/or base pointer for this function?
|
||||
bool HasFP = hasFP(MF);
|
||||
bool HasBP = RegInfo->hasBasePointer(MF);
|
||||
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
|
||||
|
||||
unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
|
||||
unsigned BPReg = RegInfo->getBaseRegister(MF);
|
||||
|
@ -1202,6 +1258,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
: PPC::LWZ );
|
||||
const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
|
||||
: PPC::LIS );
|
||||
const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
|
||||
: PPC::OR );
|
||||
const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
|
||||
: PPC::ORI );
|
||||
const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
|
||||
|
@ -1223,7 +1281,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
|
||||
if (HasFP) {
|
||||
if (isSVR4ABI) {
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
int FPIndex = FI->getFramePointerSaveIndex();
|
||||
assert(FPIndex && "No Frame Pointer Save Slot!");
|
||||
FPOffset = MFI.getObjectOffset(FPIndex);
|
||||
|
@ -1235,7 +1292,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
int BPOffset = 0;
|
||||
if (HasBP) {
|
||||
if (isSVR4ABI) {
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
int BPIndex = FI->getBasePointerSaveIndex();
|
||||
assert(BPIndex && "No Base Pointer Save Slot!");
|
||||
BPOffset = MFI.getObjectOffset(BPIndex);
|
||||
|
@ -1246,7 +1302,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
|
||||
int PBPOffset = 0;
|
||||
if (FI->usesPICBase()) {
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
int PBPIndex = FI->getPICBasePointerSaveIndex();
|
||||
assert(PBPIndex && "No PIC Base Pointer Save Slot!");
|
||||
PBPOffset = MFI.getObjectOffset(PBPIndex);
|
||||
|
@ -1282,9 +1337,25 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
// indexed into with a simple LD/LWZ immediate offset operand.
|
||||
bool isLargeFrame = !isInt<16>(FrameSize);
|
||||
|
||||
// On targets without red zone, the SP needs to be restored last, so that
|
||||
// all live contents of the stack frame are upwards of the SP. This means
|
||||
// that we cannot restore SP just now, since there may be more registers
|
||||
// to restore from the stack frame (e.g. R31). If the frame size is not
|
||||
// a simple immediate value, we will need a spare register to hold the
|
||||
// restored SP. If the frame size is known and small, we can simply adjust
|
||||
// the offsets of the registers to be restored, and still use SP to restore
|
||||
// them. In such case, the final update of SP will be to add the frame
|
||||
// size to it.
|
||||
// To simplify the code, set RBReg to the base register used to restore
|
||||
// values from the stack, and set SPAdd to the value that needs to be added
|
||||
// to the SP at the end. The default values are as if red zone was present.
|
||||
unsigned RBReg = SPReg;
|
||||
unsigned SPAdd = 0;
|
||||
|
||||
if (FrameSize) {
|
||||
// In the prologue, the loaded (or persistent) stack pointer value is offset
|
||||
// by the STDU/STDUX/STWU/STWUX instruction. Add this offset back now.
|
||||
// In the prologue, the loaded (or persistent) stack pointer value is
|
||||
// offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
|
||||
// zone add this offset back now.
|
||||
|
||||
// If this function contained a fastcc call and GuaranteedTailCallOpt is
|
||||
// enabled (=> hasFastCall()==true) the fastcc call might contain a tail
|
||||
|
@ -1292,8 +1363,10 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
// value of R31 in this case.
|
||||
if (FI->hasFastCall()) {
|
||||
assert(HasFP && "Expecting a valid frame pointer.");
|
||||
if (!HasRedZone)
|
||||
RBReg = FPReg;
|
||||
if (!isLargeFrame) {
|
||||
BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
|
||||
BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
|
||||
.addReg(FPReg).addImm(FrameSize);
|
||||
} else {
|
||||
BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
|
||||
|
@ -1302,27 +1375,55 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
.addReg(ScratchReg, RegState::Kill)
|
||||
.addImm(FrameSize & 0xFFFF);
|
||||
BuildMI(MBB, MBBI, dl, AddInst)
|
||||
.addReg(SPReg)
|
||||
.addReg(RBReg)
|
||||
.addReg(FPReg)
|
||||
.addReg(ScratchReg);
|
||||
}
|
||||
} else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
|
||||
BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(FrameSize);
|
||||
if (HasRedZone) {
|
||||
BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(FrameSize);
|
||||
} else {
|
||||
// Make sure that adding FrameSize will not overflow the max offset
|
||||
// size.
|
||||
assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
|
||||
"Local offsets should be negative");
|
||||
SPAdd = FrameSize;
|
||||
FPOffset += FrameSize;
|
||||
BPOffset += FrameSize;
|
||||
PBPOffset += FrameSize;
|
||||
}
|
||||
} else {
|
||||
BuildMI(MBB, MBBI, dl, LoadInst, SPReg)
|
||||
// We don't want to use ScratchReg as a base register, because it
|
||||
// could happen to be R0. Use FP instead, but make sure to preserve it.
|
||||
if (!HasRedZone) {
|
||||
// If FP is not saved, copy it to ScratchReg.
|
||||
if (!HasFP)
|
||||
BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
|
||||
.addReg(FPReg)
|
||||
.addReg(FPReg);
|
||||
RBReg = FPReg;
|
||||
}
|
||||
BuildMI(MBB, MBBI, dl, LoadInst, RBReg)
|
||||
.addImm(0)
|
||||
.addReg(SPReg);
|
||||
}
|
||||
}
|
||||
assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
|
||||
// If there is no red zone, ScratchReg may be needed for holding a useful
|
||||
// value (although not the base register). Make sure it is not overwritten
|
||||
// too early.
|
||||
|
||||
assert((isPPC64 || !MustSaveCR) &&
|
||||
"Epilogue CR restoring supported only in 64-bit mode");
|
||||
|
||||
// If we need to save both the LR and the CR and we only have one available
|
||||
// scratch register, we must do them one at a time.
|
||||
// If we need to restore both the LR and the CR and we only have one
|
||||
// available scratch register, we must do them one at a time.
|
||||
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
|
||||
// Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
|
||||
// is live here.
|
||||
assert(HasRedZone && "Expecting red zone");
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
|
||||
.addImm(8)
|
||||
.addReg(SPReg);
|
||||
|
@ -1331,33 +1432,77 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
.addReg(TempReg, getKillRegState(i == e-1));
|
||||
}
|
||||
|
||||
if (MustSaveLR)
|
||||
// Delay restoring of the LR if ScratchReg is needed. This is ok, since
|
||||
// LR is stored in the caller's stack frame. ScratchReg will be needed
|
||||
// if RBReg is anything other than SP. We shouldn't use ScratchReg as
|
||||
// a base register anyway, because it may happen to be R0.
|
||||
bool LoadedLR = false;
|
||||
if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
|
||||
BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
|
||||
.addImm(LROffset)
|
||||
.addReg(SPReg);
|
||||
.addImm(LROffset+SPAdd)
|
||||
.addReg(RBReg);
|
||||
LoadedLR = true;
|
||||
}
|
||||
|
||||
if (MustSaveCR &&
|
||||
!(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
|
||||
if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
|
||||
// This will only occur for PPC64.
|
||||
assert(isPPC64 && "Expecting 64-bit mode");
|
||||
assert(RBReg == SPReg && "Should be using SP as a base register");
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
|
||||
.addImm(8)
|
||||
.addReg(SPReg);
|
||||
.addReg(RBReg);
|
||||
}
|
||||
|
||||
if (HasFP)
|
||||
BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
|
||||
.addImm(FPOffset)
|
||||
.addReg(SPReg);
|
||||
if (HasFP) {
|
||||
// If there is red zone, restore FP directly, since SP has already been
|
||||
// restored. Otherwise, restore the value of FP into ScratchReg.
|
||||
if (HasRedZone || RBReg == SPReg)
|
||||
BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
|
||||
.addImm(FPOffset)
|
||||
.addReg(SPReg);
|
||||
else
|
||||
BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
|
||||
.addImm(FPOffset)
|
||||
.addReg(RBReg);
|
||||
}
|
||||
|
||||
if (FI->usesPICBase())
|
||||
// FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
|
||||
BuildMI(MBB, MBBI, dl, LoadInst)
|
||||
.addReg(PPC::R30)
|
||||
.addImm(PBPOffset)
|
||||
.addReg(SPReg);
|
||||
.addReg(RBReg);
|
||||
|
||||
if (HasBP)
|
||||
BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
|
||||
.addImm(BPOffset)
|
||||
.addReg(SPReg);
|
||||
.addReg(RBReg);
|
||||
|
||||
// There is nothing more to be loaded from the stack, so now we can
|
||||
// restore SP: SP = RBReg + SPAdd.
|
||||
if (RBReg != SPReg || SPAdd != 0) {
|
||||
assert(!HasRedZone && "This should not happen with red zone");
|
||||
// If SPAdd is 0, generate a copy.
|
||||
if (SPAdd == 0)
|
||||
BuildMI(MBB, MBBI, dl, OrInst, SPReg)
|
||||
.addReg(RBReg)
|
||||
.addReg(RBReg);
|
||||
else
|
||||
BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
|
||||
.addReg(RBReg)
|
||||
.addImm(SPAdd);
|
||||
|
||||
assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
|
||||
if (RBReg == FPReg)
|
||||
BuildMI(MBB, MBBI, dl, OrInst, FPReg)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(ScratchReg);
|
||||
|
||||
// Now load the LR from the caller's stack frame.
|
||||
if (MustSaveLR && !LoadedLR)
|
||||
BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
|
||||
.addImm(LROffset)
|
||||
.addReg(SPReg);
|
||||
}
|
||||
|
||||
if (MustSaveCR &&
|
||||
!(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
|
||||
|
|
|
@ -25,7 +25,7 @@ entry:
|
|||
; LARGE-BSS-DAG: lwz [[VREG:[0-9]+]], [[VREF:\.LC[0-9]+]]-.LTOC(30)
|
||||
; LARGE-BSS-DAG: lwz {{[0-9]+}}, 0([[VREG]])
|
||||
; LARGE-BSS-DAG: stw {{[0-9]+}}, 8(1)
|
||||
; LARGE-BSS: lwz 30, -8(1)
|
||||
; LARGE-BSS: lwz 30, 24(1)
|
||||
; LARGE-BSS: [[VREF]]:
|
||||
; LARGE-BSS-NEXT: .p2align 2
|
||||
; LARGE-BSS-NEXT: .long bar
|
||||
|
|
|
@ -21,4 +21,4 @@ entry:
|
|||
; SMALL-BSS-DAG: lwz [[VREG:[0-9]+]], bar@GOT(30)
|
||||
; SMALL-BSS-DAG: lwz {{[0-9]+}}, 0([[VREG]])
|
||||
; SMALL-BSS: bl call_foo@PLT
|
||||
; SMALL-BSS: lwz 30, -8(1)
|
||||
; SMALL-BSS: lwz 30, 24(1)
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
; Test that accesses of the stack remain within the range defined by R1,
|
||||
; i.e. that loads and stores only access the allocated stack. This does not
|
||||
; have to be the case when red zone is present.
|
||||
|
||||
; Make sure that there is no red zone, i.e. ppc32 and SVR4 ABI.
|
||||
; RUN: llc -mtriple=powerpc--freebsd-elf < %s | FileCheck %s
|
||||
|
||||
; There are two ways that the stack pointer can be adjusted in the prologue:
|
||||
; - by adding an immediate value:
|
||||
; stwu r1, -imm(r1)
|
||||
; - by adding another register:
|
||||
; stwux r1, rx, r1
|
||||
;
|
||||
; The restoring of the stack pointer can be done:
|
||||
; - by adding an immediate value to it:
|
||||
; addi r1, r1, imm
|
||||
; - by copying the value from another register:
|
||||
; mr r1, rx
|
||||
|
||||
|
||||
; Nothing (no special features).
|
||||
;
|
||||
; CHECK-LABEL: test_n:
|
||||
; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
; CHECK: stwu 1, -[[SIZE:[0-9]+]](1)
|
||||
; CHECK: addi 1, 1, [[SIZE]]
|
||||
; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
define i32 @test_n() local_unnamed_addr #0 {
|
||||
entry:
|
||||
%t0 = tail call i32 bitcast (i32 (...)* @bar0 to i32 ()*)() #0
|
||||
ret i32 %t0
|
||||
}
|
||||
|
||||
; Aligned object on the stack.
|
||||
;
|
||||
; CHECK-LABEL: test_a:
|
||||
; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
; CHECK: stwux 1, 1, {{[0-9]+}}
|
||||
; CHECK: mr 1, {{[0-9]+}}
|
||||
; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
|
||||
define i32 @test_a() local_unnamed_addr #0 {
|
||||
entry:
|
||||
%t0 = alloca i32, align 128
|
||||
%t1 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
|
||||
ret i32 %t1
|
||||
}
|
||||
|
||||
; Dynamic allocation on the stack.
|
||||
;
|
||||
; CHECK-LABEL: test_d:
|
||||
; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
; CHECK: stwu 1, -[[SIZE:[0-9]+]](1)
|
||||
; CHECK: mr 1, {{[0-9]+}}
|
||||
; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
define i32 @test_d(i32 %p0) local_unnamed_addr #0 {
|
||||
%t0 = alloca i32, i32 %p0, align 4
|
||||
%t1 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
|
||||
ret i32 %t1
|
||||
}
|
||||
|
||||
; Large stack (exceeds size of D-field).
|
||||
; CHECK-LABEL: test_s:
|
||||
; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
; CHECK: stwux 1, 1, {{[0-9]+}}
|
||||
; CHECK: mr 1, {{[0-9]+}}
|
||||
; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
define i32 @test_s(i32 %p0) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%t0 = alloca [16384 x i32]
|
||||
%t1 = getelementptr [16384 x i32], [16384 x i32]* %t0, i32 0, i32 0
|
||||
%t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t1) #0
|
||||
ret i32 %t2
|
||||
}
|
||||
|
||||
; Combinations.
|
||||
|
||||
; CHECK-LABEL: test_ad:
|
||||
; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
; CHECK: stwux 1, 1, {{[0-9]+}}
|
||||
; CHECK: mr 1, {{[0-9]+}}
|
||||
; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
define i32 @test_ad(i32 %p0) local_unnamed_addr #0 {
|
||||
%t0 = alloca i32, align 128
|
||||
%t1 = alloca i32, i32 %p0, align 4
|
||||
%t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
|
||||
%t3 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t1) #0
|
||||
%t4 = add i32 %t2, %t3
|
||||
ret i32 %t4
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_as:
|
||||
; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
; CHECK: stwux 1, 1, {{[0-9]+}}
|
||||
; CHECK: mr 1, {{[0-9]+}}
|
||||
; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
define i32 @test_as() local_unnamed_addr #0 {
|
||||
%t0 = alloca i32, align 128
|
||||
%t1 = alloca [16384 x i32]
|
||||
%t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
|
||||
%t3 = getelementptr [16384 x i32], [16384 x i32]* %t1, i32 0, i32 0
|
||||
%t4 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t3) #0
|
||||
%t5 = add i32 %t2, %t4
|
||||
ret i32 %t5
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_ds:
|
||||
; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
; CHECK: stwux 1, 1, {{[0-9]+}}
|
||||
; CHECK: mr 1, {{[0-9]+}}
|
||||
; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
define i32 @test_ds(i32 %p0) local_unnamed_addr #0 {
|
||||
%t0 = alloca i32, i32 %p0, align 4
|
||||
%t1 = alloca [16384 x i32]
|
||||
%t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
|
||||
%t3 = getelementptr [16384 x i32], [16384 x i32]* %t1, i32 0, i32 0
|
||||
%t4 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t3) #0
|
||||
%t5 = add i32 %t2, %t4
|
||||
ret i32 %t5
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_ads:
|
||||
; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
; CHECK: stwux 1, 1, {{[0-9]+}}
|
||||
; CHECK: mr 1, {{[0-9]+}}
|
||||
; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
|
||||
define i32 @test_ads(i32 %p0) local_unnamed_addr #0 {
|
||||
%t0 = alloca i32, align 128
|
||||
%t1 = alloca i32, i32 %p0, align 4
|
||||
%t2 = alloca [16384 x i32]
|
||||
|
||||
%t3 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
|
||||
%t4 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t1) #0
|
||||
%t5 = add i32 %t3, %t4
|
||||
|
||||
%t6 = getelementptr [16384 x i32], [16384 x i32]* %t2, i32 0, i32 0
|
||||
%t7 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t6) #0
|
||||
%t8 = add i32 %t5, %t7
|
||||
ret i32 %t7
|
||||
}
|
||||
|
||||
|
||||
declare i32 @bar0(...) local_unnamed_addr #0
|
||||
declare i32 @bar1(...) local_unnamed_addr #0
|
||||
|
||||
attributes #0 = { nounwind }
|
|
@ -83,18 +83,26 @@ entry:
|
|||
; CHECK-32-DAG: subfic 0, [[REG]], -64
|
||||
; CHECK-32: stwux 1, 1, 0
|
||||
; CHECK-32: subf 0, 0, 1
|
||||
; CHECK-32: stw 30, -8(0)
|
||||
; CHECK-32: mr 30, 0
|
||||
; CHECK-32: addic 0, 0, -4
|
||||
; CHECK-32: stwx 31, 0, 0
|
||||
; CHECK-32: addic 0, 0, -4
|
||||
; CHECK-32: stwx 30, 0, 0
|
||||
; CHECK-32: addic 30, 0, 8
|
||||
|
||||
; CHECK-32-PIC-LABEL: @goo
|
||||
; CHECK-32-PIC-DAG: mflr [[LR:[0-9]+]]
|
||||
; CHECK-32-PIC-DAG: clrlwi [[REG:[0-9]+]], 1, 27
|
||||
; CHECK-32-PIC-DAG: stw [[LR]], 4(1)
|
||||
; CHECK-32-PIC-DAG: subfic 0, [[REG]], -64
|
||||
; CHECK-32-PIC: stwux 1, 1, 0
|
||||
; CHECK-32-PIC: subf 0, 0, 1
|
||||
; CHECK-32-PIC: stw 29, -12(0)
|
||||
; CHECK-32-PIC-DAG: mr 29, 0
|
||||
; CHECK-32-PIC: stwux 1, 1, 0
|
||||
; CHECK-32-PIC: subf 0, 0, 1
|
||||
; CHECK-32-PIC: addic 0, 0, -4
|
||||
; CHECK-32-PIC: stwx 31, 0, 0
|
||||
; CHECK-32-PIC: addic 0, 0, -4
|
||||
; CHECK-32-PIC: stwx 30, 0, 0
|
||||
; CHECK-32-PIC: addic 0, 0, -4
|
||||
; CHECK-32-PIC: stwx 29, 0, 0
|
||||
; CHECK-32-PIC: addic 29, 0, 12
|
||||
|
||||
; The large-frame-size case.
|
||||
define void @hoo(%struct.s* byval nocapture readonly %a) {
|
||||
|
@ -138,9 +146,11 @@ entry:
|
|||
; CHECK-32-DAG: subfc 0, [[REG3]], [[REG2]]
|
||||
; CHECK-32: stwux 1, 1, 0
|
||||
; CHECK-32: subf 0, 0, 1
|
||||
; CHECK-32-DAG: stw 31, -4(0)
|
||||
; CHECK-32-DAG: stw 30, -8(0)
|
||||
; CHECK-32: mr 30, 0
|
||||
; CHECK-32: addic 0, 0, -4
|
||||
; CHECK-32: stwx 31, 0, 0
|
||||
; CHECK-32: addic 0, 0, -4
|
||||
; CHECK-32: stwx 30, 0, 0
|
||||
; CHECK-32: addic 30, 0, 8
|
||||
|
||||
; CHECK-32: blr
|
||||
|
||||
|
@ -152,10 +162,13 @@ entry:
|
|||
; CHECK-32-PIC-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51904
|
||||
; CHECK-32-PIC-DAG: stw 0, 4(1)
|
||||
; CHECK-32-PIC-DAG: subfc 0, [[REG3]], [[REG2]]
|
||||
; CHECK-32-PIC: stwux 1, 1, 0
|
||||
; CHECK-32-PIC: stw 29, -12(0)
|
||||
; CHECK-32-PIC: subf 0, 0, 1
|
||||
; CHECK-32-PIC: mr 29, 0
|
||||
; CHECK-32-PIC: stwux 1, 1, 0
|
||||
; CHECK-32-PIC: subf 0, 0, 1
|
||||
; CHECK-32-PIC: addic 0, 0, -4
|
||||
; CHECK-32-PIC: stwx 31, 0, 0
|
||||
; CHECK-32-PIC: addic 0, 0, -8
|
||||
; CHECK-32-PIC: stwx 29, 0, 0
|
||||
; CHECK-32-PIC: addic 29, 0, 12
|
||||
|
||||
; CHECK-32: blr
|
||||
|
||||
|
|
Loading…
Reference in New Issue