[SystemZ] Implement -fstack-clash-protection

Probing of allocated stack space is now done when this option is passed. The
purpose is to protect against the stack clash attack (see
https://www.qualys.com/2017/06/19/stack-clash/stack-clash.txt).

Review: Ulrich Weigand

Differential Revision: https://reviews.llvm.org/D78717
This commit is contained in:
Jonas Paulsson 2020-04-21 18:16:29 +02:00
parent 92cb0ce8f8
commit 515bfc66ea
16 changed files with 750 additions and 69 deletions

View File

@ -94,8 +94,8 @@ New Compiler Flags
------------------
- -fstack-clash-protection will provide a protection against the stack clash
attack for x86 architecture through automatic probing of each page of
allocated stack.
attack for x86 and s390x architectures through automatic probing of each page
of allocated stack.
- -ffp-exception-behavior={ignore,maytrap,strict} allows the user to specify
the floating-point exception behavior. The default setting is ``ignore``.

View File

@ -64,6 +64,10 @@ public:
ArrayRef<TargetInfo::AddlRegName> getGCCAddlRegNames() const override;
bool isSPRegName(StringRef RegName) const override {
return RegName.equals("r15");
}
bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &info) const override;

View File

@ -2997,7 +2997,7 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
if (!EffectiveTriple.isOSLinux())
return;
if (!EffectiveTriple.isX86())
if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ())
return;
if (Args.hasFlag(options::OPT_fstack_clash_protection,

View File

@ -1,5 +1,6 @@
// Check the correct function attributes are generated
// RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
// CHECK: define void @large_stack() #[[A:.*]] {
void large_stack() {

View File

@ -0,0 +1,13 @@
// RUN: %clang -target s390x-linux-gnu -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SystemZ
// SystemZ: "-fstack-clash-protection"
// RUN: %clang -target s390x-linux-gnu -fstack-clash-protection -S -emit-llvm -o %t.ll %s 2>&1 | FileCheck %s -check-prefix=SystemZ-warn
// SystemZ-warn: warning: Unable to protect inline asm that clobbers stack pointer against stack clash
int foo(int c) {
int r;
__asm__("ag %%r15, %0"
:
: "rm"(c)
: "r15");
return r;
}

View File

@ -739,6 +739,11 @@ public:
return getArch() == Triple::riscv32 || getArch() == Triple::riscv64;
}
/// Tests whether the target is SystemZ.
bool isSystemZ() const {
return getArch() == Triple::systemz;
}
/// Tests whether the target is x86 (32- or 64-bit).
bool isX86() const {
return getArch() == Triple::x86 || getArch() == Triple::x86_64;

View File

@ -374,12 +374,39 @@ static void emitIncrement(MachineBasicBlock &MBB,
}
}
// Add CFI for the new CFA offset.
static void buildCFAOffs(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, int Offset,
const SystemZInstrInfo *ZII) {
unsigned CFIIndex = MBB.getParent()->addFrameInst(
MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset));
BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
// Add CFI for the new frame location.
static void buildDefCFAReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, unsigned Reg,
const SystemZInstrInfo *ZII) {
MachineFunction &MF = *MBB.getParent();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
const SystemZTargetLowering &TLI = *STI.getTargetLowering();
MachineFrameInfo &MFFrame = MF.getFrameInfo();
auto *ZII =
static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
auto *ZII = static_cast<const SystemZInstrInfo *>(STI.getInstrInfo());
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineModuleInfo &MMI = MF.getMMI();
@ -462,13 +489,22 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
// Allocate StackSize bytes.
int64_t Delta = -int64_t(StackSize);
emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
// Add CFI for the allocation.
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPOffsetFromCFA - Delta));
BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
const unsigned ProbeSize = TLI.getStackProbeSize(MF);
bool FreeProbe = (ZFI->getSpillGPRRegs().GPROffset &&
(ZFI->getSpillGPRRegs().GPROffset + StackSize) < ProbeSize);
if (!FreeProbe &&
MF.getSubtarget().getTargetLowering()->hasInlineStackProbe(MF)) {
// Stack probing may involve looping, but splitting the prologue block
// is not possible at this point since it would invalidate the
// SaveBlocks / RestoreBlocks sets of PEI in the single block function
// case. Build a pseudo to be handled later by inlineStackProbe().
BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::PROBED_STACKALLOC))
.addImm(StackSize);
}
else {
emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII);
}
SPOffsetFromCFA += Delta;
if (StoreBackchain) {
@ -486,11 +522,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(SystemZ::R15D);
// Add CFI for the new frame location.
unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true);
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createDefCfaRegister(nullptr, HardFP));
BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
buildDefCFAReg(MBB, MBBI, DL, SystemZ::R11D, ZII);
// Mark the FramePtr as live at the beginning of every block except
// the entry block. (We'll have marked R11 as live on entry when
@ -583,6 +615,91 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologMBB) const {
auto *ZII =
static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
const SystemZTargetLowering &TLI = *STI.getTargetLowering();
MachineInstr *StackAllocMI = nullptr;
for (MachineInstr &MI : PrologMBB)
if (MI.getOpcode() == SystemZ::PROBED_STACKALLOC) {
StackAllocMI = &MI;
break;
}
if (StackAllocMI == nullptr)
return;
uint64_t StackSize = StackAllocMI->getOperand(0).getImm();
const unsigned ProbeSize = TLI.getStackProbeSize(MF);
uint64_t NumFullBlocks = StackSize / ProbeSize;
uint64_t Residual = StackSize % ProbeSize;
int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP;
MachineBasicBlock *MBB = &PrologMBB;
MachineBasicBlock::iterator MBBI = StackAllocMI;
const DebugLoc DL = StackAllocMI->getDebugLoc();
// Allocate a block of Size bytes on the stack and probe it.
auto allocateAndProbe = [&](MachineBasicBlock &InsMBB,
MachineBasicBlock::iterator InsPt, unsigned Size,
bool EmitCFI) -> void {
emitIncrement(InsMBB, InsPt, DL, SystemZ::R15D, -int64_t(Size), ZII);
if (EmitCFI) {
SPOffsetFromCFA -= Size;
buildCFAOffs(InsMBB, InsPt, DL, SPOffsetFromCFA, ZII);
}
// Probe by means of a volatile compare.
MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(),
MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
BuildMI(InsMBB, InsPt, DL, ZII->get(SystemZ::CG))
.addReg(SystemZ::R0D, RegState::Undef)
.addReg(SystemZ::R15D).addImm(Size - 8).addReg(0)
.addMemOperand(MMO);
};
if (NumFullBlocks < 3) {
// Emit unrolled probe statements.
for (unsigned int i = 0; i < NumFullBlocks; i++)
allocateAndProbe(*MBB, MBBI, ProbeSize, true/*EmitCFI*/);
} else {
// Emit a loop probing the pages.
uint64_t LoopAlloc = ProbeSize * NumFullBlocks;
SPOffsetFromCFA -= LoopAlloc;
BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R1D)
.addReg(SystemZ::R15D);
buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R1D, ZII);
emitIncrement(*MBB, MBBI, DL, SystemZ::R1D, -int64_t(LoopAlloc), ZII);
buildCFAOffs(*MBB, MBBI, DL, -int64_t(SystemZMC::CallFrameSize + LoopAlloc),
ZII);
MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB);
MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB);
MBB->addSuccessor(LoopMBB);
LoopMBB->addSuccessor(LoopMBB);
LoopMBB->addSuccessor(DoneMBB);
MBB = LoopMBB;
allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/);
BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR))
.addReg(SystemZ::R15D).addReg(SystemZ::R1D);
BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB);
MBB = DoneMBB;
MBBI = DoneMBB->begin();
buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R15D, ZII);
recomputeLiveIns(*DoneMBB);
recomputeLiveIns(*LoopMBB);
}
if (Residual)
allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/);
StackAllocMI->eraseFromParent();
}
bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
MF.getFrameInfo().hasVarSizedObjects() ||

View File

@ -43,6 +43,8 @@ public:
RegScavenger *RS) const override;
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologMBB) const override;
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,

View File

@ -826,6 +826,15 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
}
/// Returns true if stack probing through inline assembly is requested.
bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
// If the function specifically requests inline stack probes, emit them.
if (MF.getFunction().hasFnAttribute("probe-stack"))
return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
"inline-asm";
return false;
}
bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
// We can use CGFI or CLGFI.
return isInt<32>(Imm) || isUInt<32>(Imm);
@ -3428,10 +3437,17 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
// Get the new stack pointer value.
SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
// Copy the new stack pointer back.
Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
SDValue NewSP;
if (hasInlineStackProbe(MF)) {
NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
Chain = NewSP.getValue(1);
}
else {
NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
// Copy the new stack pointer back.
Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
}
// The allocated data lives above the 160 bytes allocated for the standard
// frame, plus any outgoing stack arguments. We don't know how much that
@ -5400,6 +5416,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(BR_CCMASK);
OPCODE(SELECT_CCMASK);
OPCODE(ADJDYNALLOC);
OPCODE(PROBED_ALLOCA);
OPCODE(POPCNT);
OPCODE(SMUL_LOHI);
OPCODE(UMUL_LOHI);
@ -6825,38 +6842,29 @@ SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
return 1;
}
unsigned
SystemZTargetLowering::getStackProbeSize(MachineFunction &MF) const {
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
unsigned StackAlign = TFI->getStackAlignment();
assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
"Unexpected stack alignment");
// The default stack probe size is 4096 if the function has no
// stack-probe-size attribute.
unsigned StackProbeSize = 4096;
const Function &Fn = MF.getFunction();
if (Fn.hasFnAttribute("stack-probe-size"))
Fn.getFnAttribute("stack-probe-size")
.getValueAsString()
.getAsInteger(0, StackProbeSize);
// Round down to the stack alignment.
StackProbeSize &= ~(StackAlign - 1);
return StackProbeSize ? StackProbeSize : StackAlign;
}
//===----------------------------------------------------------------------===//
// Custom insertion
//===----------------------------------------------------------------------===//
// Create a new basic block after MBB.
static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
MachineFunction &MF = *MBB->getParent();
MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
return NewMBB;
}
// Split MBB after MI and return the new block (the one that contains
// instructions after MI).
static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB) {
MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
NewMBB->splice(NewMBB->begin(), MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
return NewMBB;
}
// Split MBB before MI and return the new block (the one that contains MI).
static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB) {
MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
return NewMBB;
}
// Force base value Base into a register before MI. Return the register.
static Register forceReg(MachineInstr &MI, MachineOperand &Base,
const SystemZInstrInfo *TII) {
@ -7027,8 +7035,8 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
bool CCKilled =
(LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
MachineBasicBlock *StartMBB = MBB;
MachineBasicBlock *JoinMBB = splitBlockAfter(LastMI, MBB);
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
// Unless CC was killed in the last Select instruction, mark it as
// live-in to both FalseMBB and JoinMBB.
@ -7121,8 +7129,8 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
CCMask ^= CCValid;
MachineBasicBlock *StartMBB = MBB;
MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
// Unless CC was killed in the CondStore instruction, mark it as
// live-in to both FalseMBB and JoinMBB.
@ -7205,8 +7213,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
// Insert a basic block for the main loop.
MachineBasicBlock *StartMBB = MBB;
MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
// StartMBB:
// ...
@ -7323,10 +7331,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
// Insert 3 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
// StartMBB:
// ...
@ -7434,9 +7442,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
// Insert 2 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
// StartMBB:
// ...
@ -7596,7 +7604,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
// When generating more than one CLC, all but the last will need to
// branch to the end when a difference is found.
MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
splitBlockAfter(MI, MBB) : nullptr);
SystemZ::splitBlockAfter(MI, MBB) : nullptr);
// Check for the loop form, in which operand 5 is the trip count.
if (MI.getNumExplicitOperands() > 5) {
@ -7620,9 +7628,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
Register NextCountReg = MRI.createVirtualRegister(RC);
MachineBasicBlock *StartMBB = MBB;
MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
MachineBasicBlock *NextMBB =
(EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
// StartMBB:
// # fall through to LoopMMB
@ -7738,7 +7747,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
// If there's another CLC to go, branch to the end if a difference
// was found.
if (EndMBB && Length > 0) {
MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
.addMBB(EndMBB);
@ -7778,8 +7787,8 @@ MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
uint64_t End2Reg = MRI.createVirtualRegister(RC);
MachineBasicBlock *StartMBB = MBB;
MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
// StartMBB:
// # fall through to LoopMMB
@ -7890,6 +7899,97 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
return MBB;
}
MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
MachineInstr &MI, MachineBasicBlock *MBB) const {
MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo *MRI = &MF.getRegInfo();
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
DebugLoc DL = MI.getDebugLoc();
const unsigned ProbeSize = getStackProbeSize(MF);
Register DstReg = MI.getOperand(0).getReg();
Register SizeReg = MI.getOperand(2).getReg();
MachineBasicBlock *StartMBB = MBB;
MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
// LoopTestMBB
// BRC TailTestMBB
// # fallthrough to LoopBodyMBB
StartMBB->addSuccessor(LoopTestMBB);
MBB = LoopTestMBB;
BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
.addReg(SizeReg)
.addMBB(StartMBB)
.addReg(IncReg)
.addMBB(LoopBodyMBB);
BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
.addReg(PHIReg)
.addImm(ProbeSize);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT)
.addMBB(TailTestMBB);
MBB->addSuccessor(LoopBodyMBB);
MBB->addSuccessor(TailTestMBB);
// LoopBodyMBB: Allocate and probe by means of a volatile compare.
// J LoopTestMBB
MBB = LoopBodyMBB;
BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
.addReg(PHIReg)
.addImm(ProbeSize);
BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
.addReg(SystemZ::R15D)
.addImm(ProbeSize);
BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
.addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
.setMemRefs(VolLdMMO);
BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
MBB->addSuccessor(LoopTestMBB);
// TailTestMBB
// BRC DoneMBB
// # fallthrough to TailMBB
MBB = TailTestMBB;
BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
.addReg(PHIReg)
.addImm(0);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
.addMBB(DoneMBB);
MBB->addSuccessor(TailMBB);
MBB->addSuccessor(DoneMBB);
// TailMBB
// # fallthrough to DoneMBB
MBB = TailMBB;
BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
.addReg(SystemZ::R15D)
.addReg(PHIReg);
BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
.addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
.setMemRefs(VolLdMMO);
MBB->addSuccessor(DoneMBB);
// DoneMBB
MBB = DoneMBB;
BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
.addReg(SystemZ::R15D);
MI.eraseFromParent();
return DoneMBB;
}
MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *MBB) const {
switch (MI.getOpcode()) {
@ -8150,6 +8250,9 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
case SystemZ::LTXBRCompare_VecPseudo:
return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
case SystemZ::PROBED_ALLOCA:
return emitProbedAlloca(MI, MBB);
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, MBB);

View File

@ -83,6 +83,10 @@ enum NodeType : unsigned {
// base of the dynamically-allocatable area.
ADJDYNALLOC,
// For allocating stack space when using stack clash protector.
// Allocation is performed by block, and each block is probed.
PROBED_ALLOCA,
// Count number of bits set in operand 0 per byte.
POPCNT,
@ -428,6 +432,7 @@ public:
EVT VT) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
bool hasInlineStackProbe(MachineFunction &MF) const override;
bool isLegalICmpImmediate(int64_t Imm) const override;
bool isLegalAddImmediate(int64_t Imm) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
@ -556,6 +561,8 @@ public:
return true;
}
unsigned getStackProbeSize(MachineFunction &MF) const;
private:
const SystemZSubtarget &Subtarget;
@ -691,6 +698,8 @@ private:
MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI,
MachineBasicBlock *MBB,
unsigned Opcode) const;
MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
MachineBasicBlock *MBB) const;
MachineMemOperand::Flags
getTargetMMOFlags(const Instruction &I) const override;

View File

@ -1872,6 +1872,30 @@ unsigned SystemZ::reverseCCMask(unsigned CCMask) {
(CCMask & SystemZ::CCMASK_CMP_UO));
}
MachineBasicBlock *SystemZ::emitBlockAfter(MachineBasicBlock *MBB) {
MachineFunction &MF = *MBB->getParent();
MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
return NewMBB;
}
MachineBasicBlock *SystemZ::splitBlockAfter(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB) {
MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
NewMBB->splice(NewMBB->begin(), MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
return NewMBB;
}
MachineBasicBlock *SystemZ::splitBlockBefore(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB) {
MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
return NewMBB;
}
unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const {
if (!STI.hasLoadAndTrap())
return 0;

View File

@ -159,6 +159,16 @@ int getTargetMemOpcode(uint16_t Opcode);
// Return a version of comparison CC mask CCMask in which the LT and GT
// actions are swapped.
unsigned reverseCCMask(unsigned CCMask);
// Create a new basic block after MBB.
MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB);
// Split MBB after MI and return the new block (the one that contains
// instructions after MI).
MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB);
// Split MBB before MI and return the new block (the one that contains MI).
MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB);
}
class SystemZInstrInfo : public SystemZGenInstrInfo {

View File

@ -29,6 +29,15 @@ let hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src),
[(set GR64:$dst, dynalloc12only:$src)]>;
let Defs = [R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1,
usesCustomInserter = 1 in
def PROBED_ALLOCA : Pseudo<(outs GR64:$dst),
(ins GR64:$oldSP, GR64:$space),
[(set GR64:$dst, (z_probed_alloca GR64:$oldSP, GR64:$space))]>;
let Defs = [R1D, R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1,
hasSideEffects = 1 in
def PROBED_STACKALLOC : Pseudo<(outs), (ins i64imm:$stacksize), []>;
//===----------------------------------------------------------------------===//
// Branch instructions

View File

@ -40,6 +40,10 @@ def SDT_ZWrapOffset : SDTypeProfile<1, 2,
SDTCisSameAs<0, 2>,
SDTCisPtrTy<0>]>;
def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
def SDT_ZProbedAlloca : SDTypeProfile<1, 2,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisPtrTy<0>]>;
def SDT_ZGR128Binary : SDTypeProfile<1, 2,
[SDTCisVT<0, untyped>,
SDTCisInt<1>,
@ -269,6 +273,8 @@ def z_select_ccmask_1 : SDNode<"SystemZISD::SELECT_CCMASK",
SDT_ZSelectCCMask>;
def z_ipm_1 : SDNode<"SystemZISD::IPM", SDT_ZIPM>;
def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
def z_probed_alloca : SDNode<"SystemZISD::PROBED_ALLOCA", SDT_ZProbedAlloca,
[SDNPHasChain]>;
def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
def z_smul_lohi : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>;
def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>;

View File

@ -0,0 +1,136 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
define i32 @fun0(i32 %n) #0 {
; CHECK-LABEL: fun0:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r11, %r15, 88(%r15)
; CHECK-NEXT: .cfi_offset %r11, -72
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: aghi %r15, -160
; CHECK-NEXT: .cfi_def_cfa_offset 320
; CHECK-NEXT: lgr %r11, %r15
; CHECK-NEXT: .cfi_def_cfa_register %r11
; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
; CHECK-NEXT: risbgn %r1, %r2, 30, 189, 2
; CHECK-NEXT: la %r0, 7(%r1)
; CHECK-NEXT: risbgn %r1, %r0, 29, 188, 0
; CHECK-NEXT: clgfi %r1, 4096
; CHECK-NEXT: jl .LBB0_2
; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: slgfi %r1, 4096
; CHECK-NEXT: slgfi %r15, 4096
; CHECK-NEXT: cg %r15, 4088(%r15)
; CHECK-NEXT: clgfi %r1, 4096
; CHECK-NEXT: jhe .LBB0_1
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: cgije %r1, 0, .LBB0_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: slgr %r15, %r1
; CHECK-NEXT: cg %r15, -8(%r1,%r15)
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: la %r1, 160(%r15)
; CHECK-NEXT: lhi %r0, 1
; CHECK-NEXT: sty %r0, 4792(%r1)
; CHECK-NEXT: l %r2, 0(%r1)
; CHECK-NEXT: lmg %r11, %r15, 248(%r11)
; CHECK-NEXT: br %r14
%a = alloca i32, i32 %n
%b = getelementptr inbounds i32, i32* %a, i64 1198
store volatile i32 1, i32* %b
%c = load volatile i32, i32* %a
ret i32 %c
}
; Probe size should be modulo stack alignment.
define i32 @fun1(i32 %n) #0 "stack-probe-size"="1250" {
; CHECK-LABEL: fun1:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r11, %r15, 88(%r15)
; CHECK-NEXT: .cfi_offset %r11, -72
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: aghi %r15, -160
; CHECK-NEXT: .cfi_def_cfa_offset 320
; CHECK-NEXT: lgr %r11, %r15
; CHECK-NEXT: .cfi_def_cfa_register %r11
; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
; CHECK-NEXT: risbgn %r1, %r2, 30, 189, 2
; CHECK-NEXT: la %r0, 7(%r1)
; CHECK-NEXT: risbgn %r1, %r0, 29, 188, 0
; CHECK-NEXT: clgfi %r1, 1248
; CHECK-NEXT: jl .LBB1_2
; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: slgfi %r1, 1248
; CHECK-NEXT: slgfi %r15, 1248
; CHECK-NEXT: cg %r15, 1240(%r15)
; CHECK-NEXT: clgfi %r1, 1248
; CHECK-NEXT: jhe .LBB1_1
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: cgije %r1, 0, .LBB1_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: slgr %r15, %r1
; CHECK-NEXT: cg %r15, -8(%r1,%r15)
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: la %r1, 160(%r15)
; CHECK-NEXT: lhi %r0, 1
; CHECK-NEXT: sty %r0, 4792(%r1)
; CHECK-NEXT: l %r2, 0(%r1)
; CHECK-NEXT: lmg %r11, %r15, 248(%r11)
; CHECK-NEXT: br %r14
%a = alloca i32, i32 %n
%b = getelementptr inbounds i32, i32* %a, i64 1198
store volatile i32 1, i32* %b
%c = load volatile i32, i32* %a
ret i32 %c
}
; The minimum probe size is the stack alignment.
define i32 @fun2(i32 %n) #0 "stack-probe-size"="4" {
; CHECK-LABEL: fun2:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r11, %r15, 88(%r15)
; CHECK-NEXT: .cfi_offset %r11, -72
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: lgr %r1, %r15
; CHECK-NEXT: .cfi_def_cfa_register %r1
; CHECK-NEXT: aghi %r1, -160
; CHECK-NEXT: .cfi_def_cfa_offset 320
; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: aghi %r15, -8
; CHECK-NEXT: cg %r0, 0(%r15)
; CHECK-NEXT: clgrjh %r15, %r1, .LBB2_1
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: .cfi_def_cfa_register %r15
; CHECK-NEXT: lgr %r11, %r15
; CHECK-NEXT: .cfi_def_cfa_register %r11
; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
; CHECK-NEXT: risbgn %r1, %r2, 30, 189, 2
; CHECK-NEXT: la %r0, 7(%r1)
; CHECK-NEXT: risbgn %r1, %r0, 29, 188, 0
; CHECK-NEXT: clgijl %r1, 8, .LBB2_4
; CHECK-NEXT: .LBB2_3: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: slgfi %r1, 8
; CHECK-NEXT: slgfi %r15, 8
; CHECK-NEXT: cg %r15, 0(%r15)
; CHECK-NEXT: clgijhe %r1, 8, .LBB2_3
; CHECK-NEXT: .LBB2_4:
; CHECK-NEXT: cgije %r1, 0, .LBB2_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: slgr %r15, %r1
; CHECK-NEXT: cg %r15, -8(%r1,%r15)
; CHECK-NEXT: .LBB2_6:
; CHECK-NEXT: la %r1, 160(%r15)
; CHECK-NEXT: lhi %r0, 1
; CHECK-NEXT: sty %r0, 4792(%r1)
; CHECK-NEXT: l %r2, 0(%r1)
; CHECK-NEXT: lmg %r11, %r15, 248(%r11)
; CHECK-NEXT: br %r14
%a = alloca i32, i32 %n
%b = getelementptr inbounds i32, i32* %a, i64 1198
store volatile i32 1, i32* %b
%c = load volatile i32, i32* %a
ret i32 %c
}
attributes #0 = {"probe-stack"="inline-asm"}

View File

@ -0,0 +1,242 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 | FileCheck %s
;
; Test stack clash protection probing for static allocas.
; Small: one probe.
define i32 @fun0() #0 {
; CHECK-LABEL: fun0:
; CHECK: # %bb.0:
; CHECK-NEXT: aghi %r15, -560
; CHECK-NEXT: .cfi_def_cfa_offset 720
; CHECK-NEXT: cg %r0, 552(%r15)
; CHECK-NEXT: mvhi 552(%r15), 1
; CHECK-NEXT: l %r2, 160(%r15)
; CHECK-NEXT: aghi %r15, 560
; CHECK-NEXT: br %r14
%a = alloca i32, i64 100
%b = getelementptr inbounds i32, i32* %a, i64 98
store volatile i32 1, i32* %b
%c = load volatile i32, i32* %a
ret i32 %c
}
; Medium: two probes.
define i32 @fun1() #0 {
; CHECK-LABEL: fun1:
; CHECK: # %bb.0:
; CHECK-NEXT: aghi %r15, -4096
; CHECK-NEXT: .cfi_def_cfa_offset 4256
; CHECK-NEXT: cg %r0, 4088(%r15)
; CHECK-NEXT: aghi %r15, -4080
; CHECK-NEXT: .cfi_def_cfa_offset 8336
; CHECK-NEXT: cg %r0, 4072(%r15)
; CHECK-NEXT: mvhi 976(%r15), 1
; CHECK-NEXT: l %r2, 176(%r15)
; CHECK-NEXT: aghi %r15, 8176
; CHECK-NEXT: br %r14
%a = alloca i32, i64 2000
%b = getelementptr inbounds i32, i32* %a, i64 200
store volatile i32 1, i32* %b
%c = load volatile i32, i32* %a
ret i32 %c
}
; Large: Use a loop to allocate and probe in steps.
define i32 @fun2() #0 {
; CHECK-LABEL: fun2:
; CHECK: # %bb.0:
; CHECK-NEXT: lgr %r1, %r15
; CHECK-NEXT: .cfi_def_cfa_register %r1
; CHECK-NEXT: agfi %r1, -69632
; CHECK-NEXT: .cfi_def_cfa_offset 69792
; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: aghi %r15, -4096
; CHECK-NEXT: cg %r0, 4088(%r15)
; CHECK-NEXT: clgrjh %r15, %r1, .LBB2_1
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: .cfi_def_cfa_register %r15
; CHECK-NEXT: aghi %r15, -2544
; CHECK-NEXT: .cfi_def_cfa_offset 72336
; CHECK-NEXT: cg %r0, 2536(%r15)
; CHECK-NEXT: lhi %r0, 1
; CHECK-NEXT: mvhi 568(%r15), 1
; CHECK-NEXT: sty %r0, 28968(%r15)
; CHECK-NEXT: l %r2, 176(%r15)
; CHECK-NEXT: agfi %r15, 72176
; CHECK-NEXT: br %r14
%a = alloca i32, i64 18000
%b0 = getelementptr inbounds i32, i32* %a, i64 98
%b1 = getelementptr inbounds i32, i32* %a, i64 7198
store volatile i32 1, i32* %b0
store volatile i32 1, i32* %b1
%c = load volatile i32, i32* %a
ret i32 %c
}
; Ends evenly on the step so no remainder needed.
define void @fun3() #0 {
; CHECK-LABEL: fun3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lgr %r1, %r15
; CHECK-NEXT: .cfi_def_cfa_register %r1
; CHECK-NEXT: aghi %r1, -28672
; CHECK-NEXT: .cfi_def_cfa_offset 28832
; CHECK-NEXT: .LBB3_1: # %entry
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: aghi %r15, -4096
; CHECK-NEXT: cg %r0, 4088(%r15)
; CHECK-NEXT: clgrjh %r15, %r1, .LBB3_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: .cfi_def_cfa_register %r15
; CHECK-NEXT: mvhi 180(%r15), 0
; CHECK-NEXT: l %r0, 180(%r15)
; CHECK-NEXT: aghi %r15, 28672
; CHECK-NEXT: br %r14
entry:
%stack = alloca [7122 x i32], align 4
%i = alloca i32, align 4
%0 = bitcast [7122 x i32]* %stack to i8*
%i.0.i.0..sroa_cast = bitcast i32* %i to i8*
store volatile i32 0, i32* %i, align 4
%i.0.i.0.6 = load volatile i32, i32* %i, align 4
ret void
}
; Loop with bigger step.
define void @fun4() #0 "stack-probe-size"="8192" {
; CHECK-LABEL: fun4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lgr %r1, %r15
; CHECK-NEXT: .cfi_def_cfa_register %r1
; CHECK-NEXT: aghi %r1, -24576
; CHECK-NEXT: .cfi_def_cfa_offset 24736
; CHECK-NEXT: .LBB4_1: # %entry
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: aghi %r15, -8192
; CHECK-NEXT: cg %r0, 8184(%r15)
; CHECK-NEXT: clgrjh %r15, %r1, .LBB4_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: .cfi_def_cfa_register %r15
; CHECK-NEXT: aghi %r15, -7608
; CHECK-NEXT: .cfi_def_cfa_offset 32344
; CHECK-NEXT: cg %r0, 7600(%r15)
; CHECK-NEXT: mvhi 180(%r15), 0
; CHECK-NEXT: l %r0, 180(%r15)
; CHECK-NEXT: aghi %r15, 32184
; CHECK-NEXT: br %r14
entry:
%stack = alloca [8000 x i32], align 4
%i = alloca i32, align 4
%0 = bitcast [8000 x i32]* %stack to i8*
%i.0.i.0..sroa_cast = bitcast i32* %i to i8*
store volatile i32 0, i32* %i, align 4
%i.0.i.0.6 = load volatile i32, i32* %i, align 4
ret void
}
; Probe size should be modulo stack alignment.
define void @fun5() #0 "stack-probe-size"="4100" {
; CHECK-LABEL: fun5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: aghi %r15, -4096
; CHECK-NEXT: .cfi_def_cfa_offset 4256
; CHECK-NEXT: cg %r0, 4088(%r15)
; CHECK-NEXT: aghi %r15, -88
; CHECK-NEXT: .cfi_def_cfa_offset 4344
; CHECK-NEXT: cg %r0, 80(%r15)
; CHECK-NEXT: mvhi 180(%r15), 0
; CHECK-NEXT: l %r0, 180(%r15)
; CHECK-NEXT: aghi %r15, 4184
; CHECK-NEXT: br %r14
entry:
%stack = alloca [1000 x i32], align 4
%i = alloca i32, align 4
%0 = bitcast [1000 x i32]* %stack to i8*
%i.0.i.0..sroa_cast = bitcast i32* %i to i8*
store volatile i32 0, i32* %i, align 4
%i.0.i.0.6 = load volatile i32, i32* %i, align 4
ret void
}
; The minimum probe size is the stack alignment.
define void @fun6() #0 "stack-probe-size"="5" {
; CHECK-LABEL: fun6:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lgr %r1, %r15
; CHECK-NEXT: .cfi_def_cfa_register %r1
; CHECK-NEXT: aghi %r1, -4184
; CHECK-NEXT: .cfi_def_cfa_offset 4344
; CHECK-NEXT: .LBB6_1: # %entry
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: aghi %r15, -8
; CHECK-NEXT: cg %r0, 0(%r15)
; CHECK-NEXT: clgrjh %r15, %r1, .LBB6_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: .cfi_def_cfa_register %r15
; CHECK-NEXT: mvhi 180(%r15), 0
; CHECK-NEXT: l %r0, 180(%r15)
; CHECK-NEXT: aghi %r15, 4184
; CHECK-NEXT: br %r14
entry:
%stack = alloca [1000 x i32], align 4
%i = alloca i32, align 4
%0 = bitcast [1000 x i32]* %stack to i8*
%i.0.i.0..sroa_cast = bitcast i32* %i to i8*
store volatile i32 0, i32* %i, align 4
%i.0.i.0.6 = load volatile i32, i32* %i, align 4
ret void
}
; Small with a natural probe (STMG) - needs no extra probe.
define i32 @fun7() #0 {
; CHECK-LABEL: fun7:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: aghi %r15, -3976
; CHECK-NEXT: .cfi_def_cfa_offset 4136
; CHECK-NEXT: brasl %r14, foo@PLT
; CHECK-NEXT: st %r2, 568(%r15)
; CHECK-NEXT: l %r2, 176(%r15)
; CHECK-NEXT: lmg %r14, %r15, 4088(%r15)
; CHECK-NEXT: br %r14
%v = call i32 @foo()
%a = alloca i32, i64 950
%b = getelementptr inbounds i32, i32* %a, i64 98
store volatile i32 %v, i32* %b
%c = load volatile i32, i32* %a
ret i32 %c
}
; Medium with an STMG - still needs probing.
define i32 @fun8() #0 {
; CHECK-LABEL: fun8:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: aghi %r15, -3984
; CHECK-NEXT: .cfi_def_cfa_offset 4144
; CHECK-NEXT: cg %r0, 3976(%r15)
; CHECK-NEXT: brasl %r14, foo@PLT
; CHECK-NEXT: st %r2, 976(%r15)
; CHECK-NEXT: l %r2, 176(%r15)
; CHECK-NEXT: lmg %r14, %r15, 4096(%r15)
; CHECK-NEXT: br %r14
%v = call i32 @foo()
%a = alloca i32, i64 952
%b = getelementptr inbounds i32, i32* %a, i64 200
store volatile i32 %v, i32* %b
%c = load volatile i32, i32* %a
ret i32 %c
}
declare i32 @foo()
attributes #0 = { "probe-stack"="inline-asm" }