forked from OSchip/llvm-project
[SystemZ] Implement -fstack-clash-protection
Probing of allocated stack space is now done when this option is passed. The purpose is to protect against the stack clash attack (see https://www.qualys.com/2017/06/19/stack-clash/stack-clash.txt). Review: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D78717
This commit is contained in:
parent
92cb0ce8f8
commit
515bfc66ea
|
@ -94,8 +94,8 @@ New Compiler Flags
|
|||
------------------
|
||||
|
||||
- -fstack-clash-protection will provide a protection against the stack clash
|
||||
attack for x86 architecture through automatic probing of each page of
|
||||
allocated stack.
|
||||
attack for x86 and s390x architectures through automatic probing of each page
|
||||
of allocated stack.
|
||||
|
||||
- -ffp-exception-behavior={ignore,maytrap,strict} allows the user to specify
|
||||
the floating-point exception behavior. The default setting is ``ignore``.
|
||||
|
|
|
@ -64,6 +64,10 @@ public:
|
|||
|
||||
ArrayRef<TargetInfo::AddlRegName> getGCCAddlRegNames() const override;
|
||||
|
||||
bool isSPRegName(StringRef RegName) const override {
|
||||
return RegName.equals("r15");
|
||||
}
|
||||
|
||||
bool validateAsmConstraint(const char *&Name,
|
||||
TargetInfo::ConstraintInfo &info) const override;
|
||||
|
||||
|
|
|
@ -2997,7 +2997,7 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
|
|||
if (!EffectiveTriple.isOSLinux())
|
||||
return;
|
||||
|
||||
if (!EffectiveTriple.isX86())
|
||||
if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ())
|
||||
return;
|
||||
|
||||
if (Args.hasFlag(options::OPT_fstack_clash_protection,
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
// Check the correct function attributes are generated
|
||||
// RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
|
||||
// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
|
||||
|
||||
// CHECK: define void @large_stack() #[[A:.*]] {
|
||||
void large_stack() {
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
// RUN: %clang -target s390x-linux-gnu -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SystemZ
|
||||
// SystemZ: "-fstack-clash-protection"
|
||||
// RUN: %clang -target s390x-linux-gnu -fstack-clash-protection -S -emit-llvm -o %t.ll %s 2>&1 | FileCheck %s -check-prefix=SystemZ-warn
|
||||
// SystemZ-warn: warning: Unable to protect inline asm that clobbers stack pointer against stack clash
|
||||
|
||||
int foo(int c) {
|
||||
int r;
|
||||
__asm__("ag %%r15, %0"
|
||||
:
|
||||
: "rm"(c)
|
||||
: "r15");
|
||||
return r;
|
||||
}
|
|
@ -739,6 +739,11 @@ public:
|
|||
return getArch() == Triple::riscv32 || getArch() == Triple::riscv64;
|
||||
}
|
||||
|
||||
/// Tests whether the target is SystemZ.
|
||||
bool isSystemZ() const {
|
||||
return getArch() == Triple::systemz;
|
||||
}
|
||||
|
||||
/// Tests whether the target is x86 (32- or 64-bit).
|
||||
bool isX86() const {
|
||||
return getArch() == Triple::x86 || getArch() == Triple::x86_64;
|
||||
|
|
|
@ -374,12 +374,39 @@ static void emitIncrement(MachineBasicBlock &MBB,
|
|||
}
|
||||
}
|
||||
|
||||
// Add CFI for the new CFA offset.
|
||||
static void buildCFAOffs(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
const DebugLoc &DL, int Offset,
|
||||
const SystemZInstrInfo *ZII) {
|
||||
unsigned CFIIndex = MBB.getParent()->addFrameInst(
|
||||
MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset));
|
||||
BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex);
|
||||
}
|
||||
|
||||
// Add CFI for the new frame location.
|
||||
static void buildDefCFAReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
const DebugLoc &DL, unsigned Reg,
|
||||
const SystemZInstrInfo *ZII) {
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
MachineModuleInfo &MMI = MF.getMMI();
|
||||
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
|
||||
unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
|
||||
unsigned CFIIndex = MF.addFrameInst(
|
||||
MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
|
||||
BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex);
|
||||
}
|
||||
|
||||
void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
|
||||
const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
|
||||
const SystemZTargetLowering &TLI = *STI.getTargetLowering();
|
||||
MachineFrameInfo &MFFrame = MF.getFrameInfo();
|
||||
auto *ZII =
|
||||
static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
|
||||
auto *ZII = static_cast<const SystemZInstrInfo *>(STI.getInstrInfo());
|
||||
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin();
|
||||
MachineModuleInfo &MMI = MF.getMMI();
|
||||
|
@ -462,13 +489,22 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
|
||||
// Allocate StackSize bytes.
|
||||
int64_t Delta = -int64_t(StackSize);
|
||||
emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
|
||||
|
||||
// Add CFI for the allocation.
|
||||
unsigned CFIIndex = MF.addFrameInst(
|
||||
MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPOffsetFromCFA - Delta));
|
||||
BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex);
|
||||
const unsigned ProbeSize = TLI.getStackProbeSize(MF);
|
||||
bool FreeProbe = (ZFI->getSpillGPRRegs().GPROffset &&
|
||||
(ZFI->getSpillGPRRegs().GPROffset + StackSize) < ProbeSize);
|
||||
if (!FreeProbe &&
|
||||
MF.getSubtarget().getTargetLowering()->hasInlineStackProbe(MF)) {
|
||||
// Stack probing may involve looping, but splitting the prologue block
|
||||
// is not possible at this point since it would invalidate the
|
||||
// SaveBlocks / RestoreBlocks sets of PEI in the single block function
|
||||
// case. Build a pseudo to be handled later by inlineStackProbe().
|
||||
BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::PROBED_STACKALLOC))
|
||||
.addImm(StackSize);
|
||||
}
|
||||
else {
|
||||
emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
|
||||
buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII);
|
||||
}
|
||||
SPOffsetFromCFA += Delta;
|
||||
|
||||
if (StoreBackchain) {
|
||||
|
@ -486,11 +522,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
.addReg(SystemZ::R15D);
|
||||
|
||||
// Add CFI for the new frame location.
|
||||
unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true);
|
||||
unsigned CFIIndex = MF.addFrameInst(
|
||||
MCCFIInstruction::createDefCfaRegister(nullptr, HardFP));
|
||||
BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex);
|
||||
buildDefCFAReg(MBB, MBBI, DL, SystemZ::R11D, ZII);
|
||||
|
||||
// Mark the FramePtr as live at the beginning of every block except
|
||||
// the entry block. (We'll have marked R11 as live on entry when
|
||||
|
@ -583,6 +615,91 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
}
|
||||
}
|
||||
|
||||
void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
|
||||
MachineBasicBlock &PrologMBB) const {
|
||||
auto *ZII =
|
||||
static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
|
||||
const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
|
||||
const SystemZTargetLowering &TLI = *STI.getTargetLowering();
|
||||
|
||||
MachineInstr *StackAllocMI = nullptr;
|
||||
for (MachineInstr &MI : PrologMBB)
|
||||
if (MI.getOpcode() == SystemZ::PROBED_STACKALLOC) {
|
||||
StackAllocMI = &MI;
|
||||
break;
|
||||
}
|
||||
if (StackAllocMI == nullptr)
|
||||
return;
|
||||
uint64_t StackSize = StackAllocMI->getOperand(0).getImm();
|
||||
const unsigned ProbeSize = TLI.getStackProbeSize(MF);
|
||||
uint64_t NumFullBlocks = StackSize / ProbeSize;
|
||||
uint64_t Residual = StackSize % ProbeSize;
|
||||
int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP;
|
||||
MachineBasicBlock *MBB = &PrologMBB;
|
||||
MachineBasicBlock::iterator MBBI = StackAllocMI;
|
||||
const DebugLoc DL = StackAllocMI->getDebugLoc();
|
||||
|
||||
// Allocate a block of Size bytes on the stack and probe it.
|
||||
auto allocateAndProbe = [&](MachineBasicBlock &InsMBB,
|
||||
MachineBasicBlock::iterator InsPt, unsigned Size,
|
||||
bool EmitCFI) -> void {
|
||||
emitIncrement(InsMBB, InsPt, DL, SystemZ::R15D, -int64_t(Size), ZII);
|
||||
if (EmitCFI) {
|
||||
SPOffsetFromCFA -= Size;
|
||||
buildCFAOffs(InsMBB, InsPt, DL, SPOffsetFromCFA, ZII);
|
||||
}
|
||||
// Probe by means of a volatile compare.
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(),
|
||||
MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
|
||||
BuildMI(InsMBB, InsPt, DL, ZII->get(SystemZ::CG))
|
||||
.addReg(SystemZ::R0D, RegState::Undef)
|
||||
.addReg(SystemZ::R15D).addImm(Size - 8).addReg(0)
|
||||
.addMemOperand(MMO);
|
||||
};
|
||||
|
||||
if (NumFullBlocks < 3) {
|
||||
// Emit unrolled probe statements.
|
||||
for (unsigned int i = 0; i < NumFullBlocks; i++)
|
||||
allocateAndProbe(*MBB, MBBI, ProbeSize, true/*EmitCFI*/);
|
||||
} else {
|
||||
// Emit a loop probing the pages.
|
||||
uint64_t LoopAlloc = ProbeSize * NumFullBlocks;
|
||||
SPOffsetFromCFA -= LoopAlloc;
|
||||
|
||||
BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R1D)
|
||||
.addReg(SystemZ::R15D);
|
||||
buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R1D, ZII);
|
||||
emitIncrement(*MBB, MBBI, DL, SystemZ::R1D, -int64_t(LoopAlloc), ZII);
|
||||
buildCFAOffs(*MBB, MBBI, DL, -int64_t(SystemZMC::CallFrameSize + LoopAlloc),
|
||||
ZII);
|
||||
|
||||
MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB);
|
||||
MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB);
|
||||
MBB->addSuccessor(LoopMBB);
|
||||
LoopMBB->addSuccessor(LoopMBB);
|
||||
LoopMBB->addSuccessor(DoneMBB);
|
||||
|
||||
MBB = LoopMBB;
|
||||
allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/);
|
||||
BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR))
|
||||
.addReg(SystemZ::R15D).addReg(SystemZ::R1D);
|
||||
BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC))
|
||||
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB);
|
||||
|
||||
MBB = DoneMBB;
|
||||
MBBI = DoneMBB->begin();
|
||||
buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R15D, ZII);
|
||||
|
||||
recomputeLiveIns(*DoneMBB);
|
||||
recomputeLiveIns(*LoopMBB);
|
||||
}
|
||||
|
||||
if (Residual)
|
||||
allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/);
|
||||
|
||||
StackAllocMI->eraseFromParent();
|
||||
}
|
||||
|
||||
bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
|
||||
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
|
||||
MF.getFrameInfo().hasVarSizedObjects() ||
|
||||
|
|
|
@ -43,6 +43,8 @@ public:
|
|||
RegScavenger *RS) const override;
|
||||
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
|
||||
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
|
||||
void inlineStackProbe(MachineFunction &MF,
|
||||
MachineBasicBlock &PrologMBB) const override;
|
||||
bool hasFP(const MachineFunction &MF) const override;
|
||||
bool hasReservedCallFrame(const MachineFunction &MF) const override;
|
||||
int getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||
|
|
|
@ -826,6 +826,15 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
|
|||
return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
|
||||
}
|
||||
|
||||
/// Returns true if stack probing through inline assembly is requested.
|
||||
bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
|
||||
// If the function specifically requests inline stack probes, emit them.
|
||||
if (MF.getFunction().hasFnAttribute("probe-stack"))
|
||||
return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
|
||||
"inline-asm";
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
|
||||
// We can use CGFI or CLGFI.
|
||||
return isInt<32>(Imm) || isUInt<32>(Imm);
|
||||
|
@ -3428,10 +3437,17 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
|
|||
DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
|
||||
|
||||
// Get the new stack pointer value.
|
||||
SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
|
||||
|
||||
// Copy the new stack pointer back.
|
||||
Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
|
||||
SDValue NewSP;
|
||||
if (hasInlineStackProbe(MF)) {
|
||||
NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
|
||||
DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
|
||||
Chain = NewSP.getValue(1);
|
||||
}
|
||||
else {
|
||||
NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
|
||||
// Copy the new stack pointer back.
|
||||
Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
|
||||
}
|
||||
|
||||
// The allocated data lives above the 160 bytes allocated for the standard
|
||||
// frame, plus any outgoing stack arguments. We don't know how much that
|
||||
|
@ -5400,6 +5416,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
OPCODE(BR_CCMASK);
|
||||
OPCODE(SELECT_CCMASK);
|
||||
OPCODE(ADJDYNALLOC);
|
||||
OPCODE(PROBED_ALLOCA);
|
||||
OPCODE(POPCNT);
|
||||
OPCODE(SMUL_LOHI);
|
||||
OPCODE(UMUL_LOHI);
|
||||
|
@ -6825,38 +6842,29 @@ SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
|
|||
return 1;
|
||||
}
|
||||
|
||||
unsigned
|
||||
SystemZTargetLowering::getStackProbeSize(MachineFunction &MF) const {
|
||||
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
|
||||
unsigned StackAlign = TFI->getStackAlignment();
|
||||
assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
|
||||
"Unexpected stack alignment");
|
||||
// The default stack probe size is 4096 if the function has no
|
||||
// stack-probe-size attribute.
|
||||
unsigned StackProbeSize = 4096;
|
||||
const Function &Fn = MF.getFunction();
|
||||
if (Fn.hasFnAttribute("stack-probe-size"))
|
||||
Fn.getFnAttribute("stack-probe-size")
|
||||
.getValueAsString()
|
||||
.getAsInteger(0, StackProbeSize);
|
||||
// Round down to the stack alignment.
|
||||
StackProbeSize &= ~(StackAlign - 1);
|
||||
return StackProbeSize ? StackProbeSize : StackAlign;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Custom insertion
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Create a new basic block after MBB.
|
||||
static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
|
||||
MachineFunction &MF = *MBB->getParent();
|
||||
MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
|
||||
MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
|
||||
return NewMBB;
|
||||
}
|
||||
|
||||
// Split MBB after MI and return the new block (the one that contains
|
||||
// instructions after MI).
|
||||
static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
|
||||
MachineBasicBlock *MBB) {
|
||||
MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
|
||||
NewMBB->splice(NewMBB->begin(), MBB,
|
||||
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
|
||||
NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
|
||||
return NewMBB;
|
||||
}
|
||||
|
||||
// Split MBB before MI and return the new block (the one that contains MI).
|
||||
static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
|
||||
MachineBasicBlock *MBB) {
|
||||
MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
|
||||
NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
|
||||
NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
|
||||
return NewMBB;
|
||||
}
|
||||
|
||||
// Force base value Base into a register before MI. Return the register.
|
||||
static Register forceReg(MachineInstr &MI, MachineOperand &Base,
|
||||
const SystemZInstrInfo *TII) {
|
||||
|
@ -7027,8 +7035,8 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
|
|||
bool CCKilled =
|
||||
(LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
|
||||
MachineBasicBlock *StartMBB = MBB;
|
||||
MachineBasicBlock *JoinMBB = splitBlockAfter(LastMI, MBB);
|
||||
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
|
||||
MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
|
||||
MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
|
||||
|
||||
// Unless CC was killed in the last Select instruction, mark it as
|
||||
// live-in to both FalseMBB and JoinMBB.
|
||||
|
@ -7121,8 +7129,8 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
|
|||
CCMask ^= CCValid;
|
||||
|
||||
MachineBasicBlock *StartMBB = MBB;
|
||||
MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
|
||||
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
|
||||
MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
|
||||
MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
|
||||
|
||||
// Unless CC was killed in the CondStore instruction, mark it as
|
||||
// live-in to both FalseMBB and JoinMBB.
|
||||
|
@ -7205,8 +7213,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
|
|||
|
||||
// Insert a basic block for the main loop.
|
||||
MachineBasicBlock *StartMBB = MBB;
|
||||
MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
|
||||
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
|
||||
MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
|
||||
MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
|
||||
|
||||
// StartMBB:
|
||||
// ...
|
||||
|
@ -7323,10 +7331,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
|
|||
|
||||
// Insert 3 basic blocks for the loop.
|
||||
MachineBasicBlock *StartMBB = MBB;
|
||||
MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
|
||||
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
|
||||
MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
|
||||
MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
|
||||
MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
|
||||
MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
|
||||
MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
|
||||
MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
|
||||
|
||||
// StartMBB:
|
||||
// ...
|
||||
|
@ -7434,9 +7442,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
|
|||
|
||||
// Insert 2 basic blocks for the loop.
|
||||
MachineBasicBlock *StartMBB = MBB;
|
||||
MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
|
||||
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
|
||||
MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
|
||||
MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
|
||||
MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
|
||||
MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
|
||||
|
||||
// StartMBB:
|
||||
// ...
|
||||
|
@ -7596,7 +7604,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
|
|||
// When generating more than one CLC, all but the last will need to
|
||||
// branch to the end when a difference is found.
|
||||
MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
|
||||
splitBlockAfter(MI, MBB) : nullptr);
|
||||
SystemZ::splitBlockAfter(MI, MBB) : nullptr);
|
||||
|
||||
// Check for the loop form, in which operand 5 is the trip count.
|
||||
if (MI.getNumExplicitOperands() > 5) {
|
||||
|
@ -7620,9 +7628,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
|
|||
Register NextCountReg = MRI.createVirtualRegister(RC);
|
||||
|
||||
MachineBasicBlock *StartMBB = MBB;
|
||||
MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
|
||||
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
|
||||
MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
|
||||
MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
|
||||
MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
|
||||
MachineBasicBlock *NextMBB =
|
||||
(EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
|
||||
|
||||
// StartMBB:
|
||||
// # fall through to LoopMMB
|
||||
|
@ -7738,7 +7747,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
|
|||
// If there's another CLC to go, branch to the end if a difference
|
||||
// was found.
|
||||
if (EndMBB && Length > 0) {
|
||||
MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
|
||||
MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
|
||||
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
|
||||
.addMBB(EndMBB);
|
||||
|
@ -7778,8 +7787,8 @@ MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
|
|||
uint64_t End2Reg = MRI.createVirtualRegister(RC);
|
||||
|
||||
MachineBasicBlock *StartMBB = MBB;
|
||||
MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
|
||||
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
|
||||
MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
|
||||
MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
|
||||
|
||||
// StartMBB:
|
||||
// # fall through to LoopMMB
|
||||
|
@ -7890,6 +7899,97 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
|
|||
return MBB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
|
||||
MachineInstr &MI, MachineBasicBlock *MBB) const {
|
||||
MachineFunction &MF = *MBB->getParent();
|
||||
MachineRegisterInfo *MRI = &MF.getRegInfo();
|
||||
const SystemZInstrInfo *TII =
|
||||
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
const unsigned ProbeSize = getStackProbeSize(MF);
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
Register SizeReg = MI.getOperand(2).getReg();
|
||||
|
||||
MachineBasicBlock *StartMBB = MBB;
|
||||
MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
|
||||
MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
|
||||
MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
|
||||
MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
|
||||
MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
|
||||
|
||||
MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
|
||||
MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
|
||||
|
||||
Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
|
||||
Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
|
||||
|
||||
// LoopTestMBB
|
||||
// BRC TailTestMBB
|
||||
// # fallthrough to LoopBodyMBB
|
||||
StartMBB->addSuccessor(LoopTestMBB);
|
||||
MBB = LoopTestMBB;
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
|
||||
.addReg(SizeReg)
|
||||
.addMBB(StartMBB)
|
||||
.addReg(IncReg)
|
||||
.addMBB(LoopBodyMBB);
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
|
||||
.addReg(PHIReg)
|
||||
.addImm(ProbeSize);
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
|
||||
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT)
|
||||
.addMBB(TailTestMBB);
|
||||
MBB->addSuccessor(LoopBodyMBB);
|
||||
MBB->addSuccessor(TailTestMBB);
|
||||
|
||||
// LoopBodyMBB: Allocate and probe by means of a volatile compare.
|
||||
// J LoopTestMBB
|
||||
MBB = LoopBodyMBB;
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
|
||||
.addReg(PHIReg)
|
||||
.addImm(ProbeSize);
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
|
||||
.addReg(SystemZ::R15D)
|
||||
.addImm(ProbeSize);
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
|
||||
.addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
|
||||
.setMemRefs(VolLdMMO);
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
|
||||
MBB->addSuccessor(LoopTestMBB);
|
||||
|
||||
// TailTestMBB
|
||||
// BRC DoneMBB
|
||||
// # fallthrough to TailMBB
|
||||
MBB = TailTestMBB;
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
|
||||
.addReg(PHIReg)
|
||||
.addImm(0);
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
|
||||
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
|
||||
.addMBB(DoneMBB);
|
||||
MBB->addSuccessor(TailMBB);
|
||||
MBB->addSuccessor(DoneMBB);
|
||||
|
||||
// TailMBB
|
||||
// # fallthrough to DoneMBB
|
||||
MBB = TailMBB;
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
|
||||
.addReg(SystemZ::R15D)
|
||||
.addReg(PHIReg);
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
|
||||
.addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
|
||||
.setMemRefs(VolLdMMO);
|
||||
MBB->addSuccessor(DoneMBB);
|
||||
|
||||
// DoneMBB
|
||||
MBB = DoneMBB;
|
||||
BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
|
||||
.addReg(SystemZ::R15D);
|
||||
|
||||
MI.eraseFromParent();
|
||||
return DoneMBB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
|
||||
MachineInstr &MI, MachineBasicBlock *MBB) const {
|
||||
switch (MI.getOpcode()) {
|
||||
|
@ -8150,6 +8250,9 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
|
|||
case SystemZ::LTXBRCompare_VecPseudo:
|
||||
return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
|
||||
|
||||
case SystemZ::PROBED_ALLOCA:
|
||||
return emitProbedAlloca(MI, MBB);
|
||||
|
||||
case TargetOpcode::STACKMAP:
|
||||
case TargetOpcode::PATCHPOINT:
|
||||
return emitPatchPoint(MI, MBB);
|
||||
|
|
|
@ -83,6 +83,10 @@ enum NodeType : unsigned {
|
|||
// base of the dynamically-allocatable area.
|
||||
ADJDYNALLOC,
|
||||
|
||||
// For allocating stack space when using stack clash protector.
|
||||
// Allocation is performed by block, and each block is probed.
|
||||
PROBED_ALLOCA,
|
||||
|
||||
// Count number of bits set in operand 0 per byte.
|
||||
POPCNT,
|
||||
|
||||
|
@ -428,6 +432,7 @@ public:
|
|||
EVT VT) const override;
|
||||
bool isFPImmLegal(const APFloat &Imm, EVT VT,
|
||||
bool ForCodeSize) const override;
|
||||
bool hasInlineStackProbe(MachineFunction &MF) const override;
|
||||
bool isLegalICmpImmediate(int64_t Imm) const override;
|
||||
bool isLegalAddImmediate(int64_t Imm) const override;
|
||||
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
|
||||
|
@ -556,6 +561,8 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
unsigned getStackProbeSize(MachineFunction &MF) const;
|
||||
|
||||
private:
|
||||
const SystemZSubtarget &Subtarget;
|
||||
|
||||
|
@ -691,6 +698,8 @@ private:
|
|||
MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI,
|
||||
MachineBasicBlock *MBB,
|
||||
unsigned Opcode) const;
|
||||
MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
|
||||
MachineBasicBlock *MBB) const;
|
||||
|
||||
MachineMemOperand::Flags
|
||||
getTargetMMOFlags(const Instruction &I) const override;
|
||||
|
|
|
@ -1872,6 +1872,30 @@ unsigned SystemZ::reverseCCMask(unsigned CCMask) {
|
|||
(CCMask & SystemZ::CCMASK_CMP_UO));
|
||||
}
|
||||
|
||||
MachineBasicBlock *SystemZ::emitBlockAfter(MachineBasicBlock *MBB) {
|
||||
MachineFunction &MF = *MBB->getParent();
|
||||
MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
|
||||
MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
|
||||
return NewMBB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *SystemZ::splitBlockAfter(MachineBasicBlock::iterator MI,
|
||||
MachineBasicBlock *MBB) {
|
||||
MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
|
||||
NewMBB->splice(NewMBB->begin(), MBB,
|
||||
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
|
||||
NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
|
||||
return NewMBB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *SystemZ::splitBlockBefore(MachineBasicBlock::iterator MI,
|
||||
MachineBasicBlock *MBB) {
|
||||
MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
|
||||
NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
|
||||
NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
|
||||
return NewMBB;
|
||||
}
|
||||
|
||||
unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const {
|
||||
if (!STI.hasLoadAndTrap())
|
||||
return 0;
|
||||
|
|
|
@ -159,6 +159,16 @@ int getTargetMemOpcode(uint16_t Opcode);
|
|||
// Return a version of comparison CC mask CCMask in which the LT and GT
|
||||
// actions are swapped.
|
||||
unsigned reverseCCMask(unsigned CCMask);
|
||||
|
||||
// Create a new basic block after MBB.
|
||||
MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB);
|
||||
// Split MBB after MI and return the new block (the one that contains
|
||||
// instructions after MI).
|
||||
MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
|
||||
MachineBasicBlock *MBB);
|
||||
// Split MBB before MI and return the new block (the one that contains MI).
|
||||
MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
|
||||
MachineBasicBlock *MBB);
|
||||
}
|
||||
|
||||
class SystemZInstrInfo : public SystemZGenInstrInfo {
|
||||
|
|
|
@ -29,6 +29,15 @@ let hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
|
|||
def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src),
|
||||
[(set GR64:$dst, dynalloc12only:$src)]>;
|
||||
|
||||
let Defs = [R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1,
|
||||
usesCustomInserter = 1 in
|
||||
def PROBED_ALLOCA : Pseudo<(outs GR64:$dst),
|
||||
(ins GR64:$oldSP, GR64:$space),
|
||||
[(set GR64:$dst, (z_probed_alloca GR64:$oldSP, GR64:$space))]>;
|
||||
|
||||
let Defs = [R1D, R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1,
|
||||
hasSideEffects = 1 in
|
||||
def PROBED_STACKALLOC : Pseudo<(outs), (ins i64imm:$stacksize), []>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Branch instructions
|
||||
|
|
|
@ -40,6 +40,10 @@ def SDT_ZWrapOffset : SDTypeProfile<1, 2,
|
|||
SDTCisSameAs<0, 2>,
|
||||
SDTCisPtrTy<0>]>;
|
||||
def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
|
||||
def SDT_ZProbedAlloca : SDTypeProfile<1, 2,
|
||||
[SDTCisSameAs<0, 1>,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCisPtrTy<0>]>;
|
||||
def SDT_ZGR128Binary : SDTypeProfile<1, 2,
|
||||
[SDTCisVT<0, untyped>,
|
||||
SDTCisInt<1>,
|
||||
|
@ -269,6 +273,8 @@ def z_select_ccmask_1 : SDNode<"SystemZISD::SELECT_CCMASK",
|
|||
SDT_ZSelectCCMask>;
|
||||
def z_ipm_1 : SDNode<"SystemZISD::IPM", SDT_ZIPM>;
|
||||
def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
|
||||
def z_probed_alloca : SDNode<"SystemZISD::PROBED_ALLOCA", SDT_ZProbedAlloca,
|
||||
[SDNPHasChain]>;
|
||||
def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
|
||||
def z_smul_lohi : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>;
|
||||
def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>;
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||
|
||||
define i32 @fun0(i32 %n) #0 {
|
||||
; CHECK-LABEL: fun0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: stmg %r11, %r15, 88(%r15)
|
||||
; CHECK-NEXT: .cfi_offset %r11, -72
|
||||
; CHECK-NEXT: .cfi_offset %r15, -40
|
||||
; CHECK-NEXT: aghi %r15, -160
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 320
|
||||
; CHECK-NEXT: lgr %r11, %r15
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %r11
|
||||
; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
|
||||
; CHECK-NEXT: risbgn %r1, %r2, 30, 189, 2
|
||||
; CHECK-NEXT: la %r0, 7(%r1)
|
||||
; CHECK-NEXT: risbgn %r1, %r0, 29, 188, 0
|
||||
; CHECK-NEXT: clgfi %r1, 4096
|
||||
; CHECK-NEXT: jl .LBB0_2
|
||||
; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: slgfi %r1, 4096
|
||||
; CHECK-NEXT: slgfi %r15, 4096
|
||||
; CHECK-NEXT: cg %r15, 4088(%r15)
|
||||
; CHECK-NEXT: clgfi %r1, 4096
|
||||
; CHECK-NEXT: jhe .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_2:
|
||||
; CHECK-NEXT: cgije %r1, 0, .LBB0_4
|
||||
; CHECK-NEXT: # %bb.3:
|
||||
; CHECK-NEXT: slgr %r15, %r1
|
||||
; CHECK-NEXT: cg %r15, -8(%r1,%r15)
|
||||
; CHECK-NEXT: .LBB0_4:
|
||||
; CHECK-NEXT: la %r1, 160(%r15)
|
||||
; CHECK-NEXT: lhi %r0, 1
|
||||
; CHECK-NEXT: sty %r0, 4792(%r1)
|
||||
; CHECK-NEXT: l %r2, 0(%r1)
|
||||
; CHECK-NEXT: lmg %r11, %r15, 248(%r11)
|
||||
; CHECK-NEXT: br %r14
|
||||
|
||||
%a = alloca i32, i32 %n
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 1198
|
||||
store volatile i32 1, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; Probe size should be modulo stack alignment.
|
||||
define i32 @fun1(i32 %n) #0 "stack-probe-size"="1250" {
|
||||
; CHECK-LABEL: fun1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: stmg %r11, %r15, 88(%r15)
|
||||
; CHECK-NEXT: .cfi_offset %r11, -72
|
||||
; CHECK-NEXT: .cfi_offset %r15, -40
|
||||
; CHECK-NEXT: aghi %r15, -160
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 320
|
||||
; CHECK-NEXT: lgr %r11, %r15
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %r11
|
||||
; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
|
||||
; CHECK-NEXT: risbgn %r1, %r2, 30, 189, 2
|
||||
; CHECK-NEXT: la %r0, 7(%r1)
|
||||
; CHECK-NEXT: risbgn %r1, %r0, 29, 188, 0
|
||||
; CHECK-NEXT: clgfi %r1, 1248
|
||||
; CHECK-NEXT: jl .LBB1_2
|
||||
; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: slgfi %r1, 1248
|
||||
; CHECK-NEXT: slgfi %r15, 1248
|
||||
; CHECK-NEXT: cg %r15, 1240(%r15)
|
||||
; CHECK-NEXT: clgfi %r1, 1248
|
||||
; CHECK-NEXT: jhe .LBB1_1
|
||||
; CHECK-NEXT: .LBB1_2:
|
||||
; CHECK-NEXT: cgije %r1, 0, .LBB1_4
|
||||
; CHECK-NEXT: # %bb.3:
|
||||
; CHECK-NEXT: slgr %r15, %r1
|
||||
; CHECK-NEXT: cg %r15, -8(%r1,%r15)
|
||||
; CHECK-NEXT: .LBB1_4:
|
||||
; CHECK-NEXT: la %r1, 160(%r15)
|
||||
; CHECK-NEXT: lhi %r0, 1
|
||||
; CHECK-NEXT: sty %r0, 4792(%r1)
|
||||
; CHECK-NEXT: l %r2, 0(%r1)
|
||||
; CHECK-NEXT: lmg %r11, %r15, 248(%r11)
|
||||
; CHECK-NEXT: br %r14
|
||||
%a = alloca i32, i32 %n
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 1198
|
||||
store volatile i32 1, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; The minimum probe size is the stack alignment.
|
||||
define i32 @fun2(i32 %n) #0 "stack-probe-size"="4" {
|
||||
; CHECK-LABEL: fun2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: stmg %r11, %r15, 88(%r15)
|
||||
; CHECK-NEXT: .cfi_offset %r11, -72
|
||||
; CHECK-NEXT: .cfi_offset %r15, -40
|
||||
; CHECK-NEXT: lgr %r1, %r15
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %r1
|
||||
; CHECK-NEXT: aghi %r1, -160
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 320
|
||||
; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: aghi %r15, -8
|
||||
; CHECK-NEXT: cg %r0, 0(%r15)
|
||||
; CHECK-NEXT: clgrjh %r15, %r1, .LBB2_1
|
||||
; CHECK-NEXT: # %bb.2:
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %r15
|
||||
; CHECK-NEXT: lgr %r11, %r15
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %r11
|
||||
; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
|
||||
; CHECK-NEXT: risbgn %r1, %r2, 30, 189, 2
|
||||
; CHECK-NEXT: la %r0, 7(%r1)
|
||||
; CHECK-NEXT: risbgn %r1, %r0, 29, 188, 0
|
||||
; CHECK-NEXT: clgijl %r1, 8, .LBB2_4
|
||||
; CHECK-NEXT: .LBB2_3: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: slgfi %r1, 8
|
||||
; CHECK-NEXT: slgfi %r15, 8
|
||||
; CHECK-NEXT: cg %r15, 0(%r15)
|
||||
; CHECK-NEXT: clgijhe %r1, 8, .LBB2_3
|
||||
; CHECK-NEXT: .LBB2_4:
|
||||
; CHECK-NEXT: cgije %r1, 0, .LBB2_6
|
||||
; CHECK-NEXT: # %bb.5:
|
||||
; CHECK-NEXT: slgr %r15, %r1
|
||||
; CHECK-NEXT: cg %r15, -8(%r1,%r15)
|
||||
; CHECK-NEXT: .LBB2_6:
|
||||
; CHECK-NEXT: la %r1, 160(%r15)
|
||||
; CHECK-NEXT: lhi %r0, 1
|
||||
; CHECK-NEXT: sty %r0, 4792(%r1)
|
||||
; CHECK-NEXT: l %r2, 0(%r1)
|
||||
; CHECK-NEXT: lmg %r11, %r15, 248(%r11)
|
||||
; CHECK-NEXT: br %r14
|
||||
%a = alloca i32, i32 %n
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 1198
|
||||
store volatile i32 1, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
attributes #0 = {"probe-stack"="inline-asm"}
|
|
@ -0,0 +1,242 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 | FileCheck %s
|
||||
;
|
||||
; Test stack clash protection probing for static allocas.
|
||||
|
||||
; Small: one probe.
|
||||
define i32 @fun0() #0 {
|
||||
; CHECK-LABEL: fun0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: aghi %r15, -560
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 720
|
||||
; CHECK-NEXT: cg %r0, 552(%r15)
|
||||
; CHECK-NEXT: mvhi 552(%r15), 1
|
||||
; CHECK-NEXT: l %r2, 160(%r15)
|
||||
; CHECK-NEXT: aghi %r15, 560
|
||||
; CHECK-NEXT: br %r14
|
||||
|
||||
%a = alloca i32, i64 100
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 98
|
||||
store volatile i32 1, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; Medium: two probes.
|
||||
define i32 @fun1() #0 {
|
||||
; CHECK-LABEL: fun1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: aghi %r15, -4096
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 4256
|
||||
; CHECK-NEXT: cg %r0, 4088(%r15)
|
||||
; CHECK-NEXT: aghi %r15, -4080
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8336
|
||||
; CHECK-NEXT: cg %r0, 4072(%r15)
|
||||
; CHECK-NEXT: mvhi 976(%r15), 1
|
||||
; CHECK-NEXT: l %r2, 176(%r15)
|
||||
; CHECK-NEXT: aghi %r15, 8176
|
||||
; CHECK-NEXT: br %r14
|
||||
|
||||
%a = alloca i32, i64 2000
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 200
|
||||
store volatile i32 1, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; Large: Use a loop to allocate and probe in steps.
|
||||
define i32 @fun2() #0 {
|
||||
; CHECK-LABEL: fun2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lgr %r1, %r15
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %r1
|
||||
; CHECK-NEXT: agfi %r1, -69632
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 69792
|
||||
; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: aghi %r15, -4096
|
||||
; CHECK-NEXT: cg %r0, 4088(%r15)
|
||||
; CHECK-NEXT: clgrjh %r15, %r1, .LBB2_1
|
||||
; CHECK-NEXT: # %bb.2:
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %r15
|
||||
; CHECK-NEXT: aghi %r15, -2544
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 72336
|
||||
; CHECK-NEXT: cg %r0, 2536(%r15)
|
||||
; CHECK-NEXT: lhi %r0, 1
|
||||
; CHECK-NEXT: mvhi 568(%r15), 1
|
||||
; CHECK-NEXT: sty %r0, 28968(%r15)
|
||||
; CHECK-NEXT: l %r2, 176(%r15)
|
||||
; CHECK-NEXT: agfi %r15, 72176
|
||||
; CHECK-NEXT: br %r14
|
||||
|
||||
%a = alloca i32, i64 18000
|
||||
%b0 = getelementptr inbounds i32, i32* %a, i64 98
|
||||
%b1 = getelementptr inbounds i32, i32* %a, i64 7198
|
||||
store volatile i32 1, i32* %b0
|
||||
store volatile i32 1, i32* %b1
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; Ends evenly on the step so no remainder needed.
|
||||
define void @fun3() #0 {
|
||||
; CHECK-LABEL: fun3:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lgr %r1, %r15
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %r1
|
||||
; CHECK-NEXT: aghi %r1, -28672
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 28832
|
||||
; CHECK-NEXT: .LBB3_1: # %entry
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: aghi %r15, -4096
|
||||
; CHECK-NEXT: cg %r0, 4088(%r15)
|
||||
; CHECK-NEXT: clgrjh %r15, %r1, .LBB3_1
|
||||
; CHECK-NEXT: # %bb.2: # %entry
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %r15
|
||||
; CHECK-NEXT: mvhi 180(%r15), 0
|
||||
; CHECK-NEXT: l %r0, 180(%r15)
|
||||
; CHECK-NEXT: aghi %r15, 28672
|
||||
; CHECK-NEXT: br %r14
|
||||
entry:
|
||||
%stack = alloca [7122 x i32], align 4
|
||||
%i = alloca i32, align 4
|
||||
%0 = bitcast [7122 x i32]* %stack to i8*
|
||||
%i.0.i.0..sroa_cast = bitcast i32* %i to i8*
|
||||
store volatile i32 0, i32* %i, align 4
|
||||
%i.0.i.0.6 = load volatile i32, i32* %i, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Loop with bigger step.
|
||||
define void @fun4() #0 "stack-probe-size"="8192" {
|
||||
; CHECK-LABEL: fun4:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lgr %r1, %r15
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %r1
|
||||
; CHECK-NEXT: aghi %r1, -24576
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 24736
|
||||
; CHECK-NEXT: .LBB4_1: # %entry
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: aghi %r15, -8192
|
||||
; CHECK-NEXT: cg %r0, 8184(%r15)
|
||||
; CHECK-NEXT: clgrjh %r15, %r1, .LBB4_1
|
||||
; CHECK-NEXT: # %bb.2: # %entry
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %r15
|
||||
; CHECK-NEXT: aghi %r15, -7608
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 32344
|
||||
; CHECK-NEXT: cg %r0, 7600(%r15)
|
||||
; CHECK-NEXT: mvhi 180(%r15), 0
|
||||
; CHECK-NEXT: l %r0, 180(%r15)
|
||||
; CHECK-NEXT: aghi %r15, 32184
|
||||
; CHECK-NEXT: br %r14
|
||||
entry:
|
||||
%stack = alloca [8000 x i32], align 4
|
||||
%i = alloca i32, align 4
|
||||
%0 = bitcast [8000 x i32]* %stack to i8*
|
||||
%i.0.i.0..sroa_cast = bitcast i32* %i to i8*
|
||||
store volatile i32 0, i32* %i, align 4
|
||||
%i.0.i.0.6 = load volatile i32, i32* %i, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Probe size should be modulo stack alignment.
|
||||
define void @fun5() #0 "stack-probe-size"="4100" {
|
||||
; CHECK-LABEL: fun5:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: aghi %r15, -4096
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 4256
|
||||
; CHECK-NEXT: cg %r0, 4088(%r15)
|
||||
; CHECK-NEXT: aghi %r15, -88
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 4344
|
||||
; CHECK-NEXT: cg %r0, 80(%r15)
|
||||
; CHECK-NEXT: mvhi 180(%r15), 0
|
||||
; CHECK-NEXT: l %r0, 180(%r15)
|
||||
; CHECK-NEXT: aghi %r15, 4184
|
||||
; CHECK-NEXT: br %r14
|
||||
entry:
|
||||
%stack = alloca [1000 x i32], align 4
|
||||
%i = alloca i32, align 4
|
||||
%0 = bitcast [1000 x i32]* %stack to i8*
|
||||
%i.0.i.0..sroa_cast = bitcast i32* %i to i8*
|
||||
store volatile i32 0, i32* %i, align 4
|
||||
%i.0.i.0.6 = load volatile i32, i32* %i, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; The minimum probe size is the stack alignment.
|
||||
define void @fun6() #0 "stack-probe-size"="5" {
|
||||
; CHECK-LABEL: fun6:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lgr %r1, %r15
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %r1
|
||||
; CHECK-NEXT: aghi %r1, -4184
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 4344
|
||||
; CHECK-NEXT: .LBB6_1: # %entry
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: aghi %r15, -8
|
||||
; CHECK-NEXT: cg %r0, 0(%r15)
|
||||
; CHECK-NEXT: clgrjh %r15, %r1, .LBB6_1
|
||||
; CHECK-NEXT: # %bb.2: # %entry
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %r15
|
||||
; CHECK-NEXT: mvhi 180(%r15), 0
|
||||
; CHECK-NEXT: l %r0, 180(%r15)
|
||||
; CHECK-NEXT: aghi %r15, 4184
|
||||
; CHECK-NEXT: br %r14
|
||||
entry:
|
||||
%stack = alloca [1000 x i32], align 4
|
||||
%i = alloca i32, align 4
|
||||
%0 = bitcast [1000 x i32]* %stack to i8*
|
||||
%i.0.i.0..sroa_cast = bitcast i32* %i to i8*
|
||||
store volatile i32 0, i32* %i, align 4
|
||||
%i.0.i.0.6 = load volatile i32, i32* %i, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Small with a natural probe (STMG) - needs no extra probe.
|
||||
define i32 @fun7() #0 {
|
||||
; CHECK-LABEL: fun7:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
|
||||
; CHECK-NEXT: .cfi_offset %r14, -48
|
||||
; CHECK-NEXT: .cfi_offset %r15, -40
|
||||
; CHECK-NEXT: aghi %r15, -3976
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 4136
|
||||
; CHECK-NEXT: brasl %r14, foo@PLT
|
||||
; CHECK-NEXT: st %r2, 568(%r15)
|
||||
; CHECK-NEXT: l %r2, 176(%r15)
|
||||
; CHECK-NEXT: lmg %r14, %r15, 4088(%r15)
|
||||
; CHECK-NEXT: br %r14
|
||||
%v = call i32 @foo()
|
||||
%a = alloca i32, i64 950
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 98
|
||||
store volatile i32 %v, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; Medium with an STMG - still needs probing.
|
||||
define i32 @fun8() #0 {
|
||||
; CHECK-LABEL: fun8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
|
||||
; CHECK-NEXT: .cfi_offset %r14, -48
|
||||
; CHECK-NEXT: .cfi_offset %r15, -40
|
||||
; CHECK-NEXT: aghi %r15, -3984
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 4144
|
||||
; CHECK-NEXT: cg %r0, 3976(%r15)
|
||||
; CHECK-NEXT: brasl %r14, foo@PLT
|
||||
; CHECK-NEXT: st %r2, 976(%r15)
|
||||
; CHECK-NEXT: l %r2, 176(%r15)
|
||||
; CHECK-NEXT: lmg %r14, %r15, 4096(%r15)
|
||||
; CHECK-NEXT: br %r14
|
||||
|
||||
%v = call i32 @foo()
|
||||
%a = alloca i32, i64 952
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 200
|
||||
store volatile i32 %v, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
declare i32 @foo()
|
||||
attributes #0 = { "probe-stack"="inline-asm" }
|
||||
|
Loading…
Reference in New Issue