forked from OSchip/llvm-project
Support -fstack-clash-protection for x86
Implement protection against the stack clash attack [0] through inline stack
probing.
Probe stack allocation every PAGE_SIZE during frame lowering or dynamic
allocation to make sure the page guard, if any, is touched when touching the
stack, in a similar manner to GCC[1].
This extends the existing `probe-stack' mechanism with a special value `inline-asm'.
Technically the former uses function call before stack allocation while this
patch provides inlined stack probes and chunk allocation.
Only implemented for x86.
[0] https://www.qualys.com/2017/06/19/stack-clash/stack-clash.txt
[1] https://gcc.gnu.org/ml/gcc-patches/2017-07/msg00556.html
This a recommit of 39f50da2a3
with better option
handling and more portable testing
Differential Revision: https://reviews.llvm.org/D68720
This commit is contained in:
parent
ef83d46b6b
commit
e229017732
|
@ -1917,6 +1917,10 @@ Use a strong heuristic to apply stack protectors to functions
|
|||
|
||||
Emit section containing metadata on function stack sizes
|
||||
|
||||
.. option:: -fstack-clash-protection, -fno-stack-clash-protection
|
||||
|
||||
Instrument stack allocation to prevent stack clash attacks (x86, non-Windows only).
|
||||
|
||||
.. option:: -fstandalone-debug, -fno-limit-debug-info, -fno-standalone-debug
|
||||
|
||||
Emit full debug info for all types used by the program
|
||||
|
|
|
@ -61,6 +61,10 @@ New Compiler Flags
|
|||
------------------
|
||||
|
||||
|
||||
- -fstack-clash-protection will provide a protection against the stack clash
|
||||
attack for x86 architecture through automatic probing of each page of
|
||||
allocated stack.
|
||||
|
||||
Deprecated Compiler Flags
|
||||
-------------------------
|
||||
|
||||
|
|
|
@ -150,6 +150,7 @@ CODEGENOPT(NoWarn , 1, 0) ///< Set when -Wa,--no-warn is enabled.
|
|||
CODEGENOPT(EnableSegmentedStacks , 1, 0) ///< Set when -fsplit-stack is enabled.
|
||||
CODEGENOPT(NoInlineLineTables, 1, 0) ///< Whether debug info should contain
|
||||
///< inline line tables.
|
||||
CODEGENOPT(StackClashProtector, 1, 0) ///< Set when -fstack-clash-protection is enabled.
|
||||
CODEGENOPT(NoImplicitFloat , 1, 0) ///< Set when -mno-implicit-float is enabled.
|
||||
CODEGENOPT(NoInfsFPMath , 1, 0) ///< Assume FP arguments, results not +-Inf.
|
||||
CODEGENOPT(NoSignedZeros , 1, 0) ///< Allow ignoring the signedness of FP zero
|
||||
|
|
|
@ -239,6 +239,10 @@ def note_invalid_subexpr_in_const_expr : Note<
|
|||
let CategoryName = "Inline Assembly Issue" in {
|
||||
def err_asm_invalid_type_in_input : Error<
|
||||
"invalid type %0 in asm input for constraint '%1'">;
|
||||
|
||||
def warn_stack_clash_protection_inline_asm : Warning<
|
||||
"Unable to protect inline asm that clobbers stack pointer against stack clash">,
|
||||
InGroup<DiagGroup<"stack-protector">>;
|
||||
}
|
||||
|
||||
// Sema && Serialization
|
||||
|
|
|
@ -816,6 +816,8 @@ public:
|
|||
StringRef getNormalizedGCCRegisterName(StringRef Name,
|
||||
bool ReturnCanonical = false) const;
|
||||
|
||||
virtual bool isSPRegName(StringRef) const { return false; }
|
||||
|
||||
/// Extracts a register from the passed constraint (if it is a
|
||||
/// single-register constraint) and the asm label expression related to a
|
||||
/// variable in the input or output list of an inline asm statement.
|
||||
|
|
|
@ -1773,6 +1773,10 @@ def fno_signed_char : Flag<["-"], "fno-signed-char">, Group<f_Group>,
|
|||
def fsplit_stack : Flag<["-"], "fsplit-stack">, Group<f_Group>;
|
||||
def fstack_protector_all : Flag<["-"], "fstack-protector-all">, Group<f_Group>,
|
||||
HelpText<"Enable stack protectors for all functions">;
|
||||
def fstack_clash_protection : Flag<["-"], "fstack-clash-protection">, Group<f_Group>, Flags<[CC1Option]>,
|
||||
HelpText<"Enable stack clash protection">;
|
||||
def fnostack_clash_protection : Flag<["-"], "fnostack-clash-protection">, Group<f_Group>,
|
||||
HelpText<"Disable stack clash protection">;
|
||||
def fstack_protector_strong : Flag<["-"], "fstack-protector-strong">, Group<f_Group>,
|
||||
HelpText<"Enable stack protectors for some functions vulnerable to stack smashing. "
|
||||
"Compared to -fstack-protector, this uses a stronger heuristic "
|
||||
|
|
|
@ -166,6 +166,10 @@ public:
|
|||
|
||||
ArrayRef<TargetInfo::AddlRegName> getGCCAddlRegNames() const override;
|
||||
|
||||
bool isSPRegName(StringRef RegName) const override {
|
||||
return RegName.equals("esp") || RegName.equals("rsp");
|
||||
}
|
||||
|
||||
bool validateCpuSupports(StringRef Name) const override;
|
||||
|
||||
bool validateCpuIs(StringRef Name) const override;
|
||||
|
|
|
@ -2247,8 +2247,14 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
|
|||
|
||||
if (Clobber == "memory")
|
||||
ReadOnly = ReadNone = false;
|
||||
else if (Clobber != "cc")
|
||||
else if (Clobber != "cc") {
|
||||
Clobber = getTarget().getNormalizedGCCRegisterName(Clobber);
|
||||
if (CGM.getCodeGenOpts().StackClashProtector &&
|
||||
getTarget().isSPRegName(Clobber)) {
|
||||
CGM.getDiags().Report(S.getAsmLoc(),
|
||||
diag::warn_stack_clash_protection_inline_asm);
|
||||
}
|
||||
}
|
||||
|
||||
if (!Constraints.empty())
|
||||
Constraints += ',';
|
||||
|
|
|
@ -1514,6 +1514,9 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
|
|||
if (CodeGenOpts.UnwindTables)
|
||||
B.addAttribute(llvm::Attribute::UWTable);
|
||||
|
||||
if (CodeGenOpts.StackClashProtector)
|
||||
B.addAttribute("probe-stack", "inline-asm");
|
||||
|
||||
if (!hasUnwindExceptions(LangOpts))
|
||||
B.addAttribute(llvm::Attribute::NoUnwind);
|
||||
|
||||
|
|
|
@ -3002,6 +3002,21 @@ static void RenderSSPOptions(const ToolChain &TC, const ArgList &Args,
|
|||
}
|
||||
}
|
||||
|
||||
static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
|
||||
ArgStringList &CmdArgs) {
|
||||
const llvm::Triple &EffectiveTriple = TC.getEffectiveTriple();
|
||||
|
||||
if (!EffectiveTriple.isOSLinux())
|
||||
return;
|
||||
|
||||
if (!EffectiveTriple.isX86())
|
||||
return;
|
||||
|
||||
if (Args.hasFlag(options::OPT_fstack_clash_protection,
|
||||
options::OPT_fnostack_clash_protection, false))
|
||||
CmdArgs.push_back("-fstack-clash-protection");
|
||||
}
|
||||
|
||||
static void RenderTrivialAutoVarInitOptions(const Driver &D,
|
||||
const ToolChain &TC,
|
||||
const ArgList &Args,
|
||||
|
@ -5248,6 +5263,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
CmdArgs.push_back(Args.MakeArgString("-mspeculative-load-hardening"));
|
||||
|
||||
RenderSSPOptions(TC, Args, CmdArgs, KernelOrKext);
|
||||
RenderSCPOptions(TC, Args, CmdArgs);
|
||||
RenderTrivialAutoVarInitOptions(D, TC, Args, CmdArgs);
|
||||
|
||||
// Translate -mstackrealign
|
||||
|
|
|
@ -1238,6 +1238,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
|
|||
|
||||
Opts.NoStackArgProbe = Args.hasArg(OPT_mno_stack_arg_probe);
|
||||
|
||||
Opts.StackClashProtector = Args.hasArg(OPT_fstack_clash_protection);
|
||||
|
||||
if (Arg *A = Args.getLastArg(OPT_fobjc_dispatch_method_EQ)) {
|
||||
StringRef Name = A->getValue();
|
||||
unsigned Method = llvm::StringSwitch<unsigned>(Name)
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
// Check the correct function attributes are generated
|
||||
// RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
|
||||
|
||||
// CHECK: define void @large_stack() #[[A:.*]] {
|
||||
void large_stack() {
|
||||
volatile int stack[20000], i;
|
||||
for (i = 0; i < sizeof(stack) / sizeof(int); ++i)
|
||||
stack[i] = i;
|
||||
}
|
||||
|
||||
// CHECK: define void @vla({{.*}}) #[[A:.*]] {
|
||||
void vla(int n) {
|
||||
volatile int vla[n];
|
||||
__builtin_memset(&vla[0], 0, 1);
|
||||
}
|
||||
|
||||
// CHECK: define void @builtin_alloca({{.*}}) #[[A:.*]] {
|
||||
void builtin_alloca(int n) {
|
||||
volatile void *mem = __builtin_alloca(n);
|
||||
}
|
||||
|
||||
// CHECK: attributes #[[A]] = {{.*}} "probe-stack"="inline-asm"
|
|
@ -0,0 +1,33 @@
|
|||
// RUN: %clang -target i386-unknown-linux -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-i386
|
||||
// RUN: %clang -target i386-unknown-linux -fnostack-clash-protection -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-i386
|
||||
// RUN: %clang -target i386-unknown-linux -fstack-clash-protection -fnostack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-i386-NO
|
||||
// SCP-i386: "-fstack-clash-protection"
|
||||
// SCP-i386-NO-NOT: "-fstack-clash-protection"
|
||||
|
||||
// RUN: %clang -target x86_64-scei-linux -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-x86
|
||||
// SCP-x86: "-fstack-clash-protection"
|
||||
|
||||
// RUN: %clang -target armv7k-apple-linux -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-armv7
|
||||
// SCP-armv7-NOT: "-fstack-clash-protection"
|
||||
// SCP-armv7: argument unused during compilation: '-fstack-clash-protection'
|
||||
|
||||
// RUN: %clang -target x86_64-unknown-linux -fstack-clash-protection -c %s 2>&1 | FileCheck %s -check-prefix=SCP-warn
|
||||
// SCP-warn: warning: Unable to protect inline asm that clobbers stack pointer against stack clash
|
||||
|
||||
// RUN: %clang -target x86_64-pc-unknown-linux -fstack-clash-protection -S -emit-llvm -o- %s | FileCheck %s -check-prefix=SCP-ll-linux64
|
||||
// SCP-ll-linux64: attributes {{.*}} "probe-stack"="inline-asm"
|
||||
|
||||
// RUN: %clang -target x86_64-pc-windows-msvc -fstack-clash-protection -S -emit-llvm -o- %s 2>&1 | FileCheck %s -check-prefix=SCP-ll-win64
|
||||
// SCP-ll-win64-NOT: attributes {{.*}} "probe-stack"="inline-asm"
|
||||
// SCP-ll-win64: argument unused during compilation: '-fstack-clash-protection'
|
||||
|
||||
int foo(int c) {
|
||||
int r;
|
||||
__asm__("sub %0, %%rsp"
|
||||
:
|
||||
: "rm"(c)
|
||||
: "rsp");
|
||||
__asm__("mov %%rsp, %0"
|
||||
: "=rm"(r)::);
|
||||
return r;
|
||||
}
|
|
@ -84,6 +84,10 @@ Changes to the X86 Target
|
|||
During this release ...
|
||||
|
||||
|
||||
* Functions with the probe-stack attribute set to "inline-asm" are now protected
|
||||
against stack clash without the need of a third-party probing function and
|
||||
with limited impact on performance.
|
||||
|
||||
Changes to the AMDGPU Target
|
||||
-----------------------------
|
||||
|
||||
|
|
|
@ -1727,6 +1727,10 @@ public:
|
|||
|
||||
/// Returns the name of the symbol used to emit stack probes or the empty
|
||||
/// string if not applicable.
|
||||
virtual bool hasStackProbeSymbol(MachineFunction &MF) const { return false; }
|
||||
|
||||
virtual bool hasInlineStackProbe(MachineFunction &MF) const { return false; }
|
||||
|
||||
virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const {
|
||||
return "";
|
||||
}
|
||||
|
|
|
@ -162,14 +162,13 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
|
|||
// memory for arguments.
|
||||
unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
|
||||
unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
|
||||
bool UseStackProbe =
|
||||
!STI->getTargetLowering()->getStackProbeSymbolName(MF).empty();
|
||||
bool EmitStackProbeCall = STI->getTargetLowering()->hasStackProbeSymbol(MF);
|
||||
unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
|
||||
for (MachineBasicBlock &BB : MF) {
|
||||
bool InsideFrameSequence = false;
|
||||
for (MachineInstr &MI : BB) {
|
||||
if (MI.getOpcode() == FrameSetupOpcode) {
|
||||
if (TII->getFrameSize(MI) >= StackProbeSize && UseStackProbe)
|
||||
if (TII->getFrameSize(MI) >= StackProbeSize && EmitStackProbeCall)
|
||||
return false;
|
||||
if (InsideFrameSequence)
|
||||
return false;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "X86Subtarget.h"
|
||||
#include "X86TargetMachine.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/EHPersonalities.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
|
@ -32,6 +33,12 @@
|
|||
#include "llvm/Target/TargetOptions.h"
|
||||
#include <cstdlib>
|
||||
|
||||
#define DEBUG_TYPE "x86-fl"
|
||||
|
||||
STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
|
||||
STATISTIC(NumFrameExtraProbe,
|
||||
"Number of extra stack probes generated in prologue");
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
|
||||
|
@ -257,7 +264,27 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
|
|||
|
||||
uint64_t Chunk = (1LL << 31) - 1;
|
||||
|
||||
if (Offset > Chunk) {
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
|
||||
const X86TargetLowering &TLI = *STI.getTargetLowering();
|
||||
const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
|
||||
|
||||
// It's ok to not take into account large chunks when probing, as the
|
||||
// allocation is split in smaller chunks anyway.
|
||||
if (EmitInlineStackProbe && !InEpilogue) {
|
||||
|
||||
// stack probing may involve looping, and control flow generations is
|
||||
// disallowed at this point. Rely to later processing through
|
||||
// `inlineStackProbe`.
|
||||
MachineInstr *Stub = emitStackProbeInlineStub(MF, MBB, MBBI, DL, true);
|
||||
|
||||
// Encode the static offset as a metadata attached to the stub.
|
||||
LLVMContext &Context = MF.getFunction().getContext();
|
||||
MachineInstrBuilder(MF, Stub).addMetadata(
|
||||
MDTuple::get(Context, {ConstantAsMetadata::get(ConstantInt::get(
|
||||
IntegerType::get(Context, 64), Offset))}));
|
||||
return;
|
||||
} else if (Offset > Chunk) {
|
||||
// Rather than emit a long series of instructions for large offsets,
|
||||
// load the offset into a register and do one sub/add
|
||||
unsigned Reg = 0;
|
||||
|
@ -381,8 +408,8 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
|
|||
} else {
|
||||
bool IsSub = Offset < 0;
|
||||
uint64_t AbsOffset = IsSub ? -Offset : Offset;
|
||||
unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
|
||||
: getADDriOpcode(Uses64BitFramePtr, AbsOffset);
|
||||
const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
|
||||
: getADDriOpcode(Uses64BitFramePtr, AbsOffset);
|
||||
MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
.addImm(AbsOffset);
|
||||
|
@ -528,6 +555,169 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
|
|||
const DebugLoc &DL,
|
||||
bool InProlog) const {
|
||||
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
|
||||
if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
|
||||
emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
|
||||
else
|
||||
emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
|
||||
}
|
||||
|
||||
void X86FrameLowering::emitStackProbeInlineGeneric(
|
||||
MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
|
||||
MachineInstr &CallToInline = *std::prev(MBBI);
|
||||
assert(CallToInline.getOperand(1).isMetadata() &&
|
||||
"no metadata attached to that probe");
|
||||
uint64_t Offset =
|
||||
cast<ConstantInt>(
|
||||
cast<ConstantAsMetadata>(
|
||||
cast<MDTuple>(CallToInline.getOperand(1).getMetadata())
|
||||
->getOperand(0))
|
||||
->getValue())
|
||||
->getZExtValue();
|
||||
|
||||
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
|
||||
const X86TargetLowering &TLI = *STI.getTargetLowering();
|
||||
assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
|
||||
"different expansion expected for CoreCLR 64 bit");
|
||||
|
||||
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
|
||||
uint64_t ProbeChunk = StackProbeSize * 8;
|
||||
|
||||
// Synthesize a loop or unroll it, depending on the number of iterations.
|
||||
if (Offset > ProbeChunk) {
|
||||
emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset);
|
||||
} else {
|
||||
emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset);
|
||||
}
|
||||
}
|
||||
|
||||
void X86FrameLowering::emitStackProbeInlineGenericBlock(
|
||||
MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
|
||||
uint64_t Offset) const {
|
||||
|
||||
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
|
||||
const X86TargetLowering &TLI = *STI.getTargetLowering();
|
||||
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
|
||||
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
|
||||
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
|
||||
uint64_t CurrentOffset = 0;
|
||||
// 0 Thanks to return address being saved on the stack
|
||||
uint64_t CurrentProbeOffset = 0;
|
||||
|
||||
// For the first N - 1 pages, just probe. I tried to take advantage of
|
||||
// natural probes but it implies much more logic and there was very few
|
||||
// interesting natural probes to interleave.
|
||||
while (CurrentOffset + StackProbeSize < Offset) {
|
||||
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
.addImm(StackProbeSize)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
|
||||
|
||||
|
||||
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
|
||||
.setMIFlag(MachineInstr::FrameSetup),
|
||||
StackPtr, false, 0)
|
||||
.addImm(0)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
NumFrameExtraProbe++;
|
||||
CurrentOffset += StackProbeSize;
|
||||
CurrentProbeOffset += StackProbeSize;
|
||||
}
|
||||
|
||||
uint64_t ChunkSize = Offset - CurrentOffset;
|
||||
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
.addImm(ChunkSize)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
|
||||
}
|
||||
|
||||
void X86FrameLowering::emitStackProbeInlineGenericLoop(
|
||||
MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
|
||||
uint64_t Offset) const {
|
||||
|
||||
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
|
||||
const X86TargetLowering &TLI = *STI.getTargetLowering();
|
||||
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
|
||||
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
|
||||
|
||||
// Synthesize a loop
|
||||
NumFrameLoopProbe++;
|
||||
const BasicBlock *LLVM_BB = MBB.getBasicBlock();
|
||||
|
||||
MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
|
||||
MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
|
||||
|
||||
MachineFunction::iterator MBBIter = ++MBB.getIterator();
|
||||
MF.insert(MBBIter, testMBB);
|
||||
MF.insert(MBBIter, tailMBB);
|
||||
|
||||
unsigned FinalStackPtr = Uses64BitFramePtr ? X86::R11 : X86::R11D;
|
||||
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FinalStackPtr)
|
||||
.addReg(StackPtr)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
|
||||
// save loop bound
|
||||
{
|
||||
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
|
||||
BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackPtr)
|
||||
.addReg(FinalStackPtr)
|
||||
.addImm(Offset / StackProbeSize * StackProbeSize)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
|
||||
// allocate a page
|
||||
{
|
||||
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
|
||||
BuildMI(testMBB, DL, TII.get(Opc), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
.addImm(StackProbeSize)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
|
||||
// touch the page
|
||||
addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
|
||||
.setMIFlag(MachineInstr::FrameSetup),
|
||||
StackPtr, false, 0)
|
||||
.addImm(0)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
|
||||
// cmp with stack pointer bound
|
||||
BuildMI(testMBB, DL, TII.get(IsLP64 ? X86::CMP64rr : X86::CMP32rr))
|
||||
.addReg(StackPtr)
|
||||
.addReg(FinalStackPtr)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
|
||||
// jump
|
||||
BuildMI(testMBB, DL, TII.get(X86::JCC_1))
|
||||
.addMBB(testMBB)
|
||||
.addImm(X86::COND_NE)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
testMBB->addSuccessor(testMBB);
|
||||
testMBB->addSuccessor(tailMBB);
|
||||
|
||||
// allocate a block and touch it
|
||||
|
||||
tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
|
||||
tailMBB->transferSuccessorsAndUpdatePHIs(&MBB);
|
||||
MBB.addSuccessor(testMBB);
|
||||
|
||||
if (Offset % StackProbeSize) {
|
||||
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
|
||||
BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
.addImm(Offset % StackProbeSize)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
}
|
||||
|
||||
void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
|
||||
MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
|
||||
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
|
||||
assert(STI.is64Bit() && "different expansion needed for 32 bit");
|
||||
assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
|
||||
const TargetInstrInfo &TII = *STI.getInstrInfo();
|
||||
|
@ -821,13 +1011,13 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
|
|||
}
|
||||
}
|
||||
|
||||
void X86FrameLowering::emitStackProbeInlineStub(
|
||||
MachineInstr *X86FrameLowering::emitStackProbeInlineStub(
|
||||
MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
|
||||
|
||||
assert(InProlog && "ChkStkStub called outside prolog!");
|
||||
|
||||
BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
|
||||
return BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
|
||||
.addExternalSymbol("__chkstk_stub");
|
||||
}
|
||||
|
||||
|
@ -1014,7 +1204,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
|
|||
X86FI->setCalleeSavedFrameSize(
|
||||
X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
|
||||
|
||||
bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty();
|
||||
const bool EmitStackProbeCall =
|
||||
STI.getTargetLowering()->hasStackProbeSymbol(MF);
|
||||
unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
|
||||
|
||||
// Re-align the stack on 64-bit if the x86-interrupt calling convention is
|
||||
|
@ -1032,11 +1223,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
|
|||
// pointer, calls, or dynamic alloca then we do not need to adjust the
|
||||
// stack pointer (we fit in the Red Zone). We also check that we don't
|
||||
// push and pop from the stack.
|
||||
if (has128ByteRedZone(MF) &&
|
||||
!TRI->needsStackRealignment(MF) &&
|
||||
if (has128ByteRedZone(MF) && !TRI->needsStackRealignment(MF) &&
|
||||
!MFI.hasVarSizedObjects() && // No dynamic alloca.
|
||||
!MFI.adjustsStack() && // No calls.
|
||||
!UseStackProbe && // No stack probes.
|
||||
!EmitStackProbeCall && // No stack probes.
|
||||
!MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
|
||||
!MF.shouldSplitStack()) { // Regular stack
|
||||
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
|
||||
|
@ -1237,7 +1427,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
|
|||
uint64_t AlignedNumBytes = NumBytes;
|
||||
if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF))
|
||||
AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
|
||||
if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
|
||||
if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
|
||||
assert(!X86FI->getUsesRedZone() &&
|
||||
"The Red Zone is not accounted for in stack probes");
|
||||
|
||||
|
|
|
@ -195,11 +195,33 @@ private:
|
|||
void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
const DebugLoc &DL, bool InProlog) const;
|
||||
void emitStackProbeInlineWindowsCoreCLR64(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
const DebugLoc &DL,
|
||||
bool InProlog) const;
|
||||
void emitStackProbeInlineGeneric(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
const DebugLoc &DL, bool InProlog) const;
|
||||
|
||||
void emitStackProbeInlineGenericBlock(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
const DebugLoc &DL,
|
||||
uint64_t Offset) const;
|
||||
|
||||
void emitStackProbeInlineGenericLoop(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
const DebugLoc &DL,
|
||||
uint64_t Offset) const;
|
||||
|
||||
/// Emit a stub to later inline the target stack probe.
|
||||
void emitStackProbeInlineStub(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
const DebugLoc &DL, bool InProlog) const;
|
||||
MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
const DebugLoc &DL,
|
||||
bool InProlog) const;
|
||||
|
||||
/// Aligns the stack pointer by ANDing it with -MaxAlign.
|
||||
void BuildStackAlignAND(MachineBasicBlock &MBB,
|
||||
|
|
|
@ -23111,9 +23111,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
|||
SelectionDAG &DAG) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
bool SplitStack = MF.shouldSplitStack();
|
||||
bool EmitStackProbe = !getStackProbeSymbolName(MF).empty();
|
||||
bool EmitStackProbeCall = hasStackProbeSymbol(MF);
|
||||
bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) ||
|
||||
SplitStack || EmitStackProbe;
|
||||
SplitStack || EmitStackProbeCall;
|
||||
SDLoc dl(Op);
|
||||
|
||||
// Get the inputs.
|
||||
|
@ -23137,11 +23137,21 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
|||
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
|
||||
" not tell us which reg is the stack pointer!");
|
||||
|
||||
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
|
||||
Chain = SP.getValue(1);
|
||||
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
|
||||
const Align StackAlign(TFI.getStackAlignment());
|
||||
Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
|
||||
if (hasInlineStackProbe(MF)) {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
||||
const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
|
||||
Register Vreg = MRI.createVirtualRegister(AddrRegClass);
|
||||
Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
|
||||
Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, SPTy, Chain,
|
||||
DAG.getRegister(Vreg, SPTy));
|
||||
} else {
|
||||
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
|
||||
Chain = SP.getValue(1);
|
||||
Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
|
||||
}
|
||||
if (Alignment && Alignment > StackAlign)
|
||||
Result =
|
||||
DAG.getNode(ISD::AND, dl, VT, Result,
|
||||
|
@ -29874,6 +29884,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
NODE_NAME_CASE(MEMBARRIER)
|
||||
NODE_NAME_CASE(MFENCE)
|
||||
NODE_NAME_CASE(SEG_ALLOCA)
|
||||
NODE_NAME_CASE(PROBED_ALLOCA)
|
||||
NODE_NAME_CASE(RDRAND)
|
||||
NODE_NAME_CASE(RDSEED)
|
||||
NODE_NAME_CASE(RDPKRU)
|
||||
|
@ -31141,6 +31152,97 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
|
|||
return SinkMBB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const {
|
||||
MachineFunction *MF = BB->getParent();
|
||||
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
|
||||
const X86FrameLowering &TFI = *Subtarget.getFrameLowering();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
const BasicBlock *LLVM_BB = BB->getBasicBlock();
|
||||
|
||||
const unsigned ProbeSize = getStackProbeSize(*MF);
|
||||
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
MachineBasicBlock *testMBB = MF->CreateMachineBasicBlock(LLVM_BB);
|
||||
MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB);
|
||||
MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB);
|
||||
|
||||
MachineFunction::iterator MBBIter = ++BB->getIterator();
|
||||
MF->insert(MBBIter, testMBB);
|
||||
MF->insert(MBBIter, blockMBB);
|
||||
MF->insert(MBBIter, tailMBB);
|
||||
|
||||
unsigned sizeVReg = MI.getOperand(1).getReg();
|
||||
|
||||
const TargetRegisterClass *SizeRegClass = MRI.getRegClass(sizeVReg);
|
||||
|
||||
unsigned tmpSizeVReg = MRI.createVirtualRegister(SizeRegClass);
|
||||
unsigned tmpSizeVReg2 = MRI.createVirtualRegister(SizeRegClass);
|
||||
|
||||
unsigned physSPReg = TFI.Uses64BitFramePtr ? X86::RSP : X86::ESP;
|
||||
|
||||
// test rsp size
|
||||
BuildMI(testMBB, DL, TII->get(X86::PHI), tmpSizeVReg)
|
||||
.addReg(sizeVReg)
|
||||
.addMBB(BB)
|
||||
.addReg(tmpSizeVReg2)
|
||||
.addMBB(blockMBB);
|
||||
|
||||
BuildMI(testMBB, DL,
|
||||
TII->get(TFI.Uses64BitFramePtr ? X86::CMP64ri32 : X86::CMP32ri))
|
||||
.addReg(tmpSizeVReg)
|
||||
.addImm(ProbeSize);
|
||||
|
||||
BuildMI(testMBB, DL, TII->get(X86::JCC_1))
|
||||
.addMBB(tailMBB)
|
||||
.addImm(X86::COND_L);
|
||||
testMBB->addSuccessor(blockMBB);
|
||||
testMBB->addSuccessor(tailMBB);
|
||||
|
||||
// allocate a block and touch it
|
||||
|
||||
BuildMI(blockMBB, DL,
|
||||
TII->get(TFI.Uses64BitFramePtr ? X86::SUB64ri32 : X86::SUB32ri),
|
||||
tmpSizeVReg2)
|
||||
.addReg(tmpSizeVReg)
|
||||
.addImm(ProbeSize);
|
||||
|
||||
BuildMI(blockMBB, DL,
|
||||
TII->get(TFI.Uses64BitFramePtr ? X86::SUB64ri32 : X86::SUB32ri),
|
||||
physSPReg)
|
||||
.addReg(physSPReg)
|
||||
.addImm(ProbeSize);
|
||||
|
||||
const unsigned MovMIOpc =
|
||||
TFI.Uses64BitFramePtr ? X86::MOV64mi32 : X86::MOV32mi;
|
||||
addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0)
|
||||
.addImm(0);
|
||||
|
||||
BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB);
|
||||
blockMBB->addSuccessor(testMBB);
|
||||
|
||||
// allocate the tail and continue
|
||||
BuildMI(tailMBB, DL,
|
||||
TII->get(TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr),
|
||||
physSPReg)
|
||||
.addReg(physSPReg)
|
||||
.addReg(tmpSizeVReg);
|
||||
BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
|
||||
.addReg(physSPReg);
|
||||
|
||||
tailMBB->splice(tailMBB->end(), BB,
|
||||
std::next(MachineBasicBlock::iterator(MI)), BB->end());
|
||||
tailMBB->transferSuccessorsAndUpdatePHIs(BB);
|
||||
BB->addSuccessor(testMBB);
|
||||
|
||||
// Delete the original pseudo instruction.
|
||||
MI.eraseFromParent();
|
||||
|
||||
// And we're done.
|
||||
return tailMBB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const {
|
||||
|
@ -32301,6 +32403,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|||
case X86::SEG_ALLOCA_32:
|
||||
case X86::SEG_ALLOCA_64:
|
||||
return EmitLoweredSegAlloca(MI, BB);
|
||||
case X86::PROBED_ALLOCA_32:
|
||||
case X86::PROBED_ALLOCA_64:
|
||||
return EmitLoweredProbedAlloca(MI, BB);
|
||||
case X86::TLSCall_32:
|
||||
case X86::TLSCall_64:
|
||||
return EmitLoweredTLSCall(MI, BB);
|
||||
|
@ -47560,10 +47665,35 @@ bool X86TargetLowering::supportSwiftError() const {
|
|||
return Subtarget.is64Bit();
|
||||
}
|
||||
|
||||
/// Returns true if stack probing through a function call is requested.
|
||||
bool X86TargetLowering::hasStackProbeSymbol(MachineFunction &MF) const {
|
||||
return !getStackProbeSymbolName(MF).empty();
|
||||
}
|
||||
|
||||
/// Returns true if stack probing through inline assembly is requested.
|
||||
bool X86TargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
|
||||
|
||||
// No inline stack probe for Windows, they have their own mechanism.
|
||||
if (Subtarget.isOSWindows() ||
|
||||
MF.getFunction().hasFnAttribute("no-stack-arg-probe"))
|
||||
return false;
|
||||
|
||||
// If the function specifically requests inline stack probes, emit them.
|
||||
if (MF.getFunction().hasFnAttribute("probe-stack"))
|
||||
return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
|
||||
"inline-asm";
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Returns the name of the symbol used to emit stack probes or the empty
|
||||
/// string if not applicable.
|
||||
StringRef
|
||||
X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
|
||||
// Inline Stack probes disable stack probe call
|
||||
if (hasInlineStackProbe(MF))
|
||||
return "";
|
||||
|
||||
// If the function specifically requests stack probes, emit them.
|
||||
if (MF.getFunction().hasFnAttribute("probe-stack"))
|
||||
return MF.getFunction().getFnAttribute("probe-stack").getValueAsString();
|
||||
|
|
|
@ -537,6 +537,10 @@ namespace llvm {
|
|||
// falls back to heap allocation if not.
|
||||
SEG_ALLOCA,
|
||||
|
||||
// For allocating stack space when using stack clash protector.
|
||||
// Allocation is performed by block, and each block is probed.
|
||||
PROBED_ALLOCA,
|
||||
|
||||
// Memory barriers.
|
||||
MEMBARRIER,
|
||||
MFENCE,
|
||||
|
@ -1224,6 +1228,8 @@ namespace llvm {
|
|||
|
||||
bool supportSwiftError() const override;
|
||||
|
||||
bool hasStackProbeSymbol(MachineFunction &MF) const override;
|
||||
bool hasInlineStackProbe(MachineFunction &MF) const override;
|
||||
StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
|
||||
|
||||
unsigned getStackProbeSize(MachineFunction &MF) const;
|
||||
|
@ -1448,6 +1454,9 @@ namespace llvm {
|
|||
MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const;
|
||||
|
||||
MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const;
|
||||
|
||||
MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const;
|
||||
|
||||
|
|
|
@ -111,6 +111,23 @@ def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
|
|||
[(set GR64:$dst,
|
||||
(X86SegAlloca GR64:$size))]>,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
// To protect against stack clash, dynamic allocation should perform a memory
|
||||
// probe at each page.
|
||||
|
||||
let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
|
||||
def PROBED_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
|
||||
"# variable sized alloca with probing",
|
||||
[(set GR32:$dst,
|
||||
(X86ProbedAlloca GR32:$size))]>,
|
||||
Requires<[NotLP64]>;
|
||||
|
||||
let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
|
||||
def PROBED_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
|
||||
"# variable sized alloca with probing",
|
||||
[(set GR64:$dst,
|
||||
(X86ProbedAlloca GR64:$size))]>,
|
||||
Requires<[In64BitMode]>;
|
||||
}
|
||||
|
||||
// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
|
||||
|
|
|
@ -121,6 +121,8 @@ def SDT_X86WIN_ALLOCA : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
|
|||
|
||||
def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
|
||||
|
||||
def SDT_X86PROBED_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
|
||||
|
||||
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
|
||||
|
||||
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
|
||||
|
@ -292,6 +294,9 @@ def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA,
|
|||
def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
|
||||
[SDNPHasChain]>;
|
||||
|
||||
def X86ProbedAlloca : SDNode<"X86ISD::PROBED_ALLOCA", SDT_X86PROBED_ALLOCA,
|
||||
[SDNPHasChain]>;
|
||||
|
||||
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
||||
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i32 @foo(i32 %n) local_unnamed_addr #0 {
|
||||
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushq %rbp
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: .cfi_offset %rbp, -16
|
||||
; CHECK-NEXT: movq %rsp, %rbp
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: leaq 15(,%rax,4), %rax
|
||||
; CHECK-NEXT: andq $-16, %rax
|
||||
; CHECK-NEXT: cmpq $4096, %rax # imm = 0x1000
|
||||
; CHECK-NEXT: jl .LBB0_3
|
||||
; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: subq $4096, %rax # imm = 0x1000
|
||||
; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
; CHECK-NEXT: movq $0, (%rsp)
|
||||
; CHECK-NEXT: cmpq $4096, %rax # imm = 0x1000
|
||||
; CHECK-NEXT: jge .LBB0_2
|
||||
; CHECK-NEXT: .LBB0_3:
|
||||
; CHECK-NEXT: subq %rax, %rsp
|
||||
; CHECK-NEXT: movq %rsp, %rax
|
||||
; CHECK-NEXT: movl $1, 4792(%rax)
|
||||
; CHECK-NEXT: movl (%rax), %eax
|
||||
; CHECK-NEXT: movq %rbp, %rsp
|
||||
; CHECK-NEXT: popq %rbp
|
||||
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
%a = alloca i32, i32 %n, align 16
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 1198
|
||||
store volatile i32 1, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
attributes #0 = {"probe-stack"="inline-asm"}
|
|
@ -0,0 +1,38 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i32 @foo() local_unnamed_addr #0 {
|
||||
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rsp, %r11
|
||||
; CHECK-NEXT: subq $69632, %r11 # imm = 0x11000
|
||||
; CHECK-NEXT: .LBB0_1:
|
||||
; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
; CHECK-NEXT: movq $0, (%rsp)
|
||||
; CHECK-NEXT: cmpq %r11, %rsp
|
||||
; CHECK-NEXT: jne .LBB0_1
|
||||
; CHECK-NEXT:# %bb.2:
|
||||
; CHECK-NEXT: subq $2248, %rsp # imm = 0x8C8
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 71888
|
||||
; CHECK-NEXT: movl $1, 264(%rsp)
|
||||
; CHECK-NEXT: movl $1, 28664(%rsp)
|
||||
; CHECK-NEXT: movl -128(%rsp), %eax
|
||||
; CHECK-NEXT: addq $71880, %rsp # imm = 0x118C8
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
|
||||
%a = alloca i32, i64 18000, align 16
|
||||
%b0 = getelementptr inbounds i32, i32* %a, i64 98
|
||||
%b1 = getelementptr inbounds i32, i32* %a, i64 7198
|
||||
store volatile i32 1, i32* %b0
|
||||
store volatile i32 1, i32* %b1
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
attributes #0 = {"probe-stack"="inline-asm"}
|
|
@ -0,0 +1,32 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i32 @foo() local_unnamed_addr #0 {
|
||||
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT subq $4096, %rsp # imm = 0x1000
|
||||
; CHECK-NEXT .cfi_def_cfa_offset 5888
|
||||
; CHECK-NEXT movl $1, 2088(%rsp)
|
||||
; CHECK-NEXT subq $1784, %rsp # imm = 0x6F8
|
||||
; CHECK-NEXT movl $2, 672(%rsp)
|
||||
; CHECK-NEXT movl 1872(%rsp), %eax
|
||||
; CHECK-NEXT addq $5880, %rsp # imm = 0x16F8
|
||||
; CHECK-NEXT .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT retq
|
||||
|
||||
|
||||
%a = alloca i32, i64 1000, align 16
|
||||
%b = alloca i32, i64 500, align 16
|
||||
%a0 = getelementptr inbounds i32, i32* %a, i64 500
|
||||
%b0 = getelementptr inbounds i32, i32* %b, i64 200
|
||||
store volatile i32 1, i32* %a0
|
||||
store volatile i32 2, i32* %b0
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
attributes #0 = {"probe-stack"="inline-asm"}
|
|
@ -0,0 +1,33 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i32 @foo() local_unnamed_addr #0 {
|
||||
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
; CHECK-NEXT: movq $0, (%rsp)
|
||||
; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 7888
|
||||
; CHECK-NEXT: movl $1, 264(%rsp)
|
||||
; CHECK-NEXT: movl $1, 4664(%rsp)
|
||||
; CHECK-NEXT: movl -128(%rsp), %eax
|
||||
; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
|
||||
|
||||
%a = alloca i32, i64 2000, align 16
|
||||
%b0 = getelementptr inbounds i32, i32* %a, i64 98
|
||||
%b1 = getelementptr inbounds i32, i32* %a, i64 1198
|
||||
store i32 1, i32* %b0
|
||||
store i32 1, i32* %b1
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
attributes #0 = {"probe-stack"="inline-asm"}
|
|
@ -0,0 +1,30 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i32 @foo() local_unnamed_addr #0 {
|
||||
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
; CHECK-NEXT: movq $0, (%rsp)
|
||||
; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 7888
|
||||
; CHECK-NEXT: movl $1, 672(%rsp)
|
||||
; CHECK-NEXT: movl -128(%rsp), %eax
|
||||
; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
|
||||
|
||||
%a = alloca i32, i64 2000, align 16
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 200
|
||||
store volatile i32 1, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
attributes #0 = {"probe-stack"="inline-asm"}
|
|
@ -0,0 +1,27 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i32 @foo(i64 %i) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
; CHECK-NEXT: movq $0, (%rsp)
|
||||
; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 7888
|
||||
; CHECK-NEXT: movl $1, -128(%rsp,%rdi,4)
|
||||
; CHECK-NEXT: movl -128(%rsp), %eax
|
||||
; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
%a = alloca i32, i32 2000, align 16
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 %i
|
||||
store volatile i32 1, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
attributes #0 = {"probe-stack"="inline-asm"}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i32 @foo() local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: subq $280, %rsp # imm = 0x118
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 288
|
||||
; CHECK-NEXT: movl $1, 264(%rsp)
|
||||
; CHECK-NEXT: movl -128(%rsp), %eax
|
||||
; CHECK-NEXT: addq $280, %rsp # imm = 0x118
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
%a = alloca i32, i64 100, align 16
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 98
|
||||
store volatile i32 1, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
attributes #0 = {"probe-stack"="inline-asm"}
|
|
@ -0,0 +1,31 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg);
|
||||
|
||||
define void @foo() local_unnamed_addr #0 {
|
||||
|
||||
;CHECK-LABEL: foo:
|
||||
;CHECK: # %bb.0:
|
||||
;CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
; it's important that we don't use the call as a probe here
|
||||
;CHECK-NEXT: movq $0, (%rsp)
|
||||
;CHECK-NEXT: subq $3912, %rsp # imm = 0xF48
|
||||
;CHECK-NEXT: .cfi_def_cfa_offset 8016
|
||||
;CHECK-NEXT: movq %rsp, %rdi
|
||||
;CHECK-NEXT: movl $8000, %edx # imm = 0x1F40
|
||||
;CHECK-NEXT: xorl %esi, %esi
|
||||
;CHECK-NEXT: callq memset
|
||||
;CHECK-NEXT: addq $8008, %rsp # imm = 0x1F48
|
||||
;CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
;CHECK-NEXT: retq
|
||||
|
||||
%a = alloca i8, i64 8000, align 16
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %a, i8 0, i64 8000, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = {"probe-stack"="inline-asm"}
|
Loading…
Reference in New Issue