forked from OSchip/llvm-project
[PowerPC] Implement probing for dynamic stack allocation
This patch is part of supporting `-fstack-clash-protection`. Mainly do such things compared to existing `lowerDynamicAlloc` - Added a new pseudo instruction PPC::PREPARE_PROBED_ALLOC to get actual frame pointer and final stack pointer. - Synthesize a loop to probe by blocks. - Use DYNAREAOFFSET to get MaxCallFrameSize which is calculated in prologepilog. Differential Revision: https://reviews.llvm.org/D81358
This commit is contained in:
parent
52855ed099
commit
03828e38c3
|
@ -126,6 +126,7 @@ cl::desc("use absolute jump tables on ppc"), cl::Hidden);
|
|||
STATISTIC(NumTailCalls, "Number of tail calls");
|
||||
STATISTIC(NumSiblingCalls, "Number of sibling calls");
|
||||
STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
|
||||
STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
|
||||
|
||||
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
|
||||
|
||||
|
@ -1486,6 +1487,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
|
||||
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
|
||||
case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
|
||||
case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
|
||||
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
|
||||
case PPCISD::SRL: return "PPCISD::SRL";
|
||||
case PPCISD::SRA: return "PPCISD::SRA";
|
||||
|
@ -7919,6 +7921,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
|
|||
|
||||
SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
// Get the inputs.
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue Size = Op.getOperand(1);
|
||||
|
@ -7931,9 +7934,10 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
|||
DAG.getConstant(0, dl, PtrVT), Size);
|
||||
// Construct a node for the frame pointer save index.
|
||||
SDValue FPSIdx = getFramePointerFrameIndex(DAG);
|
||||
// Build a DYNALLOC node.
|
||||
SDValue Ops[3] = { Chain, NegSize, FPSIdx };
|
||||
SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
|
||||
if (hasInlineStackProbe(MF))
|
||||
return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
|
||||
return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
|
||||
}
|
||||
|
||||
|
@ -11799,6 +11803,184 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
|
|||
return MBB;
|
||||
}
|
||||
|
||||
bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
|
||||
// If the function specifically requests inline stack probes, emit them.
|
||||
if (MF.getFunction().hasFnAttribute("probe-stack"))
|
||||
return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
|
||||
"inline-asm";
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {
|
||||
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
|
||||
unsigned StackAlign = TFI->getStackAlignment();
|
||||
assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
|
||||
"Unexpected stack alignment");
|
||||
// The default stack probe size is 4096 if the function has no
|
||||
// stack-probe-size attribute.
|
||||
unsigned StackProbeSize = 4096;
|
||||
const Function &Fn = MF.getFunction();
|
||||
if (Fn.hasFnAttribute("stack-probe-size"))
|
||||
Fn.getFnAttribute("stack-probe-size")
|
||||
.getValueAsString()
|
||||
.getAsInteger(0, StackProbeSize);
|
||||
// Round down to the stack alignment.
|
||||
StackProbeSize &= ~(StackAlign - 1);
|
||||
return StackProbeSize ? StackProbeSize : StackAlign;
|
||||
}
|
||||
|
||||
// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
|
||||
// into three phases. In the first phase, it uses pseudo instruction
|
||||
// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
|
||||
// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
|
||||
// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
|
||||
// MaxCallFrameSize so that it can calculate correct data area pointer.
|
||||
MachineBasicBlock *
|
||||
PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
|
||||
MachineBasicBlock *MBB) const {
|
||||
const bool isPPC64 = Subtarget.isPPC64();
|
||||
MachineFunction *MF = MBB->getParent();
|
||||
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
const unsigned ProbeSize = getStackProbeSize(*MF);
|
||||
const BasicBlock *ProbedBB = MBB->getBasicBlock();
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
// The CFG of probing stack looks as
|
||||
// +-----+
|
||||
// | MBB |
|
||||
// +--+--+
|
||||
// |
|
||||
// +----v----+
|
||||
// +--->+ TestMBB +---+
|
||||
// | +----+----+ |
|
||||
// | | |
|
||||
// | +-----v----+ |
|
||||
// +---+ BlockMBB | |
|
||||
// +----------+ |
|
||||
// |
|
||||
// +---------+ |
|
||||
// | TailMBB +<--+
|
||||
// +---------+
|
||||
// In MBB, calculate previous frame pointer and final stack pointer.
|
||||
// In TestMBB, test if sp is equal to final stack pointer, if so, jump to
|
||||
// TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
|
||||
// TailMBB is spliced via \p MI.
|
||||
MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
|
||||
MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
|
||||
MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
|
||||
|
||||
MachineFunction::iterator MBBIter = ++MBB->getIterator();
|
||||
MF->insert(MBBIter, TestMBB);
|
||||
MF->insert(MBBIter, BlockMBB);
|
||||
MF->insert(MBBIter, TailMBB);
|
||||
|
||||
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
|
||||
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
|
||||
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
Register NegSizeReg = MI.getOperand(1).getReg();
|
||||
Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
|
||||
Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
|
||||
Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
|
||||
|
||||
// Get the canonical FinalStackPtr like what
|
||||
// PPCRegisterInfo::lowerDynamicAlloc does.
|
||||
BuildMI(*MBB, {MI}, DL,
|
||||
TII->get(isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64
|
||||
: PPC::PREPARE_PROBED_ALLOCA_32),
|
||||
FramePointer)
|
||||
.addDef(FinalStackPtr)
|
||||
.addReg(NegSizeReg)
|
||||
.add(MI.getOperand(2))
|
||||
.add(MI.getOperand(3));
|
||||
|
||||
// Materialize a scratch register for update.
|
||||
int64_t NegProbeSize = -(int64_t)ProbeSize;
|
||||
assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
|
||||
Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
|
||||
if (!isInt<16>(NegProbeSize)) {
|
||||
Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
|
||||
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
|
||||
.addImm(NegProbeSize >> 16);
|
||||
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
|
||||
ScratchReg)
|
||||
.addReg(TempReg)
|
||||
.addImm(NegProbeSize & 0xFFFF);
|
||||
} else
|
||||
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
|
||||
.addImm(NegProbeSize);
|
||||
|
||||
{
|
||||
// Probing leading residual part.
|
||||
Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
|
||||
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
|
||||
.addReg(NegSizeReg)
|
||||
.addReg(ScratchReg);
|
||||
Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
|
||||
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
|
||||
.addReg(Div)
|
||||
.addReg(ScratchReg);
|
||||
Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
|
||||
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
|
||||
.addReg(Mul)
|
||||
.addReg(NegSizeReg);
|
||||
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
|
||||
.addReg(FramePointer)
|
||||
.addReg(SPReg)
|
||||
.addReg(NegMod);
|
||||
}
|
||||
|
||||
{
|
||||
// Remaining part should be multiple of ProbeSize.
|
||||
Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
|
||||
BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
|
||||
.addReg(SPReg)
|
||||
.addReg(FinalStackPtr);
|
||||
BuildMI(TestMBB, DL, TII->get(PPC::BCC))
|
||||
.addImm(PPC::PRED_EQ)
|
||||
.addReg(CmpResult)
|
||||
.addMBB(TailMBB);
|
||||
TestMBB->addSuccessor(BlockMBB);
|
||||
TestMBB->addSuccessor(TailMBB);
|
||||
}
|
||||
|
||||
{
|
||||
// Touch the block.
|
||||
// |P...|P...|P...
|
||||
BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
|
||||
.addReg(FramePointer)
|
||||
.addReg(SPReg)
|
||||
.addReg(ScratchReg);
|
||||
BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
|
||||
BlockMBB->addSuccessor(TestMBB);
|
||||
}
|
||||
|
||||
// Calculation of MaxCallFrameSize is deferred to prologepilog, use
|
||||
// DYNAREAOFFSET pseudo instruction to get the future result.
|
||||
Register MaxCallFrameSizeReg =
|
||||
MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
|
||||
BuildMI(TailMBB, DL,
|
||||
TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
|
||||
MaxCallFrameSizeReg)
|
||||
.add(MI.getOperand(2))
|
||||
.add(MI.getOperand(3));
|
||||
BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
|
||||
.addReg(SPReg)
|
||||
.addReg(MaxCallFrameSizeReg);
|
||||
|
||||
// Splice instructions after MI to TailMBB.
|
||||
TailMBB->splice(TailMBB->end(), MBB,
|
||||
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
|
||||
TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
|
||||
MBB->addSuccessor(TestMBB);
|
||||
|
||||
// Delete the pseudo instruction.
|
||||
MI.eraseFromParent();
|
||||
|
||||
++NumDynamicAllocaProbed;
|
||||
return TailMBB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const {
|
||||
|
@ -12565,6 +12747,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|||
.addReg(NewFPSCRReg)
|
||||
.addImm(0)
|
||||
.addImm(0);
|
||||
} else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
|
||||
MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
|
||||
return emitProbedAlloca(MI, BB);
|
||||
} else {
|
||||
llvm_unreachable("Unexpected instr type to insert");
|
||||
}
|
||||
|
|
|
@ -138,6 +138,10 @@ namespace llvm {
|
|||
/// dynamic alloca.
|
||||
DYNAREAOFFSET,
|
||||
|
||||
/// To avoid stack clash, allocation is performed by block and each block is
|
||||
/// probed.
|
||||
PROBED_ALLOCA,
|
||||
|
||||
/// GlobalBaseReg - On Darwin, this node represents the result of the mflr
|
||||
/// at function entry, used for PIC code.
|
||||
GlobalBaseReg,
|
||||
|
@ -804,6 +808,13 @@ namespace llvm {
|
|||
MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
|
||||
MachineBasicBlock *MBB) const;
|
||||
|
||||
MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
|
||||
MachineBasicBlock *MBB) const;
|
||||
|
||||
bool hasInlineStackProbe(MachineFunction &MF) const override;
|
||||
|
||||
unsigned getStackProbeSize(MachineFunction &MF) const;
|
||||
|
||||
ConstraintType getConstraintType(StringRef Constraint) const override;
|
||||
|
||||
/// Examine constraint string and operand type and determine a weight value.
|
||||
|
|
|
@ -425,6 +425,16 @@ def DYNALLOC8 : PPCEmitTimePseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri
|
|||
(PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
|
||||
def DYNAREAOFFSET8 : PPCEmitTimePseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
|
||||
[(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
|
||||
// Probed alloca to support stack clash protection.
|
||||
let Defs = [X1], Uses = [X1], hasNoSchedulingInfo = 1 in {
|
||||
def PROBED_ALLOCA_64 : PPCCustomInserterPseudo<(outs g8rc:$result),
|
||||
(ins g8rc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_64",
|
||||
[(set i64:$result,
|
||||
(PPCprobedalloca i64:$negsize, iaddr:$fpsi))]>;
|
||||
def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs g8rc:$fp,
|
||||
g8rc:$sp),
|
||||
(ins g8rc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_64", []>;
|
||||
}
|
||||
|
||||
let hasSideEffects = 0 in {
|
||||
let Defs = [LR8] in {
|
||||
|
|
|
@ -323,6 +323,7 @@ def SDTDynOp : SDTypeProfile<1, 2, []>;
|
|||
def SDTDynAreaOp : SDTypeProfile<1, 1, []>;
|
||||
def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
|
||||
def PPCdynareaoffset : SDNode<"PPCISD::DYNAREAOFFSET", SDTDynAreaOp, [SDNPHasChain]>;
|
||||
def PPCprobedalloca : SDNode<"PPCISD::PROBED_ALLOCA", SDTDynOp, [SDNPHasChain]>;
|
||||
|
||||
// PC Relative Specific Nodes
|
||||
def PPCmatpcreladdr : SDNode<"PPCISD::MAT_PCREL_ADDR", SDTIntUnaryOp, []>;
|
||||
|
@ -1399,6 +1400,16 @@ def DYNALLOC : PPCEmitTimePseudo<(outs gprc:$result), (ins gprc:$negsize, memri:
|
|||
(PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
|
||||
def DYNAREAOFFSET : PPCEmitTimePseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET",
|
||||
[(set i32:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
|
||||
// Probed alloca to support stack clash protection.
|
||||
let Defs = [R1], Uses = [R1], hasNoSchedulingInfo = 1 in {
|
||||
def PROBED_ALLOCA_32 : PPCCustomInserterPseudo<(outs gprc:$result),
|
||||
(ins gprc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_32",
|
||||
[(set i32:$result,
|
||||
(PPCprobedalloca i32:$negsize, iaddr:$fpsi))]>;
|
||||
def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs gprc:$fp,
|
||||
gprc:$sp),
|
||||
(ins gprc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_32", []>;
|
||||
}
|
||||
|
||||
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
|
||||
// instruction selection into a branch sequence.
|
||||
|
|
|
@ -610,6 +610,38 @@ void PPCRegisterInfo::prepareDynamicAlloca(MachineBasicBlock::iterator II,
|
|||
}
|
||||
}
|
||||
|
||||
void PPCRegisterInfo::lowerPrepareProbedAlloca(
|
||||
MachineBasicBlock::iterator II) const {
|
||||
MachineInstr &MI = *II;
|
||||
// Get the instruction's basic block.
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
// Get the basic block's function.
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
|
||||
// Get the instruction info.
|
||||
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
|
||||
// Determine whether 64-bit pointers are used.
|
||||
bool LP64 = TM.isPPC64();
|
||||
DebugLoc dl = MI.getDebugLoc();
|
||||
Register FramePointer = MI.getOperand(0).getReg();
|
||||
Register FinalStackPtr = MI.getOperand(1).getReg();
|
||||
bool KillNegSizeReg = MI.getOperand(2).isKill();
|
||||
Register NegSizeReg = MI.getOperand(2).getReg();
|
||||
prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, FramePointer);
|
||||
if (LP64) {
|
||||
BuildMI(MBB, II, dl, TII.get(PPC::ADD8), FinalStackPtr)
|
||||
.addReg(PPC::X1)
|
||||
.addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
|
||||
|
||||
} else {
|
||||
BuildMI(MBB, II, dl, TII.get(PPC::ADD4), FinalStackPtr)
|
||||
.addReg(PPC::R1)
|
||||
.addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
|
||||
}
|
||||
|
||||
MBB.erase(II);
|
||||
}
|
||||
|
||||
void PPCRegisterInfo::lowerDynamicAreaOffset(
|
||||
MachineBasicBlock::iterator II) const {
|
||||
// Get the instruction.
|
||||
|
@ -1050,6 +1082,13 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
|||
return;
|
||||
}
|
||||
|
||||
if (FPSI && FrameIndex == FPSI &&
|
||||
(OpC == PPC::PREPARE_PROBED_ALLOCA_64 ||
|
||||
OpC == PPC::PREPARE_PROBED_ALLOCA_32)) {
|
||||
lowerPrepareProbedAlloca(II);
|
||||
return;
|
||||
}
|
||||
|
||||
// Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc.
|
||||
if (OpC == PPC::SPILL_CR) {
|
||||
lowerCRSpilling(II, FrameIndex);
|
||||
|
|
|
@ -110,6 +110,7 @@ public:
|
|||
void prepareDynamicAlloca(MachineBasicBlock::iterator II,
|
||||
Register &NegSizeReg, bool &KillNegSizeReg,
|
||||
Register &FramePointer) const;
|
||||
void lowerPrepareProbedAlloca(MachineBasicBlock::iterator II) const;
|
||||
void lowerCRSpilling(MachineBasicBlock::iterator II,
|
||||
unsigned FrameIndex) const;
|
||||
void lowerCRRestore(MachineBasicBlock::iterator II,
|
||||
|
|
|
@ -0,0 +1,437 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
|
||||
; RUN: -mtriple=powerpc64le-linux-gnu < %s | FileCheck \
|
||||
; RUN: -check-prefix=CHECK-LE %s
|
||||
; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
|
||||
; RUN: -mtriple=powerpc64le-linux-gnu -mcpu=pwr9 < %s | FileCheck \
|
||||
; RUN: -check-prefix=CHECK-P9-LE %s
|
||||
; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
|
||||
; RUN: -mtriple=powerpc64-linux-gnu < %s | FileCheck \
|
||||
; RUN: -check-prefix=CHECK-BE %s
|
||||
; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
|
||||
; RUN: -mtriple=powerpc-linux-gnu < %s | FileCheck \
|
||||
; RUN: -check-prefix=CHECK-32 %s
|
||||
|
||||
define i32 @foo(i32 %n) local_unnamed_addr #0 "stack-probe-size"="32768" nounwind {
|
||||
; CHECK-LE-LABEL: foo:
|
||||
; CHECK-LE: # %bb.0:
|
||||
; CHECK-LE-NEXT: std r31, -8(r1)
|
||||
; CHECK-LE-NEXT: stdu r1, -48(r1)
|
||||
; CHECK-LE-NEXT: rldic r3, r3, 2, 30
|
||||
; CHECK-LE-NEXT: li r6, -32768
|
||||
; CHECK-LE-NEXT: mr r31, r1
|
||||
; CHECK-LE-NEXT: addi r3, r3, 15
|
||||
; CHECK-LE-NEXT: addi r4, r31, 48
|
||||
; CHECK-LE-NEXT: rldicl r3, r3, 60, 4
|
||||
; CHECK-LE-NEXT: rldicl r3, r3, 4, 29
|
||||
; CHECK-LE-NEXT: neg r5, r3
|
||||
; CHECK-LE-NEXT: divd r7, r5, r6
|
||||
; CHECK-LE-NEXT: add r3, r1, r5
|
||||
; CHECK-LE-NEXT: mulld r6, r7, r6
|
||||
; CHECK-LE-NEXT: sub r5, r5, r6
|
||||
; CHECK-LE-NEXT: stdux r4, r1, r5
|
||||
; CHECK-LE-NEXT: cmpd r1, r3
|
||||
; CHECK-LE-NEXT: beq cr0, .LBB0_2
|
||||
; CHECK-LE-NEXT: .LBB0_1:
|
||||
; CHECK-LE-NEXT: stdu r4, -32768(r1)
|
||||
; CHECK-LE-NEXT: cmpd r1, r3
|
||||
; CHECK-LE-NEXT: bne cr0, .LBB0_1
|
||||
; CHECK-LE-NEXT: .LBB0_2:
|
||||
; CHECK-LE-NEXT: li r4, 1
|
||||
; CHECK-LE-NEXT: addi r3, r1, 32
|
||||
; CHECK-LE-NEXT: stw r4, 4792(r3)
|
||||
; CHECK-LE-NEXT: lwz r3, 0(r3)
|
||||
; CHECK-LE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-LE-NEXT: ld r31, -8(r1)
|
||||
; CHECK-LE-NEXT: blr
|
||||
;
|
||||
; CHECK-P9-LE-LABEL: foo:
|
||||
; CHECK-P9-LE: # %bb.0:
|
||||
; CHECK-P9-LE-NEXT: std r31, -8(r1)
|
||||
; CHECK-P9-LE-NEXT: stdu r1, -48(r1)
|
||||
; CHECK-P9-LE-NEXT: rldic r3, r3, 2, 30
|
||||
; CHECK-P9-LE-NEXT: addi r3, r3, 15
|
||||
; CHECK-P9-LE-NEXT: rldicl r3, r3, 60, 4
|
||||
; CHECK-P9-LE-NEXT: rldicl r3, r3, 4, 29
|
||||
; CHECK-P9-LE-NEXT: neg r5, r3
|
||||
; CHECK-P9-LE-NEXT: li r6, -32768
|
||||
; CHECK-P9-LE-NEXT: divd r7, r5, r6
|
||||
; CHECK-P9-LE-NEXT: mulld r6, r7, r6
|
||||
; CHECK-P9-LE-NEXT: mr r31, r1
|
||||
; CHECK-P9-LE-NEXT: addi r4, r31, 48
|
||||
; CHECK-P9-LE-NEXT: add r3, r1, r5
|
||||
; CHECK-P9-LE-NEXT: sub r5, r5, r6
|
||||
; CHECK-P9-LE-NEXT: stdux r4, r1, r5
|
||||
; CHECK-P9-LE-NEXT: cmpd r1, r3
|
||||
; CHECK-P9-LE-NEXT: beq cr0, .LBB0_2
|
||||
; CHECK-P9-LE-NEXT: .LBB0_1:
|
||||
; CHECK-P9-LE-NEXT: stdu r4, -32768(r1)
|
||||
; CHECK-P9-LE-NEXT: cmpd r1, r3
|
||||
; CHECK-P9-LE-NEXT: bne cr0, .LBB0_1
|
||||
; CHECK-P9-LE-NEXT: .LBB0_2:
|
||||
; CHECK-P9-LE-NEXT: addi r3, r1, 32
|
||||
; CHECK-P9-LE-NEXT: li r4, 1
|
||||
; CHECK-P9-LE-NEXT: stw r4, 4792(r3)
|
||||
; CHECK-P9-LE-NEXT: lwz r3, 0(r3)
|
||||
; CHECK-P9-LE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-P9-LE-NEXT: ld r31, -8(r1)
|
||||
; CHECK-P9-LE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: foo:
|
||||
; CHECK-BE: # %bb.0:
|
||||
; CHECK-BE-NEXT: std r31, -8(r1)
|
||||
; CHECK-BE-NEXT: stdu r1, -64(r1)
|
||||
; CHECK-BE-NEXT: rldic r3, r3, 2, 30
|
||||
; CHECK-BE-NEXT: li r6, -32768
|
||||
; CHECK-BE-NEXT: addi r3, r3, 15
|
||||
; CHECK-BE-NEXT: rldicl r3, r3, 60, 4
|
||||
; CHECK-BE-NEXT: mr r31, r1
|
||||
; CHECK-BE-NEXT: rldicl r3, r3, 4, 29
|
||||
; CHECK-BE-NEXT: addi r4, r31, 64
|
||||
; CHECK-BE-NEXT: neg r5, r3
|
||||
; CHECK-BE-NEXT: divd r7, r5, r6
|
||||
; CHECK-BE-NEXT: add r3, r1, r5
|
||||
; CHECK-BE-NEXT: mulld r6, r7, r6
|
||||
; CHECK-BE-NEXT: sub r5, r5, r6
|
||||
; CHECK-BE-NEXT: stdux r4, r1, r5
|
||||
; CHECK-BE-NEXT: cmpd r1, r3
|
||||
; CHECK-BE-NEXT: beq cr0, .LBB0_2
|
||||
; CHECK-BE-NEXT: .LBB0_1:
|
||||
; CHECK-BE-NEXT: stdu r4, -32768(r1)
|
||||
; CHECK-BE-NEXT: cmpd r1, r3
|
||||
; CHECK-BE-NEXT: bne cr0, .LBB0_1
|
||||
; CHECK-BE-NEXT: .LBB0_2:
|
||||
; CHECK-BE-NEXT: li r4, 1
|
||||
; CHECK-BE-NEXT: addi r3, r1, 48
|
||||
; CHECK-BE-NEXT: stw r4, 4792(r3)
|
||||
; CHECK-BE-NEXT: lwz r3, 0(r3)
|
||||
; CHECK-BE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-BE-NEXT: ld r31, -8(r1)
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: foo:
|
||||
; CHECK-32: # %bb.0:
|
||||
; CHECK-32-NEXT: stwu r1, -32(r1)
|
||||
; CHECK-32-NEXT: slwi r3, r3, 2
|
||||
; CHECK-32-NEXT: addi r3, r3, 15
|
||||
; CHECK-32-NEXT: rlwinm r3, r3, 0, 0, 27
|
||||
; CHECK-32-NEXT: neg r5, r3
|
||||
; CHECK-32-NEXT: li r6, -32768
|
||||
; CHECK-32-NEXT: divw r7, r5, r6
|
||||
; CHECK-32-NEXT: stw r31, 28(r1)
|
||||
; CHECK-32-NEXT: mr r31, r1
|
||||
; CHECK-32-NEXT: addi r4, r31, 32
|
||||
; CHECK-32-NEXT: add r3, r1, r5
|
||||
; CHECK-32-NEXT: mullw r6, r7, r6
|
||||
; CHECK-32-NEXT: sub r5, r5, r6
|
||||
; CHECK-32-NEXT: stwux r4, r1, r5
|
||||
; CHECK-32-NEXT: cmpw r1, r3
|
||||
; CHECK-32-NEXT: beq cr0, .LBB0_2
|
||||
; CHECK-32-NEXT: .LBB0_1:
|
||||
; CHECK-32-NEXT: stwu r4, -32768(r1)
|
||||
; CHECK-32-NEXT: cmpw r1, r3
|
||||
; CHECK-32-NEXT: bne cr0, .LBB0_1
|
||||
; CHECK-32-NEXT: .LBB0_2:
|
||||
; CHECK-32-NEXT: li r4, 1
|
||||
; CHECK-32-NEXT: addi r3, r1, 16
|
||||
; CHECK-32-NEXT: stw r4, 4792(r3)
|
||||
; CHECK-32-NEXT: lwz r3, 0(r3)
|
||||
; CHECK-32-NEXT: lwz r31, 0(r1)
|
||||
; CHECK-32-NEXT: lwz r0, -4(r31)
|
||||
; CHECK-32-NEXT: mr r1, r31
|
||||
; CHECK-32-NEXT: mr r31, r0
|
||||
; CHECK-32-NEXT: blr
|
||||
%a = alloca i32, i32 %n, align 16
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 1198
|
||||
store volatile i32 1, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
define i32 @bar(i32 %n) local_unnamed_addr #0 nounwind {
|
||||
; CHECK-LE-LABEL: bar:
|
||||
; CHECK-LE: # %bb.0:
|
||||
; CHECK-LE-NEXT: std r31, -8(r1)
|
||||
; CHECK-LE-NEXT: stdu r1, -48(r1)
|
||||
; CHECK-LE-NEXT: rldic r4, r3, 2, 30
|
||||
; CHECK-LE-NEXT: li r7, -4096
|
||||
; CHECK-LE-NEXT: mr r31, r1
|
||||
; CHECK-LE-NEXT: addi r4, r4, 15
|
||||
; CHECK-LE-NEXT: addi r5, r31, 48
|
||||
; CHECK-LE-NEXT: rldicl r4, r4, 60, 4
|
||||
; CHECK-LE-NEXT: rldicl r4, r4, 4, 29
|
||||
; CHECK-LE-NEXT: neg r6, r4
|
||||
; CHECK-LE-NEXT: divd r8, r6, r7
|
||||
; CHECK-LE-NEXT: add r4, r1, r6
|
||||
; CHECK-LE-NEXT: mulld r7, r8, r7
|
||||
; CHECK-LE-NEXT: sub r6, r6, r7
|
||||
; CHECK-LE-NEXT: stdux r5, r1, r6
|
||||
; CHECK-LE-NEXT: cmpd r1, r4
|
||||
; CHECK-LE-NEXT: beq cr0, .LBB1_2
|
||||
; CHECK-LE-NEXT: .LBB1_1:
|
||||
; CHECK-LE-NEXT: stdu r5, -4096(r1)
|
||||
; CHECK-LE-NEXT: cmpd r1, r4
|
||||
; CHECK-LE-NEXT: bne cr0, .LBB1_1
|
||||
; CHECK-LE-NEXT: .LBB1_2:
|
||||
; CHECK-LE-NEXT: extsw r3, r3
|
||||
; CHECK-LE-NEXT: li r5, 1
|
||||
; CHECK-LE-NEXT: addi r4, r1, 32
|
||||
; CHECK-LE-NEXT: sldi r3, r3, 2
|
||||
; CHECK-LE-NEXT: add r3, r4, r3
|
||||
; CHECK-LE-NEXT: stw r5, 4096(r3)
|
||||
; CHECK-LE-NEXT: lwz r3, 0(r4)
|
||||
; CHECK-LE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-LE-NEXT: ld r31, -8(r1)
|
||||
; CHECK-LE-NEXT: blr
|
||||
;
|
||||
; CHECK-P9-LE-LABEL: bar:
|
||||
; CHECK-P9-LE: # %bb.0:
|
||||
; CHECK-P9-LE-NEXT: std r31, -8(r1)
|
||||
; CHECK-P9-LE-NEXT: stdu r1, -48(r1)
|
||||
; CHECK-P9-LE-NEXT: rldic r4, r3, 2, 30
|
||||
; CHECK-P9-LE-NEXT: addi r4, r4, 15
|
||||
; CHECK-P9-LE-NEXT: rldicl r4, r4, 60, 4
|
||||
; CHECK-P9-LE-NEXT: rldicl r4, r4, 4, 29
|
||||
; CHECK-P9-LE-NEXT: neg r6, r4
|
||||
; CHECK-P9-LE-NEXT: li r7, -4096
|
||||
; CHECK-P9-LE-NEXT: divd r8, r6, r7
|
||||
; CHECK-P9-LE-NEXT: mulld r7, r8, r7
|
||||
; CHECK-P9-LE-NEXT: mr r31, r1
|
||||
; CHECK-P9-LE-NEXT: addi r5, r31, 48
|
||||
; CHECK-P9-LE-NEXT: add r4, r1, r6
|
||||
; CHECK-P9-LE-NEXT: sub r6, r6, r7
|
||||
; CHECK-P9-LE-NEXT: stdux r5, r1, r6
|
||||
; CHECK-P9-LE-NEXT: cmpd r1, r4
|
||||
; CHECK-P9-LE-NEXT: beq cr0, .LBB1_2
|
||||
; CHECK-P9-LE-NEXT: .LBB1_1:
|
||||
; CHECK-P9-LE-NEXT: stdu r5, -4096(r1)
|
||||
; CHECK-P9-LE-NEXT: cmpd r1, r4
|
||||
; CHECK-P9-LE-NEXT: bne cr0, .LBB1_1
|
||||
; CHECK-P9-LE-NEXT: .LBB1_2:
|
||||
; CHECK-P9-LE-NEXT: addi r4, r1, 32
|
||||
; CHECK-P9-LE-NEXT: extswsli r3, r3, 2
|
||||
; CHECK-P9-LE-NEXT: add r3, r4, r3
|
||||
; CHECK-P9-LE-NEXT: li r5, 1
|
||||
; CHECK-P9-LE-NEXT: stw r5, 4096(r3)
|
||||
; CHECK-P9-LE-NEXT: lwz r3, 0(r4)
|
||||
; CHECK-P9-LE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-P9-LE-NEXT: ld r31, -8(r1)
|
||||
; CHECK-P9-LE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: bar:
|
||||
; CHECK-BE: # %bb.0:
|
||||
; CHECK-BE-NEXT: std r31, -8(r1)
|
||||
; CHECK-BE-NEXT: stdu r1, -64(r1)
|
||||
; CHECK-BE-NEXT: rldic r4, r3, 2, 30
|
||||
; CHECK-BE-NEXT: li r7, -4096
|
||||
; CHECK-BE-NEXT: addi r4, r4, 15
|
||||
; CHECK-BE-NEXT: rldicl r4, r4, 60, 4
|
||||
; CHECK-BE-NEXT: mr r31, r1
|
||||
; CHECK-BE-NEXT: rldicl r4, r4, 4, 29
|
||||
; CHECK-BE-NEXT: addi r5, r31, 64
|
||||
; CHECK-BE-NEXT: neg r6, r4
|
||||
; CHECK-BE-NEXT: divd r8, r6, r7
|
||||
; CHECK-BE-NEXT: add r4, r1, r6
|
||||
; CHECK-BE-NEXT: mulld r7, r8, r7
|
||||
; CHECK-BE-NEXT: sub r6, r6, r7
|
||||
; CHECK-BE-NEXT: stdux r5, r1, r6
|
||||
; CHECK-BE-NEXT: cmpd r1, r4
|
||||
; CHECK-BE-NEXT: beq cr0, .LBB1_2
|
||||
; CHECK-BE-NEXT: .LBB1_1:
|
||||
; CHECK-BE-NEXT: stdu r5, -4096(r1)
|
||||
; CHECK-BE-NEXT: cmpd r1, r4
|
||||
; CHECK-BE-NEXT: bne cr0, .LBB1_1
|
||||
; CHECK-BE-NEXT: .LBB1_2:
|
||||
; CHECK-BE-NEXT: extsw r3, r3
|
||||
; CHECK-BE-NEXT: addi r4, r1, 48
|
||||
; CHECK-BE-NEXT: sldi r3, r3, 2
|
||||
; CHECK-BE-NEXT: li r5, 1
|
||||
; CHECK-BE-NEXT: add r3, r4, r3
|
||||
; CHECK-BE-NEXT: stw r5, 4096(r3)
|
||||
; CHECK-BE-NEXT: lwz r3, 0(r4)
|
||||
; CHECK-BE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-BE-NEXT: ld r31, -8(r1)
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: bar:
|
||||
; CHECK-32: # %bb.0:
|
||||
; CHECK-32-NEXT: stwu r1, -32(r1)
|
||||
; CHECK-32-NEXT: slwi r3, r3, 2
|
||||
; CHECK-32-NEXT: addi r4, r3, 15
|
||||
; CHECK-32-NEXT: rlwinm r4, r4, 0, 0, 27
|
||||
; CHECK-32-NEXT: neg r6, r4
|
||||
; CHECK-32-NEXT: li r7, -4096
|
||||
; CHECK-32-NEXT: divw r8, r6, r7
|
||||
; CHECK-32-NEXT: stw r31, 28(r1)
|
||||
; CHECK-32-NEXT: mr r31, r1
|
||||
; CHECK-32-NEXT: addi r5, r31, 32
|
||||
; CHECK-32-NEXT: add r4, r1, r6
|
||||
; CHECK-32-NEXT: mullw r7, r8, r7
|
||||
; CHECK-32-NEXT: sub r6, r6, r7
|
||||
; CHECK-32-NEXT: stwux r5, r1, r6
|
||||
; CHECK-32-NEXT: cmpw r1, r4
|
||||
; CHECK-32-NEXT: beq cr0, .LBB1_2
|
||||
; CHECK-32-NEXT: .LBB1_1:
|
||||
; CHECK-32-NEXT: stwu r5, -4096(r1)
|
||||
; CHECK-32-NEXT: cmpw r1, r4
|
||||
; CHECK-32-NEXT: bne cr0, .LBB1_1
|
||||
; CHECK-32-NEXT: .LBB1_2:
|
||||
; CHECK-32-NEXT: addi r4, r1, 16
|
||||
; CHECK-32-NEXT: li r5, 1
|
||||
; CHECK-32-NEXT: add r3, r4, r3
|
||||
; CHECK-32-NEXT: stw r5, 4096(r3)
|
||||
; CHECK-32-NEXT: lwz r3, 0(r4)
|
||||
; CHECK-32-NEXT: lwz r31, 0(r1)
|
||||
; CHECK-32-NEXT: lwz r0, -4(r31)
|
||||
; CHECK-32-NEXT: mr r1, r31
|
||||
; CHECK-32-NEXT: mr r31, r0
|
||||
; CHECK-32-NEXT: blr
|
||||
%a = alloca i32, i32 %n, align 16
|
||||
%i = add i32 %n, 1024
|
||||
%b = getelementptr inbounds i32, i32* %a, i32 %i
|
||||
store volatile i32 1, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
define i32 @f(i32 %n) local_unnamed_addr #0 "stack-probe-size"="65536" nounwind {
|
||||
; CHECK-LE-LABEL: f:
|
||||
; CHECK-LE: # %bb.0:
|
||||
; CHECK-LE-NEXT: std r31, -8(r1)
|
||||
; CHECK-LE-NEXT: stdu r1, -48(r1)
|
||||
; CHECK-LE-NEXT: rldic r3, r3, 2, 30
|
||||
; CHECK-LE-NEXT: lis r5, -1
|
||||
; CHECK-LE-NEXT: mr r31, r1
|
||||
; CHECK-LE-NEXT: addi r3, r3, 15
|
||||
; CHECK-LE-NEXT: ori r5, r5, 0
|
||||
; CHECK-LE-NEXT: addi r4, r31, 48
|
||||
; CHECK-LE-NEXT: rldicl r3, r3, 60, 4
|
||||
; CHECK-LE-NEXT: rldicl r3, r3, 4, 29
|
||||
; CHECK-LE-NEXT: neg r6, r3
|
||||
; CHECK-LE-NEXT: divd r7, r6, r5
|
||||
; CHECK-LE-NEXT: add r3, r1, r6
|
||||
; CHECK-LE-NEXT: mulld r7, r7, r5
|
||||
; CHECK-LE-NEXT: sub r6, r6, r7
|
||||
; CHECK-LE-NEXT: stdux r4, r1, r6
|
||||
; CHECK-LE-NEXT: cmpd r1, r3
|
||||
; CHECK-LE-NEXT: beq cr0, .LBB2_2
|
||||
; CHECK-LE-NEXT: .LBB2_1:
|
||||
; CHECK-LE-NEXT: stdux r4, r1, r5
|
||||
; CHECK-LE-NEXT: cmpd r1, r3
|
||||
; CHECK-LE-NEXT: bne cr0, .LBB2_1
|
||||
; CHECK-LE-NEXT: .LBB2_2:
|
||||
; CHECK-LE-NEXT: li r4, 1
|
||||
; CHECK-LE-NEXT: addi r3, r1, 32
|
||||
; CHECK-LE-NEXT: stw r4, 4792(r3)
|
||||
; CHECK-LE-NEXT: lwz r3, 0(r3)
|
||||
; CHECK-LE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-LE-NEXT: ld r31, -8(r1)
|
||||
; CHECK-LE-NEXT: blr
|
||||
;
|
||||
; CHECK-P9-LE-LABEL: f:
|
||||
; CHECK-P9-LE: # %bb.0:
|
||||
; CHECK-P9-LE-NEXT: std r31, -8(r1)
|
||||
; CHECK-P9-LE-NEXT: stdu r1, -48(r1)
|
||||
; CHECK-P9-LE-NEXT: rldic r3, r3, 2, 30
|
||||
; CHECK-P9-LE-NEXT: addi r3, r3, 15
|
||||
; CHECK-P9-LE-NEXT: rldicl r3, r3, 60, 4
|
||||
; CHECK-P9-LE-NEXT: rldicl r3, r3, 4, 29
|
||||
; CHECK-P9-LE-NEXT: lis r5, -1
|
||||
; CHECK-P9-LE-NEXT: ori r5, r5, 0
|
||||
; CHECK-P9-LE-NEXT: neg r6, r3
|
||||
; CHECK-P9-LE-NEXT: divd r7, r6, r5
|
||||
; CHECK-P9-LE-NEXT: mulld r7, r7, r5
|
||||
; CHECK-P9-LE-NEXT: mr r31, r1
|
||||
; CHECK-P9-LE-NEXT: addi r4, r31, 48
|
||||
; CHECK-P9-LE-NEXT: add r3, r1, r6
|
||||
; CHECK-P9-LE-NEXT: sub r6, r6, r7
|
||||
; CHECK-P9-LE-NEXT: stdux r4, r1, r6
|
||||
; CHECK-P9-LE-NEXT: cmpd r1, r3
|
||||
; CHECK-P9-LE-NEXT: beq cr0, .LBB2_2
|
||||
; CHECK-P9-LE-NEXT: .LBB2_1:
|
||||
; CHECK-P9-LE-NEXT: stdux r4, r1, r5
|
||||
; CHECK-P9-LE-NEXT: cmpd r1, r3
|
||||
; CHECK-P9-LE-NEXT: bne cr0, .LBB2_1
|
||||
; CHECK-P9-LE-NEXT: .LBB2_2:
|
||||
; CHECK-P9-LE-NEXT: addi r3, r1, 32
|
||||
; CHECK-P9-LE-NEXT: li r4, 1
|
||||
; CHECK-P9-LE-NEXT: stw r4, 4792(r3)
|
||||
; CHECK-P9-LE-NEXT: lwz r3, 0(r3)
|
||||
; CHECK-P9-LE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-P9-LE-NEXT: ld r31, -8(r1)
|
||||
; CHECK-P9-LE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: f:
|
||||
; CHECK-BE: # %bb.0:
|
||||
; CHECK-BE-NEXT: std r31, -8(r1)
|
||||
; CHECK-BE-NEXT: stdu r1, -64(r1)
|
||||
; CHECK-BE-NEXT: rldic r3, r3, 2, 30
|
||||
; CHECK-BE-NEXT: lis r5, -1
|
||||
; CHECK-BE-NEXT: addi r3, r3, 15
|
||||
; CHECK-BE-NEXT: rldicl r3, r3, 60, 4
|
||||
; CHECK-BE-NEXT: ori r5, r5, 0
|
||||
; CHECK-BE-NEXT: rldicl r3, r3, 4, 29
|
||||
; CHECK-BE-NEXT: mr r31, r1
|
||||
; CHECK-BE-NEXT: neg r6, r3
|
||||
; CHECK-BE-NEXT: divd r7, r6, r5
|
||||
; CHECK-BE-NEXT: addi r4, r31, 64
|
||||
; CHECK-BE-NEXT: mulld r7, r7, r5
|
||||
; CHECK-BE-NEXT: add r3, r1, r6
|
||||
; CHECK-BE-NEXT: sub r6, r6, r7
|
||||
; CHECK-BE-NEXT: stdux r4, r1, r6
|
||||
; CHECK-BE-NEXT: cmpd r1, r3
|
||||
; CHECK-BE-NEXT: beq cr0, .LBB2_2
|
||||
; CHECK-BE-NEXT: .LBB2_1:
|
||||
; CHECK-BE-NEXT: stdux r4, r1, r5
|
||||
; CHECK-BE-NEXT: cmpd r1, r3
|
||||
; CHECK-BE-NEXT: bne cr0, .LBB2_1
|
||||
; CHECK-BE-NEXT: .LBB2_2:
|
||||
; CHECK-BE-NEXT: li r4, 1
|
||||
; CHECK-BE-NEXT: addi r3, r1, 48
|
||||
; CHECK-BE-NEXT: stw r4, 4792(r3)
|
||||
; CHECK-BE-NEXT: lwz r3, 0(r3)
|
||||
; CHECK-BE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-BE-NEXT: ld r31, -8(r1)
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: f:
|
||||
; CHECK-32: # %bb.0:
|
||||
; CHECK-32-NEXT: stwu r1, -32(r1)
|
||||
; CHECK-32-NEXT: slwi r3, r3, 2
|
||||
; CHECK-32-NEXT: addi r3, r3, 15
|
||||
; CHECK-32-NEXT: rlwinm r3, r3, 0, 0, 27
|
||||
; CHECK-32-NEXT: lis r5, -1
|
||||
; CHECK-32-NEXT: neg r6, r3
|
||||
; CHECK-32-NEXT: ori r5, r5, 0
|
||||
; CHECK-32-NEXT: divw r7, r6, r5
|
||||
; CHECK-32-NEXT: stw r31, 28(r1)
|
||||
; CHECK-32-NEXT: mr r31, r1
|
||||
; CHECK-32-NEXT: addi r4, r31, 32
|
||||
; CHECK-32-NEXT: add r3, r1, r6
|
||||
; CHECK-32-NEXT: mullw r7, r7, r5
|
||||
; CHECK-32-NEXT: sub r6, r6, r7
|
||||
; CHECK-32-NEXT: stwux r4, r1, r6
|
||||
; CHECK-32-NEXT: cmpw r1, r3
|
||||
; CHECK-32-NEXT: beq cr0, .LBB2_2
|
||||
; CHECK-32-NEXT: .LBB2_1:
|
||||
; CHECK-32-NEXT: stwux r4, r1, r5
|
||||
; CHECK-32-NEXT: cmpw r1, r3
|
||||
; CHECK-32-NEXT: bne cr0, .LBB2_1
|
||||
; CHECK-32-NEXT: .LBB2_2:
|
||||
; CHECK-32-NEXT: li r4, 1
|
||||
; CHECK-32-NEXT: addi r3, r1, 16
|
||||
; CHECK-32-NEXT: stw r4, 4792(r3)
|
||||
; CHECK-32-NEXT: lwz r3, 0(r3)
|
||||
; CHECK-32-NEXT: lwz r31, 0(r1)
|
||||
; CHECK-32-NEXT: lwz r0, -4(r31)
|
||||
; CHECK-32-NEXT: mr r1, r31
|
||||
; CHECK-32-NEXT: mr r31, r0
|
||||
; CHECK-32-NEXT: blr
|
||||
%a = alloca i32, i32 %n, align 16
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 1198
|
||||
store volatile i32 1, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
attributes #0 = {"probe-stack"="inline-asm"}
|
Loading…
Reference in New Issue