forked from OSchip/llvm-project
[PowerPC] Probe the gap between stackptr and realigned stackptr
During reviewing https://reviews.llvm.org/D84419, @efriedma mentioned the gap between realigned stack pointer and origin stack pointer should be probed too whatever the alignment is. This patch fixes the issue for PPC64. Reviewed By: jsji Differential Revision: https://reviews.llvm.org/D88078
This commit is contained in:
parent
1c82d32089
commit
97e7ce3b15
|
@ -525,6 +525,8 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
|
|||
// register is available, we can adjust for that by not overlapping the spill
|
||||
// code. However, if we need to realign the stack (i.e. have a base pointer)
|
||||
// and the stack frame is large, we need two scratch registers.
|
||||
// Also, stack probe requires two scratch registers, one for old sp, one for
|
||||
// large frame and large probe size.
|
||||
bool
|
||||
PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
|
||||
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
|
||||
|
@ -536,8 +538,10 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
|
|||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
Align MaxAlign = MFI.getMaxAlign();
|
||||
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
|
||||
const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
|
||||
|
||||
return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
|
||||
return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
|
||||
TLI.hasInlineStackProbe(MF);
|
||||
}
|
||||
|
||||
bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
|
||||
|
@ -676,12 +680,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
"FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
|
||||
|
||||
// Using the same bool variable as below to suppress compiler warnings.
|
||||
// Stack probe requires two scratch registers, one for old sp, one for large
|
||||
// frame and large probe size.
|
||||
bool SingleScratchReg = findScratchRegister(
|
||||
&MBB, false,
|
||||
twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF),
|
||||
&ScratchReg, &TempReg);
|
||||
&MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
|
||||
assert(SingleScratchReg &&
|
||||
"Required number of registers not available in this block");
|
||||
|
||||
|
@ -1202,10 +1202,12 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
|
|||
if (StackAllocMIPos == PrologMBB.end())
|
||||
return;
|
||||
const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
|
||||
MachineBasicBlock *CurrentMBB = &PrologMBB;
|
||||
DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
|
||||
MachineInstr &MI = *StackAllocMIPos;
|
||||
int64_t NegFrameSize = MI.getOperand(2).getImm();
|
||||
int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF);
|
||||
unsigned ProbeSize = TLI.getStackProbeSize(MF);
|
||||
int64_t NegProbeSize = -(int64_t)ProbeSize;
|
||||
assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
|
||||
int64_t NumBlocks = NegFrameSize / NegProbeSize;
|
||||
int64_t NegResidualSize = NegFrameSize % NegProbeSize;
|
||||
|
@ -1214,10 +1216,9 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
|
|||
Register FPReg = MI.getOperand(1).getReg();
|
||||
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
|
||||
bool HasBP = RegInfo->hasBasePointer(MF);
|
||||
Register BPReg = RegInfo->getBaseRegister(MF);
|
||||
Align MaxAlign = MFI.getMaxAlign();
|
||||
// Initialize current frame pointer.
|
||||
const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
|
||||
BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
|
||||
// Subroutines to generate .cfi_* directives.
|
||||
auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, Register Reg) {
|
||||
|
@ -1257,89 +1258,218 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
|
|||
// Subroutine to store frame pointer and decrease stack pointer by probe size.
|
||||
auto allocateAndProbe = [&](MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, int64_t NegSize,
|
||||
Register NegSizeReg, bool UseDForm) {
|
||||
Register NegSizeReg, bool UseDForm,
|
||||
Register StoreReg) {
|
||||
if (UseDForm)
|
||||
BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
|
||||
.addReg(FPReg)
|
||||
.addReg(StoreReg)
|
||||
.addImm(NegSize)
|
||||
.addReg(SPReg);
|
||||
else
|
||||
BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
|
||||
.addReg(FPReg)
|
||||
.addReg(StoreReg)
|
||||
.addReg(SPReg)
|
||||
.addReg(NegSizeReg);
|
||||
};
|
||||
// Use FPReg to calculate CFA.
|
||||
if (needsCFI)
|
||||
buildDefCFA(PrologMBB, {MI}, FPReg, 0);
|
||||
// For case HasBP && MaxAlign > 1, we have to align the SP by performing
|
||||
// SP = SP - SP % MaxAlign.
|
||||
if (HasBP && MaxAlign > 1) {
|
||||
if (isPPC64)
|
||||
BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
|
||||
.addReg(FPReg)
|
||||
// Used to probe realignment gap [stackptr - (stackptr % align), stackptr)
|
||||
// when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30
|
||||
// available and r1 is already copied to r30 which is BPReg. So BPReg stores
|
||||
// the value of stackptr.
|
||||
// First we have to probe tail interval whose size is less than probesize,
|
||||
// i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage,
|
||||
// ScratchReg stores the value of ((stackptr % align) % probesize). Then we
|
||||
// probe each block sized probesize until stackptr meets
|
||||
// (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized
|
||||
// as negprobesize. At both stages, TempReg stores the value of
|
||||
// (stackptr - (stackptr % align)).
|
||||
auto dynamicProbe = [&](MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, Register ScratchReg,
|
||||
Register TempReg) {
|
||||
assert(HasBP && isPPC64 && "Probe alignment part not available");
|
||||
assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
|
||||
// ScratchReg = stackptr % align
|
||||
BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
|
||||
.addReg(BPReg)
|
||||
.addImm(0)
|
||||
.addImm(64 - Log2(MaxAlign));
|
||||
else
|
||||
BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
|
||||
// TempReg = stackptr - (stackptr % align)
|
||||
BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(BPReg);
|
||||
// ScratchReg = (stackptr % align) % probesize
|
||||
BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(0)
|
||||
.addImm(64 - Log2(ProbeSize));
|
||||
Register CRReg = PPC::CR0;
|
||||
// If (stackptr % align) % probesize == 0, we should not generate probe
|
||||
// code. Layout of output assembly kinda like:
|
||||
// bb.0:
|
||||
// ...
|
||||
// cmpldi $scratchreg, 0
|
||||
// beq bb.2
|
||||
// bb.1: # Probe tail interval
|
||||
// neg $scratchreg, $scratchreg
|
||||
// stdux $bpreg, r1, $scratchreg
|
||||
// bb.2:
|
||||
// <materialize negprobesize into $scratchreg>
|
||||
// cmpd r1, $tempreg
|
||||
// beq bb.4
|
||||
// bb.3: # Loop to probe each block
|
||||
// stdux $bpreg, r1, $scratchreg
|
||||
// cmpd r1, $tempreg
|
||||
// bne bb.3
|
||||
// bb.4:
|
||||
// ...
|
||||
MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
|
||||
MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB);
|
||||
MF.insert(MBBInsertPoint, ProbeResidualMBB);
|
||||
MachineBasicBlock *ProbeLoopPreHeaderMBB =
|
||||
MF.CreateMachineBasicBlock(ProbedBB);
|
||||
MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB);
|
||||
MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
|
||||
MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
|
||||
MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
|
||||
MF.insert(MBBInsertPoint, ProbeExitMBB);
|
||||
// bb.4
|
||||
ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
|
||||
ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
|
||||
// bb.0
|
||||
BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0);
|
||||
BuildMI(&MBB, DL, TII.get(PPC::BCC))
|
||||
.addImm(PPC::PRED_EQ)
|
||||
.addReg(CRReg)
|
||||
.addMBB(ProbeLoopPreHeaderMBB);
|
||||
MBB.addSuccessor(ProbeResidualMBB);
|
||||
MBB.addSuccessor(ProbeLoopPreHeaderMBB);
|
||||
// bb.1
|
||||
BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg)
|
||||
.addReg(ScratchReg);
|
||||
allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg,
|
||||
false, BPReg);
|
||||
ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB);
|
||||
// bb.2
|
||||
MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(),
|
||||
NegProbeSize, ScratchReg);
|
||||
BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg)
|
||||
.addReg(SPReg)
|
||||
.addReg(TempReg);
|
||||
BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC))
|
||||
.addImm(PPC::PRED_EQ)
|
||||
.addReg(CRReg)
|
||||
.addMBB(ProbeExitMBB);
|
||||
ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB);
|
||||
ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB);
|
||||
// bb.3
|
||||
allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg,
|
||||
false, BPReg);
|
||||
BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg)
|
||||
.addReg(SPReg)
|
||||
.addReg(TempReg);
|
||||
BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
|
||||
.addImm(PPC::PRED_NE)
|
||||
.addReg(CRReg)
|
||||
.addMBB(ProbeLoopBodyMBB);
|
||||
ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
|
||||
ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
|
||||
// Update liveins.
|
||||
recomputeLiveIns(*ProbeResidualMBB);
|
||||
recomputeLiveIns(*ProbeLoopPreHeaderMBB);
|
||||
recomputeLiveIns(*ProbeLoopBodyMBB);
|
||||
recomputeLiveIns(*ProbeExitMBB);
|
||||
return ProbeExitMBB;
|
||||
};
|
||||
// For case HasBP && MaxAlign > 1, we have to realign the SP by performing
|
||||
// SP = SP - SP % MaxAlign.
|
||||
if (HasBP && MaxAlign > 1) {
|
||||
// FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in
|
||||
// 64-bit mode.
|
||||
if (isPPC64) {
|
||||
// Use BPReg to calculate CFA.
|
||||
if (needsCFI)
|
||||
buildDefCFA(*CurrentMBB, {MI}, BPReg, 0);
|
||||
// Since we have SPReg copied to BPReg at the moment, FPReg can be used as
|
||||
// TempReg.
|
||||
Register TempReg = FPReg;
|
||||
CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg);
|
||||
// Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64.
|
||||
BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
|
||||
.addReg(BPReg)
|
||||
.addReg(BPReg);
|
||||
} else {
|
||||
// Initialize current frame pointer.
|
||||
BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
|
||||
.addReg(SPReg)
|
||||
.addReg(SPReg);
|
||||
// Use FPReg to calculate CFA.
|
||||
if (needsCFI)
|
||||
buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
|
||||
BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
|
||||
.addReg(FPReg)
|
||||
.addImm(0)
|
||||
.addImm(32 - Log2(MaxAlign))
|
||||
.addImm(31);
|
||||
BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC),
|
||||
SPReg)
|
||||
BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(SPReg);
|
||||
}
|
||||
} else {
|
||||
// Initialize current frame pointer.
|
||||
BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
|
||||
// Use FPReg to calculate CFA.
|
||||
if (needsCFI)
|
||||
buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
|
||||
}
|
||||
// Probe residual part.
|
||||
if (NegResidualSize) {
|
||||
bool ResidualUseDForm = CanUseDForm(NegResidualSize);
|
||||
if (!ResidualUseDForm)
|
||||
MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg);
|
||||
allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg,
|
||||
ResidualUseDForm);
|
||||
MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
|
||||
allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
|
||||
ResidualUseDForm, FPReg);
|
||||
}
|
||||
bool UseDForm = CanUseDForm(NegProbeSize);
|
||||
// If number of blocks is small, just probe them directly.
|
||||
if (NumBlocks < 3) {
|
||||
if (!UseDForm)
|
||||
MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
|
||||
MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
|
||||
for (int i = 0; i < NumBlocks; ++i)
|
||||
allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm);
|
||||
allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
|
||||
FPReg);
|
||||
if (needsCFI) {
|
||||
// Restore using SPReg to calculate CFA.
|
||||
buildDefCFAReg(PrologMBB, {MI}, SPReg);
|
||||
buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
|
||||
}
|
||||
} else {
|
||||
// Since CTR is a volatile register and current shrinkwrap implementation
|
||||
// won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
|
||||
// CTR loop to probe.
|
||||
// Calculate trip count and stores it in CTRReg.
|
||||
MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg);
|
||||
BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
|
||||
MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
|
||||
BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
|
||||
.addReg(ScratchReg, RegState::Kill);
|
||||
if (!UseDForm)
|
||||
MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
|
||||
MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
|
||||
// Create MBBs of the loop.
|
||||
MachineFunction::iterator MBBInsertPoint =
|
||||
std::next(PrologMBB.getIterator());
|
||||
std::next(CurrentMBB->getIterator());
|
||||
MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
|
||||
MF.insert(MBBInsertPoint, LoopMBB);
|
||||
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
|
||||
MF.insert(MBBInsertPoint, ExitMBB);
|
||||
// Synthesize the loop body.
|
||||
allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
|
||||
UseDForm);
|
||||
UseDForm, FPReg);
|
||||
BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
|
||||
.addMBB(LoopMBB);
|
||||
LoopMBB->addSuccessor(ExitMBB);
|
||||
LoopMBB->addSuccessor(LoopMBB);
|
||||
// Synthesize the exit MBB.
|
||||
ExitMBB->splice(ExitMBB->end(), &PrologMBB,
|
||||
ExitMBB->splice(ExitMBB->end(), CurrentMBB,
|
||||
std::next(MachineBasicBlock::iterator(MI)),
|
||||
PrologMBB.end());
|
||||
ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB);
|
||||
PrologMBB.addSuccessor(LoopMBB);
|
||||
CurrentMBB->end());
|
||||
ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
|
||||
CurrentMBB->addSuccessor(LoopMBB);
|
||||
if (needsCFI) {
|
||||
// Restore using SPReg to calculate CFA.
|
||||
buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
|
||||
|
|
|
@ -9,10 +9,26 @@ define void @foo(i32 %vla_size) #0 {
|
|||
; CHECK-LE-NEXT: std r31, -8(r1)
|
||||
; CHECK-LE-NEXT: std r30, -16(r1)
|
||||
; CHECK-LE-NEXT: mr r30, r1
|
||||
; CHECK-LE-NEXT: mr r12, r1
|
||||
; CHECK-LE-NEXT: .cfi_def_cfa r12, 0
|
||||
; CHECK-LE-NEXT: clrldi r0, r12, 53
|
||||
; CHECK-LE-NEXT: subc r1, r1, r0
|
||||
; CHECK-LE-NEXT: .cfi_def_cfa r30, 0
|
||||
; CHECK-LE-NEXT: clrldi r0, r30, 53
|
||||
; CHECK-LE-NEXT: subc r12, r30, r0
|
||||
; CHECK-LE-NEXT: clrldi r0, r0, 52
|
||||
; CHECK-LE-NEXT: cmpdi r0, 0
|
||||
; CHECK-LE-NEXT: beq cr0, .LBB0_2
|
||||
; CHECK-LE-NEXT: # %bb.1: # %entry
|
||||
; CHECK-LE-NEXT: neg r0, r0
|
||||
; CHECK-LE-NEXT: stdux r30, r1, r0
|
||||
; CHECK-LE-NEXT: .LBB0_2: # %entry
|
||||
; CHECK-LE-NEXT: li r0, -4096
|
||||
; CHECK-LE-NEXT: cmpd r1, r12
|
||||
; CHECK-LE-NEXT: beq cr0, .LBB0_4
|
||||
; CHECK-LE-NEXT: .LBB0_3: # %entry
|
||||
; CHECK-LE-NEXT: #
|
||||
; CHECK-LE-NEXT: stdux r30, r1, r0
|
||||
; CHECK-LE-NEXT: cmpd r1, r12
|
||||
; CHECK-LE-NEXT: bne cr0, .LBB0_3
|
||||
; CHECK-LE-NEXT: .LBB0_4: # %entry
|
||||
; CHECK-LE-NEXT: mr r12, r30
|
||||
; CHECK-LE-NEXT: stdu r12, -2048(r1)
|
||||
; CHECK-LE-NEXT: stdu r12, -4096(r1)
|
||||
; CHECK-LE-NEXT: .cfi_def_cfa_register r1
|
||||
|
@ -36,13 +52,13 @@ define void @foo(i32 %vla_size) #0 {
|
|||
; CHECK-LE-NEXT: add r4, r1, r4
|
||||
; CHECK-LE-NEXT: stdux r3, r1, r5
|
||||
; CHECK-LE-NEXT: cmpd r1, r4
|
||||
; CHECK-LE-NEXT: beq cr0, .LBB0_2
|
||||
; CHECK-LE-NEXT: .LBB0_1: # %entry
|
||||
; CHECK-LE-NEXT: beq cr0, .LBB0_6
|
||||
; CHECK-LE-NEXT: .LBB0_5: # %entry
|
||||
; CHECK-LE-NEXT: #
|
||||
; CHECK-LE-NEXT: stdu r3, -4096(r1)
|
||||
; CHECK-LE-NEXT: cmpd r1, r4
|
||||
; CHECK-LE-NEXT: bne cr0, .LBB0_1
|
||||
; CHECK-LE-NEXT: .LBB0_2: # %entry
|
||||
; CHECK-LE-NEXT: bne cr0, .LBB0_5
|
||||
; CHECK-LE-NEXT: .LBB0_6: # %entry
|
||||
; CHECK-LE-NEXT: addi r3, r1, 2048
|
||||
; CHECK-LE-NEXT: lbz r3, 0(r3)
|
||||
; CHECK-LE-NEXT: ld r1, 0(r1)
|
||||
|
|
|
@ -528,4 +528,502 @@ entry:
|
|||
ret i8 %c
|
||||
}
|
||||
|
||||
; alloca + align < probe_size
|
||||
define i32 @f8(i64 %i) local_unnamed_addr #0 {
|
||||
; CHECK-LE-LABEL: f8:
|
||||
; CHECK-LE: # %bb.0:
|
||||
; CHECK-LE-NEXT: clrldi r0, r1, 58
|
||||
; CHECK-LE-NEXT: std r30, -16(r1)
|
||||
; CHECK-LE-NEXT: mr r30, r1
|
||||
; CHECK-LE-NEXT: subfic r0, r0, -896
|
||||
; CHECK-LE-NEXT: stdux r1, r1, r0
|
||||
; CHECK-LE-NEXT: .cfi_def_cfa_register r30
|
||||
; CHECK-LE-NEXT: .cfi_offset r30, -16
|
||||
; CHECK-LE-NEXT: addi r4, r1, 64
|
||||
; CHECK-LE-NEXT: sldi r3, r3, 2
|
||||
; CHECK-LE-NEXT: li r5, 1
|
||||
; CHECK-LE-NEXT: stwx r5, r4, r3
|
||||
; CHECK-LE-NEXT: lwz r3, 64(r1)
|
||||
; CHECK-LE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-LE-NEXT: ld r30, -16(r1)
|
||||
; CHECK-LE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: f8:
|
||||
; CHECK-BE: # %bb.0:
|
||||
; CHECK-BE-NEXT: clrldi r0, r1, 58
|
||||
; CHECK-BE-NEXT: std r30, -16(r1)
|
||||
; CHECK-BE-NEXT: mr r30, r1
|
||||
; CHECK-BE-NEXT: subfic r0, r0, -896
|
||||
; CHECK-BE-NEXT: stdux r1, r1, r0
|
||||
; CHECK-BE-NEXT: .cfi_def_cfa_register r30
|
||||
; CHECK-BE-NEXT: .cfi_offset r30, -16
|
||||
; CHECK-BE-NEXT: addi r4, r1, 64
|
||||
; CHECK-BE-NEXT: li r5, 1
|
||||
; CHECK-BE-NEXT: sldi r3, r3, 2
|
||||
; CHECK-BE-NEXT: stwx r5, r4, r3
|
||||
; CHECK-BE-NEXT: lwz r3, 64(r1)
|
||||
; CHECK-BE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-BE-NEXT: ld r30, -16(r1)
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: f8:
|
||||
; CHECK-32: # %bb.0:
|
||||
; CHECK-32-NEXT: clrlwi r0, r1, 26
|
||||
; CHECK-32-NEXT: subfic r0, r0, -896
|
||||
; CHECK-32-NEXT: stwux r1, r1, r0
|
||||
; CHECK-32-NEXT: sub r0, r1, r0
|
||||
; CHECK-32-NEXT: addic r0, r0, -8
|
||||
; CHECK-32-NEXT: stwx r30, 0, r0
|
||||
; CHECK-32-NEXT: addic r30, r0, 8
|
||||
; CHECK-32-NEXT: .cfi_def_cfa_register r30
|
||||
; CHECK-32-NEXT: .cfi_offset r30, -8
|
||||
; CHECK-32-NEXT: addi r3, r1, 64
|
||||
; CHECK-32-NEXT: li r5, 1
|
||||
; CHECK-32-NEXT: slwi r4, r4, 2
|
||||
; CHECK-32-NEXT: stwx r5, r3, r4
|
||||
; CHECK-32-NEXT: mr r0, r31
|
||||
; CHECK-32-NEXT: lwz r3, 64(r1)
|
||||
; CHECK-32-NEXT: lwz r31, 0(r1)
|
||||
; CHECK-32-NEXT: lwz r30, -8(r31)
|
||||
; CHECK-32-NEXT: mr r1, r31
|
||||
; CHECK-32-NEXT: mr r31, r0
|
||||
; CHECK-32-NEXT: blr
|
||||
%a = alloca i32, i32 200, align 64
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 %i
|
||||
store volatile i32 1, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; alloca > probe_size, align > probe_size
|
||||
define i32 @f9(i64 %i) local_unnamed_addr #0 {
|
||||
; CHECK-LE-LABEL: f9:
|
||||
; CHECK-LE: # %bb.0:
|
||||
; CHECK-LE-NEXT: std r30, -16(r1)
|
||||
; CHECK-LE-NEXT: mr r30, r1
|
||||
; CHECK-LE-NEXT: .cfi_def_cfa r30, 0
|
||||
; CHECK-LE-NEXT: clrldi r0, r30, 53
|
||||
; CHECK-LE-NEXT: subc r12, r30, r0
|
||||
; CHECK-LE-NEXT: clrldi r0, r0, 52
|
||||
; CHECK-LE-NEXT: cmpdi r0, 0
|
||||
; CHECK-LE-NEXT: beq cr0, .LBB9_2
|
||||
; CHECK-LE-NEXT: # %bb.1:
|
||||
; CHECK-LE-NEXT: neg r0, r0
|
||||
; CHECK-LE-NEXT: stdux r30, r1, r0
|
||||
; CHECK-LE-NEXT: .LBB9_2:
|
||||
; CHECK-LE-NEXT: li r0, -4096
|
||||
; CHECK-LE-NEXT: cmpd r1, r12
|
||||
; CHECK-LE-NEXT: beq cr0, .LBB9_4
|
||||
; CHECK-LE-NEXT: .LBB9_3:
|
||||
; CHECK-LE-NEXT: stdux r30, r1, r0
|
||||
; CHECK-LE-NEXT: cmpd r1, r12
|
||||
; CHECK-LE-NEXT: bne cr0, .LBB9_3
|
||||
; CHECK-LE-NEXT: .LBB9_4:
|
||||
; CHECK-LE-NEXT: mr r12, r30
|
||||
; CHECK-LE-NEXT: stdu r12, -2048(r1)
|
||||
; CHECK-LE-NEXT: stdu r12, -4096(r1)
|
||||
; CHECK-LE-NEXT: stdu r12, -4096(r1)
|
||||
; CHECK-LE-NEXT: .cfi_def_cfa_register r1
|
||||
; CHECK-LE-NEXT: .cfi_def_cfa_register r30
|
||||
; CHECK-LE-NEXT: .cfi_offset r30, -16
|
||||
; CHECK-LE-NEXT: addi r4, r1, 2048
|
||||
; CHECK-LE-NEXT: sldi r3, r3, 2
|
||||
; CHECK-LE-NEXT: li r5, 1
|
||||
; CHECK-LE-NEXT: stwx r5, r4, r3
|
||||
; CHECK-LE-NEXT: lwz r3, 2048(r1)
|
||||
; CHECK-LE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-LE-NEXT: ld r30, -16(r1)
|
||||
; CHECK-LE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: f9:
|
||||
; CHECK-BE: # %bb.0:
|
||||
; CHECK-BE-NEXT: std r30, -16(r1)
|
||||
; CHECK-BE-NEXT: mr r30, r1
|
||||
; CHECK-BE-NEXT: .cfi_def_cfa r30, 0
|
||||
; CHECK-BE-NEXT: clrldi r0, r30, 53
|
||||
; CHECK-BE-NEXT: subc r12, r30, r0
|
||||
; CHECK-BE-NEXT: clrldi r0, r0, 52
|
||||
; CHECK-BE-NEXT: cmpdi r0, 0
|
||||
; CHECK-BE-NEXT: beq cr0, .LBB9_2
|
||||
; CHECK-BE-NEXT: # %bb.1:
|
||||
; CHECK-BE-NEXT: neg r0, r0
|
||||
; CHECK-BE-NEXT: stdux r30, r1, r0
|
||||
; CHECK-BE-NEXT: .LBB9_2:
|
||||
; CHECK-BE-NEXT: li r0, -4096
|
||||
; CHECK-BE-NEXT: cmpd r1, r12
|
||||
; CHECK-BE-NEXT: beq cr0, .LBB9_4
|
||||
; CHECK-BE-NEXT: .LBB9_3:
|
||||
; CHECK-BE-NEXT: stdux r30, r1, r0
|
||||
; CHECK-BE-NEXT: cmpd r1, r12
|
||||
; CHECK-BE-NEXT: bne cr0, .LBB9_3
|
||||
; CHECK-BE-NEXT: .LBB9_4:
|
||||
; CHECK-BE-NEXT: mr r12, r30
|
||||
; CHECK-BE-NEXT: stdu r12, -2048(r1)
|
||||
; CHECK-BE-NEXT: stdu r12, -4096(r1)
|
||||
; CHECK-BE-NEXT: stdu r12, -4096(r1)
|
||||
; CHECK-BE-NEXT: .cfi_def_cfa_register r1
|
||||
; CHECK-BE-NEXT: .cfi_def_cfa_register r30
|
||||
; CHECK-BE-NEXT: .cfi_offset r30, -16
|
||||
; CHECK-BE-NEXT: addi r4, r1, 2048
|
||||
; CHECK-BE-NEXT: li r5, 1
|
||||
; CHECK-BE-NEXT: sldi r3, r3, 2
|
||||
; CHECK-BE-NEXT: stwx r5, r4, r3
|
||||
; CHECK-BE-NEXT: lwz r3, 2048(r1)
|
||||
; CHECK-BE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-BE-NEXT: ld r30, -16(r1)
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: f9:
|
||||
; CHECK-32: # %bb.0:
|
||||
; CHECK-32-NEXT: mr r12, r1
|
||||
; CHECK-32-NEXT: .cfi_def_cfa r12, 0
|
||||
; CHECK-32-NEXT: clrlwi r0, r12, 21
|
||||
; CHECK-32-NEXT: subc r1, r1, r0
|
||||
; CHECK-32-NEXT: stwu r12, -2048(r1)
|
||||
; CHECK-32-NEXT: stwu r12, -4096(r1)
|
||||
; CHECK-32-NEXT: stwu r12, -4096(r1)
|
||||
; CHECK-32-NEXT: .cfi_def_cfa_register r1
|
||||
; CHECK-32-NEXT: sub r0, r1, r12
|
||||
; CHECK-32-NEXT: sub r0, r1, r0
|
||||
; CHECK-32-NEXT: addic r0, r0, -8
|
||||
; CHECK-32-NEXT: stwx r30, 0, r0
|
||||
; CHECK-32-NEXT: addic r30, r0, 8
|
||||
; CHECK-32-NEXT: .cfi_def_cfa_register r30
|
||||
; CHECK-32-NEXT: .cfi_offset r30, -8
|
||||
; CHECK-32-NEXT: addi r3, r1, 2048
|
||||
; CHECK-32-NEXT: li r5, 1
|
||||
; CHECK-32-NEXT: slwi r4, r4, 2
|
||||
; CHECK-32-NEXT: stwx r5, r3, r4
|
||||
; CHECK-32-NEXT: mr r0, r31
|
||||
; CHECK-32-NEXT: lwz r3, 2048(r1)
|
||||
; CHECK-32-NEXT: lwz r31, 0(r1)
|
||||
; CHECK-32-NEXT: lwz r30, -8(r31)
|
||||
; CHECK-32-NEXT: mr r1, r31
|
||||
; CHECK-32-NEXT: mr r31, r0
|
||||
; CHECK-32-NEXT: blr
|
||||
%a = alloca i32, i32 2000, align 2048
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 %i
|
||||
store volatile i32 1, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; alloca < probe_size, align < probe_size, alloca + align > probe_size
|
||||
define i32 @f10(i64 %i) local_unnamed_addr #0 {
|
||||
; CHECK-LE-LABEL: f10:
|
||||
; CHECK-LE: # %bb.0:
|
||||
; CHECK-LE-NEXT: std r30, -16(r1)
|
||||
; CHECK-LE-NEXT: mr r30, r1
|
||||
; CHECK-LE-NEXT: .cfi_def_cfa r30, 0
|
||||
; CHECK-LE-NEXT: clrldi r0, r30, 54
|
||||
; CHECK-LE-NEXT: subc r12, r30, r0
|
||||
; CHECK-LE-NEXT: clrldi r0, r0, 52
|
||||
; CHECK-LE-NEXT: cmpdi r0, 0
|
||||
; CHECK-LE-NEXT: beq cr0, .LBB10_2
|
||||
; CHECK-LE-NEXT: # %bb.1:
|
||||
; CHECK-LE-NEXT: neg r0, r0
|
||||
; CHECK-LE-NEXT: stdux r30, r1, r0
|
||||
; CHECK-LE-NEXT: .LBB10_2:
|
||||
; CHECK-LE-NEXT: li r0, -4096
|
||||
; CHECK-LE-NEXT: cmpd r1, r12
|
||||
; CHECK-LE-NEXT: beq cr0, .LBB10_4
|
||||
; CHECK-LE-NEXT: .LBB10_3:
|
||||
; CHECK-LE-NEXT: stdux r30, r1, r0
|
||||
; CHECK-LE-NEXT: cmpd r1, r12
|
||||
; CHECK-LE-NEXT: bne cr0, .LBB10_3
|
||||
; CHECK-LE-NEXT: .LBB10_4:
|
||||
; CHECK-LE-NEXT: mr r12, r30
|
||||
; CHECK-LE-NEXT: stdu r12, -1024(r1)
|
||||
; CHECK-LE-NEXT: stdu r12, -4096(r1)
|
||||
; CHECK-LE-NEXT: .cfi_def_cfa_register r1
|
||||
; CHECK-LE-NEXT: .cfi_def_cfa_register r30
|
||||
; CHECK-LE-NEXT: .cfi_offset r30, -16
|
||||
; CHECK-LE-NEXT: addi r4, r1, 1024
|
||||
; CHECK-LE-NEXT: sldi r3, r3, 2
|
||||
; CHECK-LE-NEXT: li r5, 1
|
||||
; CHECK-LE-NEXT: stwx r5, r4, r3
|
||||
; CHECK-LE-NEXT: lwz r3, 1024(r1)
|
||||
; CHECK-LE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-LE-NEXT: ld r30, -16(r1)
|
||||
; CHECK-LE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: f10:
|
||||
; CHECK-BE: # %bb.0:
|
||||
; CHECK-BE-NEXT: std r30, -16(r1)
|
||||
; CHECK-BE-NEXT: mr r30, r1
|
||||
; CHECK-BE-NEXT: .cfi_def_cfa r30, 0
|
||||
; CHECK-BE-NEXT: clrldi r0, r30, 54
|
||||
; CHECK-BE-NEXT: subc r12, r30, r0
|
||||
; CHECK-BE-NEXT: clrldi r0, r0, 52
|
||||
; CHECK-BE-NEXT: cmpdi r0, 0
|
||||
; CHECK-BE-NEXT: beq cr0, .LBB10_2
|
||||
; CHECK-BE-NEXT: # %bb.1:
|
||||
; CHECK-BE-NEXT: neg r0, r0
|
||||
; CHECK-BE-NEXT: stdux r30, r1, r0
|
||||
; CHECK-BE-NEXT: .LBB10_2:
|
||||
; CHECK-BE-NEXT: li r0, -4096
|
||||
; CHECK-BE-NEXT: cmpd r1, r12
|
||||
; CHECK-BE-NEXT: beq cr0, .LBB10_4
|
||||
; CHECK-BE-NEXT: .LBB10_3:
|
||||
; CHECK-BE-NEXT: stdux r30, r1, r0
|
||||
; CHECK-BE-NEXT: cmpd r1, r12
|
||||
; CHECK-BE-NEXT: bne cr0, .LBB10_3
|
||||
; CHECK-BE-NEXT: .LBB10_4:
|
||||
; CHECK-BE-NEXT: mr r12, r30
|
||||
; CHECK-BE-NEXT: stdu r12, -1024(r1)
|
||||
; CHECK-BE-NEXT: stdu r12, -4096(r1)
|
||||
; CHECK-BE-NEXT: .cfi_def_cfa_register r1
|
||||
; CHECK-BE-NEXT: .cfi_def_cfa_register r30
|
||||
; CHECK-BE-NEXT: .cfi_offset r30, -16
|
||||
; CHECK-BE-NEXT: addi r4, r1, 1024
|
||||
; CHECK-BE-NEXT: li r5, 1
|
||||
; CHECK-BE-NEXT: sldi r3, r3, 2
|
||||
; CHECK-BE-NEXT: stwx r5, r4, r3
|
||||
; CHECK-BE-NEXT: lwz r3, 1024(r1)
|
||||
; CHECK-BE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-BE-NEXT: ld r30, -16(r1)
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: f10:
|
||||
; CHECK-32: # %bb.0:
|
||||
; CHECK-32-NEXT: mr r12, r1
|
||||
; CHECK-32-NEXT: .cfi_def_cfa r12, 0
|
||||
; CHECK-32-NEXT: clrlwi r0, r12, 22
|
||||
; CHECK-32-NEXT: subc r1, r1, r0
|
||||
; CHECK-32-NEXT: stwu r12, -1024(r1)
|
||||
; CHECK-32-NEXT: stwu r12, -4096(r1)
|
||||
; CHECK-32-NEXT: .cfi_def_cfa_register r1
|
||||
; CHECK-32-NEXT: sub r0, r1, r12
|
||||
; CHECK-32-NEXT: sub r0, r1, r0
|
||||
; CHECK-32-NEXT: addic r0, r0, -8
|
||||
; CHECK-32-NEXT: stwx r30, 0, r0
|
||||
; CHECK-32-NEXT: addic r30, r0, 8
|
||||
; CHECK-32-NEXT: .cfi_def_cfa_register r30
|
||||
; CHECK-32-NEXT: .cfi_offset r30, -8
|
||||
; CHECK-32-NEXT: addi r3, r1, 1024
|
||||
; CHECK-32-NEXT: li r5, 1
|
||||
; CHECK-32-NEXT: slwi r4, r4, 2
|
||||
; CHECK-32-NEXT: stwx r5, r3, r4
|
||||
; CHECK-32-NEXT: mr r0, r31
|
||||
; CHECK-32-NEXT: lwz r3, 1024(r1)
|
||||
; CHECK-32-NEXT: lwz r31, 0(r1)
|
||||
; CHECK-32-NEXT: lwz r30, -8(r31)
|
||||
; CHECK-32-NEXT: mr r1, r31
|
||||
; CHECK-32-NEXT: mr r31, r0
|
||||
; CHECK-32-NEXT: blr
|
||||
%a = alloca i32, i32 1000, align 1024
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 %i
|
||||
store volatile i32 1, i32* %b
|
||||
%c = load volatile i32, i32* %a
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
define void @f11(i32 %vla_size, i64 %i) #0 {
|
||||
; CHECK-LE-LABEL: f11:
|
||||
; CHECK-LE: # %bb.0:
|
||||
; CHECK-LE-NEXT: std r31, -8(r1)
|
||||
; CHECK-LE-NEXT: std r30, -16(r1)
|
||||
; CHECK-LE-NEXT: mr r30, r1
|
||||
; CHECK-LE-NEXT: .cfi_def_cfa r30, 0
|
||||
; CHECK-LE-NEXT: clrldi r0, r30, 49
|
||||
; CHECK-LE-NEXT: subc r12, r30, r0
|
||||
; CHECK-LE-NEXT: clrldi r0, r0, 52
|
||||
; CHECK-LE-NEXT: cmpdi r0, 0
|
||||
; CHECK-LE-NEXT: beq cr0, .LBB11_2
|
||||
; CHECK-LE-NEXT: # %bb.1:
|
||||
; CHECK-LE-NEXT: neg r0, r0
|
||||
; CHECK-LE-NEXT: stdux r30, r1, r0
|
||||
; CHECK-LE-NEXT: .LBB11_2:
|
||||
; CHECK-LE-NEXT: li r0, -4096
|
||||
; CHECK-LE-NEXT: cmpd r1, r12
|
||||
; CHECK-LE-NEXT: beq cr0, .LBB11_4
|
||||
; CHECK-LE-NEXT: .LBB11_3:
|
||||
; CHECK-LE-NEXT: stdux r30, r1, r0
|
||||
; CHECK-LE-NEXT: cmpd r1, r12
|
||||
; CHECK-LE-NEXT: bne cr0, .LBB11_3
|
||||
; CHECK-LE-NEXT: .LBB11_4:
|
||||
; CHECK-LE-NEXT: mr r12, r30
|
||||
; CHECK-LE-NEXT: li r0, 24
|
||||
; CHECK-LE-NEXT: mtctr r0
|
||||
; CHECK-LE-NEXT: .LBB11_5:
|
||||
; CHECK-LE-NEXT: stdu r12, -4096(r1)
|
||||
; CHECK-LE-NEXT: bdnz .LBB11_5
|
||||
; CHECK-LE-NEXT: # %bb.6:
|
||||
; CHECK-LE-NEXT: .cfi_def_cfa_register r1
|
||||
; CHECK-LE-NEXT: .cfi_def_cfa_register r30
|
||||
; CHECK-LE-NEXT: .cfi_offset r31, -8
|
||||
; CHECK-LE-NEXT: .cfi_offset r30, -16
|
||||
; CHECK-LE-NEXT: clrldi r3, r3, 32
|
||||
; CHECK-LE-NEXT: lis r5, 1
|
||||
; CHECK-LE-NEXT: mr r31, r1
|
||||
; CHECK-LE-NEXT: li r6, 1
|
||||
; CHECK-LE-NEXT: addi r3, r3, 15
|
||||
; CHECK-LE-NEXT: ori r5, r5, 0
|
||||
; CHECK-LE-NEXT: rldicl r3, r3, 60, 4
|
||||
; CHECK-LE-NEXT: sldi r4, r4, 2
|
||||
; CHECK-LE-NEXT: add r5, r31, r5
|
||||
; CHECK-LE-NEXT: rldicl r3, r3, 4, 31
|
||||
; CHECK-LE-NEXT: stwx r6, r5, r4
|
||||
; CHECK-LE-NEXT: li r4, -32768
|
||||
; CHECK-LE-NEXT: neg r7, r3
|
||||
; CHECK-LE-NEXT: ld r3, 0(r1)
|
||||
; CHECK-LE-NEXT: and r4, r7, r4
|
||||
; CHECK-LE-NEXT: mr r7, r4
|
||||
; CHECK-LE-NEXT: li r4, -4096
|
||||
; CHECK-LE-NEXT: divd r5, r7, r4
|
||||
; CHECK-LE-NEXT: mulld r4, r5, r4
|
||||
; CHECK-LE-NEXT: sub r5, r7, r4
|
||||
; CHECK-LE-NEXT: add r4, r1, r7
|
||||
; CHECK-LE-NEXT: stdux r3, r1, r5
|
||||
; CHECK-LE-NEXT: cmpd r1, r4
|
||||
; CHECK-LE-NEXT: beq cr0, .LBB11_8
|
||||
; CHECK-LE-NEXT: .LBB11_7:
|
||||
; CHECK-LE-NEXT: stdu r3, -4096(r1)
|
||||
; CHECK-LE-NEXT: cmpd r1, r4
|
||||
; CHECK-LE-NEXT: bne cr0, .LBB11_7
|
||||
; CHECK-LE-NEXT: .LBB11_8:
|
||||
; CHECK-LE-NEXT: addi r3, r1, -32768
|
||||
; CHECK-LE-NEXT: lbz r3, 0(r3)
|
||||
; CHECK-LE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-LE-NEXT: ld r31, -8(r1)
|
||||
; CHECK-LE-NEXT: ld r30, -16(r1)
|
||||
; CHECK-LE-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: f11:
|
||||
; CHECK-BE: # %bb.0:
|
||||
; CHECK-BE-NEXT: std r31, -8(r1)
|
||||
; CHECK-BE-NEXT: std r30, -16(r1)
|
||||
; CHECK-BE-NEXT: mr r30, r1
|
||||
; CHECK-BE-NEXT: .cfi_def_cfa r30, 0
|
||||
; CHECK-BE-NEXT: clrldi r0, r30, 49
|
||||
; CHECK-BE-NEXT: subc r12, r30, r0
|
||||
; CHECK-BE-NEXT: clrldi r0, r0, 52
|
||||
; CHECK-BE-NEXT: cmpdi r0, 0
|
||||
; CHECK-BE-NEXT: beq cr0, .LBB11_2
|
||||
; CHECK-BE-NEXT: # %bb.1:
|
||||
; CHECK-BE-NEXT: neg r0, r0
|
||||
; CHECK-BE-NEXT: stdux r30, r1, r0
|
||||
; CHECK-BE-NEXT: .LBB11_2:
|
||||
; CHECK-BE-NEXT: li r0, -4096
|
||||
; CHECK-BE-NEXT: cmpd r1, r12
|
||||
; CHECK-BE-NEXT: beq cr0, .LBB11_4
|
||||
; CHECK-BE-NEXT: .LBB11_3:
|
||||
; CHECK-BE-NEXT: stdux r30, r1, r0
|
||||
; CHECK-BE-NEXT: cmpd r1, r12
|
||||
; CHECK-BE-NEXT: bne cr0, .LBB11_3
|
||||
; CHECK-BE-NEXT: .LBB11_4:
|
||||
; CHECK-BE-NEXT: mr r12, r30
|
||||
; CHECK-BE-NEXT: li r0, 24
|
||||
; CHECK-BE-NEXT: mtctr r0
|
||||
; CHECK-BE-NEXT: .LBB11_5:
|
||||
; CHECK-BE-NEXT: stdu r12, -4096(r1)
|
||||
; CHECK-BE-NEXT: bdnz .LBB11_5
|
||||
; CHECK-BE-NEXT: # %bb.6:
|
||||
; CHECK-BE-NEXT: .cfi_def_cfa_register r1
|
||||
; CHECK-BE-NEXT: .cfi_def_cfa_register r30
|
||||
; CHECK-BE-NEXT: .cfi_offset r31, -8
|
||||
; CHECK-BE-NEXT: .cfi_offset r30, -16
|
||||
; CHECK-BE-NEXT: clrldi r3, r3, 32
|
||||
; CHECK-BE-NEXT: lis r5, 1
|
||||
; CHECK-BE-NEXT: addi r3, r3, 15
|
||||
; CHECK-BE-NEXT: mr r31, r1
|
||||
; CHECK-BE-NEXT: ori r5, r5, 0
|
||||
; CHECK-BE-NEXT: rldicl r3, r3, 60, 4
|
||||
; CHECK-BE-NEXT: add r5, r31, r5
|
||||
; CHECK-BE-NEXT: sldi r4, r4, 2
|
||||
; CHECK-BE-NEXT: li r6, 1
|
||||
; CHECK-BE-NEXT: rldicl r3, r3, 4, 31
|
||||
; CHECK-BE-NEXT: stwx r6, r5, r4
|
||||
; CHECK-BE-NEXT: neg r7, r3
|
||||
; CHECK-BE-NEXT: li r4, -32768
|
||||
; CHECK-BE-NEXT: and r4, r7, r4
|
||||
; CHECK-BE-NEXT: ld r3, 0(r1)
|
||||
; CHECK-BE-NEXT: mr r7, r4
|
||||
; CHECK-BE-NEXT: li r4, -4096
|
||||
; CHECK-BE-NEXT: divd r5, r7, r4
|
||||
; CHECK-BE-NEXT: mulld r4, r5, r4
|
||||
; CHECK-BE-NEXT: sub r5, r7, r4
|
||||
; CHECK-BE-NEXT: add r4, r1, r7
|
||||
; CHECK-BE-NEXT: stdux r3, r1, r5
|
||||
; CHECK-BE-NEXT: cmpd r1, r4
|
||||
; CHECK-BE-NEXT: beq cr0, .LBB11_8
|
||||
; CHECK-BE-NEXT: .LBB11_7:
|
||||
; CHECK-BE-NEXT: stdu r3, -4096(r1)
|
||||
; CHECK-BE-NEXT: cmpd r1, r4
|
||||
; CHECK-BE-NEXT: bne cr0, .LBB11_7
|
||||
; CHECK-BE-NEXT: .LBB11_8:
|
||||
; CHECK-BE-NEXT: addi r3, r1, -32768
|
||||
; CHECK-BE-NEXT: lbz r3, 0(r3)
|
||||
; CHECK-BE-NEXT: ld r1, 0(r1)
|
||||
; CHECK-BE-NEXT: ld r31, -8(r1)
|
||||
; CHECK-BE-NEXT: ld r30, -16(r1)
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: f11:
|
||||
; CHECK-32: # %bb.0:
|
||||
; CHECK-32-NEXT: mr r12, r1
|
||||
; CHECK-32-NEXT: .cfi_def_cfa r12, 0
|
||||
; CHECK-32-NEXT: clrlwi r0, r12, 17
|
||||
; CHECK-32-NEXT: subc r1, r1, r0
|
||||
; CHECK-32-NEXT: li r0, 24
|
||||
; CHECK-32-NEXT: mtctr r0
|
||||
; CHECK-32-NEXT: .LBB11_1:
|
||||
; CHECK-32-NEXT: stwu r12, -4096(r1)
|
||||
; CHECK-32-NEXT: bdnz .LBB11_1
|
||||
; CHECK-32-NEXT: # %bb.2:
|
||||
; CHECK-32-NEXT: .cfi_def_cfa_register r1
|
||||
; CHECK-32-NEXT: sub r0, r1, r12
|
||||
; CHECK-32-NEXT: sub r0, r1, r0
|
||||
; CHECK-32-NEXT: addic r0, r0, -4
|
||||
; CHECK-32-NEXT: stwx r31, 0, r0
|
||||
; CHECK-32-NEXT: addic r0, r0, -4
|
||||
; CHECK-32-NEXT: stwx r30, 0, r0
|
||||
; CHECK-32-NEXT: addic r30, r0, 8
|
||||
; CHECK-32-NEXT: .cfi_def_cfa_register r30
|
||||
; CHECK-32-NEXT: .cfi_offset r31, -4
|
||||
; CHECK-32-NEXT: .cfi_offset r30, -8
|
||||
; CHECK-32-NEXT: lis r4, 1
|
||||
; CHECK-32-NEXT: mr r31, r1
|
||||
; CHECK-32-NEXT: ori r4, r4, 0
|
||||
; CHECK-32-NEXT: addi r3, r3, 15
|
||||
; CHECK-32-NEXT: add r4, r31, r4
|
||||
; CHECK-32-NEXT: li r5, 1
|
||||
; CHECK-32-NEXT: slwi r6, r6, 2
|
||||
; CHECK-32-NEXT: rlwinm r3, r3, 0, 0, 27
|
||||
; CHECK-32-NEXT: neg r7, r3
|
||||
; CHECK-32-NEXT: stwx r5, r4, r6
|
||||
; CHECK-32-NEXT: li r4, -32768
|
||||
; CHECK-32-NEXT: and r4, r7, r4
|
||||
; CHECK-32-NEXT: lwz r3, 0(r1)
|
||||
; CHECK-32-NEXT: mr r7, r4
|
||||
; CHECK-32-NEXT: li r4, -4096
|
||||
; CHECK-32-NEXT: divw r5, r7, r4
|
||||
; CHECK-32-NEXT: mullw r4, r5, r4
|
||||
; CHECK-32-NEXT: sub r5, r7, r4
|
||||
; CHECK-32-NEXT: add r4, r1, r7
|
||||
; CHECK-32-NEXT: stwux r3, r1, r5
|
||||
; CHECK-32-NEXT: cmpw r1, r4
|
||||
; CHECK-32-NEXT: beq cr0, .LBB11_4
|
||||
; CHECK-32-NEXT: .LBB11_3:
|
||||
; CHECK-32-NEXT: stwu r3, -4096(r1)
|
||||
; CHECK-32-NEXT: cmpw r1, r4
|
||||
; CHECK-32-NEXT: bne cr0, .LBB11_3
|
||||
; CHECK-32-NEXT: .LBB11_4:
|
||||
; CHECK-32-NEXT: addi r3, r1, -32768
|
||||
; CHECK-32-NEXT: lbz r3, 0(r3)
|
||||
; CHECK-32-NEXT: lwz r31, 0(r1)
|
||||
; CHECK-32-NEXT: lwz r0, -4(r31)
|
||||
; CHECK-32-NEXT: lwz r30, -8(r31)
|
||||
; CHECK-32-NEXT: mr r1, r31
|
||||
; CHECK-32-NEXT: mr r31, r0
|
||||
; CHECK-32-NEXT: blr
|
||||
%a = alloca i32, i32 4096, align 32768
|
||||
%b = getelementptr inbounds i32, i32* %a, i64 %i
|
||||
store volatile i32 1, i32* %b
|
||||
%1 = zext i32 %vla_size to i64
|
||||
%vla = alloca i8, i64 %1, align 2048
|
||||
%2 = load volatile i8, i8* %vla, align 2048
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "probe-stack"="inline-asm" }
|
||||
|
|
Loading…
Reference in New Issue