[PowerPC] guard update form prepare with non-const increment with option

Reviewed By: jsji

Differential Revision: https://reviews.llvm.org/D113471
This commit is contained in:
Chen Zheng 2021-11-09 09:47:48 +00:00
parent 0c660256eb
commit eec9ca622c
4 changed files with 112 additions and 97 deletions

View File

@ -125,6 +125,11 @@ static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update",
cl::init(true), cl::Hidden,
cl::desc("prefer update form when ds form is also a update form"));
static cl::opt<bool> EnableUpdateFormForNonConstInc(
"ppc-formprep-update-nonconst-inc", cl::init(false), cl::Hidden,
cl::desc("prepare update form when the load/store increment is a loop "
"invariant non-const value."));
static cl::opt<bool> EnableChainCommoning(
"ppc-formprep-chain-commoning", cl::init(false), cl::Hidden,
cl::desc("Enable chain commoning in PPC loop prepare pass."));
@ -212,7 +217,7 @@ namespace {
// load/store with update like ldu/stdu, or Prefetch intrinsic.
// For DS form instructions, their displacements must be multiple of 4.
// For DQ form instructions, their displacements must be multiple of 16.
enum InstrForm { UpdateForm = 1, DSForm = 4, DQForm = 16 };
enum PrepForm { UpdateForm = 1, DSForm = 4, DQForm = 16, ChainCommoning };
class PPCLoopInstrFormPrep : public FunctionPass {
public:
@ -255,7 +260,7 @@ namespace {
/// Check if required PHI node is already exist in Loop \p L.
bool alreadyPrepared(Loop *L, Instruction *MemI,
const SCEV *BasePtrStartSCEV,
const SCEV *BasePtrIncSCEV, InstrForm Form);
const SCEV *BasePtrIncSCEV, PrepForm Form);
/// Get the value which defines the increment SCEV \p BasePtrIncSCEV.
Value *getNodeForInc(Loop *L, Instruction *MemI,
@ -293,8 +298,7 @@ namespace {
/// Prepare all candidates in \p Buckets for displacement form, now for
/// ds/dq.
bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets,
InstrForm Form);
bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets, PrepForm Form);
/// Prepare for one chain \p BucketChain, find the best base element and
/// update all other elements in \p BucketChain accordingly.
@ -302,8 +306,7 @@ namespace {
/// If success, best base element must be stored as the first element of
/// \p BucketChain.
/// Return false if no base element found, otherwise return true.
bool prepareBaseForDispFormChain(Bucket &BucketChain,
InstrForm Form);
bool prepareBaseForDispFormChain(Bucket &BucketChain, PrepForm Form);
/// Prepare for one chain \p BucketChain, find the best base element and
/// update all other elements in \p BucketChain accordingly.
@ -316,12 +319,12 @@ namespace {
/// preparation.
bool rewriteLoadStores(Loop *L, Bucket &BucketChain,
SmallSet<BasicBlock *, 16> &BBChanged,
InstrForm Form);
PrepForm Form);
/// Rewrite for the base load/store of a chain.
std::pair<Instruction *, Instruction *>
rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
Instruction *BaseMemI, bool CanPreInc, InstrForm Form,
Instruction *BaseMemI, bool CanPreInc, PrepForm Form,
SCEVExpander &SCEVE, SmallPtrSet<Value *, 16> &DeletedPtrs);
/// Rewrite for the other load/stores of a chain according to the new \p
@ -572,9 +575,9 @@ bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains(
assert(BasePtrSCEV->isAffine() &&
"Invalid SCEV type for the base ptr for a candidate chain!\n");
std::pair<Instruction *, Instruction *> Base =
rewriteForBase(L, BasePtrSCEV, Bucket.Elements[BaseElemIdx].Instr,
false /* CanPreInc */, UpdateForm, SCEVE, DeletedPtrs);
std::pair<Instruction *, Instruction *> Base = rewriteForBase(
L, BasePtrSCEV, Bucket.Elements[BaseElemIdx].Instr,
false /* CanPreInc */, ChainCommoning, SCEVE, DeletedPtrs);
if (!Base.first || !Base.second)
return MadeChange;
@ -645,7 +648,7 @@ bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains(
std::pair<Instruction *, Instruction *>
PPCLoopInstrFormPrep::rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
Instruction *BaseMemI, bool CanPreInc,
InstrForm Form, SCEVExpander &SCEVE,
PrepForm Form, SCEVExpander &SCEVE,
SmallPtrSet<Value *, 16> &DeletedPtrs) {
LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
@ -675,6 +678,13 @@ PPCLoopInstrFormPrep::rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
return std::make_pair(nullptr, nullptr);
}
if (Form == UpdateForm && !IsConstantInc && !EnableUpdateFormForNonConstInc) {
LLVM_DEBUG(
dbgs()
<< "Update form prepare for non-const increment is not enabled!\n");
return std::make_pair(nullptr, nullptr);
}
const SCEV *BasePtrStartSCEV = nullptr;
if (CanPreInc) {
assert(SE->isLoopInvariant(BasePtrIncSCEV, L) &&
@ -884,7 +894,7 @@ SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
}
bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
InstrForm Form) {
PrepForm Form) {
// RemainderOffsetInfo details:
// key: value of (Offset urem DispConstraint). For DSForm, it can
// be [0, 4).
@ -1001,7 +1011,7 @@ bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
bool PPCLoopInstrFormPrep::rewriteLoadStores(
Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged,
InstrForm Form) {
PrepForm Form) {
bool MadeChange = false;
const SCEVAddRecExpr *BasePtrSCEV =
@ -1098,8 +1108,9 @@ bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L,
return MadeChange;
}
bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets,
InstrForm Form) {
bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L,
SmallVector<Bucket, 16> &Buckets,
PrepForm Form) {
bool MadeChange = false;
if (Buckets.empty())
@ -1202,7 +1213,7 @@ Value *PPCLoopInstrFormPrep::getNodeForInc(Loop *L, Instruction *MemI,
bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI,
const SCEV *BasePtrStartSCEV,
const SCEV *BasePtrIncSCEV,
InstrForm Form) {
PrepForm Form) {
BasicBlock *BB = MemI->getParent();
if (!BB)
return false;
@ -1242,7 +1253,7 @@ bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI,
if (PHIBasePtrIncSCEV == BasePtrIncSCEV) {
// The existing PHI (CurrentPHINode) has the same start and increment
// as the PHI that we wanted to create.
if (Form == UpdateForm &&
if ((Form == UpdateForm || Form == ChainCommoning ) &&
PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) {
++PHINodeAlreadyExistsUpdate;
return true;

View File

@ -771,9 +771,9 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r31, -8(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r2, -152(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r9, -176(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r8, -168(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r7, -160(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r9, -160(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r8, -176(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r7, -168(r1) # 8-byte Folded Spill
; CHECK-NEXT: blt cr0, .LBB7_7
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: sldi r6, r6, 2
@ -789,66 +789,71 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
; CHECK-NEXT: rldicl r7, r7, 62, 2
; CHECK-NEXT: sldi r10, r12, 2
; CHECK-NEXT: ld r2, -168(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r31, -160(r1) # 8-byte Folded Reload
; CHECK-NEXT: rldicl r7, r7, 2, 1
; CHECK-NEXT: std r7, -184(r1) # 8-byte Folded Spill
; CHECK-NEXT: ld r7, -160(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r8, r7, r10
; CHECK-NEXT: mr r22, r7
; CHECK-NEXT: mr r7, r4
; CHECK-NEXT: ld r4, -176(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r8, r4, r10
; CHECK-NEXT: mr r4, r3
; CHECK-NEXT: ld r3, -176(r1) # 8-byte Folded Reload
; CHECK-NEXT: sldi r8, r8, 3
; CHECK-NEXT: add r9, r5, r8
; CHECK-NEXT: add r8, r2, r10
; CHECK-NEXT: add r10, r31, r10
; CHECK-NEXT: add r8, r3, r10
; CHECK-NEXT: add r10, r2, r10
; CHECK-NEXT: sldi r10, r10, 3
; CHECK-NEXT: sldi r8, r8, 3
; CHECK-NEXT: add r30, r5, r10
; CHECK-NEXT: add r29, r7, r10
; CHECK-NEXT: add r28, r3, r10
; CHECK-NEXT: add r28, r4, r10
; CHECK-NEXT: sldi r10, r12, 1
; CHECK-NEXT: add r8, r5, r8
; CHECK-NEXT: add r11, r12, r10
; CHECK-NEXT: add r0, r4, r11
; CHECK-NEXT: add r0, r22, r11
; CHECK-NEXT: sldi r0, r0, 3
; CHECK-NEXT: add r27, r5, r0
; CHECK-NEXT: add r0, r2, r11
; CHECK-NEXT: add r11, r31, r11
; CHECK-NEXT: add r0, r3, r11
; CHECK-NEXT: add r11, r2, r11
; CHECK-NEXT: sldi r11, r11, 3
; CHECK-NEXT: sldi r0, r0, 3
; CHECK-NEXT: add r25, r5, r11
; CHECK-NEXT: add r24, r7, r11
; CHECK-NEXT: add r23, r3, r11
; CHECK-NEXT: add r11, r4, r10
; CHECK-NEXT: add r23, r4, r11
; CHECK-NEXT: add r11, r22, r10
; CHECK-NEXT: add r26, r5, r0
; CHECK-NEXT: mr r0, r22
; CHECK-NEXT: sldi r11, r11, 3
; CHECK-NEXT: add r22, r5, r11
; CHECK-NEXT: add r11, r2, r10
; CHECK-NEXT: add r10, r31, r10
; CHECK-NEXT: add r11, r3, r10
; CHECK-NEXT: add r10, r2, r10
; CHECK-NEXT: sldi r10, r10, 3
; CHECK-NEXT: sldi r11, r11, 3
; CHECK-NEXT: add r20, r5, r10
; CHECK-NEXT: add r19, r7, r10
; CHECK-NEXT: add r18, r3, r10
; CHECK-NEXT: add r10, r12, r4
; CHECK-NEXT: add r18, r4, r10
; CHECK-NEXT: add r10, r12, r0
; CHECK-NEXT: add r21, r5, r11
; CHECK-NEXT: sldi r11, r2, 3
; CHECK-NEXT: sldi r10, r10, 3
; CHECK-NEXT: add r17, r5, r10
; CHECK-NEXT: add r10, r12, r2
; CHECK-NEXT: add r10, r12, r3
; CHECK-NEXT: sldi r10, r10, 3
; CHECK-NEXT: add r16, r5, r10
; CHECK-NEXT: add r10, r12, r31
; CHECK-NEXT: sldi r31, r31, 3
; CHECK-NEXT: sub r0, r11, r31
; CHECK-NEXT: sldi r11, r4, 3
; CHECK-NEXT: mr r4, r7
; CHECK-NEXT: ld r7, -184(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r10, r12, r2
; CHECK-NEXT: sldi r10, r10, 3
; CHECK-NEXT: add r15, r5, r10
; CHECK-NEXT: add r14, r3, r10
; CHECK-NEXT: sub r31, r11, r31
; CHECK-NEXT: add r2, r4, r10
; CHECK-NEXT: add r14, r7, r10
; CHECK-NEXT: add r31, r4, r10
; CHECK-NEXT: sldi r10, r3, 3
; CHECK-NEXT: mr r3, r4
; CHECK-NEXT: mr r4, r7
; CHECK-NEXT: ld r7, -160(r1) # 8-byte Folded Reload
; CHECK-NEXT: sub r0, r10, r11
; CHECK-NEXT: sldi r10, r7, 3
; CHECK-NEXT: ld r7, -184(r1) # 8-byte Folded Reload
; CHECK-NEXT: sub r2, r10, r11
; CHECK-NEXT: li r11, 0
; CHECK-NEXT: mr r10, r12
; CHECK-NEXT: rldicl r7, r7, 2, 1
; CHECK-NEXT: addi r7, r7, -4
; CHECK-NEXT: rldicl r7, r7, 62, 2
; CHECK-NEXT: addi r7, r7, 1
@ -857,8 +862,8 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB7_3: # %for.body
; CHECK-NEXT: #
; CHECK-NEXT: lfd f0, 0(r14)
; CHECK-NEXT: lfd f1, 0(r2)
; CHECK-NEXT: lfd f0, 0(r31)
; CHECK-NEXT: lfd f1, 0(r14)
; CHECK-NEXT: add r10, r10, r12
; CHECK-NEXT: add r10, r10, r12
; CHECK-NEXT: xsmuldp f0, f0, f1
@ -868,16 +873,16 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfd f0, 0(r15)
; CHECK-NEXT: add r15, r15, r7
; CHECK-NEXT: lfdx f0, r14, r0
; CHECK-NEXT: lfdx f1, r2, r0
; CHECK-NEXT: lfdx f0, r31, r0
; CHECK-NEXT: lfdx f1, r14, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r16, r11
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r16, r11
; CHECK-NEXT: lfdx f0, r14, r31
; CHECK-NEXT: lfdx f1, r2, r31
; CHECK-NEXT: lfdx f0, r31, r2
; CHECK-NEXT: lfdx f1, r14, r2
; CHECK-NEXT: add r31, r31, r7
; CHECK-NEXT: add r14, r14, r7
; CHECK-NEXT: add r2, r2, r7
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r17, r11
; CHECK-NEXT: xsadddp f0, f1, f0
@ -894,8 +899,8 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
; CHECK-NEXT: lfdx f1, r21, r11
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r21, r11
; CHECK-NEXT: lfdx f0, r18, r31
; CHECK-NEXT: lfdx f1, r19, r31
; CHECK-NEXT: lfdx f0, r18, r2
; CHECK-NEXT: lfdx f1, r19, r2
; CHECK-NEXT: add r18, r18, r7
; CHECK-NEXT: add r19, r19, r7
; CHECK-NEXT: xsmuldp f0, f0, f1
@ -914,8 +919,8 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
; CHECK-NEXT: lfdx f1, r26, r11
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r26, r11
; CHECK-NEXT: lfdx f0, r23, r31
; CHECK-NEXT: lfdx f1, r24, r31
; CHECK-NEXT: lfdx f0, r23, r2
; CHECK-NEXT: lfdx f1, r24, r2
; CHECK-NEXT: add r23, r23, r7
; CHECK-NEXT: add r24, r24, r7
; CHECK-NEXT: xsmuldp f0, f0, f1
@ -934,8 +939,8 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
; CHECK-NEXT: lfdx f1, r8, r11
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r8, r11
; CHECK-NEXT: lfdx f0, r28, r31
; CHECK-NEXT: lfdx f1, r29, r31
; CHECK-NEXT: lfdx f0, r28, r2
; CHECK-NEXT: lfdx f1, r29, r2
; CHECK-NEXT: add r28, r28, r7
; CHECK-NEXT: add r29, r29, r7
; CHECK-NEXT: xsmuldp f0, f0, f1
@ -948,46 +953,44 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
; CHECK-NEXT: cmpldi r6, 0
; CHECK-NEXT: beq cr0, .LBB7_7
; CHECK-NEXT: # %bb.5: # %for.body.epil.preheader
; CHECK-NEXT: ld r0, -168(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r7, -176(r1) # 8-byte Folded Reload
; CHECK-NEXT: sldi r8, r12, 3
; CHECK-NEXT: add r0, r10, r0
; CHECK-NEXT: ld r12, -176(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r7, -160(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r12, r10, r12
; CHECK-NEXT: add r7, r10, r7
; CHECK-NEXT: sldi r0, r0, 3
; CHECK-NEXT: sldi r0, r12, 3
; CHECK-NEXT: sldi r11, r7, 3
; CHECK-NEXT: add r30, r5, r0
; CHECK-NEXT: add r29, r4, r0
; CHECK-NEXT: add r28, r3, r0
; CHECK-NEXT: ld r0, -160(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r12, r5, r0
; CHECK-NEXT: add r30, r4, r0
; CHECK-NEXT: add r29, r3, r0
; CHECK-NEXT: ld r0, -168(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r7, r5, r11
; CHECK-NEXT: add r9, r4, r11
; CHECK-NEXT: add r11, r3, r11
; CHECK-NEXT: add r10, r10, r0
; CHECK-NEXT: sub r12, r10, r12
; CHECK-NEXT: sldi r10, r10, 3
; CHECK-NEXT: sldi r12, r12, 3
; CHECK-NEXT: add r5, r5, r10
; CHECK-NEXT: add r4, r4, r10
; CHECK-NEXT: add r3, r3, r10
; CHECK-NEXT: li r10, 0
; CHECK-NEXT: add r3, r3, r12
; CHECK-NEXT: add r4, r4, r12
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB7_6: # %for.body.epil
; CHECK-NEXT: #
; CHECK-NEXT: lfdux f0, r4, r8
; CHECK-NEXT: lfdux f1, r3, r8
; CHECK-NEXT: lfdx f0, r3, r10
; CHECK-NEXT: lfdx f1, r4, r10
; CHECK-NEXT: addi r6, r6, -1
; CHECK-NEXT: cmpldi r6, 0
; CHECK-NEXT: xsmuldp f0, f1, f0
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfd f1, 0(r5)
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfd f0, 0(r5)
; CHECK-NEXT: add r5, r5, r8
; CHECK-NEXT: lfdx f0, r28, r10
; CHECK-NEXT: lfdx f1, r29, r10
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f0, r29, r10
; CHECK-NEXT: lfdx f1, r30, r10
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfdx f1, r12, r10
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfdx f0, r30, r10
; CHECK-NEXT: stfdx f0, r12, r10
; CHECK-NEXT: lfdx f0, r11, r10
; CHECK-NEXT: lfdx f1, r9, r10
; CHECK-NEXT: xsmuldp f0, f0, f1

View File

@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -disable-lsr -ppc-asm-full-reg-names -verify-machineinstrs \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s
; RUN: -ppc-formprep-update-nonconst-inc -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 < %s | FileCheck %s
; long long foo(char *p, int n, int count) {
; int j = 0;

View File

@ -45,22 +45,22 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5
; CHECK-NEXT: # %bb.4:
; CHECK-NEXT: add 23, 6, 12
; CHECK-NEXT: add 22, 6, 30
; CHECK-NEXT: add 26, 6, 28
; CHECK-NEXT: add 25, 6, 8
; CHECK-NEXT: sldi 24, 6, 3
; CHECK-NEXT: sldi 26, 26, 3
; CHECK-NEXT: add 25, 6, 28
; CHECK-NEXT: add 24, 6, 8
; CHECK-NEXT: sldi 26, 6, 3
; CHECK-NEXT: sldi 25, 25, 3
; CHECK-NEXT: sldi 24, 24, 3
; CHECK-NEXT: sldi 23, 23, 3
; CHECK-NEXT: sldi 22, 22, 3
; CHECK-NEXT: add 24, 4, 24
; CHECK-NEXT: add 26, 29, 26
; CHECK-NEXT: add 26, 4, 26
; CHECK-NEXT: add 25, 29, 25
; CHECK-NEXT: add 24, 29, 24
; CHECK-NEXT: add 23, 3, 23
; CHECK-NEXT: add 22, 3, 22
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_5: # Parent Loop BB0_3 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lfd 0, 0(24)
; CHECK-NEXT: lfd 0, 0(26)
; CHECK-NEXT: lfd 1, 0(23)
; CHECK-NEXT: add 6, 6, 10
; CHECK-NEXT: cmpd 6, 27
@ -81,6 +81,15 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5
; CHECK-NEXT: lfd 1, 24(22)
; CHECK-NEXT: add 22, 22, 11
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, -16(24)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, -8(24)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, 0(24)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, 8(24)
; CHECK-NEXT: add 24, 24, 11
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, -16(25)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, -8(25)
@ -90,17 +99,8 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5
; CHECK-NEXT: lfd 1, 8(25)
; CHECK-NEXT: add 25, 25, 11
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, -16(26)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, -8(26)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, 0(26)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, 8(26)
; CHECK-NEXT: stfd 0, 0(26)
; CHECK-NEXT: add 26, 26, 11
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: stfd 0, 0(24)
; CHECK-NEXT: add 24, 24, 11
; CHECK-NEXT: blt 0, .LBB0_5
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_6: