forked from OSchip/llvm-project
[ARM] Disable WLSTP loops
This checks to see if the loop will likely become a tail predicated loop and disables wls loop generation if so, as the likelihood for reverting is currently too high. These should be fairly rare situations anyway due to the way iterations and element counts are used during lowering. Just not trying can alter how SCEV's are materialized however, leading to different codegen. It also adds a option to disable all while low overhead loops, for debugging. Differential Revision: https://reviews.llvm.org/D91663
This commit is contained in:
parent
4d7df43ffd
commit
f08c37da7b
|
@ -50,6 +50,10 @@ static cl::opt<bool> DisableLowOverheadLoops(
|
|||
"disable-arm-loloops", cl::Hidden, cl::init(false),
|
||||
cl::desc("Disable the generation of low-overhead loops"));
|
||||
|
||||
static cl::opt<bool>
|
||||
AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true),
|
||||
cl::desc("Enable the generation of WLS loops"));
|
||||
|
||||
extern cl::opt<TailPredication::Mode> EnableTailPredication;
|
||||
|
||||
extern cl::opt<bool> EnableMaskedGatherScatters;
|
||||
|
@ -1690,7 +1694,9 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
|
|||
};
|
||||
|
||||
// Scan the instructions to see if there's any that we know will turn into a
|
||||
// call or if this loop is already a low-overhead loop.
|
||||
// call or if this loop is already a low-overhead loop or will become a tail
|
||||
// predicated loop.
|
||||
bool IsTailPredLoop = false;
|
||||
auto ScanLoop = [&](Loop *L) {
|
||||
for (auto *BB : L->getBlocks()) {
|
||||
for (auto &I : *BB) {
|
||||
|
@ -1699,6 +1705,13 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
|
|||
LLVM_DEBUG(dbgs() << "ARMHWLoops: Bad instruction: " << I << "\n");
|
||||
return false;
|
||||
}
|
||||
if (auto *II = dyn_cast<IntrinsicInst>(&I))
|
||||
IsTailPredLoop |=
|
||||
II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||
|
||||
II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||
|
||||
II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||
|
||||
II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||
|
||||
II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
@ -1719,7 +1732,7 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
|
|||
LLVMContext &C = L->getHeader()->getContext();
|
||||
HWLoopInfo.CounterInReg = true;
|
||||
HWLoopInfo.IsNestingLegal = false;
|
||||
HWLoopInfo.PerformEntryTest = true;
|
||||
HWLoopInfo.PerformEntryTest = AllowWLSLoops && !IsTailPredLoop;
|
||||
HWLoopInfo.CountType = Type::getInt32Ty(C);
|
||||
HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
|
||||
return true;
|
||||
|
|
|
@ -8,28 +8,27 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur
|
|||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: beq.w .LBB0_11
|
||||
; CHECK-NEXT: @ %bb.1: @ %vector.memcheck
|
||||
; CHECK-NEXT: add.w r4, r0, r3, lsl #2
|
||||
; CHECK-NEXT: add.w r5, r2, r3, lsl #2
|
||||
; CHECK-NEXT: cmp r4, r2
|
||||
; CHECK-NEXT: mov.w r12, #1
|
||||
; CHECK-NEXT: add.w r5, r0, r3, lsl #2
|
||||
; CHECK-NEXT: add.w r4, r2, r3, lsl #2
|
||||
; CHECK-NEXT: cmp r5, r2
|
||||
; CHECK-NEXT: cset r12, hi
|
||||
; CHECK-NEXT: cmp r4, r0
|
||||
; CHECK-NEXT: cset lr, hi
|
||||
; CHECK-NEXT: cmp r5, r0
|
||||
; CHECK-NEXT: cset r6, hi
|
||||
; CHECK-NEXT: cmp r4, r1
|
||||
; CHECK-NEXT: cmp r5, r1
|
||||
; CHECK-NEXT: add.w r5, r1, r3, lsl #2
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: cmp r5, r0
|
||||
; CHECK-NEXT: cset r5, hi
|
||||
; CHECK-NEXT: ands r5, r4
|
||||
; CHECK-NEXT: lsls r5, r5, #31
|
||||
; CHECK-NEXT: ands r4, r5
|
||||
; CHECK-NEXT: lsls r4, r4, #31
|
||||
; CHECK-NEXT: itt eq
|
||||
; CHECK-NEXT: andeq.w r6, r6, lr
|
||||
; CHECK-NEXT: lslseq.w r6, r6, #31
|
||||
; CHECK-NEXT: andeq.w r5, lr, r12
|
||||
; CHECK-NEXT: lslseq.w r5, r5, #31
|
||||
; CHECK-NEXT: beq .LBB0_4
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
|
||||
; CHECK-NEXT: subs r6, r3, #1
|
||||
; CHECK-NEXT: subs r5, r3, #1
|
||||
; CHECK-NEXT: and r7, r3, #3
|
||||
; CHECK-NEXT: cmp r6, #3
|
||||
; CHECK-NEXT: cmp r5, #3
|
||||
; CHECK-NEXT: bhs .LBB0_6
|
||||
; CHECK-NEXT: @ %bb.3:
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
|
@ -48,11 +47,12 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur
|
|||
; CHECK-NEXT: b .LBB0_11
|
||||
; CHECK-NEXT: .LBB0_6: @ %for.body.preheader.new
|
||||
; CHECK-NEXT: bic r3, r3, #3
|
||||
; CHECK-NEXT: movs r5, #1
|
||||
; CHECK-NEXT: subs r3, #4
|
||||
; CHECK-NEXT: add.w lr, r12, r3, lsr #2
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: add.w lr, r5, r3, lsr #2
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: .LBB0_7: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: adds r4, r1, r3
|
||||
|
@ -224,11 +224,11 @@ define arm_aapcs_vfpcc float @fast_float_mac(float* nocapture readonly %b, float
|
|||
; CHECK-NEXT: cbz r2, .LBB1_4
|
||||
; CHECK-NEXT: @ %bb.1: @ %vector.ph
|
||||
; CHECK-NEXT: adds r3, r2, #3
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-NEXT: mov.w r12, #1
|
||||
; CHECK-NEXT: bic r3, r3, #3
|
||||
; CHECK-NEXT: sub.w r12, r3, #4
|
||||
; CHECK-NEXT: movs r3, #1
|
||||
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-NEXT: subs r3, #4
|
||||
; CHECK-NEXT: add.w lr, r12, r3, lsr #2
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: .LBB1_2: @ %vector.body
|
||||
|
|
|
@ -4,33 +4,33 @@
|
|||
define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) {
|
||||
; CHECK-LABEL: arm_var_f32_mve:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: mov r4, r1
|
||||
; CHECK-NEXT: cmp r1, #4
|
||||
; CHECK-NEXT: it ge
|
||||
; CHECK-NEXT: movge r4, #4
|
||||
; CHECK-NEXT: movs r3, #1
|
||||
; CHECK-NEXT: subs r4, r1, r4
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-NEXT: adds r4, #3
|
||||
; CHECK-NEXT: add.w r12, r3, r4, lsr #2
|
||||
; CHECK-NEXT: mov r3, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r1
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: mov r12, r0
|
||||
; CHECK-NEXT: .LBB0_1: @ %do.body.i
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r4], #16
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r12], #16
|
||||
; CHECK-NEXT: vadd.f32 q0, q0, q1
|
||||
; CHECK-NEXT: letp lr, .LBB0_1
|
||||
; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit
|
||||
; CHECK-NEXT: vmov s4, r1
|
||||
; CHECK-NEXT: dls lr, r12
|
||||
; CHECK-NEXT: vadd.f32 s0, s3, s3
|
||||
; CHECK-NEXT: mov r3, r1
|
||||
; CHECK-NEXT: vadd.f32 s0, s3, s3
|
||||
; CHECK-NEXT: cmp r1, #4
|
||||
; CHECK-NEXT: vcvt.f32.u32 s4, s4
|
||||
; CHECK-NEXT: it ge
|
||||
; CHECK-NEXT: movge r3, #4
|
||||
; CHECK-NEXT: subs r3, r1, r3
|
||||
; CHECK-NEXT: mov.w lr, #1
|
||||
; CHECK-NEXT: adds r3, #3
|
||||
; CHECK-NEXT: add.w lr, lr, r3, lsr #2
|
||||
; CHECK-NEXT: mov r3, r1
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: vdiv.f32 s0, s0, s4
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vmov r12, s0
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-NEXT: .LBB0_3: @ %do.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
|
@ -38,7 +38,7 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
|
|||
; CHECK-NEXT: subs r3, #4
|
||||
; CHECK-NEXT: vpsttt
|
||||
; CHECK-NEXT: vldrwt.u32 q1, [r0], #16
|
||||
; CHECK-NEXT: vsubt.f32 q1, q1, r4
|
||||
; CHECK-NEXT: vsubt.f32 q1, q1, r12
|
||||
; CHECK-NEXT: vfmat.f32 q0, q1, q1
|
||||
; CHECK-NEXT: le lr, .LBB0_3
|
||||
; CHECK-NEXT: @ %bb.4: @ %do.end
|
||||
|
@ -48,7 +48,7 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
|
|||
; CHECK-NEXT: vcvt.f32.u32 s2, s2
|
||||
; CHECK-NEXT: vdiv.f32 s0, s0, s2
|
||||
; CHECK-NEXT: vstr s0, [r2]
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
br label %do.body.i
|
||||
|
||||
|
|
|
@ -387,28 +387,27 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly
|
|||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: beq.w .LBB5_11
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph
|
||||
; CHECK-NEXT: add.w r6, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r4, r1, r12
|
||||
; CHECK-NEXT: cmp r6, r1
|
||||
; CHECK-NEXT: add.w r5, r0, r12
|
||||
; CHECK-NEXT: add.w r4, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r5, r1, r12
|
||||
; CHECK-NEXT: cmp r4, r1
|
||||
; CHECK-NEXT: add.w r6, r0, r12
|
||||
; CHECK-NEXT: cset lr, hi
|
||||
; CHECK-NEXT: cmp r4, r3
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: cmp r6, r0
|
||||
; CHECK-NEXT: cset r6, hi
|
||||
; CHECK-NEXT: cmp r5, r3
|
||||
; CHECK-NEXT: cset r5, hi
|
||||
; CHECK-NEXT: ands r5, r6
|
||||
; CHECK-NEXT: movs r6, #1
|
||||
; CHECK-NEXT: lsls r5, r5, #31
|
||||
; CHECK-NEXT: cmp r4, r0
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: cmp r6, r3
|
||||
; CHECK-NEXT: cset r6, hi
|
||||
; CHECK-NEXT: ands r4, r6
|
||||
; CHECK-NEXT: lsls r4, r4, #31
|
||||
; CHECK-NEXT: itt eq
|
||||
; CHECK-NEXT: andeq.w r5, r4, lr
|
||||
; CHECK-NEXT: lslseq.w r5, r5, #31
|
||||
; CHECK-NEXT: andeq.w r6, r5, lr
|
||||
; CHECK-NEXT: lslseq.w r6, r6, #31
|
||||
; CHECK-NEXT: beq .LBB5_4
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
|
||||
; CHECK-NEXT: sub.w r5, r12, #1
|
||||
; CHECK-NEXT: sub.w r6, r12, #1
|
||||
; CHECK-NEXT: and r9, r12, #3
|
||||
; CHECK-NEXT: cmp r5, #3
|
||||
; CHECK-NEXT: cmp r6, #3
|
||||
; CHECK-NEXT: bhs .LBB5_6
|
||||
; CHECK-NEXT: @ %bb.3:
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
|
@ -426,11 +425,12 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly
|
|||
; CHECK-NEXT: letp lr, .LBB5_5
|
||||
; CHECK-NEXT: b .LBB5_11
|
||||
; CHECK-NEXT: .LBB5_6: @ %for.body.preheader.new
|
||||
; CHECK-NEXT: bic r5, r12, #3
|
||||
; CHECK-NEXT: bic r6, r12, #3
|
||||
; CHECK-NEXT: movs r5, #1
|
||||
; CHECK-NEXT: subs r6, #4
|
||||
; CHECK-NEXT: add.w r4, r3, #8
|
||||
; CHECK-NEXT: subs r5, #4
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: add.w lr, r6, r5, lsr #2
|
||||
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
|
||||
; CHECK-NEXT: adds r5, r0, #3
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: adds r6, r1, #1
|
||||
|
@ -689,28 +689,27 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl
|
|||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: beq.w .LBB7_11
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph
|
||||
; CHECK-NEXT: add.w r6, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r4, r1, r12
|
||||
; CHECK-NEXT: cmp r6, r1
|
||||
; CHECK-NEXT: add.w r5, r0, r12
|
||||
; CHECK-NEXT: add.w r4, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r5, r1, r12
|
||||
; CHECK-NEXT: cmp r4, r1
|
||||
; CHECK-NEXT: add.w r6, r0, r12
|
||||
; CHECK-NEXT: cset lr, hi
|
||||
; CHECK-NEXT: cmp r4, r3
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: cmp r6, r0
|
||||
; CHECK-NEXT: cset r6, hi
|
||||
; CHECK-NEXT: cmp r5, r3
|
||||
; CHECK-NEXT: cset r5, hi
|
||||
; CHECK-NEXT: ands r5, r6
|
||||
; CHECK-NEXT: movs r6, #1
|
||||
; CHECK-NEXT: lsls r5, r5, #31
|
||||
; CHECK-NEXT: cmp r4, r0
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: cmp r6, r3
|
||||
; CHECK-NEXT: cset r6, hi
|
||||
; CHECK-NEXT: ands r4, r6
|
||||
; CHECK-NEXT: lsls r4, r4, #31
|
||||
; CHECK-NEXT: itt eq
|
||||
; CHECK-NEXT: andeq.w r5, r4, lr
|
||||
; CHECK-NEXT: lslseq.w r5, r5, #31
|
||||
; CHECK-NEXT: andeq.w r6, r5, lr
|
||||
; CHECK-NEXT: lslseq.w r6, r6, #31
|
||||
; CHECK-NEXT: beq .LBB7_4
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
|
||||
; CHECK-NEXT: sub.w r5, r12, #1
|
||||
; CHECK-NEXT: sub.w r6, r12, #1
|
||||
; CHECK-NEXT: and r9, r12, #3
|
||||
; CHECK-NEXT: cmp r5, #3
|
||||
; CHECK-NEXT: cmp r6, #3
|
||||
; CHECK-NEXT: bhs .LBB7_6
|
||||
; CHECK-NEXT: @ %bb.3:
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
|
@ -728,11 +727,12 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl
|
|||
; CHECK-NEXT: letp lr, .LBB7_5
|
||||
; CHECK-NEXT: b .LBB7_11
|
||||
; CHECK-NEXT: .LBB7_6: @ %for.body.preheader.new
|
||||
; CHECK-NEXT: bic r5, r12, #3
|
||||
; CHECK-NEXT: bic r6, r12, #3
|
||||
; CHECK-NEXT: movs r5, #1
|
||||
; CHECK-NEXT: subs r6, #4
|
||||
; CHECK-NEXT: add.w r4, r3, #8
|
||||
; CHECK-NEXT: subs r5, #4
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: add.w lr, r6, r5, lsr #2
|
||||
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
|
||||
; CHECK-NEXT: adds r5, r0, #3
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: adds r6, r1, #1
|
||||
|
@ -991,28 +991,27 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly
|
|||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: beq.w .LBB9_11
|
||||
; CHECK-NEXT: @ %bb.1: @ %vector.memcheck
|
||||
; CHECK-NEXT: add.w r6, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r4, r1, r12, lsl #2
|
||||
; CHECK-NEXT: cmp r6, r1
|
||||
; CHECK-NEXT: add.w r5, r0, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r4, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r5, r1, r12, lsl #2
|
||||
; CHECK-NEXT: cmp r4, r1
|
||||
; CHECK-NEXT: add.w r6, r0, r12, lsl #2
|
||||
; CHECK-NEXT: cset lr, hi
|
||||
; CHECK-NEXT: cmp r4, r3
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: cmp r6, r0
|
||||
; CHECK-NEXT: cset r6, hi
|
||||
; CHECK-NEXT: cmp r5, r3
|
||||
; CHECK-NEXT: cset r5, hi
|
||||
; CHECK-NEXT: ands r5, r6
|
||||
; CHECK-NEXT: movs r6, #1
|
||||
; CHECK-NEXT: lsls r5, r5, #31
|
||||
; CHECK-NEXT: cmp r4, r0
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: cmp r6, r3
|
||||
; CHECK-NEXT: cset r6, hi
|
||||
; CHECK-NEXT: ands r4, r6
|
||||
; CHECK-NEXT: lsls r4, r4, #31
|
||||
; CHECK-NEXT: itt eq
|
||||
; CHECK-NEXT: andeq.w r5, r4, lr
|
||||
; CHECK-NEXT: lslseq.w r5, r5, #31
|
||||
; CHECK-NEXT: andeq.w r6, r5, lr
|
||||
; CHECK-NEXT: lslseq.w r6, r6, #31
|
||||
; CHECK-NEXT: beq .LBB9_4
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
|
||||
; CHECK-NEXT: sub.w r5, r12, #1
|
||||
; CHECK-NEXT: sub.w r6, r12, #1
|
||||
; CHECK-NEXT: and r9, r12, #3
|
||||
; CHECK-NEXT: cmp r5, #3
|
||||
; CHECK-NEXT: cmp r6, #3
|
||||
; CHECK-NEXT: bhs .LBB9_6
|
||||
; CHECK-NEXT: @ %bb.3:
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
|
@ -1030,11 +1029,12 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly
|
|||
; CHECK-NEXT: letp lr, .LBB9_5
|
||||
; CHECK-NEXT: b .LBB9_11
|
||||
; CHECK-NEXT: .LBB9_6: @ %for.body.preheader.new
|
||||
; CHECK-NEXT: bic r5, r12, #3
|
||||
; CHECK-NEXT: bic r6, r12, #3
|
||||
; CHECK-NEXT: movs r5, #1
|
||||
; CHECK-NEXT: subs r6, #4
|
||||
; CHECK-NEXT: add.w r4, r3, #8
|
||||
; CHECK-NEXT: subs r5, #4
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: add.w lr, r6, r5, lsr #2
|
||||
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
|
||||
; CHECK-NEXT: add.w r5, r0, #8
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: add.w r6, r1, #8
|
||||
|
|
|
@ -448,12 +448,12 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture read
|
|||
; CHECK-NEXT: vaddv.u32 r12, q0
|
||||
; CHECK-NEXT: cbz r2, .LBB6_7
|
||||
; CHECK-NEXT: @ %bb.4: @ %vector.ph47
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vdup.32 q0, r3
|
||||
; CHECK-NEXT: movs r3, #1
|
||||
; CHECK-NEXT: add.w lr, r3, r6, lsr #2
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: vdup.32 q0, r3
|
||||
; CHECK-NEXT: vmov.32 q0[0], r12
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: .LBB6_5: @ %vector.body46
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vctp.32 r2
|
||||
|
@ -643,41 +643,41 @@ define i32 @wrongop(%struct.date* nocapture readonly %pd) {
|
|||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: mov r1, r0
|
||||
; CHECK-NEXT: movw r12, #47184
|
||||
; CHECK-NEXT: movw r3, #23593
|
||||
; CHECK-NEXT: ldrd r2, lr, [r1, #4]
|
||||
; CHECK-NEXT: movw r1, #23593
|
||||
; CHECK-NEXT: movt r12, #1310
|
||||
; CHECK-NEXT: movt r1, #49807
|
||||
; CHECK-NEXT: mla r1, lr, r1, r12
|
||||
; CHECK-NEXT: movw r3, #55051
|
||||
; CHECK-NEXT: movt r3, #49807
|
||||
; CHECK-NEXT: mla r3, lr, r3, r12
|
||||
; CHECK-NEXT: movw r1, #55051
|
||||
; CHECK-NEXT: movw r4, #23593
|
||||
; CHECK-NEXT: movt r3, #163
|
||||
; CHECK-NEXT: movt r1, #163
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: movt r4, #655
|
||||
; CHECK-NEXT: ror.w r12, r1, #4
|
||||
; CHECK-NEXT: cmp r12, r3
|
||||
; CHECK-NEXT: cset r3, lo
|
||||
; CHECK-NEXT: ror.w r1, r1, #2
|
||||
; CHECK-NEXT: ror.w r12, r3, #4
|
||||
; CHECK-NEXT: cmp r12, r1
|
||||
; CHECK-NEXT: cset r1, lo
|
||||
; CHECK-NEXT: ror.w r3, r3, #2
|
||||
; CHECK-NEXT: mov.w r12, #1
|
||||
; CHECK-NEXT: cmp r1, r4
|
||||
; CHECK-NEXT: csel r1, r3, r12, lo
|
||||
; CHECK-NEXT: cmp r3, r4
|
||||
; CHECK-NEXT: csel r3, r1, r12, lo
|
||||
; CHECK-NEXT: lsls.w r4, lr, #30
|
||||
; CHECK-NEXT: csel r3, r3, r1, ne
|
||||
; CHECK-NEXT: csel r1, r1, r3, ne
|
||||
; CHECK-NEXT: cmp r2, #1
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r4, pc}
|
||||
; CHECK-NEXT: .LBB8_1: @ %vector.ph
|
||||
; CHECK-NEXT: adds r1, r2, #3
|
||||
; CHECK-NEXT: movw r3, :lower16:days
|
||||
; CHECK-NEXT: movs r4, #52
|
||||
; CHECK-NEXT: bic r1, r1, #3
|
||||
; CHECK-NEXT: subs r1, #4
|
||||
; CHECK-NEXT: add.w lr, r12, r1, lsr #2
|
||||
; CHECK-NEXT: movw r1, :lower16:days
|
||||
; CHECK-NEXT: movt r1, :upper16:days
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: mla r1, r3, r4, r1
|
||||
; CHECK-NEXT: movt r3, :upper16:days
|
||||
; CHECK-NEXT: mla r1, r1, r4, r3
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vdup.32 q0, r3
|
||||
; CHECK-NEXT: vmov.32 q0[0], r0
|
||||
; CHECK-NEXT: adds r0, r2, #3
|
||||
; CHECK-NEXT: bic r0, r0, #3
|
||||
; CHECK-NEXT: subs r0, #4
|
||||
; CHECK-NEXT: add.w lr, r12, r0, lsr #2
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: .LBB8_2: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vctp.32 r2
|
||||
|
|
|
@ -8,13 +8,18 @@ define void @simple(i32* nocapture readonly %x, i32* nocapture readnone %y, i32*
|
|||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: ldr r1, [sp, #8]
|
||||
; CHECK-NEXT: mov r12, r3
|
||||
; CHECK-NEXT: wlstp.32 lr, r1, .LBB0_2
|
||||
; CHECK-NEXT: .LBB0_1: @ %do.body
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: add.w lr, r1, #3
|
||||
; CHECK-NEXT: cmp.w r3, lr, lsr #2
|
||||
; CHECK-NEXT: beq .LBB0_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %do.body.preheader
|
||||
; CHECK-NEXT: dlstp.32 lr, r1
|
||||
; CHECK-NEXT: .LBB0_2: @ %do.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
|
||||
; CHECK-NEXT: vaddva.s32 r12, q0
|
||||
; CHECK-NEXT: letp lr, .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_2: @ %if.end
|
||||
; CHECK-NEXT: letp lr, .LBB0_2
|
||||
; CHECK-NEXT: .LBB0_3: @ %if.end
|
||||
; CHECK-NEXT: str.w r12, [r2]
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
|
@ -48,52 +53,48 @@ if.end: ; preds = %do.body, %entry
|
|||
define void @nested(i32* nocapture readonly %x, i32* nocapture readnone %y, i32* nocapture %z, i32 %m, i32 %n) {
|
||||
; CHECK-LABEL: nested:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: cbz r3, .LBB1_7
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
|
||||
; CHECK-NEXT: cbz r3, .LBB1_8
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
; CHECK-NEXT: ldr.w r12, [sp, #20]
|
||||
; CHECK-NEXT: ldr r5, [sp, #24]
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: b .LBB1_3
|
||||
; CHECK-NEXT: .LBB1_2: @ %if.end
|
||||
; CHECK-NEXT: @ in Loop: Header=BB1_3 Depth=1
|
||||
; CHECK-NEXT: b .LBB1_4
|
||||
; CHECK-NEXT: .LBB1_2: @ in Loop: Header=BB1_4 Depth=1
|
||||
; CHECK-NEXT: mov r4, r3
|
||||
; CHECK-NEXT: .LBB1_3: @ %if.end
|
||||
; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1
|
||||
; CHECK-NEXT: str.w r4, [r2, r1, lsl #2]
|
||||
; CHECK-NEXT: adds r1, #1
|
||||
; CHECK-NEXT: cmp r1, r3
|
||||
; CHECK-NEXT: beq .LBB1_7
|
||||
; CHECK-NEXT: .LBB1_3: @ %for.body
|
||||
; CHECK-NEXT: beq .LBB1_8
|
||||
; CHECK-NEXT: .LBB1_4: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: @ Child Loop BB1_5 Depth 2
|
||||
; CHECK-NEXT: add.w r5, r12, #3
|
||||
; CHECK-NEXT: mov r4, r3
|
||||
; CHECK-NEXT: lsr.w lr, r5, #2
|
||||
; CHECK-NEXT: cmp.w lr, #0
|
||||
; CHECK-NEXT: @ Child Loop BB1_6 Depth 2
|
||||
; CHECK-NEXT: adds r7, r5, #3
|
||||
; CHECK-NEXT: cmp.w r12, r7, lsr #2
|
||||
; CHECK-NEXT: beq .LBB1_2
|
||||
; CHECK-NEXT: b .LBB1_4
|
||||
; CHECK-NEXT: .LBB1_4: @ %do.body.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB1_3 Depth=1
|
||||
; CHECK-NEXT: bic r6, r5, #3
|
||||
; CHECK-NEXT: mov r7, r12
|
||||
; CHECK-NEXT: @ %bb.5: @ %do.body.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1
|
||||
; CHECK-NEXT: bic r6, r7, #3
|
||||
; CHECK-NEXT: dlstp.32 lr, r5
|
||||
; CHECK-NEXT: mov r7, r5
|
||||
; CHECK-NEXT: add.w r8, r0, r6, lsl #2
|
||||
; CHECK-NEXT: mov r4, r3
|
||||
; CHECK-NEXT: add.w r5, r0, r6, lsl #2
|
||||
; CHECK-NEXT: .LBB1_5: @ %do.body
|
||||
; CHECK-NEXT: @ Parent Loop BB1_3 Depth=1
|
||||
; CHECK-NEXT: .LBB1_6: @ %do.body
|
||||
; CHECK-NEXT: @ Parent Loop BB1_4 Depth=1
|
||||
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: vctp.32 r7
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
|
||||
; CHECK-NEXT: subs r7, #4
|
||||
; CHECK-NEXT: subs.w lr, lr, #1
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vaddvat.s32 r4, q0
|
||||
; CHECK-NEXT: bne .LBB1_5
|
||||
; CHECK-NEXT: b .LBB1_6
|
||||
; CHECK-NEXT: .LBB1_6: @ %if.end.loopexit
|
||||
; CHECK-NEXT: @ in Loop: Header=BB1_3 Depth=1
|
||||
; CHECK-NEXT: sub.w r12, r12, r6
|
||||
; CHECK-NEXT: mov r0, r5
|
||||
; CHECK-NEXT: b .LBB1_2
|
||||
; CHECK-NEXT: .LBB1_7: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
; CHECK-NEXT: vaddva.s32 r4, q0
|
||||
; CHECK-NEXT: letp lr, .LBB1_6
|
||||
; CHECK-NEXT: @ %bb.7: @ %if.end.loopexit
|
||||
; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1
|
||||
; CHECK-NEXT: subs r5, r5, r6
|
||||
; CHECK-NEXT: mov r0, r8
|
||||
; CHECK-NEXT: b .LBB1_3
|
||||
; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
|
||||
entry:
|
||||
%cmp20.not = icmp eq i32 %m, 0
|
||||
br i1 %cmp20.not, label %for.cond.cleanup, label %for.body
|
||||
|
|
|
@ -264,10 +264,10 @@ define arm_aapcs_vfpcc void @fmss1(float* nocapture readonly %x, float* nocaptur
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r4, pc}
|
||||
; CHECK-NEXT: .LBB4_1: @ %vector.ph
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: dlstp.32 lr, r3
|
||||
; CHECK-NEXT: eor r12, r4, #-2147483648
|
||||
; CHECK-NEXT: vmov r12, s0
|
||||
; CHECK-NEXT: movs r4, #0
|
||||
; CHECK-NEXT: dlstp.32 lr, r3
|
||||
; CHECK-NEXT: eor r12, r12, #-2147483648
|
||||
; CHECK-NEXT: .LBB4_2: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: adds r4, #4
|
||||
|
@ -323,16 +323,15 @@ for.cond.cleanup: ; preds = %vector.body, %entry
|
|||
define arm_aapcs_vfpcc void @fmss2(float* nocapture readonly %x, float* nocapture readonly %y, float* noalias nocapture %z, float %a, i32 %n) {
|
||||
; CHECK-LABEL: fmss2:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: cmp r3, #1
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r4, pc}
|
||||
; CHECK-NEXT: .LBB5_1: @ %vector.ph
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vdup.32 q0, r4
|
||||
; CHECK-NEXT: vneg.f32 q0, q0
|
||||
; CHECK-NEXT: blt .LBB5_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %vector.ph
|
||||
; CHECK-NEXT: vmov r6, s0
|
||||
; CHECK-NEXT: vdup.32 q0, r6
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: vneg.f32 q0, q0
|
||||
; CHECK-NEXT: dlstp.32 lr, r3
|
||||
; CHECK-NEXT: .LBB5_2: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
|
@ -343,8 +342,8 @@ define arm_aapcs_vfpcc void @fmss2(float* nocapture readonly %x, float* nocaptur
|
|||
; CHECK-NEXT: vfma.f32 q3, q2, q1
|
||||
; CHECK-NEXT: vstrw.32 q3, [r2], #16
|
||||
; CHECK-NEXT: letp lr, .LBB5_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
; CHECK-NEXT: .LBB5_3: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%cmp8 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp8, label %vector.ph, label %for.cond.cleanup
|
||||
|
@ -390,14 +389,14 @@ for.cond.cleanup: ; preds = %vector.body, %entry
|
|||
define arm_aapcs_vfpcc void @fmss3(float* nocapture readonly %x, float* nocapture readonly %y, float* noalias nocapture %z, float %a, i32 %n) {
|
||||
; CHECK-LABEL: fmss3:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: cmp r3, #1
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: poplt {r4, pc}
|
||||
; CHECK-NEXT: .LBB6_1: @ %vector.ph
|
||||
; CHECK-NEXT: vmov r12, s0
|
||||
; CHECK-NEXT: vdup.32 q0, r12
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vdup.32 q0, r4
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: dlstp.32 lr, r3
|
||||
; CHECK-NEXT: .LBB6_2: @ %vector.body
|
||||
|
@ -410,7 +409,7 @@ define arm_aapcs_vfpcc void @fmss3(float* nocapture readonly %x, float* nocaptur
|
|||
; CHECK-NEXT: vstrw.32 q3, [r2], #16
|
||||
; CHECK-NEXT: letp lr, .LBB6_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%cmp8 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp8, label %vector.ph, label %for.cond.cleanup
|
||||
|
@ -456,14 +455,14 @@ for.cond.cleanup: ; preds = %vector.body, %entry
|
|||
define arm_aapcs_vfpcc void @fmss4(float* nocapture readonly %x, float* nocapture readonly %y, float* noalias nocapture %z, float %a, i32 %n) {
|
||||
; CHECK-LABEL: fmss4:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: cmp r3, #1
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: poplt {r4, pc}
|
||||
; CHECK-NEXT: .LBB7_1: @ %vector.ph
|
||||
; CHECK-NEXT: vmov r12, s0
|
||||
; CHECK-NEXT: vdup.32 q0, r12
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vdup.32 q0, r4
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: dlstp.32 lr, r3
|
||||
; CHECK-NEXT: .LBB7_2: @ %vector.body
|
||||
|
@ -476,7 +475,7 @@ define arm_aapcs_vfpcc void @fmss4(float* nocapture readonly %x, float* nocaptur
|
|||
; CHECK-NEXT: vstrw.32 q3, [r2], #16
|
||||
; CHECK-NEXT: letp lr, .LBB7_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%cmp8 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp8, label %vector.ph, label %for.cond.cleanup
|
||||
|
@ -528,10 +527,10 @@ define arm_aapcs_vfpcc void @fms1(float* nocapture readonly %x, float* nocapture
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r4, pc}
|
||||
; CHECK-NEXT: .LBB8_1: @ %vector.ph
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: dlstp.32 lr, r3
|
||||
; CHECK-NEXT: eor r12, r4, #-2147483648
|
||||
; CHECK-NEXT: vmov r12, s0
|
||||
; CHECK-NEXT: movs r4, #0
|
||||
; CHECK-NEXT: dlstp.32 lr, r3
|
||||
; CHECK-NEXT: eor r12, r12, #-2147483648
|
||||
; CHECK-NEXT: .LBB8_2: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: adds r4, #4
|
||||
|
@ -587,14 +586,14 @@ for.cond.cleanup: ; preds = %vector.body, %entry
|
|||
define arm_aapcs_vfpcc void @fms2(float* nocapture readonly %x, float* nocapture readonly %y, float* noalias nocapture %z, float %a, i32 %n) {
|
||||
; CHECK-LABEL: fms2:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: cmp r3, #1
|
||||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: poplt {r4, pc}
|
||||
; CHECK-NEXT: .LBB9_1: @ %vector.ph
|
||||
; CHECK-NEXT: vmov r12, s0
|
||||
; CHECK-NEXT: vdup.32 q0, r12
|
||||
; CHECK-NEXT: vmov r4, s0
|
||||
; CHECK-NEXT: vdup.32 q0, r4
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: dlstp.32 lr, r3
|
||||
; CHECK-NEXT: .LBB9_2: @ %vector.body
|
||||
|
@ -606,7 +605,7 @@ define arm_aapcs_vfpcc void @fms2(float* nocapture readonly %x, float* nocapture
|
|||
; CHECK-NEXT: vstrw.32 q2, [r2], #16
|
||||
; CHECK-NEXT: letp lr, .LBB9_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%cmp8 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp8, label %vector.ph, label %for.cond.cleanup
|
||||
|
|
|
@ -574,27 +574,19 @@ define i32 @arm_nn_mat_mul_core_4x_s8(i32 %row_elements, i32 %offset, i8* %row_b
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r10, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r10, lr}
|
||||
; CHECK-NEXT: add.w r7, r0, #15
|
||||
; CHECK-NEXT: ldr.w r12, [sp, #32]
|
||||
; CHECK-NEXT: asrs r6, r7, #31
|
||||
; CHECK-NEXT: add.w r7, r7, r6, lsr #28
|
||||
; CHECK-NEXT: movs r6, #1
|
||||
; CHECK-NEXT: asrs r5, r7, #4
|
||||
; CHECK-NEXT: cmp r5, #1
|
||||
; CHECK-NEXT: it gt
|
||||
; CHECK-NEXT: asrgt r6, r7, #4
|
||||
; CHECK-NEXT: cmp r0, #1
|
||||
; CHECK-NEXT: blt .LBB4_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
; CHECK-NEXT: adds r7, r2, r1
|
||||
; CHECK-NEXT: add.w r5, r2, r1, lsl #1
|
||||
; CHECK-NEXT: add.w r1, r1, r1, lsl #1
|
||||
; CHECK-NEXT: dlstp.8 lr, r0
|
||||
; CHECK-NEXT: add r1, r2
|
||||
; CHECK-NEXT: mov.w r8, #0
|
||||
; CHECK-NEXT: movs r6, #0
|
||||
; CHECK-NEXT: movs r4, #0
|
||||
; CHECK-NEXT: mov.w r10, #0
|
||||
; CHECK-NEXT: movs r6, #0
|
||||
; CHECK-NEXT: adds r7, r2, r1
|
||||
; CHECK-NEXT: add.w r1, r1, r1, lsl #1
|
||||
; CHECK-NEXT: add r1, r2
|
||||
; CHECK-NEXT: dlstp.8 lr, r0
|
||||
; CHECK-NEXT: .LBB4_2: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrb.u8 q0, [r3], #16
|
||||
|
@ -735,20 +727,20 @@ define i8* @signext(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16
|
|||
; CHECK-NEXT: @ =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: @ Child Loop BB5_7 Depth 2
|
||||
; CHECK-NEXT: ldr r0, [sp, #92]
|
||||
; CHECK-NEXT: cmp r2, r2
|
||||
; CHECK-NEXT: ldr.w r12, [r0, r9, lsl #2]
|
||||
; CHECK-NEXT: subs r0, r2, r2
|
||||
; CHECK-NEXT: ble .LBB5_3
|
||||
; CHECK-NEXT: bge .LBB5_3
|
||||
; CHECK-NEXT: @ %bb.6: @ %for.body24.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB5_5 Depth=1
|
||||
; CHECK-NEXT: ldr.w r11, [sp, #88]
|
||||
; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
|
||||
; CHECK-NEXT: mov r6, r12
|
||||
; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
|
||||
; CHECK-NEXT: mov r8, r12
|
||||
; CHECK-NEXT: dlstp.16 lr, r11
|
||||
; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: mla r3, r9, r11, r1
|
||||
; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
|
||||
; CHECK-NEXT: mla r3, r9, r11, r0
|
||||
; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldrd r7, r5, [sp] @ 8-byte Folded Reload
|
||||
; CHECK-NEXT: mov r8, r12
|
||||
; CHECK-NEXT: mov r10, r12
|
||||
; CHECK-NEXT: .LBB5_7: @ %for.body24
|
||||
; CHECK-NEXT: @ Parent Loop BB5_5 Depth=1
|
||||
|
@ -907,20 +899,20 @@ define i8* @signext_optsize(i8* %input_row, i8* %input_col, i16 zeroext %output_
|
|||
; CHECK-NEXT: @ =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: @ Child Loop BB6_5 Depth 2
|
||||
; CHECK-NEXT: ldr r0, [sp, #92]
|
||||
; CHECK-NEXT: cmp r2, r2
|
||||
; CHECK-NEXT: ldr.w r12, [r0, r9, lsl #2]
|
||||
; CHECK-NEXT: subs r0, r2, r2
|
||||
; CHECK-NEXT: ble .LBB6_6
|
||||
; CHECK-NEXT: bge .LBB6_6
|
||||
; CHECK-NEXT: @ %bb.4: @ %for.body24.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB6_3 Depth=1
|
||||
; CHECK-NEXT: ldr.w r11, [sp, #88]
|
||||
; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
|
||||
; CHECK-NEXT: mov r6, r12
|
||||
; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
|
||||
; CHECK-NEXT: mov r8, r12
|
||||
; CHECK-NEXT: dlstp.16 lr, r11
|
||||
; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: mla r3, r9, r11, r1
|
||||
; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
|
||||
; CHECK-NEXT: mla r3, r9, r11, r0
|
||||
; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldrd r7, r5, [sp] @ 8-byte Folded Reload
|
||||
; CHECK-NEXT: mov r8, r12
|
||||
; CHECK-NEXT: mov r10, r12
|
||||
; CHECK-NEXT: .LBB6_5: @ %for.body24
|
||||
; CHECK-NEXT: @ Parent Loop BB6_3 Depth=1
|
||||
|
|
Loading…
Reference in New Issue