forked from OSchip/llvm-project
[ARM] Sink splats to MVE intrinsics
The predicated MVE intrinsics are generated as, for example, llvm.arm.mve.add.predicated(x, splat(y). p). We need to sink the splat value back into the loop, like we do for other instructions, so we can re-select qr variants. Differential Revision: https://reviews.llvm.org/D87693
This commit is contained in:
parent
7b2dd58eb0
commit
34b27b9441
|
@ -16446,6 +16446,19 @@ bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
|
|||
switch (II->getIntrinsicID()) {
|
||||
case Intrinsic::fma:
|
||||
return !IsFMS(I);
|
||||
case Intrinsic::arm_mve_add_predicated:
|
||||
case Intrinsic::arm_mve_mul_predicated:
|
||||
case Intrinsic::arm_mve_qadd_predicated:
|
||||
case Intrinsic::arm_mve_hadd_predicated:
|
||||
case Intrinsic::arm_mve_vqdmull_predicated:
|
||||
case Intrinsic::arm_mve_qdmulh_predicated:
|
||||
case Intrinsic::arm_mve_qrdmulh_predicated:
|
||||
case Intrinsic::arm_mve_fma_predicated:
|
||||
return true;
|
||||
case Intrinsic::arm_mve_sub_predicated:
|
||||
case Intrinsic::arm_mve_qsub_predicated:
|
||||
case Intrinsic::arm_mve_hsub_predicated:
|
||||
return Operand == 1;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -17,19 +17,18 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
|
|||
; CHECK-NEXT: letp lr, .LBB0_1
|
||||
; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit
|
||||
; CHECK-NEXT: vmov s4, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r1
|
||||
; CHECK-NEXT: mov r3, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r3
|
||||
; CHECK-NEXT: vadd.f32 s0, s3, s3
|
||||
; CHECK-NEXT: vcvt.f32.u32 s4, s4
|
||||
; CHECK-NEXT: vdiv.f32 s0, s0, s4
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r12, s0
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-NEXT: vdup.32 q1, r3
|
||||
; CHECK-NEXT: mov r3, r1
|
||||
; CHECK-NEXT: .LBB0_3: @ %do.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0], #16
|
||||
; CHECK-NEXT: vsub.f32 q2, q2, q1
|
||||
; CHECK-NEXT: vfma.f32 q0, q2, q2
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
|
||||
; CHECK-NEXT: vsub.f32 q1, q1, r12
|
||||
; CHECK-NEXT: vfma.f32 q0, q1, q1
|
||||
; CHECK-NEXT: letp lr, .LBB0_3
|
||||
; CHECK-NEXT: @ %bb.4: @ %do.end
|
||||
; CHECK-NEXT: subs r0, r1, #1
|
||||
|
|
|
@ -683,84 +683,86 @@ define i8* @signext(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; CHECK-NEXT: .pad #20
|
||||
; CHECK-NEXT: sub sp, #20
|
||||
; CHECK-NEXT: .pad #24
|
||||
; CHECK-NEXT: sub sp, #24
|
||||
; CHECK-NEXT: add.w r12, sp, #12
|
||||
; CHECK-NEXT: cmp r3, #4
|
||||
; CHECK-NEXT: strd r0, r1, [sp, #12] @ 8-byte Folded Spill
|
||||
; CHECK-NEXT: stm.w r12, {r0, r1, r2} @ 12-byte Folded Spill
|
||||
; CHECK-NEXT: bne .LBB5_8
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.cond.preheader
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: beq .LBB5_8
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.lr.ph
|
||||
; CHECK-NEXT: ldr r7, [sp, #84]
|
||||
; CHECK-NEXT: mov.w r11, #0
|
||||
; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldr r0, [sp, #68]
|
||||
; CHECK-NEXT: add.w r1, r3, r7, lsl #1
|
||||
; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
|
||||
; CHECK-NEXT: adds r1, r3, r7
|
||||
; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: add.w r1, r7, r7, lsl #1
|
||||
; CHECK-NEXT: vdup.16 q0, r0
|
||||
; CHECK-NEXT: adds r0, r3, r1
|
||||
; CHECK-NEXT: ldr r2, [sp, #88]
|
||||
; CHECK-NEXT: mov.w r9, #0
|
||||
; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldr r4, [sp, #72]
|
||||
; CHECK-NEXT: add.w r0, r1, r2, lsl #1
|
||||
; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
|
||||
; CHECK-NEXT: adds r0, r1, r2
|
||||
; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: add.w r0, r2, r2, lsl #1
|
||||
; CHECK-NEXT: add r0, r1
|
||||
; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
|
||||
; CHECK-NEXT: adds r0, r7, #7
|
||||
; CHECK-NEXT: lsr.w r9, r0, #3
|
||||
; CHECK-NEXT: adds r0, r2, #7
|
||||
; CHECK-NEXT: lsrs r2, r0, #3
|
||||
; CHECK-NEXT: b .LBB5_5
|
||||
; CHECK-NEXT: .LBB5_3: @ in Loop: Header=BB5_5 Depth=1
|
||||
; CHECK-NEXT: mov r10, r12
|
||||
; CHECK-NEXT: mov r8, r12
|
||||
; CHECK-NEXT: mov r10, r12
|
||||
; CHECK-NEXT: mov r6, r12
|
||||
; CHECK-NEXT: .LBB5_4: @ %for.cond.cleanup23
|
||||
; CHECK-NEXT: @ in Loop: Header=BB5_5 Depth=1
|
||||
; CHECK-NEXT: ldr r1, [sp, #92]
|
||||
; CHECK-NEXT: add.w r0, r8, r10
|
||||
; CHECK-NEXT: add.w r0, r10, r8
|
||||
; CHECK-NEXT: ldr r1, [sp, #96]
|
||||
; CHECK-NEXT: add r0, r6
|
||||
; CHECK-NEXT: add r0, r12
|
||||
; CHECK-NEXT: strb.w r0, [r1, r11]
|
||||
; CHECK-NEXT: add.w r11, r11, #1
|
||||
; CHECK-NEXT: cmp r11, r2
|
||||
; CHECK-NEXT: strb.w r0, [r1, r9]
|
||||
; CHECK-NEXT: add.w r9, r9, #1
|
||||
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
|
||||
; CHECK-NEXT: cmp r9, r0
|
||||
; CHECK-NEXT: beq .LBB5_8
|
||||
; CHECK-NEXT: .LBB5_5: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: @ Child Loop BB5_7 Depth 2
|
||||
; CHECK-NEXT: ldr r0, [sp, #88]
|
||||
; CHECK-NEXT: subs.w lr, r9, r9
|
||||
; CHECK-NEXT: ldr.w r12, [r0, r11, lsl #2]
|
||||
; CHECK-NEXT: ldr r0, [sp, #92]
|
||||
; CHECK-NEXT: subs.w lr, r2, r2
|
||||
; CHECK-NEXT: ldr.w r12, [r0, r9, lsl #2]
|
||||
; CHECK-NEXT: ble .LBB5_3
|
||||
; CHECK-NEXT: @ %bb.6: @ %for.body24.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB5_5 Depth=1
|
||||
; CHECK-NEXT: ldr r3, [sp, #84]
|
||||
; CHECK-NEXT: ldr.w r11, [sp, #88]
|
||||
; CHECK-NEXT: mov r6, r12
|
||||
; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
|
||||
; CHECK-NEXT: dlstp.16 lr, r3
|
||||
; CHECK-NEXT: dlstp.16 lr, r11
|
||||
; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
|
||||
; CHECK-NEXT: mov r8, r12
|
||||
; CHECK-NEXT: mla r5, r11, r3, r0
|
||||
; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldrd r4, r7, [sp] @ 8-byte Folded Reload
|
||||
; CHECK-NEXT: mov r10, r12
|
||||
; CHECK-NEXT: mla r3, r9, r11, r0
|
||||
; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldrd r7, r0, [sp] @ 8-byte Folded Reload
|
||||
; CHECK-NEXT: mov r8, r12
|
||||
; CHECK-NEXT: .LBB5_7: @ %for.body24
|
||||
; CHECK-NEXT: @ Parent Loop BB5_5 Depth=1
|
||||
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: vldrb.s16 q1, [r4], #8
|
||||
; CHECK-NEXT: vadd.i16 q2, q1, q0
|
||||
; CHECK-NEXT: vldrb.s16 q0, [r7], #8
|
||||
; CHECK-NEXT: vadd.i16 q1, q0, r4
|
||||
; CHECK-NEXT: vldrb.s16 q0, [r3], #8
|
||||
; CHECK-NEXT: vmlava.s16 r12, q0, q1
|
||||
; CHECK-NEXT: vldrb.s16 q1, [r5], #8
|
||||
; CHECK-NEXT: vmlava.s16 r12, q1, q2
|
||||
; CHECK-NEXT: vldrb.s16 q2, [r0], #8
|
||||
; CHECK-NEXT: vadd.i16 q2, q2, q0
|
||||
; CHECK-NEXT: vmlava.s16 r6, q1, q2
|
||||
; CHECK-NEXT: vldrb.s16 q2, [r7], #8
|
||||
; CHECK-NEXT: vadd.i16 q2, q2, q0
|
||||
; CHECK-NEXT: vmlava.s16 r8, q1, q2
|
||||
; CHECK-NEXT: vldrb.s16 q2, [r1], #8
|
||||
; CHECK-NEXT: vadd.i16 q2, q2, q0
|
||||
; CHECK-NEXT: vmlava.s16 r10, q1, q2
|
||||
; CHECK-NEXT: vadd.i16 q1, q1, r4
|
||||
; CHECK-NEXT: vmlava.s16 r6, q0, q1
|
||||
; CHECK-NEXT: vldrb.s16 q1, [r0], #8
|
||||
; CHECK-NEXT: vadd.i16 q1, q1, r4
|
||||
; CHECK-NEXT: vmlava.s16 r10, q0, q1
|
||||
; CHECK-NEXT: vldrb.s16 q1, [r1], #8
|
||||
; CHECK-NEXT: vadd.i16 q1, q1, r4
|
||||
; CHECK-NEXT: vmlava.s16 r8, q0, q1
|
||||
; CHECK-NEXT: letp lr, .LBB5_7
|
||||
; CHECK-NEXT: b .LBB5_4
|
||||
; CHECK-NEXT: .LBB5_8: @ %if.end
|
||||
; CHECK-NEXT: ldr r0, [sp, #92]
|
||||
; CHECK-NEXT: add sp, #20
|
||||
; CHECK-NEXT: ldr r0, [sp, #96]
|
||||
; CHECK-NEXT: add sp, #24
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
entry:
|
||||
%cmp = icmp eq i16 %num_cols, 4
|
||||
|
@ -869,83 +871,85 @@ define i8* @signext_optsize(i8* %input_row, i8* %input_col, i16 zeroext %output_
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; CHECK-NEXT: .pad #20
|
||||
; CHECK-NEXT: sub sp, #20
|
||||
; CHECK-NEXT: .pad #24
|
||||
; CHECK-NEXT: sub sp, #24
|
||||
; CHECK-NEXT: add.w r12, sp, #12
|
||||
; CHECK-NEXT: cmp r3, #4
|
||||
; CHECK-NEXT: strd r0, r1, [sp, #12] @ 8-byte Folded Spill
|
||||
; CHECK-NEXT: stm.w r12, {r0, r1, r2} @ 12-byte Folded Spill
|
||||
; CHECK-NEXT: bne .LBB6_8
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.cond.preheader
|
||||
; CHECK-NEXT: cmp r2, #0
|
||||
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: beq .LBB6_8
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.lr.ph
|
||||
; CHECK-NEXT: ldr r7, [sp, #84]
|
||||
; CHECK-NEXT: mov.w r11, #0
|
||||
; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldr r0, [sp, #68]
|
||||
; CHECK-NEXT: add.w r1, r3, r7, lsl #1
|
||||
; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
|
||||
; CHECK-NEXT: adds r1, r3, r7
|
||||
; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: add.w r1, r7, r7, lsl #1
|
||||
; CHECK-NEXT: vdup.16 q0, r0
|
||||
; CHECK-NEXT: adds r0, r3, r1
|
||||
; CHECK-NEXT: ldr r2, [sp, #88]
|
||||
; CHECK-NEXT: mov.w r9, #0
|
||||
; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldr r4, [sp, #72]
|
||||
; CHECK-NEXT: add.w r0, r1, r2, lsl #1
|
||||
; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
|
||||
; CHECK-NEXT: adds r0, r1, r2
|
||||
; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: add.w r0, r2, r2, lsl #1
|
||||
; CHECK-NEXT: add r0, r1
|
||||
; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
|
||||
; CHECK-NEXT: adds r0, r7, #7
|
||||
; CHECK-NEXT: lsr.w r9, r0, #3
|
||||
; CHECK-NEXT: adds r0, r2, #7
|
||||
; CHECK-NEXT: lsrs r2, r0, #3
|
||||
; CHECK-NEXT: .LBB6_3: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: @ Child Loop BB6_5 Depth 2
|
||||
; CHECK-NEXT: ldr r0, [sp, #88]
|
||||
; CHECK-NEXT: subs.w lr, r9, r9
|
||||
; CHECK-NEXT: ldr.w r12, [r0, r11, lsl #2]
|
||||
; CHECK-NEXT: ldr r0, [sp, #92]
|
||||
; CHECK-NEXT: subs.w lr, r2, r2
|
||||
; CHECK-NEXT: ldr.w r12, [r0, r9, lsl #2]
|
||||
; CHECK-NEXT: ble .LBB6_6
|
||||
; CHECK-NEXT: @ %bb.4: @ %for.body24.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB6_3 Depth=1
|
||||
; CHECK-NEXT: ldr r3, [sp, #84]
|
||||
; CHECK-NEXT: ldr.w r11, [sp, #88]
|
||||
; CHECK-NEXT: mov r6, r12
|
||||
; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
|
||||
; CHECK-NEXT: dlstp.16 lr, r3
|
||||
; CHECK-NEXT: dlstp.16 lr, r11
|
||||
; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
|
||||
; CHECK-NEXT: mov r8, r12
|
||||
; CHECK-NEXT: mla r5, r11, r3, r0
|
||||
; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldrd r4, r7, [sp] @ 8-byte Folded Reload
|
||||
; CHECK-NEXT: mov r10, r12
|
||||
; CHECK-NEXT: mla r3, r9, r11, r0
|
||||
; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldrd r7, r0, [sp] @ 8-byte Folded Reload
|
||||
; CHECK-NEXT: mov r8, r12
|
||||
; CHECK-NEXT: .LBB6_5: @ %for.body24
|
||||
; CHECK-NEXT: @ Parent Loop BB6_3 Depth=1
|
||||
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: vldrb.s16 q1, [r4], #8
|
||||
; CHECK-NEXT: vadd.i16 q2, q1, q0
|
||||
; CHECK-NEXT: vldrb.s16 q0, [r7], #8
|
||||
; CHECK-NEXT: vadd.i16 q1, q0, r4
|
||||
; CHECK-NEXT: vldrb.s16 q0, [r3], #8
|
||||
; CHECK-NEXT: vmlava.s16 r12, q0, q1
|
||||
; CHECK-NEXT: vldrb.s16 q1, [r5], #8
|
||||
; CHECK-NEXT: vmlava.s16 r12, q1, q2
|
||||
; CHECK-NEXT: vldrb.s16 q2, [r0], #8
|
||||
; CHECK-NEXT: vadd.i16 q2, q2, q0
|
||||
; CHECK-NEXT: vmlava.s16 r6, q1, q2
|
||||
; CHECK-NEXT: vldrb.s16 q2, [r7], #8
|
||||
; CHECK-NEXT: vadd.i16 q2, q2, q0
|
||||
; CHECK-NEXT: vmlava.s16 r8, q1, q2
|
||||
; CHECK-NEXT: vldrb.s16 q2, [r1], #8
|
||||
; CHECK-NEXT: vadd.i16 q2, q2, q0
|
||||
; CHECK-NEXT: vmlava.s16 r10, q1, q2
|
||||
; CHECK-NEXT: vadd.i16 q1, q1, r4
|
||||
; CHECK-NEXT: vmlava.s16 r6, q0, q1
|
||||
; CHECK-NEXT: vldrb.s16 q1, [r0], #8
|
||||
; CHECK-NEXT: vadd.i16 q1, q1, r4
|
||||
; CHECK-NEXT: vmlava.s16 r10, q0, q1
|
||||
; CHECK-NEXT: vldrb.s16 q1, [r1], #8
|
||||
; CHECK-NEXT: vadd.i16 q1, q1, r4
|
||||
; CHECK-NEXT: vmlava.s16 r8, q0, q1
|
||||
; CHECK-NEXT: letp lr, .LBB6_5
|
||||
; CHECK-NEXT: b .LBB6_7
|
||||
; CHECK-NEXT: .LBB6_6: @ in Loop: Header=BB6_3 Depth=1
|
||||
; CHECK-NEXT: mov r10, r12
|
||||
; CHECK-NEXT: mov r8, r12
|
||||
; CHECK-NEXT: mov r10, r12
|
||||
; CHECK-NEXT: mov r6, r12
|
||||
; CHECK-NEXT: .LBB6_7: @ %for.cond.cleanup23
|
||||
; CHECK-NEXT: @ in Loop: Header=BB6_3 Depth=1
|
||||
; CHECK-NEXT: ldr r1, [sp, #92]
|
||||
; CHECK-NEXT: add.w r0, r8, r10
|
||||
; CHECK-NEXT: add.w r0, r10, r8
|
||||
; CHECK-NEXT: ldr r1, [sp, #96]
|
||||
; CHECK-NEXT: add r0, r6
|
||||
; CHECK-NEXT: add r0, r12
|
||||
; CHECK-NEXT: strb.w r0, [r1, r11]
|
||||
; CHECK-NEXT: add.w r11, r11, #1
|
||||
; CHECK-NEXT: cmp r11, r2
|
||||
; CHECK-NEXT: strb.w r0, [r1, r9]
|
||||
; CHECK-NEXT: add.w r9, r9, #1
|
||||
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
|
||||
; CHECK-NEXT: cmp r9, r0
|
||||
; CHECK-NEXT: bne .LBB6_3
|
||||
; CHECK-NEXT: .LBB6_8: @ %if.end
|
||||
; CHECK-NEXT: ldr r0, [sp, #92]
|
||||
; CHECK-NEXT: add sp, #20
|
||||
; CHECK-NEXT: ldr r0, [sp, #96]
|
||||
; CHECK-NEXT: add sp, #24
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
entry:
|
||||
%cmp = icmp eq i16 %num_cols, 4
|
||||
|
|
|
@ -10,13 +10,12 @@ define void @vadd(i32* %s1, i32 %c0, i32 %N) {
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: .LBB0_1: @ %while.body.lr.ph
|
||||
; CHECK-NEXT: vdup.32 q0, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r2
|
||||
; CHECK-NEXT: .LBB0_2: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vadd.i32 q1, q1, q0
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r1
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB0_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -55,13 +54,12 @@ define void @vsub(i32* %s1, i32 %c0, i32 %N) {
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: .LBB1_1: @ %while.body.lr.ph
|
||||
; CHECK-NEXT: vdup.32 q0, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r2
|
||||
; CHECK-NEXT: .LBB1_2: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vsub.i32 q1, q1, q0
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vsub.i32 q0, q0, r1
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB1_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -100,13 +98,12 @@ define void @vmul(i32* %s1, i32 %c0, i32 %N) {
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: .LBB2_1: @ %while.body.lr.ph
|
||||
; CHECK-NEXT: vdup.32 q0, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r2
|
||||
; CHECK-NEXT: .LBB2_2: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vmul.i32 q1, q1, q0
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmul.i32 q0, q0, r1
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB2_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -145,13 +142,12 @@ define void @vqadd(i32* %s1, i32 %c0, i32 %N) {
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: .LBB3_1: @ %while.body.lr.ph
|
||||
; CHECK-NEXT: vdup.32 q0, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r2
|
||||
; CHECK-NEXT: .LBB3_2: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vqadd.s32 q1, q1, q0
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vqadd.s32 q0, q0, r1
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB3_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -190,13 +186,12 @@ define void @vqsub(i32* %s1, i32 %c0, i32 %N) {
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: .LBB4_1: @ %while.body.lr.ph
|
||||
; CHECK-NEXT: vdup.32 q0, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r2
|
||||
; CHECK-NEXT: .LBB4_2: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vqsub.s32 q1, q1, q0
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vqsub.s32 q0, q0, r1
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB4_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -235,13 +230,12 @@ define void @vhadd(i32* %s1, i32 %c0, i32 %N) {
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: .LBB5_1: @ %while.body.lr.ph
|
||||
; CHECK-NEXT: vdup.32 q0, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r2
|
||||
; CHECK-NEXT: .LBB5_2: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vhadd.s32 q1, q1, q0
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vhadd.s32 q0, q0, r1
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB5_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -280,13 +274,12 @@ define void @vhsub(i32* %s1, i32 %c0, i32 %N) {
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: .LBB6_1: @ %while.body.lr.ph
|
||||
; CHECK-NEXT: vdup.32 q0, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r2
|
||||
; CHECK-NEXT: .LBB6_2: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vhsub.s32 q1, q1, q0
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vhsub.s32 q0, q0, r1
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB6_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -325,13 +318,12 @@ define void @vqdmull(i32* %s1, i32 %c0, i32 %N) {
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: .LBB7_1: @ %while.body.lr.ph
|
||||
; CHECK-NEXT: vdup.16 q0, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r2
|
||||
; CHECK-NEXT: .LBB7_2: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrh.s32 q1, [r0]
|
||||
; CHECK-NEXT: vqdmullb.s16 q1, q1, q0
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
||||
; CHECK-NEXT: vldrh.s32 q0, [r0]
|
||||
; CHECK-NEXT: vqdmullb.s16 q0, q0, r1
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB7_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -374,13 +366,12 @@ define void @vqdmulh(i32* %s1, i32 %c0, i32 %N) {
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: .LBB8_1: @ %while.body.lr.ph
|
||||
; CHECK-NEXT: vdup.32 q0, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r2
|
||||
; CHECK-NEXT: .LBB8_2: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vqdmulh.s32 q1, q1, q0
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vqdmulh.s32 q0, q0, r1
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB8_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -419,13 +410,12 @@ define void @vqrdmulh(i32* %s1, i32 %c0, i32 %N) {
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: .LBB9_1: @ %while.body.lr.ph
|
||||
; CHECK-NEXT: vdup.32 q0, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r2
|
||||
; CHECK-NEXT: .LBB9_2: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vqrdmulh.s32 q1, q1, q0
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vqrdmulh.s32 q0, q0, r1
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB9_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -464,13 +454,12 @@ define void @vaddf(float* %s1, float %c0, i32 %N) {
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: .LBB10_1: @ %while.body.lr.ph
|
||||
; CHECK-NEXT: vdup.32 q0, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r2
|
||||
; CHECK-NEXT: .LBB10_2: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vadd.f32 q1, q1, q0
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vadd.f32 q0, q0, r1
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB10_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -509,13 +498,12 @@ define void @vsubf(float* %s1, float %c0, i32 %N) {
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: .LBB11_1: @ %while.body.lr.ph
|
||||
; CHECK-NEXT: vdup.32 q0, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r2
|
||||
; CHECK-NEXT: .LBB11_2: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vsub.f32 q1, q1, q0
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vsub.f32 q0, q0, r1
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB11_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -554,13 +542,12 @@ define void @vmulf(float* %s1, float %c0, i32 %N) {
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: .LBB12_1: @ %while.body.lr.ph
|
||||
; CHECK-NEXT: vdup.32 q0, r1
|
||||
; CHECK-NEXT: dlstp.32 lr, r2
|
||||
; CHECK-NEXT: .LBB12_2: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vmul.f32 q1, q1, q0
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmul.f32 q0, q0, r1
|
||||
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB12_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -599,14 +586,13 @@ define void @vfma(float* %s1, float* %s2, float %c0, i32 %N) {
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: .LBB13_1: @ %while.body.lr.ph
|
||||
; CHECK-NEXT: vdup.32 q0, r2
|
||||
; CHECK-NEXT: dlstp.32 lr, r3
|
||||
; CHECK-NEXT: .LBB13_2: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: vfma.f32 q2, q1, q0
|
||||
; CHECK-NEXT: vstrw.32 q2, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vfma.f32 q1, q0, r2
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB13_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -647,15 +633,13 @@ define void @vfmas(float* %s1, float* %s2, float %c0, i32 %N) {
|
|||
; CHECK-NEXT: it lt
|
||||
; CHECK-NEXT: poplt {r7, pc}
|
||||
; CHECK-NEXT: .LBB14_1: @ %while.body.lr.ph
|
||||
; CHECK-NEXT: vdup.32 q0, r2
|
||||
; CHECK-NEXT: dlstp.32 lr, r3
|
||||
; CHECK-NEXT: .LBB14_2: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vmov q3, q0
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0]
|
||||
; CHECK-NEXT: vfma.f32 q3, q2, q1
|
||||
; CHECK-NEXT: vstrw.32 q3, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: vfmas.f32 q1, q0, r2
|
||||
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
||||
; CHECK-NEXT: letp lr, .LBB14_2
|
||||
; CHECK-NEXT: @ %bb.3: @ %while.end
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
|
Loading…
Reference in New Issue