forked from OSchip/llvm-project
[LSR] Drop potentially invalid nowrap flags when switching to post-inc IV (PR46943)
When LSR converts a branch on the pre-inc IV into a branch on the post-inc IV, the nowrap flags on the addition may no longer be valid. Previously, a poison result of the addition might have been ignored, in which case the program was well defined. After branching on the post-inc IV, we might be branching on poison, which is undefined behavior. Fix this by discarding nowrap flags which are not present on the SCEV expression. Nowrap flags on the SCEV expression are proven by SCEV to always hold, independently of how the expression will be used. This is essentially the same fix we applied to IndVars LFTR, which also performs this kind of pre-inc to post-inc conversion. I believe a similar problem can also exist for getelementptr inbounds, but I was not able to come up with a problematic test case. The inbounds case would have to be addressed in a differently anyway (as SCEV does not track this property). Fixes https://bugs.llvm.org/show_bug.cgi?id=46943. Differential Revision: https://reviews.llvm.org/D95286
This commit is contained in:
parent
15141cd115
commit
835104a114
|
@ -1440,6 +1440,17 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
|
|||
assert(LatchBlock && "PostInc mode requires a unique loop latch!");
|
||||
Result = PN->getIncomingValueForBlock(LatchBlock);
|
||||
|
||||
// We might be introducing a new use of the post-inc IV that is not poison
|
||||
// safe, in which case we should drop poison generating flags. Only keep
|
||||
// those flags for which SCEV has proven that they always hold.
|
||||
if (isa<OverflowingBinaryOperator>(Result)) {
|
||||
auto *I = cast<Instruction>(Result);
|
||||
if (!S->hasNoUnsignedWrap())
|
||||
I->setHasNoUnsignedWrap(false);
|
||||
if (!S->hasNoSignedWrap())
|
||||
I->setHasNoSignedWrap(false);
|
||||
}
|
||||
|
||||
// For an expansion to use the postinc form, the client must call
|
||||
// expandCodeFor with an InsertPoint that is either outside the PostIncLoop
|
||||
// or dominated by IVIncInsertPos.
|
||||
|
|
|
@ -27,11 +27,11 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur
|
|||
; CHECK-NEXT: beq .LBB0_4
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
|
||||
; CHECK-NEXT: subs r5, r3, #1
|
||||
; CHECK-NEXT: and r7, r3, #3
|
||||
; CHECK-NEXT: and lr, r3, #3
|
||||
; CHECK-NEXT: cmp r5, #3
|
||||
; CHECK-NEXT: bhs .LBB0_6
|
||||
; CHECK-NEXT: @ %bb.3:
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: b .LBB0_8
|
||||
; CHECK-NEXT: .LBB0_4: @ %vector.ph
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
|
@ -46,44 +46,40 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur
|
|||
; CHECK-NEXT: letp lr, .LBB0_5
|
||||
; CHECK-NEXT: b .LBB0_11
|
||||
; CHECK-NEXT: .LBB0_6: @ %for.body.preheader.new
|
||||
; CHECK-NEXT: bic r3, r3, #3
|
||||
; CHECK-NEXT: movs r5, #1
|
||||
; CHECK-NEXT: subs r3, #4
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: add.w lr, r5, r3, lsr #2
|
||||
; CHECK-NEXT: sub.w r12, r3, lr
|
||||
; CHECK-NEXT: movs r4, #0
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: .LBB0_7: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: adds r4, r1, r3
|
||||
; CHECK-NEXT: adds r5, r2, r3
|
||||
; CHECK-NEXT: adds r6, r0, r3
|
||||
; CHECK-NEXT: adds r3, #16
|
||||
; CHECK-NEXT: vldr s0, [r4]
|
||||
; CHECK-NEXT: add.w r12, r12, #4
|
||||
; CHECK-NEXT: vldr s2, [r5]
|
||||
; CHECK-NEXT: adds r5, r1, r4
|
||||
; CHECK-NEXT: adds r6, r2, r4
|
||||
; CHECK-NEXT: adds r7, r0, r4
|
||||
; CHECK-NEXT: adds r3, #4
|
||||
; CHECK-NEXT: vldr s0, [r5]
|
||||
; CHECK-NEXT: adds r4, #16
|
||||
; CHECK-NEXT: vldr s2, [r6]
|
||||
; CHECK-NEXT: cmp r12, r3
|
||||
; CHECK-NEXT: vmul.f32 s0, s2, s0
|
||||
; CHECK-NEXT: vstr s0, [r6]
|
||||
; CHECK-NEXT: vldr s0, [r4, #4]
|
||||
; CHECK-NEXT: vldr s2, [r5, #4]
|
||||
; CHECK-NEXT: vstr s0, [r7]
|
||||
; CHECK-NEXT: vldr s0, [r5, #4]
|
||||
; CHECK-NEXT: vldr s2, [r6, #4]
|
||||
; CHECK-NEXT: vmul.f32 s0, s2, s0
|
||||
; CHECK-NEXT: vstr s0, [r6, #4]
|
||||
; CHECK-NEXT: vldr s0, [r4, #8]
|
||||
; CHECK-NEXT: vldr s2, [r5, #8]
|
||||
; CHECK-NEXT: vstr s0, [r7, #4]
|
||||
; CHECK-NEXT: vldr s0, [r5, #8]
|
||||
; CHECK-NEXT: vldr s2, [r6, #8]
|
||||
; CHECK-NEXT: vmul.f32 s0, s2, s0
|
||||
; CHECK-NEXT: vstr s0, [r6, #8]
|
||||
; CHECK-NEXT: vldr s0, [r4, #12]
|
||||
; CHECK-NEXT: vldr s2, [r5, #12]
|
||||
; CHECK-NEXT: vstr s0, [r7, #8]
|
||||
; CHECK-NEXT: vldr s0, [r5, #12]
|
||||
; CHECK-NEXT: vldr s2, [r6, #12]
|
||||
; CHECK-NEXT: vmul.f32 s0, s2, s0
|
||||
; CHECK-NEXT: vstr s0, [r6, #12]
|
||||
; CHECK-NEXT: le lr, .LBB0_7
|
||||
; CHECK-NEXT: vstr s0, [r7, #12]
|
||||
; CHECK-NEXT: bne .LBB0_7
|
||||
; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup.loopexit.unr-lcssa
|
||||
; CHECK-NEXT: wls lr, r7, .LBB0_11
|
||||
; CHECK-NEXT: wls lr, lr, .LBB0_11
|
||||
; CHECK-NEXT: @ %bb.9: @ %for.body.epil.preheader
|
||||
; CHECK-NEXT: add.w r1, r1, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r2, r2, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r0, r0, r12, lsl #2
|
||||
; CHECK-NEXT: mov lr, r7
|
||||
; CHECK-NEXT: add.w r1, r1, r3, lsl #2
|
||||
; CHECK-NEXT: add.w r2, r2, r3, lsl #2
|
||||
; CHECK-NEXT: add.w r0, r0, r3, lsl #2
|
||||
; CHECK-NEXT: .LBB0_10: @ %for.body.epil
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldr s0, [r1]
|
||||
|
|
|
@ -1459,58 +1459,53 @@ define arm_aapcs_vfpcc float @half_half_mac(half* nocapture readonly %a, half* n
|
|||
; CHECK-NEXT: cbz r2, .LBB9_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
; CHECK-NEXT: subs r3, r2, #1
|
||||
; CHECK-NEXT: and r5, r2, #3
|
||||
; CHECK-NEXT: and lr, r2, #3
|
||||
; CHECK-NEXT: vldr s0, .LCPI9_0
|
||||
; CHECK-NEXT: cmp r3, #3
|
||||
; CHECK-NEXT: bhs .LBB9_4
|
||||
; CHECK-NEXT: @ %bb.2:
|
||||
; CHECK-NEXT: vldr s0, .LCPI9_0
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: b .LBB9_6
|
||||
; CHECK-NEXT: .LBB9_3:
|
||||
; CHECK-NEXT: vldr s0, .LCPI9_0
|
||||
; CHECK-NEXT: b .LBB9_9
|
||||
; CHECK-NEXT: .LBB9_4: @ %for.body.preheader.new
|
||||
; CHECK-NEXT: bic r2, r2, #3
|
||||
; CHECK-NEXT: movs r3, #1
|
||||
; CHECK-NEXT: subs r2, #4
|
||||
; CHECK-NEXT: vldr s0, .LCPI9_0
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
|
||||
; CHECK-NEXT: sub.w r12, r2, lr
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: .LBB9_5: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: adds r4, r0, r3
|
||||
; CHECK-NEXT: adds r2, r1, r3
|
||||
; CHECK-NEXT: vldr.16 s2, [r2, #6]
|
||||
; CHECK-NEXT: vldr.16 s4, [r4, #6]
|
||||
; CHECK-NEXT: vldr.16 s6, [r4, #4]
|
||||
; CHECK-NEXT: vldr.16 s8, [r4, #2]
|
||||
; CHECK-NEXT: adds r5, r0, r3
|
||||
; CHECK-NEXT: adds r4, r1, r3
|
||||
; CHECK-NEXT: vldr.16 s2, [r4, #6]
|
||||
; CHECK-NEXT: vldr.16 s4, [r5, #6]
|
||||
; CHECK-NEXT: vldr.16 s6, [r5, #4]
|
||||
; CHECK-NEXT: vldr.16 s8, [r5, #2]
|
||||
; CHECK-NEXT: vmul.f16 s2, s4, s2
|
||||
; CHECK-NEXT: vldr.16 s4, [r2, #4]
|
||||
; CHECK-NEXT: vldr.16 s10, [r4]
|
||||
; CHECK-NEXT: vldr.16 s4, [r4, #4]
|
||||
; CHECK-NEXT: vldr.16 s10, [r5]
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s2, s2
|
||||
; CHECK-NEXT: vmul.f16 s4, s6, s4
|
||||
; CHECK-NEXT: vldr.16 s6, [r2, #2]
|
||||
; CHECK-NEXT: vldr.16 s6, [r4, #2]
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s4, s4
|
||||
; CHECK-NEXT: adds r3, #8
|
||||
; CHECK-NEXT: adds r2, #4
|
||||
; CHECK-NEXT: vmul.f16 s6, s8, s6
|
||||
; CHECK-NEXT: vldr.16 s8, [r2]
|
||||
; CHECK-NEXT: vldr.16 s8, [r4]
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s6, s6
|
||||
; CHECK-NEXT: add.w r12, r12, #4
|
||||
; CHECK-NEXT: adds r3, #8
|
||||
; CHECK-NEXT: vmul.f16 s8, s10, s8
|
||||
; CHECK-NEXT: cmp r12, r2
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s8, s8
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s8
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s6
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s4
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s2
|
||||
; CHECK-NEXT: le lr, .LBB9_5
|
||||
; CHECK-NEXT: bne .LBB9_5
|
||||
; CHECK-NEXT: .LBB9_6: @ %for.cond.cleanup.loopexit.unr-lcssa
|
||||
; CHECK-NEXT: wls lr, r5, .LBB9_9
|
||||
; CHECK-NEXT: wls lr, lr, .LBB9_9
|
||||
; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader
|
||||
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
|
||||
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
|
||||
; CHECK-NEXT: mov lr, r5
|
||||
; CHECK-NEXT: add.w r0, r0, r2, lsl #1
|
||||
; CHECK-NEXT: add.w r1, r1, r2, lsl #1
|
||||
; CHECK-NEXT: .LBB9_8: @ %for.body.epil
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldr.16 s2, [r1]
|
||||
|
@ -1616,58 +1611,53 @@ define arm_aapcs_vfpcc float @half_half_acc(half* nocapture readonly %a, half* n
|
|||
; CHECK-NEXT: cbz r2, .LBB10_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
; CHECK-NEXT: subs r3, r2, #1
|
||||
; CHECK-NEXT: and r5, r2, #3
|
||||
; CHECK-NEXT: and lr, r2, #3
|
||||
; CHECK-NEXT: vldr s0, .LCPI10_0
|
||||
; CHECK-NEXT: cmp r3, #3
|
||||
; CHECK-NEXT: bhs .LBB10_4
|
||||
; CHECK-NEXT: @ %bb.2:
|
||||
; CHECK-NEXT: vldr s0, .LCPI10_0
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: b .LBB10_6
|
||||
; CHECK-NEXT: .LBB10_3:
|
||||
; CHECK-NEXT: vldr s0, .LCPI10_0
|
||||
; CHECK-NEXT: b .LBB10_9
|
||||
; CHECK-NEXT: .LBB10_4: @ %for.body.preheader.new
|
||||
; CHECK-NEXT: bic r2, r2, #3
|
||||
; CHECK-NEXT: movs r3, #1
|
||||
; CHECK-NEXT: subs r2, #4
|
||||
; CHECK-NEXT: vldr s0, .LCPI10_0
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
|
||||
; CHECK-NEXT: sub.w r12, r2, lr
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: .LBB10_5: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: adds r4, r0, r3
|
||||
; CHECK-NEXT: adds r2, r1, r3
|
||||
; CHECK-NEXT: vldr.16 s2, [r2, #6]
|
||||
; CHECK-NEXT: vldr.16 s4, [r4, #6]
|
||||
; CHECK-NEXT: vldr.16 s6, [r4, #4]
|
||||
; CHECK-NEXT: vldr.16 s8, [r4, #2]
|
||||
; CHECK-NEXT: adds r5, r0, r3
|
||||
; CHECK-NEXT: adds r4, r1, r3
|
||||
; CHECK-NEXT: vldr.16 s2, [r4, #6]
|
||||
; CHECK-NEXT: vldr.16 s4, [r5, #6]
|
||||
; CHECK-NEXT: vldr.16 s6, [r5, #4]
|
||||
; CHECK-NEXT: vldr.16 s8, [r5, #2]
|
||||
; CHECK-NEXT: vadd.f16 s2, s4, s2
|
||||
; CHECK-NEXT: vldr.16 s4, [r2, #4]
|
||||
; CHECK-NEXT: vldr.16 s10, [r4]
|
||||
; CHECK-NEXT: vldr.16 s4, [r4, #4]
|
||||
; CHECK-NEXT: vldr.16 s10, [r5]
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s2, s2
|
||||
; CHECK-NEXT: vadd.f16 s4, s6, s4
|
||||
; CHECK-NEXT: vldr.16 s6, [r2, #2]
|
||||
; CHECK-NEXT: vldr.16 s6, [r4, #2]
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s4, s4
|
||||
; CHECK-NEXT: adds r3, #8
|
||||
; CHECK-NEXT: adds r2, #4
|
||||
; CHECK-NEXT: vadd.f16 s6, s8, s6
|
||||
; CHECK-NEXT: vldr.16 s8, [r2]
|
||||
; CHECK-NEXT: vldr.16 s8, [r4]
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s6, s6
|
||||
; CHECK-NEXT: add.w r12, r12, #4
|
||||
; CHECK-NEXT: adds r3, #8
|
||||
; CHECK-NEXT: vadd.f16 s8, s10, s8
|
||||
; CHECK-NEXT: cmp r12, r2
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s8, s8
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s8
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s6
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s4
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s2
|
||||
; CHECK-NEXT: le lr, .LBB10_5
|
||||
; CHECK-NEXT: bne .LBB10_5
|
||||
; CHECK-NEXT: .LBB10_6: @ %for.cond.cleanup.loopexit.unr-lcssa
|
||||
; CHECK-NEXT: wls lr, r5, .LBB10_9
|
||||
; CHECK-NEXT: wls lr, lr, .LBB10_9
|
||||
; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader
|
||||
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
|
||||
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
|
||||
; CHECK-NEXT: mov lr, r5
|
||||
; CHECK-NEXT: add.w r0, r0, r2, lsl #1
|
||||
; CHECK-NEXT: add.w r1, r1, r2, lsl #1
|
||||
; CHECK-NEXT: .LBB10_8: @ %for.body.epil
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldr.16 s2, [r1]
|
||||
|
@ -1773,65 +1763,60 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
|
|||
; CHECK-NEXT: cbz r2, .LBB11_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
; CHECK-NEXT: subs r3, r2, #1
|
||||
; CHECK-NEXT: and r6, r2, #3
|
||||
; CHECK-NEXT: and lr, r2, #3
|
||||
; CHECK-NEXT: vldr s0, .LCPI11_0
|
||||
; CHECK-NEXT: cmp r3, #3
|
||||
; CHECK-NEXT: bhs .LBB11_4
|
||||
; CHECK-NEXT: @ %bb.2:
|
||||
; CHECK-NEXT: vldr s0, .LCPI11_0
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: b .LBB11_6
|
||||
; CHECK-NEXT: .LBB11_3:
|
||||
; CHECK-NEXT: vldr s0, .LCPI11_0
|
||||
; CHECK-NEXT: b .LBB11_9
|
||||
; CHECK-NEXT: .LBB11_4: @ %for.body.preheader.new
|
||||
; CHECK-NEXT: bic r2, r2, #3
|
||||
; CHECK-NEXT: movs r3, #1
|
||||
; CHECK-NEXT: subs r2, #4
|
||||
; CHECK-NEXT: vldr s0, .LCPI11_0
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
|
||||
; CHECK-NEXT: sub.w r12, r2, lr
|
||||
; CHECK-NEXT: adds r3, r1, #4
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: adds r2, r0, #4
|
||||
; CHECK-NEXT: adds r4, r0, #4
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: .LBB11_5: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrsh.w r4, [r3, #2]
|
||||
; CHECK-NEXT: vldr.16 s2, [r2, #2]
|
||||
; CHECK-NEXT: add.w r12, r12, #4
|
||||
; CHECK-NEXT: vmov s4, r4
|
||||
; CHECK-NEXT: ldrsh r4, [r3], #8
|
||||
; CHECK-NEXT: ldrsh.w r5, [r3, #2]
|
||||
; CHECK-NEXT: vldr.16 s2, [r4, #2]
|
||||
; CHECK-NEXT: adds r2, #4
|
||||
; CHECK-NEXT: cmp r12, r2
|
||||
; CHECK-NEXT: vmov s4, r5
|
||||
; CHECK-NEXT: ldrsh r5, [r3], #8
|
||||
; CHECK-NEXT: vcvt.f16.s32 s4, s4
|
||||
; CHECK-NEXT: ldrsh r5, [r3, #-10]
|
||||
; CHECK-NEXT: ldrsh r6, [r3, #-10]
|
||||
; CHECK-NEXT: vmul.f16 s2, s2, s4
|
||||
; CHECK-NEXT: vmov s6, r4
|
||||
; CHECK-NEXT: vldr.16 s4, [r2]
|
||||
; CHECK-NEXT: vmov s6, r5
|
||||
; CHECK-NEXT: vldr.16 s4, [r4]
|
||||
; CHECK-NEXT: vcvt.f16.s32 s6, s6
|
||||
; CHECK-NEXT: ldrsh r4, [r3, #-12]
|
||||
; CHECK-NEXT: ldrsh r5, [r3, #-12]
|
||||
; CHECK-NEXT: vmul.f16 s4, s4, s6
|
||||
; CHECK-NEXT: vmov s8, r5
|
||||
; CHECK-NEXT: vldr.16 s6, [r2, #-2]
|
||||
; CHECK-NEXT: vmov s8, r6
|
||||
; CHECK-NEXT: vldr.16 s6, [r4, #-2]
|
||||
; CHECK-NEXT: vcvt.f16.s32 s8, s8
|
||||
; CHECK-NEXT: vmov s10, r4
|
||||
; CHECK-NEXT: vmov s10, r5
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s4, s4
|
||||
; CHECK-NEXT: vmul.f16 s6, s6, s8
|
||||
; CHECK-NEXT: vldr.16 s8, [r2, #-4]
|
||||
; CHECK-NEXT: vldr.16 s8, [r4, #-4]
|
||||
; CHECK-NEXT: vcvt.f16.s32 s10, s10
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s6, s6
|
||||
; CHECK-NEXT: vmul.f16 s8, s8, s10
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s2, s2
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s8, s8
|
||||
; CHECK-NEXT: adds r2, #8
|
||||
; CHECK-NEXT: add.w r4, r4, #8
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s8
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s6
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s4
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s2
|
||||
; CHECK-NEXT: le lr, .LBB11_5
|
||||
; CHECK-NEXT: bne .LBB11_5
|
||||
; CHECK-NEXT: .LBB11_6: @ %for.cond.cleanup.loopexit.unr-lcssa
|
||||
; CHECK-NEXT: wls lr, r6, .LBB11_9
|
||||
; CHECK-NEXT: wls lr, lr, .LBB11_9
|
||||
; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader
|
||||
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
|
||||
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
|
||||
; CHECK-NEXT: mov lr, r6
|
||||
; CHECK-NEXT: add.w r0, r0, r2, lsl #1
|
||||
; CHECK-NEXT: add.w r1, r1, r2, lsl #1
|
||||
; CHECK-NEXT: .LBB11_8: @ %for.body.epil
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrsh r2, [r1], #2
|
||||
|
|
|
@ -387,37 +387,37 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly
|
|||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: beq.w .LBB5_11
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph
|
||||
; CHECK-NEXT: add.w r4, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r5, r1, r12
|
||||
; CHECK-NEXT: cmp r4, r1
|
||||
; CHECK-NEXT: add.w r6, r0, r12
|
||||
; CHECK-NEXT: cset lr, hi
|
||||
; CHECK-NEXT: cmp r5, r3
|
||||
; CHECK-NEXT: cset r5, hi
|
||||
; CHECK-NEXT: cmp r4, r0
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: add.w r5, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r6, r1, r12
|
||||
; CHECK-NEXT: cmp r5, r1
|
||||
; CHECK-NEXT: add.w r4, r0, r12
|
||||
; CHECK-NEXT: cset r7, hi
|
||||
; CHECK-NEXT: cmp r6, r3
|
||||
; CHECK-NEXT: cset r6, hi
|
||||
; CHECK-NEXT: ands r4, r6
|
||||
; CHECK-NEXT: lsls r4, r4, #31
|
||||
; CHECK-NEXT: cmp r5, r0
|
||||
; CHECK-NEXT: cset r5, hi
|
||||
; CHECK-NEXT: cmp r4, r3
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: ands r5, r4
|
||||
; CHECK-NEXT: lsls r5, r5, #31
|
||||
; CHECK-NEXT: itt eq
|
||||
; CHECK-NEXT: andeq.w r6, r5, lr
|
||||
; CHECK-NEXT: lslseq.w r6, r6, #31
|
||||
; CHECK-NEXT: andeq r7, r6
|
||||
; CHECK-NEXT: lslseq.w r7, r7, #31
|
||||
; CHECK-NEXT: beq .LBB5_4
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
|
||||
; CHECK-NEXT: sub.w r6, r12, #1
|
||||
; CHECK-NEXT: and r9, r12, #3
|
||||
; CHECK-NEXT: cmp r6, #3
|
||||
; CHECK-NEXT: sub.w r4, r12, #1
|
||||
; CHECK-NEXT: and lr, r12, #3
|
||||
; CHECK-NEXT: cmp r4, #3
|
||||
; CHECK-NEXT: bhs .LBB5_6
|
||||
; CHECK-NEXT: @ %bb.3:
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: b .LBB5_8
|
||||
; CHECK-NEXT: .LBB5_4: @ %vector.ph
|
||||
; CHECK-NEXT: movs r6, #0
|
||||
; CHECK-NEXT: movs r7, #0
|
||||
; CHECK-NEXT: dlstp.32 lr, r12
|
||||
; CHECK-NEXT: .LBB5_5: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: adds r6, #4
|
||||
; CHECK-NEXT: adds r7, #4
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r0], #4
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r1], #4
|
||||
; CHECK-NEXT: vmlas.u32 q1, q0, r2
|
||||
|
@ -425,49 +425,45 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly
|
|||
; CHECK-NEXT: letp lr, .LBB5_5
|
||||
; CHECK-NEXT: b .LBB5_11
|
||||
; CHECK-NEXT: .LBB5_6: @ %for.body.preheader.new
|
||||
; CHECK-NEXT: bic r6, r12, #3
|
||||
; CHECK-NEXT: movs r5, #1
|
||||
; CHECK-NEXT: subs r6, #4
|
||||
; CHECK-NEXT: add.w r4, r3, #8
|
||||
; CHECK-NEXT: sub.w r8, r12, lr
|
||||
; CHECK-NEXT: add.w r5, r3, #8
|
||||
; CHECK-NEXT: adds r6, r0, #3
|
||||
; CHECK-NEXT: adds r7, r1, #1
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
|
||||
; CHECK-NEXT: adds r5, r0, #3
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: adds r6, r1, #1
|
||||
; CHECK-NEXT: .LBB5_7: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-3]
|
||||
; CHECK-NEXT: ldrb r9, [r6, #-3]
|
||||
; CHECK-NEXT: add.w r12, r12, #4
|
||||
; CHECK-NEXT: ldrb r7, [r6, #-1]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #-8]
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-2]
|
||||
; CHECK-NEXT: ldrb r7, [r6], #4
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #-4]
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-1]
|
||||
; CHECK-NEXT: ldrb r7, [r6, #-3]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4]
|
||||
; CHECK-NEXT: ldrb r8, [r5], #4
|
||||
; CHECK-NEXT: ldrb r7, [r6, #-2]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #4]
|
||||
; CHECK-NEXT: adds r4, #16
|
||||
; CHECK-NEXT: le lr, .LBB5_7
|
||||
; CHECK-NEXT: ldrb r4, [r7, #-1]
|
||||
; CHECK-NEXT: cmp r8, r12
|
||||
; CHECK-NEXT: smlabb r4, r4, r9, r2
|
||||
; CHECK-NEXT: str r4, [r5, #-8]
|
||||
; CHECK-NEXT: ldrb r9, [r6, #-2]
|
||||
; CHECK-NEXT: ldrb r4, [r7], #4
|
||||
; CHECK-NEXT: smlabb r4, r4, r9, r2
|
||||
; CHECK-NEXT: str r4, [r5, #-4]
|
||||
; CHECK-NEXT: ldrb r9, [r6, #-1]
|
||||
; CHECK-NEXT: ldrb r4, [r7, #-3]
|
||||
; CHECK-NEXT: smlabb r4, r4, r9, r2
|
||||
; CHECK-NEXT: str r4, [r5]
|
||||
; CHECK-NEXT: ldrb r9, [r6], #4
|
||||
; CHECK-NEXT: ldrb r4, [r7, #-2]
|
||||
; CHECK-NEXT: smlabb r4, r4, r9, r2
|
||||
; CHECK-NEXT: str r4, [r5, #4]
|
||||
; CHECK-NEXT: add.w r5, r5, #16
|
||||
; CHECK-NEXT: bne .LBB5_7
|
||||
; CHECK-NEXT: .LBB5_8: @ %for.cond.cleanup.loopexit.unr-lcssa
|
||||
; CHECK-NEXT: wls lr, r9, .LBB5_11
|
||||
; CHECK-NEXT: wls lr, lr, .LBB5_11
|
||||
; CHECK-NEXT: @ %bb.9: @ %for.body.epil.preheader
|
||||
; CHECK-NEXT: add r0, r12
|
||||
; CHECK-NEXT: add r1, r12
|
||||
; CHECK-NEXT: add.w r3, r3, r12, lsl #2
|
||||
; CHECK-NEXT: mov lr, r9
|
||||
; CHECK-NEXT: .LBB5_10: @ %for.body.epil
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrb r6, [r0], #1
|
||||
; CHECK-NEXT: ldrb r5, [r1], #1
|
||||
; CHECK-NEXT: smlabb r6, r5, r6, r2
|
||||
; CHECK-NEXT: str r6, [r3], #4
|
||||
; CHECK-NEXT: ldrb r7, [r0], #1
|
||||
; CHECK-NEXT: ldrb r6, [r1], #1
|
||||
; CHECK-NEXT: smlabb r7, r6, r7, r2
|
||||
; CHECK-NEXT: str r7, [r3], #4
|
||||
; CHECK-NEXT: le lr, .LBB5_10
|
||||
; CHECK-NEXT: .LBB5_11: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
|
||||
|
@ -689,37 +685,37 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl
|
|||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: beq.w .LBB7_11
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph
|
||||
; CHECK-NEXT: add.w r4, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r5, r1, r12
|
||||
; CHECK-NEXT: cmp r4, r1
|
||||
; CHECK-NEXT: add.w r6, r0, r12
|
||||
; CHECK-NEXT: cset lr, hi
|
||||
; CHECK-NEXT: cmp r5, r3
|
||||
; CHECK-NEXT: cset r5, hi
|
||||
; CHECK-NEXT: cmp r4, r0
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: add.w r5, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r6, r1, r12
|
||||
; CHECK-NEXT: cmp r5, r1
|
||||
; CHECK-NEXT: add.w r4, r0, r12
|
||||
; CHECK-NEXT: cset r7, hi
|
||||
; CHECK-NEXT: cmp r6, r3
|
||||
; CHECK-NEXT: cset r6, hi
|
||||
; CHECK-NEXT: ands r4, r6
|
||||
; CHECK-NEXT: lsls r4, r4, #31
|
||||
; CHECK-NEXT: cmp r5, r0
|
||||
; CHECK-NEXT: cset r5, hi
|
||||
; CHECK-NEXT: cmp r4, r3
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: ands r5, r4
|
||||
; CHECK-NEXT: lsls r5, r5, #31
|
||||
; CHECK-NEXT: itt eq
|
||||
; CHECK-NEXT: andeq.w r6, r5, lr
|
||||
; CHECK-NEXT: lslseq.w r6, r6, #31
|
||||
; CHECK-NEXT: andeq r7, r6
|
||||
; CHECK-NEXT: lslseq.w r7, r7, #31
|
||||
; CHECK-NEXT: beq .LBB7_4
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
|
||||
; CHECK-NEXT: sub.w r6, r12, #1
|
||||
; CHECK-NEXT: and r9, r12, #3
|
||||
; CHECK-NEXT: cmp r6, #3
|
||||
; CHECK-NEXT: sub.w r4, r12, #1
|
||||
; CHECK-NEXT: and lr, r12, #3
|
||||
; CHECK-NEXT: cmp r4, #3
|
||||
; CHECK-NEXT: bhs .LBB7_6
|
||||
; CHECK-NEXT: @ %bb.3:
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: b .LBB7_8
|
||||
; CHECK-NEXT: .LBB7_4: @ %vector.ph
|
||||
; CHECK-NEXT: movs r6, #0
|
||||
; CHECK-NEXT: movs r7, #0
|
||||
; CHECK-NEXT: dlstp.32 lr, r12
|
||||
; CHECK-NEXT: .LBB7_5: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: adds r6, #4
|
||||
; CHECK-NEXT: adds r7, #4
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r0], #4
|
||||
; CHECK-NEXT: vldrb.u32 q1, [r1], #4
|
||||
; CHECK-NEXT: vmlas.u32 q1, q0, r2
|
||||
|
@ -727,49 +723,45 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl
|
|||
; CHECK-NEXT: letp lr, .LBB7_5
|
||||
; CHECK-NEXT: b .LBB7_11
|
||||
; CHECK-NEXT: .LBB7_6: @ %for.body.preheader.new
|
||||
; CHECK-NEXT: bic r6, r12, #3
|
||||
; CHECK-NEXT: movs r5, #1
|
||||
; CHECK-NEXT: subs r6, #4
|
||||
; CHECK-NEXT: add.w r4, r3, #8
|
||||
; CHECK-NEXT: sub.w r8, r12, lr
|
||||
; CHECK-NEXT: add.w r5, r3, #8
|
||||
; CHECK-NEXT: adds r6, r0, #3
|
||||
; CHECK-NEXT: adds r7, r1, #1
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
|
||||
; CHECK-NEXT: adds r5, r0, #3
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: adds r6, r1, #1
|
||||
; CHECK-NEXT: .LBB7_7: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-3]
|
||||
; CHECK-NEXT: ldrb r9, [r6, #-3]
|
||||
; CHECK-NEXT: add.w r12, r12, #4
|
||||
; CHECK-NEXT: ldrb r7, [r6, #-1]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #-8]
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-2]
|
||||
; CHECK-NEXT: ldrb r7, [r6], #4
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #-4]
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-1]
|
||||
; CHECK-NEXT: ldrb r7, [r6, #-3]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4]
|
||||
; CHECK-NEXT: ldrb r8, [r5], #4
|
||||
; CHECK-NEXT: ldrb r7, [r6, #-2]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #4]
|
||||
; CHECK-NEXT: adds r4, #16
|
||||
; CHECK-NEXT: le lr, .LBB7_7
|
||||
; CHECK-NEXT: ldrb r4, [r7, #-1]
|
||||
; CHECK-NEXT: cmp r8, r12
|
||||
; CHECK-NEXT: smlabb r4, r4, r9, r2
|
||||
; CHECK-NEXT: str r4, [r5, #-8]
|
||||
; CHECK-NEXT: ldrb r9, [r6, #-2]
|
||||
; CHECK-NEXT: ldrb r4, [r7], #4
|
||||
; CHECK-NEXT: smlabb r4, r4, r9, r2
|
||||
; CHECK-NEXT: str r4, [r5, #-4]
|
||||
; CHECK-NEXT: ldrb r9, [r6, #-1]
|
||||
; CHECK-NEXT: ldrb r4, [r7, #-3]
|
||||
; CHECK-NEXT: smlabb r4, r4, r9, r2
|
||||
; CHECK-NEXT: str r4, [r5]
|
||||
; CHECK-NEXT: ldrb r9, [r6], #4
|
||||
; CHECK-NEXT: ldrb r4, [r7, #-2]
|
||||
; CHECK-NEXT: smlabb r4, r4, r9, r2
|
||||
; CHECK-NEXT: str r4, [r5, #4]
|
||||
; CHECK-NEXT: add.w r5, r5, #16
|
||||
; CHECK-NEXT: bne .LBB7_7
|
||||
; CHECK-NEXT: .LBB7_8: @ %for.cond.cleanup.loopexit.unr-lcssa
|
||||
; CHECK-NEXT: wls lr, r9, .LBB7_11
|
||||
; CHECK-NEXT: wls lr, lr, .LBB7_11
|
||||
; CHECK-NEXT: @ %bb.9: @ %for.body.epil.preheader
|
||||
; CHECK-NEXT: add r0, r12
|
||||
; CHECK-NEXT: add r1, r12
|
||||
; CHECK-NEXT: add.w r3, r3, r12, lsl #2
|
||||
; CHECK-NEXT: mov lr, r9
|
||||
; CHECK-NEXT: .LBB7_10: @ %for.body.epil
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrb r6, [r0], #1
|
||||
; CHECK-NEXT: ldrb r5, [r1], #1
|
||||
; CHECK-NEXT: smlabb r6, r5, r6, r2
|
||||
; CHECK-NEXT: str r6, [r3], #4
|
||||
; CHECK-NEXT: ldrb r7, [r0], #1
|
||||
; CHECK-NEXT: ldrb r6, [r1], #1
|
||||
; CHECK-NEXT: smlabb r7, r6, r7, r2
|
||||
; CHECK-NEXT: str r7, [r3], #4
|
||||
; CHECK-NEXT: le lr, .LBB7_10
|
||||
; CHECK-NEXT: .LBB7_11: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
|
||||
|
@ -991,37 +983,37 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly
|
|||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: beq.w .LBB9_11
|
||||
; CHECK-NEXT: @ %bb.1: @ %vector.memcheck
|
||||
; CHECK-NEXT: add.w r4, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r5, r1, r12, lsl #2
|
||||
; CHECK-NEXT: cmp r4, r1
|
||||
; CHECK-NEXT: add.w r6, r0, r12, lsl #2
|
||||
; CHECK-NEXT: cset lr, hi
|
||||
; CHECK-NEXT: cmp r5, r3
|
||||
; CHECK-NEXT: cset r5, hi
|
||||
; CHECK-NEXT: cmp r4, r0
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: add.w r5, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r6, r1, r12, lsl #2
|
||||
; CHECK-NEXT: cmp r5, r1
|
||||
; CHECK-NEXT: add.w r4, r0, r12, lsl #2
|
||||
; CHECK-NEXT: cset r7, hi
|
||||
; CHECK-NEXT: cmp r6, r3
|
||||
; CHECK-NEXT: cset r6, hi
|
||||
; CHECK-NEXT: ands r4, r6
|
||||
; CHECK-NEXT: lsls r4, r4, #31
|
||||
; CHECK-NEXT: cmp r5, r0
|
||||
; CHECK-NEXT: cset r5, hi
|
||||
; CHECK-NEXT: cmp r4, r3
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: ands r5, r4
|
||||
; CHECK-NEXT: lsls r5, r5, #31
|
||||
; CHECK-NEXT: itt eq
|
||||
; CHECK-NEXT: andeq.w r6, r5, lr
|
||||
; CHECK-NEXT: lslseq.w r6, r6, #31
|
||||
; CHECK-NEXT: andeq r7, r6
|
||||
; CHECK-NEXT: lslseq.w r7, r7, #31
|
||||
; CHECK-NEXT: beq .LBB9_4
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
|
||||
; CHECK-NEXT: sub.w r6, r12, #1
|
||||
; CHECK-NEXT: and r9, r12, #3
|
||||
; CHECK-NEXT: cmp r6, #3
|
||||
; CHECK-NEXT: sub.w r4, r12, #1
|
||||
; CHECK-NEXT: and lr, r12, #3
|
||||
; CHECK-NEXT: cmp r4, #3
|
||||
; CHECK-NEXT: bhs .LBB9_6
|
||||
; CHECK-NEXT: @ %bb.3:
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: b .LBB9_8
|
||||
; CHECK-NEXT: .LBB9_4: @ %vector.ph
|
||||
; CHECK-NEXT: movs r6, #0
|
||||
; CHECK-NEXT: movs r7, #0
|
||||
; CHECK-NEXT: dlstp.32 lr, r12
|
||||
; CHECK-NEXT: .LBB9_5: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: adds r6, #4
|
||||
; CHECK-NEXT: adds r7, #4
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1], #16
|
||||
; CHECK-NEXT: vmlas.u32 q1, q0, r2
|
||||
|
@ -1029,51 +1021,47 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly
|
|||
; CHECK-NEXT: letp lr, .LBB9_5
|
||||
; CHECK-NEXT: b .LBB9_11
|
||||
; CHECK-NEXT: .LBB9_6: @ %for.body.preheader.new
|
||||
; CHECK-NEXT: bic r6, r12, #3
|
||||
; CHECK-NEXT: movs r5, #1
|
||||
; CHECK-NEXT: subs r6, #4
|
||||
; CHECK-NEXT: add.w r4, r3, #8
|
||||
; CHECK-NEXT: sub.w r8, r12, lr
|
||||
; CHECK-NEXT: add.w r5, r3, #8
|
||||
; CHECK-NEXT: add.w r6, r0, #8
|
||||
; CHECK-NEXT: add.w r7, r1, #8
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
|
||||
; CHECK-NEXT: add.w r5, r0, #8
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: add.w r6, r1, #8
|
||||
; CHECK-NEXT: .LBB9_7: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldr r8, [r5, #-8]
|
||||
; CHECK-NEXT: ldr r9, [r6, #-8]
|
||||
; CHECK-NEXT: add.w r12, r12, #4
|
||||
; CHECK-NEXT: ldr r7, [r6, #-8]
|
||||
; CHECK-NEXT: mla r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #-8]
|
||||
; CHECK-NEXT: ldr r8, [r5, #-4]
|
||||
; CHECK-NEXT: ldr r7, [r6, #-4]
|
||||
; CHECK-NEXT: mla r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #-4]
|
||||
; CHECK-NEXT: ldr.w r8, [r5]
|
||||
; CHECK-NEXT: ldr r7, [r6]
|
||||
; CHECK-NEXT: mla r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4]
|
||||
; CHECK-NEXT: ldr.w r8, [r5, #4]
|
||||
; CHECK-NEXT: adds r5, #16
|
||||
; CHECK-NEXT: ldr r7, [r6, #4]
|
||||
; CHECK-NEXT: adds r6, #16
|
||||
; CHECK-NEXT: mla r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #4]
|
||||
; CHECK-NEXT: adds r4, #16
|
||||
; CHECK-NEXT: le lr, .LBB9_7
|
||||
; CHECK-NEXT: ldr r4, [r7, #-8]
|
||||
; CHECK-NEXT: cmp r8, r12
|
||||
; CHECK-NEXT: mla r4, r4, r9, r2
|
||||
; CHECK-NEXT: str r4, [r5, #-8]
|
||||
; CHECK-NEXT: ldr r9, [r6, #-4]
|
||||
; CHECK-NEXT: ldr r4, [r7, #-4]
|
||||
; CHECK-NEXT: mla r4, r4, r9, r2
|
||||
; CHECK-NEXT: str r4, [r5, #-4]
|
||||
; CHECK-NEXT: ldr.w r9, [r6]
|
||||
; CHECK-NEXT: ldr r4, [r7]
|
||||
; CHECK-NEXT: mla r4, r4, r9, r2
|
||||
; CHECK-NEXT: str r4, [r5]
|
||||
; CHECK-NEXT: ldr.w r9, [r6, #4]
|
||||
; CHECK-NEXT: add.w r6, r6, #16
|
||||
; CHECK-NEXT: ldr r4, [r7, #4]
|
||||
; CHECK-NEXT: add.w r7, r7, #16
|
||||
; CHECK-NEXT: mla r4, r4, r9, r2
|
||||
; CHECK-NEXT: str r4, [r5, #4]
|
||||
; CHECK-NEXT: add.w r5, r5, #16
|
||||
; CHECK-NEXT: bne .LBB9_7
|
||||
; CHECK-NEXT: .LBB9_8: @ %for.cond.cleanup.loopexit.unr-lcssa
|
||||
; CHECK-NEXT: wls lr, r9, .LBB9_11
|
||||
; CHECK-NEXT: wls lr, lr, .LBB9_11
|
||||
; CHECK-NEXT: @ %bb.9: @ %for.body.epil.preheader
|
||||
; CHECK-NEXT: add.w r0, r0, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r1, r1, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r3, r3, r12, lsl #2
|
||||
; CHECK-NEXT: mov lr, r9
|
||||
; CHECK-NEXT: .LBB9_10: @ %for.body.epil
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldr r6, [r0], #4
|
||||
; CHECK-NEXT: ldr r5, [r1], #4
|
||||
; CHECK-NEXT: mla r6, r5, r6, r2
|
||||
; CHECK-NEXT: str r6, [r3], #4
|
||||
; CHECK-NEXT: ldr r7, [r0], #4
|
||||
; CHECK-NEXT: ldr r6, [r1], #4
|
||||
; CHECK-NEXT: mla r7, r6, r7, r2
|
||||
; CHECK-NEXT: str r7, [r3], #4
|
||||
; CHECK-NEXT: le lr, .LBB9_10
|
||||
; CHECK-NEXT: .LBB9_11: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
|
||||
|
|
|
@ -7,6 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
|
|||
declare void @use(i8 zeroext)
|
||||
declare void @use_p(i8*)
|
||||
|
||||
; nuw needs to be dropped when switching to post-inc comparison.
|
||||
define i8 @drop_nuw() {
|
||||
; CHECK-LABEL: @drop_nuw(
|
||||
; CHECK-NEXT: entry:
|
||||
|
@ -14,7 +15,7 @@ define i8 @drop_nuw() {
|
|||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
|
||||
; CHECK-NEXT: call void @use(i8 [[IV]])
|
||||
; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1
|
||||
; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[IV_NEXT]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
|
||||
; CHECK: exit:
|
||||
|
@ -36,6 +37,7 @@ exit:
|
|||
ret i8 %iv
|
||||
}
|
||||
|
||||
; nsw needs to be dropped when switching to post-inc comparison.
|
||||
define i8 @drop_nsw() {
|
||||
; CHECK-LABEL: @drop_nsw(
|
||||
; CHECK-NEXT: entry:
|
||||
|
@ -43,7 +45,7 @@ define i8 @drop_nsw() {
|
|||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 127, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
|
||||
; CHECK-NEXT: call void @use(i8 [[IV]])
|
||||
; CHECK-NEXT: [[IV_NEXT]] = add nsw i8 [[IV]], -1
|
||||
; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], -1
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[IV_NEXT]], 127
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
|
||||
; CHECK: exit:
|
||||
|
@ -65,6 +67,7 @@ exit:
|
|||
ret i8 %iv
|
||||
}
|
||||
|
||||
; Comparison already in post-inc form, no need to drop nuw.
|
||||
define i8 @already_postinc() {
|
||||
; CHECK-LABEL: @already_postinc(
|
||||
; CHECK-NEXT: entry:
|
||||
|
|
|
@ -17,7 +17,7 @@ define void @foo(i64 %N) local_unnamed_addr {
|
|||
; CHECK: do.body:
|
||||
; CHECK-NEXT: [[I_0:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[DO_BODY]] ]
|
||||
; CHECK-NEXT: tail call void @goo(i64 [[I_0]], i64 [[I_0]])
|
||||
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_0]], 1
|
||||
; CHECK-NEXT: [[INC]] = add nuw i64 [[I_0]], 1
|
||||
; CHECK-NEXT: [[T0:%.*]] = load i64, i64* @cond, align 8
|
||||
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[T0]], 0
|
||||
; CHECK-NEXT: br i1 [[TOBOOL]], label [[DO_BODY2_PREHEADER:%.*]], label [[DO_BODY]]
|
||||
|
@ -27,7 +27,7 @@ define void @foo(i64 %N) local_unnamed_addr {
|
|||
; CHECK-NEXT: [[I_1:%.*]] = phi i64 [ [[INC3:%.*]], [[DO_BODY2]] ], [ 0, [[DO_BODY2_PREHEADER]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INC]], [[I_1]]
|
||||
; CHECK-NEXT: tail call void @goo(i64 [[I_1]], i64 [[TMP0]])
|
||||
; CHECK-NEXT: [[INC3]] = add nuw nsw i64 [[I_1]], 1
|
||||
; CHECK-NEXT: [[INC3]] = add nuw i64 [[I_1]], 1
|
||||
; CHECK-NEXT: [[T1:%.*]] = load i64, i64* @cond, align 8
|
||||
; CHECK-NEXT: [[TOBOOL6:%.*]] = icmp eq i64 [[T1]], 0
|
||||
; CHECK-NEXT: br i1 [[TOBOOL6]], label [[DO_BODY8_PREHEADER:%.*]], label [[DO_BODY2]]
|
||||
|
@ -39,7 +39,7 @@ define void @foo(i64 %N) local_unnamed_addr {
|
|||
; CHECK-NEXT: [[J_2:%.*]] = phi i64 [ [[INC10:%.*]], [[DO_BODY8]] ], [ [[TMP1]], [[DO_BODY8_PREHEADER]] ]
|
||||
; CHECK-NEXT: tail call void @goo(i64 [[I_2]], i64 [[J_2]])
|
||||
; CHECK-NEXT: [[INC9]] = add nuw nsw i64 [[I_2]], 1
|
||||
; CHECK-NEXT: [[INC10]] = add nsw i64 [[J_2]], 1
|
||||
; CHECK-NEXT: [[INC10]] = add i64 [[J_2]], 1
|
||||
; CHECK-NEXT: [[T2:%.*]] = load i64, i64* @cond, align 8
|
||||
; CHECK-NEXT: [[TOBOOL12:%.*]] = icmp eq i64 [[T2]], 0
|
||||
; CHECK-NEXT: br i1 [[TOBOOL12]], label [[DO_BODY14_PREHEADER:%.*]], label [[DO_BODY8]]
|
||||
|
@ -50,7 +50,7 @@ define void @foo(i64 %N) local_unnamed_addr {
|
|||
; CHECK-NEXT: [[J_3:%.*]] = phi i64 [ [[INC16:%.*]], [[DO_BODY14]] ], [ [[INC10]], [[DO_BODY14_PREHEADER]] ]
|
||||
; CHECK-NEXT: tail call void @goo(i64 [[I_3]], i64 [[J_3]])
|
||||
; CHECK-NEXT: [[INC15]] = add nuw nsw i64 [[I_3]], 1
|
||||
; CHECK-NEXT: [[INC16]] = add nsw i64 [[J_3]], 1
|
||||
; CHECK-NEXT: [[INC16]] = add i64 [[J_3]], 1
|
||||
; CHECK-NEXT: [[T3:%.*]] = load i64, i64* @cond, align 8
|
||||
; CHECK-NEXT: [[TOBOOL18:%.*]] = icmp eq i64 [[T3]], 0
|
||||
; CHECK-NEXT: br i1 [[TOBOOL18]], label [[DO_BODY20_PREHEADER:%.*]], label [[DO_BODY14]]
|
||||
|
@ -61,7 +61,7 @@ define void @foo(i64 %N) local_unnamed_addr {
|
|||
; CHECK-NEXT: [[J_4:%.*]] = phi i64 [ [[INC22:%.*]], [[DO_BODY20]] ], [ [[INC16]], [[DO_BODY20_PREHEADER]] ]
|
||||
; CHECK-NEXT: tail call void @goo(i64 [[I_4]], i64 [[J_4]])
|
||||
; CHECK-NEXT: [[INC21]] = add nuw nsw i64 [[I_4]], 1
|
||||
; CHECK-NEXT: [[INC22]] = add nsw i64 [[J_4]], 1
|
||||
; CHECK-NEXT: [[INC22]] = add i64 [[J_4]], 1
|
||||
; CHECK-NEXT: [[T4:%.*]] = load i64, i64* @cond, align 8
|
||||
; CHECK-NEXT: [[TOBOOL24:%.*]] = icmp eq i64 [[T4]], 0
|
||||
; CHECK-NEXT: br i1 [[TOBOOL24]], label [[DO_BODY26_PREHEADER:%.*]], label [[DO_BODY20]]
|
||||
|
|
Loading…
Reference in New Issue