forked from OSchip/llvm-project
[ARM] VMOVhr load -> vldr
Much like the similar combine added recently for VMOVrh load, this adds a fold for VMOVhr load turning it into a vldr.f16 as opposed to a vldrh and vmov.f16. Differential Revision: https://reviews.llvm.org/D78714
This commit is contained in:
parent
e38018b80d
commit
f5f83cf4df
|
@ -13053,6 +13053,18 @@ static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &
|
|||
}
|
||||
}
|
||||
|
||||
// fold (VMOVhr (load x)) -> (load (f16*)x)
|
||||
if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
|
||||
if (LN0->hasOneUse() && LN0->isUnindexed() &&
|
||||
LN0->getMemoryVT() == MVT::i16) {
|
||||
SDValue Load = DCI.DAG.getLoad(MVT::f16, SDLoc(N), LN0->getChain(),
|
||||
LN0->getBasePtr(), LN0->getMemOperand());
|
||||
DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
|
||||
DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
|
||||
return Load;
|
||||
}
|
||||
}
|
||||
|
||||
// Only the bottom 16 bits of the source register are used.
|
||||
APInt DemandedMask = APInt::getLowBitsSet(32, 16);
|
||||
const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
|
||||
|
|
|
@ -5176,105 +5176,104 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oeq_v8f16_bc(<8 x half> %src, half* %src
|
|||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
|
||||
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
|
||||
; CHECK-MVE-NEXT: ldrh r1, [r0]
|
||||
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
|
||||
; CHECK-MVE-NEXT: movs r2, #0
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
|
||||
; CHECK-MVE-NEXT: movs r0, #0
|
||||
; CHECK-MVE-NEXT: vmov.f16 s16, r1
|
||||
; CHECK-MVE-NEXT: movs r1, #0
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s12, s16
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r1, #1
|
||||
; CHECK-MVE-NEXT: cmp r1, #0
|
||||
; CHECK-MVE-NEXT: moveq r0, #1
|
||||
; CHECK-MVE-NEXT: cmp r0, #0
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
|
||||
; CHECK-MVE-NEXT: cset r1, ne
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
|
||||
; CHECK-MVE-NEXT: lsls r1, r1, #31
|
||||
; CHECK-MVE-NEXT: cset r0, ne
|
||||
; CHECK-MVE-NEXT: movs r2, #0
|
||||
; CHECK-MVE-NEXT: lsls r0, r0, #31
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r2, #1
|
||||
; CHECK-MVE-NEXT: cmp r2, #0
|
||||
; CHECK-MVE-NEXT: cset r2, ne
|
||||
; CHECK-MVE-NEXT: vmov r1, s12
|
||||
; CHECK-MVE-NEXT: vmov r0, s12
|
||||
; CHECK-MVE-NEXT: lsls r2, r2, #31
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s1, s16
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: vmov r2, s12
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[1], r1
|
||||
; CHECK-MVE-NEXT: mov.w r1, #0
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r1, #1
|
||||
; CHECK-MVE-NEXT: cmp r1, #0
|
||||
; CHECK-MVE-NEXT: cset r1, ne
|
||||
; CHECK-MVE-NEXT: lsls r1, r1, #31
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
|
||||
; CHECK-MVE-NEXT: vmov r1, s18
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[2], r1
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: mov.w r1, #0
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r1, #1
|
||||
; CHECK-MVE-NEXT: cmp r1, #0
|
||||
; CHECK-MVE-NEXT: cset r1, ne
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
|
||||
; CHECK-MVE-NEXT: lsls r1, r1, #31
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s2, s16
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: vmov r1, s18
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[3], r1
|
||||
; CHECK-MVE-NEXT: mov.w r1, #0
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r1, #1
|
||||
; CHECK-MVE-NEXT: cmp r1, #0
|
||||
; CHECK-MVE-NEXT: cset r1, ne
|
||||
; CHECK-MVE-NEXT: lsls r1, r1, #31
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
|
||||
; CHECK-MVE-NEXT: vmov r1, s18
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[4], r1
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: mov.w r1, #0
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r1, #1
|
||||
; CHECK-MVE-NEXT: cmp r1, #0
|
||||
; CHECK-MVE-NEXT: cset r1, ne
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
|
||||
; CHECK-MVE-NEXT: lsls r1, r1, #31
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s3, s16
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: vmov r1, s18
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[5], r1
|
||||
; CHECK-MVE-NEXT: mov.w r1, #0
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r1, #1
|
||||
; CHECK-MVE-NEXT: cmp r1, #0
|
||||
; CHECK-MVE-NEXT: cset r1, ne
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
|
||||
; CHECK-MVE-NEXT: lsls r1, r1, #31
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
|
||||
; CHECK-MVE-NEXT: mov.w r0, #0
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r0, #1
|
||||
; CHECK-MVE-NEXT: cmp r0, #0
|
||||
; CHECK-MVE-NEXT: cset r0, ne
|
||||
; CHECK-MVE-NEXT: vmov r1, s18
|
||||
; CHECK-MVE-NEXT: lsls r0, r0, #31
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[6], r1
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
|
||||
; CHECK-MVE-NEXT: vmov r0, s18
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: mov.w r0, #0
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r0, #1
|
||||
; CHECK-MVE-NEXT: cmp r0, #0
|
||||
; CHECK-MVE-NEXT: cset r0, ne
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
|
||||
; CHECK-MVE-NEXT: lsls r0, r0, #31
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s2, s16
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: vmov r0, s18
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
|
||||
; CHECK-MVE-NEXT: mov.w r0, #0
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r0, #1
|
||||
; CHECK-MVE-NEXT: cmp r0, #0
|
||||
; CHECK-MVE-NEXT: cset r0, ne
|
||||
; CHECK-MVE-NEXT: lsls r0, r0, #31
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
|
||||
; CHECK-MVE-NEXT: vmov r0, s18
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: mov.w r0, #0
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r0, #1
|
||||
; CHECK-MVE-NEXT: cmp r0, #0
|
||||
; CHECK-MVE-NEXT: cset r0, ne
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
|
||||
; CHECK-MVE-NEXT: lsls r0, r0, #31
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s3, s16
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: vmov r0, s18
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
|
||||
; CHECK-MVE-NEXT: mov.w r0, #0
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r0, #1
|
||||
; CHECK-MVE-NEXT: cmp r0, #0
|
||||
; CHECK-MVE-NEXT: cset r0, ne
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
|
||||
; CHECK-MVE-NEXT: lsls r0, r0, #31
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r1, #1
|
||||
; CHECK-MVE-NEXT: vmov r0, s18
|
||||
; CHECK-MVE-NEXT: cmp r1, #0
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
|
||||
; CHECK-MVE-NEXT: cset r0, ne
|
||||
; CHECK-MVE-NEXT: lsls r0, r0, #31
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
|
||||
; CHECK-MVE-NEXT: vmov r0, s0
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
|
||||
|
|
Loading…
Reference in New Issue