[ARM] VMOVhr load -> vldr

Much like the similar combine added recently for VMOVrh load, this
adds a fold for VMOVhr load turning it into a vldr.f16 as opposed to a
vldrh and vmov.f16.

Differential Revision: https://reviews.llvm.org/D78714
This commit is contained in:
David Green 2020-05-06 15:41:32 +01:00
parent e38018b80d
commit f5f83cf4df
2 changed files with 85 additions and 74 deletions

View File

@ -13053,6 +13053,18 @@ static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &
}
}
// fold (VMOVhr (load x)) -> (load (f16*)x)
if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
if (LN0->hasOneUse() && LN0->isUnindexed() &&
LN0->getMemoryVT() == MVT::i16) {
SDValue Load = DCI.DAG.getLoad(MVT::f16, SDLoc(N), LN0->getChain(),
LN0->getBasePtr(), LN0->getMemOperand());
DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
return Load;
}
}
// Only the bottom 16 bits of the source register are used.
APInt DemandedMask = APInt::getLowBitsSet(32, 16);
const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();

View File

@ -5176,105 +5176,104 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oeq_v8f16_bc(<8 x half> %src, half* %src
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: ldrh r1, [r0]
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: movs r2, #0
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmov.f16 s16, r1
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmp.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: lsls r1, r1, #31
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: movs r2, #0
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmov r1, s12
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vcmp.f16 s1, s16
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: vmov.16 q3[1], r1
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: lsls r1, r1, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r1, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: lsls r1, r1, #31
; CHECK-MVE-NEXT: vcmp.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r1, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r1
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: lsls r1, r1, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r1, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: lsls r1, r1, #31
; CHECK-MVE-NEXT: vcmp.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r1, s18
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r1
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: lsls r1, r1, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmov r1, s18
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmov.16 q3[6], r1
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmp.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmp.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0