[ARM] Select f32 constants with vmov.f16

This adds lowering for f32 values using the vmov.f16, which zeroes the
top bits whilst setting the lower bits to a pattern. This range of
values does not often come up, except where a f16 constant value has
been converted to a f32.

Differential Revision: https://reviews.llvm.org/D87790
This commit is contained in:
David Green 2020-09-21 11:10:47 +01:00
parent 2a77441117
commit f4c5cadbcb
5 changed files with 103 additions and 53 deletions

View File

@ -18067,6 +18067,9 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
return false;
if (VT == MVT::f16 && Subtarget->hasFullFP16())
return ARM_AM::getFP16Imm(Imm) != -1;
if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
ARM_AM::getFP32FP16Imm(Imm) != -1)
return true;
if (VT == MVT::f32)
return ARM_AM::getFP32Imm(Imm) != -1;
if (VT == MVT::f64 && Subtarget->hasFP64())

View File

@ -54,6 +54,16 @@ def vfp_f16imm : Operand<f16>,
let ParserMatchClass = FPImmOperand;
}
def vfp_f32f16imm_xform : SDNodeXForm<fpimm, [{
APFloat InVal = N->getValueAPF();
uint32_t enc = ARM_AM::getFP32FP16Imm(InVal);
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
}]>;
def vfp_f32f16imm : PatLeaf<(f32 fpimm), [{
return ARM_AM::getFP32FP16Imm(N->getValueAPF()) != -1;
}], vfp_f32f16imm_xform>;
def vfp_f32imm_xform : SDNodeXForm<fpimm, [{
APFloat InVal = N->getValueAPF();
uint32_t enc = ARM_AM::getFP32Imm(InVal);
@ -2637,6 +2647,11 @@ def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm),
}
}
def : Pat<(f32 (vfp_f32f16imm:$imm)),
(f32 (COPY_TO_REGCLASS (f16 (FCONSTH (vfp_f32f16imm_xform (f32 $imm)))), SPR))> {
let Predicates = [HasFullFP16];
}
//===----------------------------------------------------------------------===//
// Assembler aliases.
//

View File

@ -687,6 +687,18 @@ namespace ARM_AM {
return getFP16Imm(FPImm.bitcastToAPInt());
}
/// If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding
/// for it. Otherwise return -1 like getFP16Imm.
inline int getFP32FP16Imm(const APInt &Imm) {
if (Imm.getActiveBits() > 16)
return -1;
return ARM_AM::getFP16Imm(Imm.trunc(16));
}
inline int getFP32FP16Imm(const APFloat &FPImm) {
return getFP32FP16Imm(FPImm.bitcastToAPInt());
}
/// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
/// floating-point value. If the value cannot be represented as an 8-bit
/// floating-point value, then return -1.

View File

@ -874,7 +874,7 @@ define half @h1(half (half)* nocapture %hptr) "cmse_nonsecure_entry" nounwind {
; CHECK-MVE-NEXT: vstr fpcxtns, [sp, #-4]!
; CHECK-MVE-NEXT: push {r7, lr}
; CHECK-MVE-NEXT: sub sp, #4
; CHECK-MVE-NEXT: vldr s0, .LCPI11_0
; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
; CHECK-MVE-NEXT: blx r0
; CHECK-MVE-NEXT: vmov.f16 r0, s0
; CHECK-MVE-NEXT: vmov s0, r0
@ -884,10 +884,6 @@ define half @h1(half (half)* nocapture %hptr) "cmse_nonsecure_entry" nounwind {
; CHECK-MVE-NEXT: vldr fpcxtns, [sp], #4
; CHECK-MVE-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
; CHECK-MVE-NEXT: bxns lr
; CHECK-MVE-NEXT: .p2align 2
; CHECK-MVE-NEXT: @ %bb.1:
; CHECK-MVE-NEXT: .LCPI11_0:
; CHECK-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
%call = call half %hptr(half 10.0) nounwind
ret half %call
}
@ -931,25 +927,41 @@ define half @h2(half (half)* nocapture %hptr) nounwind {
; CHECK-8M-NEXT: .LCPI12_0:
; CHECK-8M-NEXT: .long 0x00004900 @ float 2.61874657E-41
;
; CHECK-81M-LABEL: h2:
; CHECK-81M: @ %bb.0: @ %entry
; CHECK-81M-NEXT: push {r7, lr}
; CHECK-81M-NEXT: vldr s0, .LCPI12_0
; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
; CHECK-81M-NEXT: bic r0, r0, #1
; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]!
; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
; CHECK-81M-NEXT: blxns r0
; CHECK-81M-NEXT: vldr fpcxts, [sp], #8
; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
; CHECK-81M-NEXT: pop {r7, pc}
; CHECK-81M-NEXT: .p2align 2
; CHECK-81M-NEXT: @ %bb.1:
; CHECK-81M-NEXT: .LCPI12_0:
; CHECK-81M-NEXT: .long 0x00004900 @ float 2.61874657E-41
; CHECK-NO-MVE-LABEL: h2:
; CHECK-NO-MVE: @ %bb.0: @ %entry
; CHECK-NO-MVE-NEXT: push {r7, lr}
; CHECK-NO-MVE-NEXT: vldr s0, .LCPI12_0
; CHECK-NO-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
; CHECK-NO-MVE-NEXT: bic r0, r0, #1
; CHECK-NO-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
; CHECK-NO-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
; CHECK-NO-MVE-NEXT: vstr fpcxts, [sp, #-8]!
; CHECK-NO-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
; CHECK-NO-MVE-NEXT: blxns r0
; CHECK-NO-MVE-NEXT: vldr fpcxts, [sp], #8
; CHECK-NO-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
; CHECK-NO-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
; CHECK-NO-MVE-NEXT: pop {r7, pc}
; CHECK-NO-MVE-NEXT: .p2align 2
; CHECK-NO-MVE-NEXT: @ %bb.1:
; CHECK-NO-MVE-NEXT: .LCPI12_0:
; CHECK-NO-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
;
; CHECK-MVE-LABEL: h2:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: push {r7, lr}
; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
; CHECK-MVE-NEXT: bic r0, r0, #1
; CHECK-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
; CHECK-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
; CHECK-MVE-NEXT: vstr fpcxts, [sp, #-8]!
; CHECK-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
; CHECK-MVE-NEXT: blxns r0
; CHECK-MVE-NEXT: vldr fpcxts, [sp], #8
; CHECK-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
; CHECK-MVE-NEXT: pop {r7, pc}
entry:
%call = call half %hptr(half 10.0) "cmse_nonsecure_call" nounwind
ret half %call
@ -994,25 +1006,41 @@ define half @h3(half (half)* nocapture %hptr) nounwind {
; CHECK-8M-NEXT: .LCPI13_0:
; CHECK-8M-NEXT: .long 0x00004900 @ float 2.61874657E-41
;
; CHECK-81M-LABEL: h3:
; CHECK-81M: @ %bb.0: @ %entry
; CHECK-81M-NEXT: push {r7, lr}
; CHECK-81M-NEXT: vldr s0, .LCPI13_0
; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
; CHECK-81M-NEXT: bic r0, r0, #1
; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]!
; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
; CHECK-81M-NEXT: blxns r0
; CHECK-81M-NEXT: vldr fpcxts, [sp], #8
; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
; CHECK-81M-NEXT: pop {r7, pc}
; CHECK-81M-NEXT: .p2align 2
; CHECK-81M-NEXT: @ %bb.1:
; CHECK-81M-NEXT: .LCPI13_0:
; CHECK-81M-NEXT: .long 0x00004900 @ float 2.61874657E-41
; CHECK-NO-MVE-LABEL: h3:
; CHECK-NO-MVE: @ %bb.0: @ %entry
; CHECK-NO-MVE-NEXT: push {r7, lr}
; CHECK-NO-MVE-NEXT: vldr s0, .LCPI13_0
; CHECK-NO-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
; CHECK-NO-MVE-NEXT: bic r0, r0, #1
; CHECK-NO-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
; CHECK-NO-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
; CHECK-NO-MVE-NEXT: vstr fpcxts, [sp, #-8]!
; CHECK-NO-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
; CHECK-NO-MVE-NEXT: blxns r0
; CHECK-NO-MVE-NEXT: vldr fpcxts, [sp], #8
; CHECK-NO-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
; CHECK-NO-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
; CHECK-NO-MVE-NEXT: pop {r7, pc}
; CHECK-NO-MVE-NEXT: .p2align 2
; CHECK-NO-MVE-NEXT: @ %bb.1:
; CHECK-NO-MVE-NEXT: .LCPI13_0:
; CHECK-NO-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
;
; CHECK-MVE-LABEL: h3:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: push {r7, lr}
; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
; CHECK-MVE-NEXT: bic r0, r0, #1
; CHECK-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
; CHECK-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
; CHECK-MVE-NEXT: vstr fpcxts, [sp, #-8]!
; CHECK-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
; CHECK-MVE-NEXT: blxns r0
; CHECK-MVE-NEXT: vldr fpcxts, [sp], #8
; CHECK-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
; CHECK-MVE-NEXT: pop {r7, pc}
entry:
%call = tail call half %hptr(half 10.0) "cmse_nonsecure_call" nounwind
ret half %call
@ -1123,7 +1151,7 @@ define half @h1_minsize(half (half)* nocapture %hptr) "cmse_nonsecure_entry" min
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vstr fpcxtns, [sp, #-4]!
; CHECK-MVE-NEXT: push {r6, r7, lr}
; CHECK-MVE-NEXT: vldr s0, .LCPI15_0
; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
; CHECK-MVE-NEXT: blx r0
; CHECK-MVE-NEXT: vmov.f16 r0, s0
; CHECK-MVE-NEXT: vmov s0, r0
@ -1132,10 +1160,6 @@ define half @h1_minsize(half (half)* nocapture %hptr) "cmse_nonsecure_entry" min
; CHECK-MVE-NEXT: vldr fpcxtns, [sp], #4
; CHECK-MVE-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
; CHECK-MVE-NEXT: bxns lr
; CHECK-MVE-NEXT: .p2align 2
; CHECK-MVE-NEXT: @ %bb.1:
; CHECK-MVE-NEXT: .LCPI15_0:
; CHECK-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
entry:
%call = call half %hptr(half 10.0) nounwind
ret half %call

View File

@ -152,12 +152,8 @@ define half @constcall() {
;
; CHECK-FP16-HARD-LABEL: constcall:
; CHECK-FP16-HARD: @ %bb.0: @ %entry
; CHECK-FP16-HARD-NEXT: vldr s0, .LCPI4_0
; CHECK-FP16-HARD-NEXT: vmov.f16 s0, #1.000000e+01
; CHECK-FP16-HARD-NEXT: b ccc
; CHECK-FP16-HARD-NEXT: .p2align 2
; CHECK-FP16-HARD-NEXT: @ %bb.1:
; CHECK-FP16-HARD-NEXT: .LCPI4_0:
; CHECK-FP16-HARD-NEXT: .long 0x00004900 @ float 2.61874657E-41
entry:
%call = tail call fast half @ccc(half 0xH4900)
ret half %call