forked from OSchip/llvm-project
[ARM] Select f32 constants with vmov.f16
This adds lowering for f32 values using the vmov.f16, which zeroes the top bits whilst setting the lower bits to a pattern. This range of values does not often come up, except where a f16 constant value has been converted to a f32. Differential Revision: https://reviews.llvm.org/D87790
This commit is contained in:
parent
2a77441117
commit
f4c5cadbcb
|
@ -18067,6 +18067,9 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
|
|||
return false;
|
||||
if (VT == MVT::f16 && Subtarget->hasFullFP16())
|
||||
return ARM_AM::getFP16Imm(Imm) != -1;
|
||||
if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
|
||||
ARM_AM::getFP32FP16Imm(Imm) != -1)
|
||||
return true;
|
||||
if (VT == MVT::f32)
|
||||
return ARM_AM::getFP32Imm(Imm) != -1;
|
||||
if (VT == MVT::f64 && Subtarget->hasFP64())
|
||||
|
|
|
@ -54,6 +54,16 @@ def vfp_f16imm : Operand<f16>,
|
|||
let ParserMatchClass = FPImmOperand;
|
||||
}
|
||||
|
||||
def vfp_f32f16imm_xform : SDNodeXForm<fpimm, [{
|
||||
APFloat InVal = N->getValueAPF();
|
||||
uint32_t enc = ARM_AM::getFP32FP16Imm(InVal);
|
||||
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
|
||||
}]>;
|
||||
|
||||
def vfp_f32f16imm : PatLeaf<(f32 fpimm), [{
|
||||
return ARM_AM::getFP32FP16Imm(N->getValueAPF()) != -1;
|
||||
}], vfp_f32f16imm_xform>;
|
||||
|
||||
def vfp_f32imm_xform : SDNodeXForm<fpimm, [{
|
||||
APFloat InVal = N->getValueAPF();
|
||||
uint32_t enc = ARM_AM::getFP32Imm(InVal);
|
||||
|
@ -2637,6 +2647,11 @@ def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm),
|
|||
}
|
||||
}
|
||||
|
||||
def : Pat<(f32 (vfp_f32f16imm:$imm)),
|
||||
(f32 (COPY_TO_REGCLASS (f16 (FCONSTH (vfp_f32f16imm_xform (f32 $imm)))), SPR))> {
|
||||
let Predicates = [HasFullFP16];
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Assembler aliases.
|
||||
//
|
||||
|
|
|
@ -687,6 +687,18 @@ namespace ARM_AM {
|
|||
return getFP16Imm(FPImm.bitcastToAPInt());
|
||||
}
|
||||
|
||||
/// If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding
|
||||
/// for it. Otherwise return -1 like getFP16Imm.
|
||||
inline int getFP32FP16Imm(const APInt &Imm) {
|
||||
if (Imm.getActiveBits() > 16)
|
||||
return -1;
|
||||
return ARM_AM::getFP16Imm(Imm.trunc(16));
|
||||
}
|
||||
|
||||
inline int getFP32FP16Imm(const APFloat &FPImm) {
|
||||
return getFP32FP16Imm(FPImm.bitcastToAPInt());
|
||||
}
|
||||
|
||||
/// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
|
||||
/// floating-point value. If the value cannot be represented as an 8-bit
|
||||
/// floating-point value, then return -1.
|
||||
|
|
|
@ -874,7 +874,7 @@ define half @h1(half (half)* nocapture %hptr) "cmse_nonsecure_entry" nounwind {
|
|||
; CHECK-MVE-NEXT: vstr fpcxtns, [sp, #-4]!
|
||||
; CHECK-MVE-NEXT: push {r7, lr}
|
||||
; CHECK-MVE-NEXT: sub sp, #4
|
||||
; CHECK-MVE-NEXT: vldr s0, .LCPI11_0
|
||||
; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
|
||||
; CHECK-MVE-NEXT: blx r0
|
||||
; CHECK-MVE-NEXT: vmov.f16 r0, s0
|
||||
; CHECK-MVE-NEXT: vmov s0, r0
|
||||
|
@ -884,10 +884,6 @@ define half @h1(half (half)* nocapture %hptr) "cmse_nonsecure_entry" nounwind {
|
|||
; CHECK-MVE-NEXT: vldr fpcxtns, [sp], #4
|
||||
; CHECK-MVE-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
|
||||
; CHECK-MVE-NEXT: bxns lr
|
||||
; CHECK-MVE-NEXT: .p2align 2
|
||||
; CHECK-MVE-NEXT: @ %bb.1:
|
||||
; CHECK-MVE-NEXT: .LCPI11_0:
|
||||
; CHECK-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
|
||||
%call = call half %hptr(half 10.0) nounwind
|
||||
ret half %call
|
||||
}
|
||||
|
@ -931,25 +927,41 @@ define half @h2(half (half)* nocapture %hptr) nounwind {
|
|||
; CHECK-8M-NEXT: .LCPI12_0:
|
||||
; CHECK-8M-NEXT: .long 0x00004900 @ float 2.61874657E-41
|
||||
;
|
||||
; CHECK-81M-LABEL: h2:
|
||||
; CHECK-81M: @ %bb.0: @ %entry
|
||||
; CHECK-81M-NEXT: push {r7, lr}
|
||||
; CHECK-81M-NEXT: vldr s0, .LCPI12_0
|
||||
; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
|
||||
; CHECK-81M-NEXT: bic r0, r0, #1
|
||||
; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
|
||||
; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
|
||||
; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]!
|
||||
; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
|
||||
; CHECK-81M-NEXT: blxns r0
|
||||
; CHECK-81M-NEXT: vldr fpcxts, [sp], #8
|
||||
; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
|
||||
; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
|
||||
; CHECK-81M-NEXT: pop {r7, pc}
|
||||
; CHECK-81M-NEXT: .p2align 2
|
||||
; CHECK-81M-NEXT: @ %bb.1:
|
||||
; CHECK-81M-NEXT: .LCPI12_0:
|
||||
; CHECK-81M-NEXT: .long 0x00004900 @ float 2.61874657E-41
|
||||
; CHECK-NO-MVE-LABEL: h2:
|
||||
; CHECK-NO-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-NO-MVE-NEXT: push {r7, lr}
|
||||
; CHECK-NO-MVE-NEXT: vldr s0, .LCPI12_0
|
||||
; CHECK-NO-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
|
||||
; CHECK-NO-MVE-NEXT: bic r0, r0, #1
|
||||
; CHECK-NO-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
|
||||
; CHECK-NO-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
|
||||
; CHECK-NO-MVE-NEXT: vstr fpcxts, [sp, #-8]!
|
||||
; CHECK-NO-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
|
||||
; CHECK-NO-MVE-NEXT: blxns r0
|
||||
; CHECK-NO-MVE-NEXT: vldr fpcxts, [sp], #8
|
||||
; CHECK-NO-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
|
||||
; CHECK-NO-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
|
||||
; CHECK-NO-MVE-NEXT: pop {r7, pc}
|
||||
; CHECK-NO-MVE-NEXT: .p2align 2
|
||||
; CHECK-NO-MVE-NEXT: @ %bb.1:
|
||||
; CHECK-NO-MVE-NEXT: .LCPI12_0:
|
||||
; CHECK-NO-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
|
||||
;
|
||||
; CHECK-MVE-LABEL: h2:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: push {r7, lr}
|
||||
; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
|
||||
; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
|
||||
; CHECK-MVE-NEXT: bic r0, r0, #1
|
||||
; CHECK-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
|
||||
; CHECK-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
|
||||
; CHECK-MVE-NEXT: vstr fpcxts, [sp, #-8]!
|
||||
; CHECK-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
|
||||
; CHECK-MVE-NEXT: blxns r0
|
||||
; CHECK-MVE-NEXT: vldr fpcxts, [sp], #8
|
||||
; CHECK-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
|
||||
; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
|
||||
; CHECK-MVE-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%call = call half %hptr(half 10.0) "cmse_nonsecure_call" nounwind
|
||||
ret half %call
|
||||
|
@ -994,25 +1006,41 @@ define half @h3(half (half)* nocapture %hptr) nounwind {
|
|||
; CHECK-8M-NEXT: .LCPI13_0:
|
||||
; CHECK-8M-NEXT: .long 0x00004900 @ float 2.61874657E-41
|
||||
;
|
||||
; CHECK-81M-LABEL: h3:
|
||||
; CHECK-81M: @ %bb.0: @ %entry
|
||||
; CHECK-81M-NEXT: push {r7, lr}
|
||||
; CHECK-81M-NEXT: vldr s0, .LCPI13_0
|
||||
; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
|
||||
; CHECK-81M-NEXT: bic r0, r0, #1
|
||||
; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
|
||||
; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
|
||||
; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]!
|
||||
; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
|
||||
; CHECK-81M-NEXT: blxns r0
|
||||
; CHECK-81M-NEXT: vldr fpcxts, [sp], #8
|
||||
; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
|
||||
; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
|
||||
; CHECK-81M-NEXT: pop {r7, pc}
|
||||
; CHECK-81M-NEXT: .p2align 2
|
||||
; CHECK-81M-NEXT: @ %bb.1:
|
||||
; CHECK-81M-NEXT: .LCPI13_0:
|
||||
; CHECK-81M-NEXT: .long 0x00004900 @ float 2.61874657E-41
|
||||
; CHECK-NO-MVE-LABEL: h3:
|
||||
; CHECK-NO-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-NO-MVE-NEXT: push {r7, lr}
|
||||
; CHECK-NO-MVE-NEXT: vldr s0, .LCPI13_0
|
||||
; CHECK-NO-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
|
||||
; CHECK-NO-MVE-NEXT: bic r0, r0, #1
|
||||
; CHECK-NO-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
|
||||
; CHECK-NO-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
|
||||
; CHECK-NO-MVE-NEXT: vstr fpcxts, [sp, #-8]!
|
||||
; CHECK-NO-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
|
||||
; CHECK-NO-MVE-NEXT: blxns r0
|
||||
; CHECK-NO-MVE-NEXT: vldr fpcxts, [sp], #8
|
||||
; CHECK-NO-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
|
||||
; CHECK-NO-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
|
||||
; CHECK-NO-MVE-NEXT: pop {r7, pc}
|
||||
; CHECK-NO-MVE-NEXT: .p2align 2
|
||||
; CHECK-NO-MVE-NEXT: @ %bb.1:
|
||||
; CHECK-NO-MVE-NEXT: .LCPI13_0:
|
||||
; CHECK-NO-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
|
||||
;
|
||||
; CHECK-MVE-LABEL: h3:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: push {r7, lr}
|
||||
; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
|
||||
; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
|
||||
; CHECK-MVE-NEXT: bic r0, r0, #1
|
||||
; CHECK-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
|
||||
; CHECK-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
|
||||
; CHECK-MVE-NEXT: vstr fpcxts, [sp, #-8]!
|
||||
; CHECK-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
|
||||
; CHECK-MVE-NEXT: blxns r0
|
||||
; CHECK-MVE-NEXT: vldr fpcxts, [sp], #8
|
||||
; CHECK-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
|
||||
; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
|
||||
; CHECK-MVE-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%call = tail call half %hptr(half 10.0) "cmse_nonsecure_call" nounwind
|
||||
ret half %call
|
||||
|
@ -1123,7 +1151,7 @@ define half @h1_minsize(half (half)* nocapture %hptr) "cmse_nonsecure_entry" min
|
|||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: vstr fpcxtns, [sp, #-4]!
|
||||
; CHECK-MVE-NEXT: push {r6, r7, lr}
|
||||
; CHECK-MVE-NEXT: vldr s0, .LCPI15_0
|
||||
; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
|
||||
; CHECK-MVE-NEXT: blx r0
|
||||
; CHECK-MVE-NEXT: vmov.f16 r0, s0
|
||||
; CHECK-MVE-NEXT: vmov s0, r0
|
||||
|
@ -1132,10 +1160,6 @@ define half @h1_minsize(half (half)* nocapture %hptr) "cmse_nonsecure_entry" min
|
|||
; CHECK-MVE-NEXT: vldr fpcxtns, [sp], #4
|
||||
; CHECK-MVE-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
|
||||
; CHECK-MVE-NEXT: bxns lr
|
||||
; CHECK-MVE-NEXT: .p2align 2
|
||||
; CHECK-MVE-NEXT: @ %bb.1:
|
||||
; CHECK-MVE-NEXT: .LCPI15_0:
|
||||
; CHECK-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
|
||||
entry:
|
||||
%call = call half %hptr(half 10.0) nounwind
|
||||
ret half %call
|
||||
|
|
|
@ -152,12 +152,8 @@ define half @constcall() {
|
|||
;
|
||||
; CHECK-FP16-HARD-LABEL: constcall:
|
||||
; CHECK-FP16-HARD: @ %bb.0: @ %entry
|
||||
; CHECK-FP16-HARD-NEXT: vldr s0, .LCPI4_0
|
||||
; CHECK-FP16-HARD-NEXT: vmov.f16 s0, #1.000000e+01
|
||||
; CHECK-FP16-HARD-NEXT: b ccc
|
||||
; CHECK-FP16-HARD-NEXT: .p2align 2
|
||||
; CHECK-FP16-HARD-NEXT: @ %bb.1:
|
||||
; CHECK-FP16-HARD-NEXT: .LCPI4_0:
|
||||
; CHECK-FP16-HARD-NEXT: .long 0x00004900 @ float 2.61874657E-41
|
||||
entry:
|
||||
%call = tail call fast half @ccc(half 0xH4900)
|
||||
ret half %call
|
||||
|
|
Loading…
Reference in New Issue