From f4c5cadbcbb41f13cff0905449cfff4aef6a083c Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 21 Sep 2020 11:10:47 +0100 Subject: [PATCH] [ARM] Select f32 constants with vmov.f16 This adds lowering for f32 values using the vmov.f16, which zeroes the top bits whilst setting the lower bits to a pattern. This range of values does not often come up, except where a f16 constant value has been converted to a f32. Differential Revision: https://reviews.llvm.org/D87790 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 3 + llvm/lib/Target/ARM/ARMInstrVFP.td | 15 +++ .../ARM/MCTargetDesc/ARMAddressingModes.h | 12 ++ .../test/CodeGen/ARM/cmse-clear-float-hard.ll | 120 +++++++++++------- llvm/test/CodeGen/ARM/fp16-bitcast.ll | 6 +- 5 files changed, 103 insertions(+), 53 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 0235d6aacfda..70e8a797f869 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -18067,6 +18067,9 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, return false; if (VT == MVT::f16 && Subtarget->hasFullFP16()) return ARM_AM::getFP16Imm(Imm) != -1; + if (VT == MVT::f32 && Subtarget->hasFullFP16() && + ARM_AM::getFP32FP16Imm(Imm) != -1) + return true; if (VT == MVT::f32) return ARM_AM::getFP32Imm(Imm) != -1; if (VT == MVT::f64 && Subtarget->hasFP64()) diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index cf4bcc743d8f..2336bbfe547e 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -54,6 +54,16 @@ def vfp_f16imm : Operand, let ParserMatchClass = FPImmOperand; } +def vfp_f32f16imm_xform : SDNodeXFormgetValueAPF(); + uint32_t enc = ARM_AM::getFP32FP16Imm(InVal); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); + }]>; + +def vfp_f32f16imm : PatLeaf<(f32 fpimm), [{ + return ARM_AM::getFP32FP16Imm(N->getValueAPF()) != -1; + }], vfp_f32f16imm_xform>; + def vfp_f32imm_xform : SDNodeXFormgetValueAPF(); uint32_t enc = ARM_AM::getFP32Imm(InVal); @@ -2637,6 +2647,11 @@ def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm), } } +def : Pat<(f32 (vfp_f32f16imm:$imm)), + (f32 (COPY_TO_REGCLASS (f16 (FCONSTH (vfp_f32f16imm_xform (f32 $imm)))), SPR))> { + let Predicates = [HasFullFP16]; +} + //===----------------------------------------------------------------------===// // Assembler aliases. // diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index dca252a177f2..8459b4ff2a14 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -687,6 +687,18 @@ namespace ARM_AM { return getFP16Imm(FPImm.bitcastToAPInt()); } + /// If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding + /// for it. Otherwise return -1 like getFP16Imm. + inline int getFP32FP16Imm(const APInt &Imm) { + if (Imm.getActiveBits() > 16) + return -1; + return ARM_AM::getFP16Imm(Imm.trunc(16)); + } + + inline int getFP32FP16Imm(const APFloat &FPImm) { + return getFP32FP16Imm(FPImm.bitcastToAPInt()); + } + /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit /// floating-point value. If the value cannot be represented as an 8-bit /// floating-point value, then return -1. diff --git a/llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll b/llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll index d1272f88ede1..553453296963 100644 --- a/llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll +++ b/llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll @@ -874,7 +874,7 @@ define half @h1(half (half)* nocapture %hptr) "cmse_nonsecure_entry" nounwind { ; CHECK-MVE-NEXT: vstr fpcxtns, [sp, #-4]! ; CHECK-MVE-NEXT: push {r7, lr} ; CHECK-MVE-NEXT: sub sp, #4 -; CHECK-MVE-NEXT: vldr s0, .LCPI11_0 +; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01 ; CHECK-MVE-NEXT: blx r0 ; CHECK-MVE-NEXT: vmov.f16 r0, s0 ; CHECK-MVE-NEXT: vmov s0, r0 @@ -884,10 +884,6 @@ define half @h1(half (half)* nocapture %hptr) "cmse_nonsecure_entry" nounwind { ; CHECK-MVE-NEXT: vldr fpcxtns, [sp], #4 ; CHECK-MVE-NEXT: clrm {r0, r1, r2, r3, r12, apsr} ; CHECK-MVE-NEXT: bxns lr -; CHECK-MVE-NEXT: .p2align 2 -; CHECK-MVE-NEXT: @ %bb.1: -; CHECK-MVE-NEXT: .LCPI11_0: -; CHECK-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41 %call = call half %hptr(half 10.0) nounwind ret half %call } @@ -931,25 +927,41 @@ define half @h2(half (half)* nocapture %hptr) nounwind { ; CHECK-8M-NEXT: .LCPI12_0: ; CHECK-8M-NEXT: .long 0x00004900 @ float 2.61874657E-41 ; -; CHECK-81M-LABEL: h2: -; CHECK-81M: @ %bb.0: @ %entry -; CHECK-81M-NEXT: push {r7, lr} -; CHECK-81M-NEXT: vldr s0, .LCPI12_0 -; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} -; CHECK-81M-NEXT: bic r0, r0, #1 -; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} -; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} -; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]! -; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} -; CHECK-81M-NEXT: blxns r0 -; CHECK-81M-NEXT: vldr fpcxts, [sp], #8 -; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} -; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} -; CHECK-81M-NEXT: pop {r7, pc} -; CHECK-81M-NEXT: .p2align 2 -; CHECK-81M-NEXT: @ %bb.1: -; CHECK-81M-NEXT: .LCPI12_0: -; CHECK-81M-NEXT: .long 0x00004900 @ float 2.61874657E-41 +; CHECK-NO-MVE-LABEL: h2: +; CHECK-NO-MVE: @ %bb.0: @ %entry +; CHECK-NO-MVE-NEXT: push {r7, lr} +; CHECK-NO-MVE-NEXT: vldr s0, .LCPI12_0 +; CHECK-NO-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-NO-MVE-NEXT: bic r0, r0, #1 +; CHECK-NO-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-NO-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-NO-MVE-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-NO-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-NO-MVE-NEXT: blxns r0 +; CHECK-NO-MVE-NEXT: vldr fpcxts, [sp], #8 +; CHECK-NO-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-NO-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-NO-MVE-NEXT: pop {r7, pc} +; CHECK-NO-MVE-NEXT: .p2align 2 +; CHECK-NO-MVE-NEXT: @ %bb.1: +; CHECK-NO-MVE-NEXT: .LCPI12_0: +; CHECK-NO-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41 +; +; CHECK-MVE-LABEL: h2: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: push {r7, lr} +; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01 +; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-MVE-NEXT: bic r0, r0, #1 +; CHECK-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-MVE-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-MVE-NEXT: blxns r0 +; CHECK-MVE-NEXT: vldr fpcxts, [sp], #8 +; CHECK-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-MVE-NEXT: pop {r7, pc} entry: %call = call half %hptr(half 10.0) "cmse_nonsecure_call" nounwind ret half %call @@ -994,25 +1006,41 @@ define half @h3(half (half)* nocapture %hptr) nounwind { ; CHECK-8M-NEXT: .LCPI13_0: ; CHECK-8M-NEXT: .long 0x00004900 @ float 2.61874657E-41 ; -; CHECK-81M-LABEL: h3: -; CHECK-81M: @ %bb.0: @ %entry -; CHECK-81M-NEXT: push {r7, lr} -; CHECK-81M-NEXT: vldr s0, .LCPI13_0 -; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} -; CHECK-81M-NEXT: bic r0, r0, #1 -; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} -; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} -; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]! -; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} -; CHECK-81M-NEXT: blxns r0 -; CHECK-81M-NEXT: vldr fpcxts, [sp], #8 -; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} -; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} -; CHECK-81M-NEXT: pop {r7, pc} -; CHECK-81M-NEXT: .p2align 2 -; CHECK-81M-NEXT: @ %bb.1: -; CHECK-81M-NEXT: .LCPI13_0: -; CHECK-81M-NEXT: .long 0x00004900 @ float 2.61874657E-41 +; CHECK-NO-MVE-LABEL: h3: +; CHECK-NO-MVE: @ %bb.0: @ %entry +; CHECK-NO-MVE-NEXT: push {r7, lr} +; CHECK-NO-MVE-NEXT: vldr s0, .LCPI13_0 +; CHECK-NO-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-NO-MVE-NEXT: bic r0, r0, #1 +; CHECK-NO-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-NO-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-NO-MVE-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-NO-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-NO-MVE-NEXT: blxns r0 +; CHECK-NO-MVE-NEXT: vldr fpcxts, [sp], #8 +; CHECK-NO-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-NO-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-NO-MVE-NEXT: pop {r7, pc} +; CHECK-NO-MVE-NEXT: .p2align 2 +; CHECK-NO-MVE-NEXT: @ %bb.1: +; CHECK-NO-MVE-NEXT: .LCPI13_0: +; CHECK-NO-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41 +; +; CHECK-MVE-LABEL: h3: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: push {r7, lr} +; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01 +; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-MVE-NEXT: bic r0, r0, #1 +; CHECK-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-MVE-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-MVE-NEXT: blxns r0 +; CHECK-MVE-NEXT: vldr fpcxts, [sp], #8 +; CHECK-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-MVE-NEXT: pop {r7, pc} entry: %call = tail call half %hptr(half 10.0) "cmse_nonsecure_call" nounwind ret half %call @@ -1123,7 +1151,7 @@ define half @h1_minsize(half (half)* nocapture %hptr) "cmse_nonsecure_entry" min ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vstr fpcxtns, [sp, #-4]! ; CHECK-MVE-NEXT: push {r6, r7, lr} -; CHECK-MVE-NEXT: vldr s0, .LCPI15_0 +; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01 ; CHECK-MVE-NEXT: blx r0 ; CHECK-MVE-NEXT: vmov.f16 r0, s0 ; CHECK-MVE-NEXT: vmov s0, r0 @@ -1132,10 +1160,6 @@ define half @h1_minsize(half (half)* nocapture %hptr) "cmse_nonsecure_entry" min ; CHECK-MVE-NEXT: vldr fpcxtns, [sp], #4 ; CHECK-MVE-NEXT: clrm {r0, r1, r2, r3, r12, apsr} ; CHECK-MVE-NEXT: bxns lr -; CHECK-MVE-NEXT: .p2align 2 -; CHECK-MVE-NEXT: @ %bb.1: -; CHECK-MVE-NEXT: .LCPI15_0: -; CHECK-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41 entry: %call = call half %hptr(half 10.0) nounwind ret half %call diff --git a/llvm/test/CodeGen/ARM/fp16-bitcast.ll b/llvm/test/CodeGen/ARM/fp16-bitcast.ll index ad3dc0a9efbf..997d3603437d 100644 --- a/llvm/test/CodeGen/ARM/fp16-bitcast.ll +++ b/llvm/test/CodeGen/ARM/fp16-bitcast.ll @@ -152,12 +152,8 @@ define half @constcall() { ; ; CHECK-FP16-HARD-LABEL: constcall: ; CHECK-FP16-HARD: @ %bb.0: @ %entry -; CHECK-FP16-HARD-NEXT: vldr s0, .LCPI4_0 +; CHECK-FP16-HARD-NEXT: vmov.f16 s0, #1.000000e+01 ; CHECK-FP16-HARD-NEXT: b ccc -; CHECK-FP16-HARD-NEXT: .p2align 2 -; CHECK-FP16-HARD-NEXT: @ %bb.1: -; CHECK-FP16-HARD-NEXT: .LCPI4_0: -; CHECK-FP16-HARD-NEXT: .long 0x00004900 @ float 2.61874657E-41 entry: %call = tail call fast half @ccc(half 0xH4900) ret half %call