forked from OSchip/llvm-project
[Arm] Do not lower vmax/vmin to Neon instructions
On some Arm cores there is a performance penalty when forwarding from an S register to a D register. Calculating VMAX in a D register creates false forwarding hazards, so don't do that unless we're on a core which specifically asks for it. Patch by James Greenhalgh Differential Revision: https://reviews.llvm.org/D75248
This commit is contained in:
parent
18c19441d1
commit
f0de8d0940
|
@ -1420,12 +1420,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
|||
}
|
||||
|
||||
if (Subtarget->hasNEON()) {
|
||||
// vmin and vmax aren't available in a scalar form, so we use
|
||||
// a NEON instruction with an undef lane instead.
|
||||
setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
|
||||
setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
|
||||
setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
|
||||
// vmin and vmax aren't available in a scalar form, so we can use
|
||||
// a NEON instruction with an undef lane instead. This has a performance
|
||||
// penalty on some cores, so we don't do this unless we have been
|
||||
// asked to by the core tuning model.
|
||||
if (Subtarget->useNEONForSinglePrecisionFP()) {
|
||||
setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
|
||||
setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
|
||||
}
|
||||
setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
|
||||
setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
|
||||
setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
|
||||
|
|
|
@ -446,7 +446,9 @@ define void @test_minimum(half* %p) {
|
|||
; CHECK-LABEL: test_minimum:
|
||||
; CHECK: vldr.16 s2, [r0]
|
||||
; CHECK-NEXT: vmov.f16 s0, #1.000000e+00
|
||||
; CHECK-NEXT: vmin.f16 d0, d1, d0
|
||||
; CHECK-NEXT: vcmp.f16 s2, s0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
%a = load half, half* %p, align 2
|
||||
|
@ -460,7 +462,9 @@ define void @test_maximum(half* %p) {
|
|||
; CHECK-LABEL: test_maximum:
|
||||
; CHECK: vldr.16 s2, [r0]
|
||||
; CHECK-NEXT: vmov.f16 s0, #1.000000e+00
|
||||
; CHECK-NEXT: vmax.f16 d0, d1, d0
|
||||
; CHECK-NEXT: vcmp.f16 s0, s2
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
%a = load half, half* %p, align 2
|
||||
|
|
|
@ -665,7 +665,9 @@ define void @test_maxnum(half* %p, half* %q) #0 {
|
|||
; CHECK-LIBCALL: bl __aeabi_h2f
|
||||
; CHECK-LIBCALL-VFP: vmov.f32 s{{[0-9]+}}, #1.000000e+00
|
||||
; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216
|
||||
; CHECK-VFP: vmin.f32
|
||||
; CHECK-VFP: vcmp.f32
|
||||
; CHECK-VFP: vmrs
|
||||
; CHECK-VFP: vmovlt.f32
|
||||
; CHECK-NOVFP: bl __aeabi_fcmpge
|
||||
; CHECK-FP16: vcvtb.f16.f32
|
||||
; CHECK-LIBCALL: bl __aeabi_f2h
|
||||
|
@ -683,7 +685,9 @@ define void @test_minimum(half* %p) #0 {
|
|||
; CHECK-LIBCALL: bl __aeabi_h2f
|
||||
; CHECK-LIBCALL-VFP: vmov.f32 s0, #1.000000e+00
|
||||
; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216
|
||||
; CHECK-VFP: vmax.f32
|
||||
; CHECK-VFP: vcmp.f32
|
||||
; CHECK-VFP: vmrs
|
||||
; CHECK-VFP: vmovhi.f32
|
||||
; CHECK-NOVFP: bl __aeabi_fcmple
|
||||
; CHECK-FP16: vcvtb.f16.f32
|
||||
; CHECK-LIBCALL: bl __aeabi_f2h
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=armv8-eabi -mattr=+fullfp16 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple thumbv7a -mattr=+fullfp16 | FileCheck %s
|
||||
|
||||
|
@ -9,7 +10,14 @@
|
|||
|
||||
define half @fp16_vminnm_o(i16 signext %a, i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vminnm_o:
|
||||
; CHECK-NOT: vminnm.f16
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r2
|
||||
; CHECK-NEXT: vmov.f16 s2, r1
|
||||
; CHECK-NEXT: vcmp.f16 s0, s2
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselgt.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%1 = bitcast i16 %b to half
|
||||
|
@ -20,7 +28,14 @@ entry:
|
|||
|
||||
define half @fp16_vminnm_o_rev(i16 signext %a, i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vminnm_o_rev:
|
||||
; CHECK-NOT: vminnm.f16
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r2
|
||||
; CHECK-NEXT: vmov.f16 s2, r1
|
||||
; CHECK-NEXT: vcmp.f16 s2, s0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselgt.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%1 = bitcast i16 %b to half
|
||||
|
@ -31,7 +46,14 @@ entry:
|
|||
|
||||
define half @fp16_vminnm_u(i16 signext %a, i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vminnm_u:
|
||||
; CHECK-NOT: vminnm.f16
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vmov.f16 s2, r2
|
||||
; CHECK-NEXT: vcmp.f16 s0, s2
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%1 = bitcast i16 %b to half
|
||||
|
@ -42,7 +64,14 @@ entry:
|
|||
|
||||
define half @fp16_vminnm_ule(i16 signext %a, i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vminnm_ule:
|
||||
; CHECK-NOT: vminnm.f16
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vmov.f16 s2, r2
|
||||
; CHECK-NEXT: vcmp.f16 s0, s2
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselgt.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%1 = bitcast i16 %b to half
|
||||
|
@ -53,7 +82,14 @@ entry:
|
|||
|
||||
define half @fp16_vminnm_u_rev(i16 signext %a, i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vminnm_u_rev:
|
||||
; CHECK-NOT: vminnm.f16
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r2
|
||||
; CHECK-NEXT: vmov.f16 s2, r1
|
||||
; CHECK-NEXT: vcmp.f16 s0, s2
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%1 = bitcast i16 %b to half
|
||||
|
@ -64,7 +100,14 @@ entry:
|
|||
|
||||
define half @fp16_vmaxnm_o(i16 signext %a, i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vmaxnm_o:
|
||||
; CHECK-NOT: vmaxnm.f16
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r2
|
||||
; CHECK-NEXT: vmov.f16 s2, r1
|
||||
; CHECK-NEXT: vcmp.f16 s2, s0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselgt.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%1 = bitcast i16 %b to half
|
||||
|
@ -75,7 +118,14 @@ entry:
|
|||
|
||||
define half @fp16_vmaxnm_oge(i16 signext %a, i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vmaxnm_oge:
|
||||
; CHECK-NOT: vmaxnm.f16
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r2
|
||||
; CHECK-NEXT: vmov.f16 s2, r1
|
||||
; CHECK-NEXT: vcmp.f16 s2, s0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%1 = bitcast i16 %b to half
|
||||
|
@ -86,7 +136,14 @@ entry:
|
|||
|
||||
define half @fp16_vmaxnm_o_rev(i16 signext %a, i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vmaxnm_o_rev:
|
||||
; CHECK-NOT: vmaxnm.f16
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vmov.f16 s2, r2
|
||||
; CHECK-NEXT: vcmp.f16 s2, s0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselgt.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%1 = bitcast i16 %b to half
|
||||
|
@ -97,7 +154,14 @@ entry:
|
|||
|
||||
define half @fp16_vmaxnm_ole_rev(i16 signext %a, i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vmaxnm_ole_rev:
|
||||
; CHECK-NOT: vmaxnm.f16
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vmov.f16 s2, r2
|
||||
; CHECK-NEXT: vcmp.f16 s2, s0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%1 = bitcast i16 %b to half
|
||||
|
@ -108,7 +172,14 @@ entry:
|
|||
|
||||
define half @fp16_vmaxnm_u(i16 signext %a, i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vmaxnm_u:
|
||||
; CHECK-NOT: vmaxnm.f16
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vmov.f16 s2, r2
|
||||
; CHECK-NEXT: vcmp.f16 s2, s0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%1 = bitcast i16 %b to half
|
||||
|
@ -119,7 +190,14 @@ entry:
|
|||
|
||||
define half @fp16_vmaxnm_uge(i16 signext %a, i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vmaxnm_uge:
|
||||
; CHECK-NOT: vmaxnm.f16
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vmov.f16 s2, r2
|
||||
; CHECK-NEXT: vcmp.f16 s2, s0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselgt.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%1 = bitcast i16 %b to half
|
||||
|
@ -130,7 +208,14 @@ entry:
|
|||
|
||||
define half @fp16_vmaxnm_u_rev(i16 signext %a, i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vmaxnm_u_rev:
|
||||
; CHECK-NOT: vmaxnm.f16
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r2
|
||||
; CHECK-NEXT: vmov.f16 s2, r1
|
||||
; CHECK-NEXT: vcmp.f16 s2, s0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%1 = bitcast i16 %b to half
|
||||
|
@ -142,12 +227,21 @@ entry:
|
|||
; known non-NaNs
|
||||
|
||||
define half @fp16_vminnm_NNNo(i16 signext %a) {
|
||||
; CHECK-LABEL: fp16_vminnm_NNNo:
|
||||
; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmov.f16 [[S2:s[0-9]]], #1.200000e+01
|
||||
; CHECK: vmov.f16 [[S4:s[0-9]]], r{{.}}
|
||||
; CHECK: vminnm.f16 s2, [[S4]], [[S2]]
|
||||
; CHECK: vmin.f16 d0, d1, d0
|
||||
; CHECK-LABEL: fp16_vminnm_NNNo:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
|
||||
; CHECK-NEXT: vminnm.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI12_0
|
||||
; CHECK-NEXT: vcmp.f16 s0, s2
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselgt.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 1
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI12_0:
|
||||
; CHECK-NEXT: .short 0x5040 @ half 34
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%cmp1 = fcmp olt half %0, 12.
|
||||
|
@ -158,12 +252,23 @@ entry:
|
|||
}
|
||||
|
||||
define half @fp16_vminnm_NNNo_rev(i16 signext %a) {
|
||||
; CHECK-LABEL: fp16_vminnm_NNNo_rev:
|
||||
; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
|
||||
; CHECK: vmin.f16 d0, d1, d0
|
||||
; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vminnm.f16 s0, [[S0]], [[S2]]
|
||||
; CHECK-LABEL: fp16_vminnm_NNNo_rev:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI13_0
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vcmp.f16 s0, s2
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselgt.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI13_1
|
||||
; CHECK-NEXT: vminnm.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 1
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI13_0:
|
||||
; CHECK-NEXT: .short 0x5300 @ half 56
|
||||
; CHECK-NEXT: .LCPI13_1:
|
||||
; CHECK-NEXT: .short 0x54e0 @ half 78
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%cmp1 = fcmp ogt half %0, 56.
|
||||
|
@ -175,11 +280,20 @@ entry:
|
|||
|
||||
define half @fp16_vminnm_NNNu(i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vminnm_NNNu:
|
||||
; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmov.f16 [[S2:s[0-9]]], #1.200000e+01
|
||||
; CHECK: vmov.f16 [[S4:s[0-9]]], r{{.}}
|
||||
; CHECK: vminnm.f16 s2, [[S4]], [[S2]]
|
||||
; CHECK: vmin.f16 d0, d1, d0
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
|
||||
; CHECK-NEXT: vminnm.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI14_0
|
||||
; CHECK-NEXT: vcmp.f16 s0, s2
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 1
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI14_0:
|
||||
; CHECK-NEXT: .short 0x5040 @ half 34
|
||||
entry:
|
||||
%0 = bitcast i16 %b to half
|
||||
%cmp1 = fcmp ult half 12., %0
|
||||
|
@ -190,12 +304,23 @@ entry:
|
|||
}
|
||||
|
||||
define half @fp16_vminnm_NNNule(i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vminnm_NNNule:
|
||||
; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmov.f16 [[S4:s[0-9]]], r{{.}}
|
||||
; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vminnm.f16 s2, [[S4]], [[S2]]
|
||||
; CHECK: vmin.f16 d0, d1, d0
|
||||
; CHECK-LABEL: fp16_vminnm_NNNule:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI15_0
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vminnm.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI15_1
|
||||
; CHECK-NEXT: vcmp.f16 s0, s2
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselgt.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 1
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI15_0:
|
||||
; CHECK-NEXT: .short 0x5040 @ half 34
|
||||
; CHECK-NEXT: .LCPI15_1:
|
||||
; CHECK-NEXT: .short 0x5300 @ half 56
|
||||
|
||||
entry:
|
||||
%0 = bitcast i16 %b to half
|
||||
|
@ -207,13 +332,24 @@ entry:
|
|||
}
|
||||
|
||||
define half @fp16_vminnm_NNNu_rev(i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vminnm_NNNu_rev:
|
||||
; CHECK-LABEL: fp16_vminnm_NNNu_rev:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI16_0
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vcmp.f16 s0, s2
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI16_1
|
||||
; CHECK-NEXT: vminnm.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 1
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI16_0:
|
||||
; CHECK-NEXT: .short 0x5300 @ half 56
|
||||
; CHECK-NEXT: .LCPI16_1:
|
||||
; CHECK-NEXT: .short 0x54e0 @ half 78
|
||||
|
||||
; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
|
||||
; CHECK: vmin.f16 d0, d1, d0
|
||||
; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vminnm.f16 s0, [[S0]], [[S2]]
|
||||
|
||||
entry:
|
||||
%0 = bitcast i16 %b to half
|
||||
|
@ -225,12 +361,21 @@ entry:
|
|||
}
|
||||
|
||||
define half @fp16_vmaxnm_NNNo(i16 signext %a) {
|
||||
; CHECK-LABEL: fp16_vmaxnm_NNNo:
|
||||
; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmov.f16 [[S2:s[0-9]]], #1.200000e+01
|
||||
; CHECK: vmov.f16 [[S4:s[0-9]]], r{{.}}
|
||||
; CHECK: vmaxnm.f16 s2, [[S4]], [[S2]]
|
||||
; CHECK: vmax.f16 d0, d1, d0
|
||||
; CHECK-LABEL: fp16_vmaxnm_NNNo:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
|
||||
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI17_0
|
||||
; CHECK-NEXT: vcmp.f16 s2, s0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselgt.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 1
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI17_0:
|
||||
; CHECK-NEXT: .short 0x5040 @ half 34
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%cmp1 = fcmp ogt half %0, 12.
|
||||
|
@ -241,12 +386,23 @@ entry:
|
|||
}
|
||||
|
||||
define half @fp16_vmaxnm_NNNoge(i16 signext %a) {
|
||||
; CHECK-LABEL: fp16_vmaxnm_NNNoge:
|
||||
; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmov.f16 [[S4:s[0-9]]], r{{.}}
|
||||
; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmaxnm.f16 s2, [[S4]], [[S2]]
|
||||
; CHECK: vmax.f16 d0, d1, d0
|
||||
; CHECK-LABEL: fp16_vmaxnm_NNNoge:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI18_0
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI18_1
|
||||
; CHECK-NEXT: vcmp.f16 s2, s0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 1
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI18_0:
|
||||
; CHECK-NEXT: .short 0x5040 @ half 34
|
||||
; CHECK-NEXT: .LCPI18_1:
|
||||
; CHECK-NEXT: .short 0x5300 @ half 56
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%cmp1 = fcmp oge half %0, 34.
|
||||
|
@ -257,12 +413,23 @@ entry:
|
|||
}
|
||||
|
||||
define half @fp16_vmaxnm_NNNo_rev(i16 signext %a) {
|
||||
; CHECK-LABEL: fp16_vmaxnm_NNNo_rev:
|
||||
; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
|
||||
; CHECK: vmax.f16 d0, d1, d0
|
||||
; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
|
||||
; CHECK-LABEL: fp16_vmaxnm_NNNo_rev:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI19_0
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vcmp.f16 s2, s0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselgt.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI19_1
|
||||
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 1
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI19_0:
|
||||
; CHECK-NEXT: .short 0x5300 @ half 56
|
||||
; CHECK-NEXT: .LCPI19_1:
|
||||
; CHECK-NEXT: .short 0x54e0 @ half 78
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%cmp1 = fcmp olt half %0, 56.
|
||||
|
@ -273,12 +440,23 @@ entry:
|
|||
}
|
||||
|
||||
define half @fp16_vmaxnm_NNNole_rev(i16 signext %a) {
|
||||
; CHECK-LABEL: fp16_vmaxnm_NNNole_rev:
|
||||
; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
|
||||
; CHECK: vmax.f16 d0, d1, d0
|
||||
; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
|
||||
; CHECK-LABEL: fp16_vmaxnm_NNNole_rev:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI20_0
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vcmp.f16 s2, s0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI20_1
|
||||
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 1
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI20_0:
|
||||
; CHECK-NEXT: .short 0x54e0 @ half 78
|
||||
; CHECK-NEXT: .LCPI20_1:
|
||||
; CHECK-NEXT: .short 0x55a0 @ half 90
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%cmp1 = fcmp ole half %0, 78.
|
||||
|
@ -289,12 +467,21 @@ entry:
|
|||
}
|
||||
|
||||
define half @fp16_vmaxnm_NNNu(i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vmaxnm_NNNu:
|
||||
; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmov.f16 [[S2:s[0-9]]], #1.200000e+01
|
||||
; CHECK: vmov.f16 [[S4:s[0-9]]], r{{.}}
|
||||
; CHECK: vmaxnm.f16 s2, [[S4]], [[S2]]
|
||||
; CHECK: vmax.f16 d0, d1, d0
|
||||
; CHECK-LABEL: fp16_vmaxnm_NNNu:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
|
||||
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI21_0
|
||||
; CHECK-NEXT: vcmp.f16 s2, s0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 1
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI21_0:
|
||||
; CHECK-NEXT: .short 0x5040 @ half 34
|
||||
entry:
|
||||
%0 = bitcast i16 %b to half
|
||||
%cmp1 = fcmp ugt half 12., %0
|
||||
|
@ -305,12 +492,23 @@ entry:
|
|||
}
|
||||
|
||||
define half @fp16_vmaxnm_NNNuge(i16 signext %b) {
|
||||
; CHECK-LABEL: fp16_vmaxnm_NNNuge:
|
||||
; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmov.f16 [[S4:s[0-9]]], r{{.}}
|
||||
; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmaxnm.f16 s2, [[S4]], [[S2]]
|
||||
; CHECK: vmax.f16 d0, d1, d0
|
||||
; CHECK-LABEL: fp16_vmaxnm_NNNuge:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI22_0
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI22_1
|
||||
; CHECK-NEXT: vcmp.f16 s2, s0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselgt.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 1
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI22_0:
|
||||
; CHECK-NEXT: .short 0x5040 @ half 34
|
||||
; CHECK-NEXT: .LCPI22_1:
|
||||
; CHECK-NEXT: .short 0x5300 @ half 56
|
||||
entry:
|
||||
%0 = bitcast i16 %b to half
|
||||
%cmp1 = fcmp uge half 34., %0
|
||||
|
@ -321,11 +519,20 @@ entry:
|
|||
}
|
||||
|
||||
define half @fp16_vminmaxnm_neg0(i16 signext %a) {
|
||||
; CHECK-LABEL: fp16_vminmaxnm_neg0:
|
||||
; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
|
||||
; CHECK: vminnm.f16 s2, [[S2]], [[S0]]
|
||||
; CHECK: vmax.f16 d0, d1, d0
|
||||
; CHECK-LABEL: fp16_vminmaxnm_neg0:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldr.16 s0, .LCPI23_0
|
||||
; CHECK-NEXT: vmov.f16 s2, r1
|
||||
; CHECK-NEXT: vminnm.f16 s2, s2, s0
|
||||
; CHECK-NEXT: vcmp.f16 s0, s2
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 1
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI23_0:
|
||||
; CHECK-NEXT: .short 0x8000 @ half -0
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%cmp1 = fcmp olt half %0, -0.
|
||||
|
@ -336,11 +543,20 @@ entry:
|
|||
}
|
||||
|
||||
define half @fp16_vminmaxnm_e_0(i16 signext %a) {
|
||||
; CHECK-LABEL: fp16_vminmaxnm_e_0:
|
||||
; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}}
|
||||
; CHECK: vmin.f16 d0, d0, d1
|
||||
; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]]
|
||||
; CHECK-LABEL: fp16_vminmaxnm_e_0:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.f16 s0, r1
|
||||
; CHECK-NEXT: vldr.16 s2, .LCPI24_0
|
||||
; CHECK-NEXT: vcmp.f16 s0, #0
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 1
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI24_0:
|
||||
; CHECK-NEXT: .short 0x0000 @ half 0
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%cmp1 = fcmp nsz ole half 0., %0
|
||||
|
@ -351,11 +567,20 @@ entry:
|
|||
}
|
||||
|
||||
define half @fp16_vminmaxnm_e_neg0(i16 signext %a) {
|
||||
; CHECK-LABEL: fp16_vminmaxnm_e_neg0:
|
||||
; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}}
|
||||
; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}}
|
||||
; CHECK: vminnm.f16 s2, [[S2]], [[S0]]
|
||||
; CHECK: vmax.f16 d0, d1, d0
|
||||
; CHECK-LABEL: fp16_vminmaxnm_e_neg0:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldr.16 s0, .LCPI25_0
|
||||
; CHECK-NEXT: vmov.f16 s2, r1
|
||||
; CHECK-NEXT: vminnm.f16 s2, s2, s0
|
||||
; CHECK-NEXT: vcmp.f16 s0, s2
|
||||
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-NEXT: vselge.f16 s0, s0, s2
|
||||
; CHECK-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 1
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI25_0:
|
||||
; CHECK-NEXT: .short 0x8000 @ half -0
|
||||
entry:
|
||||
%0 = bitcast i16 %a to half
|
||||
%cmp1 = fcmp nsz ule half -0., %0
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
; RUN: llc -mtriple=arm-eabihf -mattr=+neon < %s | FileCheck -check-prefixes=CHECK-NO_NEON %s
|
||||
; RUN: llc -mtriple=arm-eabihf -mattr=+neon,+neonfp < %s | FileCheck -check-prefixes=CHECK-NEON %s
|
||||
|
||||
define float @max_f32(float, float) {
|
||||
;CHECK-NEON: vmax.f32
|
||||
;CHECK-NO_NEON: vcmp.f32
|
||||
;CHECK-NO_NEON: vmrs
|
||||
;CHECK-NO_NEON: vmovgt.f32
|
||||
%3 = call nnan float @llvm.maxnum.f32(float %1, float %0)
|
||||
ret float %3
|
||||
}
|
||||
|
||||
declare float @llvm.maxnum.f32(float, float) #1
|
||||
|
||||
define float @min_f32(float, float) {
|
||||
;CHECK-NEON: vmin.f32
|
||||
;CHECK-NO_NEON: vcmp.f32
|
||||
;CHECK-NO_NEON: vmrs
|
||||
;CHECK-NO_NEON: vmovlt.f32
|
||||
%3 = call nnan float @llvm.minnum.f32(float %1, float %0)
|
||||
ret float %3
|
||||
}
|
||||
|
||||
declare float @llvm.minnum.f32(float, float) #1
|
||||
|
|
@ -297,14 +297,18 @@ declare float @llvm.minnum.f32(float %a, float %b)
|
|||
|
||||
define float @maxnum(float %a, float %b) {
|
||||
;CHECK-LABEL: maxnum:
|
||||
;CHECK: vmax.f32
|
||||
;CHECK: vcmp.f32
|
||||
;CHECK-NEXT: vmrs
|
||||
;CHECK-NEXT: vmovgt.f32
|
||||
%r = call nnan float @llvm.maxnum.f32(float %a, float %b)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @minnum(float %a, float %b) {
|
||||
;CHECK-LABEL: minnum:
|
||||
;CHECK: vmin.f32
|
||||
;CHECK: vcmp.f32
|
||||
;CHECK-NEXT: vmrs
|
||||
;CHECK-NEXT: vmovlt.f32
|
||||
%r = call nnan float @llvm.minnum.f32(float %a, float %b)
|
||||
ret float %r
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue