[AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

Previously NEON used a target specific intrinsic for frintn, given that
the FROUNDEVEN ISD node now exists, move over to that instead and add
codegen support for that node for both NEON and fixed length SVE.

Differential Revision: https://reviews.llvm.org/D98487
This commit is contained in:
Bradley Smith 2021-03-12 11:46:58 +00:00
parent dbf8f2b089
commit cf0da91ba5
19 changed files with 438 additions and 29 deletions

View File

@ -10620,17 +10620,23 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vrndnh_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::aarch64_neon_frintn;
Int = Builder.getIsFPConstrained()
? Intrinsic::experimental_constrained_roundeven
: Intrinsic::roundeven;
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
}
case NEON::BI__builtin_neon_vrndn_v:
case NEON::BI__builtin_neon_vrndnq_v: {
Int = Intrinsic::aarch64_neon_frintn;
Int = Builder.getIsFPConstrained()
? Intrinsic::experimental_constrained_roundeven
: Intrinsic::roundeven;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
}
case NEON::BI__builtin_neon_vrndns_f32: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::aarch64_neon_frintn;
Int = Builder.getIsFPConstrained()
? Intrinsic::experimental_constrained_roundeven
: Intrinsic::roundeven;
return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
}
case NEON::BI__builtin_neon_vrndph_f16: {

View File

@ -18155,7 +18155,7 @@ float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
// CHECK-LABEL: @test_vrndn_f64(
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
// CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a)
// CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %a)
// CHECK: ret <1 x double> [[VRNDN1_I]]
float64x1_t test_vrndn_f64(float64x1_t a) {
return vrndn_f64(a);

View File

@ -2287,7 +2287,7 @@ float64x2_t test_vcvt_high_f64_f32(float32x4_t a) {
// CHECK-LABEL: @test_vrndnq_f64(
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[VRNDN1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a)
// CHECK: [[VRNDN1_I:%.*]] = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %a)
// CHECK: ret <2 x double> [[VRNDN1_I]]
float64x2_t test_vrndnq_f64(float64x2_t a) {
return vrndnq_f64(a);

View File

@ -366,7 +366,7 @@ float16_t test_vrndmh_f16(float16_t a) {
}
// CHECK-LABEL: test_vrndnh_f16
// CHECK: [[RND:%.*]] = call half @llvm.aarch64.neon.frintn.f16(half %a)
// CHECK: [[RND:%.*]] = call half @llvm.roundeven.f16(half %a)
// CHECK: ret half [[RND]]
float16_t test_vrndnh_f16(float16_t a) {
return vrndnh_f16(a);

View File

@ -348,14 +348,14 @@ float16x8_t test_vrndmq_f16(float16x8_t a) {
}
// CHECK-LABEL: test_vrndn_f16
// CHECK: [[RND:%.*]] = call <4 x half> @llvm.aarch64.neon.frintn.v4f16(<4 x half> %a)
// CHECK: [[RND:%.*]] = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %a)
// CHECK: ret <4 x half> [[RND]]
float16x4_t test_vrndn_f16(float16x4_t a) {
return vrndn_f16(a);
}
// CHECK-LABEL: test_vrndnq_f16
// CHECK: [[RND:%.*]] = call <8 x half> @llvm.aarch64.neon.frintn.v8f16(<8 x half> %a)
// CHECK: [[RND:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %a)
// CHECK: ret <8 x half> [[RND]]
float16x8_t test_vrndnq_f16(float16x8_t a) {
return vrndnq_f16(a);

View File

@ -41,7 +41,7 @@ float32x4_t test_vrndmq_f32(float32x4_t a) {
// CHECK-LABEL: define{{.*}} <2 x float> @test_vrndn_f32(<2 x float> %a)
// CHECK-A32: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> %a)
// CHECK-A64: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a)
// CHECK-A64: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %a)
// CHECK: ret <2 x float> [[VRNDN_V1_I]]
float32x2_t test_vrndn_f32(float32x2_t a) {
return vrndn_f32(a);
@ -49,7 +49,7 @@ float32x2_t test_vrndn_f32(float32x2_t a) {
// CHECK-LABEL: define{{.*}} <4 x float> @test_vrndnq_f32(<4 x float> %a)
// CHECK-A32: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> %a)
// CHECK-A64: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a)
// CHECK-A64: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %a)
// CHECK: ret <4 x float> [[VRNDNQ_V1_I]]
float32x4_t test_vrndnq_f32(float32x4_t a) {
return vrndnq_f32(a);
@ -105,7 +105,7 @@ float32x4_t test_vrndq_f32(float32x4_t a) {
// CHECK-LABEL: define{{.*}} float @test_vrndns_f32(float %a)
// CHECK-A32: [[VRNDN_I:%.*]] = call float @llvm.arm.neon.vrintn.f32(float %a)
// CHECK-A64: [[VRNDN_I:%.*]] = call float @llvm.aarch64.neon.frintn.f32(float %a)
// CHECK-A64: [[VRNDN_I:%.*]] = call float @llvm.roundeven.f32(float %a)
// CHECK: ret float [[VRNDN_I]]
float32_t test_vrndns_f32(float32_t a) {
return vrndns_f32(a);

View File

@ -6,7 +6,7 @@ float64x2_t rnd5(float64x2_t a) { return vrndq_f64(a); }
// CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double>
float64x2_t rnd9(float64x2_t a) { return vrndnq_f64(a); }
// CHECK: call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>
// CHECK: call <2 x double> @llvm.roundeven.v2f64(<2 x double>
float64x2_t rnd13(float64x2_t a) { return vrndmq_f64(a); }
// CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double>

View File

@ -471,10 +471,6 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
def int_aarch64_neon_fcvtzs : AdvSIMD_FPToIntRounding_Intrinsic;
def int_aarch64_neon_fcvtzu : AdvSIMD_FPToIntRounding_Intrinsic;
// Vector FP Rounding: only ties to even is unrepresented by a normal
// intrinsic.
def int_aarch64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic;
// v8.5-A Vector FP Rounding
def int_aarch64_neon_frint32x : AdvSIMD_1FloatArg_Intrinsic;
def int_aarch64_neon_frint32z : AdvSIMD_1FloatArg_Intrinsic;

View File

@ -152,10 +152,10 @@ def SDTIntTruncOp : SDTypeProfile<1, 1, [ // trunc
def SDTFPUnaryOp : SDTypeProfile<1, 1, [ // fneg, fsqrt, etc
SDTCisSameAs<0, 1>, SDTCisFP<0>
]>;
def SDTFPRoundOp : SDTypeProfile<1, 1, [ // fround
def SDTFPRoundOp : SDTypeProfile<1, 1, [ // fpround
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>
]>;
def SDTFPExtendOp : SDTypeProfile<1, 1, [ // fextend
def SDTFPExtendOp : SDTypeProfile<1, 1, [ // fpextend
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1>
]>;
def SDTIntToFPOp : SDTypeProfile<1, 1, [ // [su]int_to_fp
@ -486,6 +486,7 @@ def fceil : SDNode<"ISD::FCEIL" , SDTFPUnaryOp>;
def ffloor : SDNode<"ISD::FFLOOR" , SDTFPUnaryOp>;
def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp>;
def fround : SDNode<"ISD::FROUND" , SDTFPUnaryOp>;
def froundeven : SDNode<"ISD::FROUNDEVEN" , SDTFPUnaryOp>;
def lround : SDNode<"ISD::LROUND" , SDTFPToIntOp>;
def llround : SDNode<"ISD::LLROUND" , SDTFPToIntOp>;
@ -547,6 +548,8 @@ def strict_llround : SDNode<"ISD::STRICT_LLROUND",
SDTFPToIntOp, [SDNPHasChain]>;
def strict_fround : SDNode<"ISD::STRICT_FROUND",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_froundeven : SDNode<"ISD::STRICT_FROUNDEVEN",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_ftrunc : SDNode<"ISD::STRICT_FTRUNC",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_fminnum : SDNode<"ISD::STRICT_FMINNUM",
@ -1414,6 +1417,9 @@ def any_llround : PatFrags<(ops node:$src),
def any_fround : PatFrags<(ops node:$src),
[(strict_fround node:$src),
(fround node:$src)]>;
def any_froundeven : PatFrags<(ops node:$src),
[(strict_froundeven node:$src),
(froundeven node:$src)]>;
def any_ftrunc : PatFrags<(ops node:$src),
[(strict_ftrunc node:$src),
(ftrunc node:$src)]>;

View File

@ -548,6 +548,11 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
F->arg_begin()->getType());
return true;
}
if (Name.startswith("aarch64.neon.frintn")) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
return true;
}
if (Name.startswith("arm.neon.vclz")) {
Type* args[2] = {
F->arg_begin()->getType(),

View File

@ -605,6 +605,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
setOperationAction(ISD::FRINT, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::f16, Promote);
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
@ -624,6 +625,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FABS, MVT::v4f16, Expand);
setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
setOperationAction(ISD::FMA, MVT::v4f16, Expand);
setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
@ -648,6 +650,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
@ -667,6 +670,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
setOperationAction(ISD::FMINNUM, Ty, Legal);
setOperationAction(ISD::FMAXNUM, Ty, Legal);
setOperationAction(ISD::FMINIMUM, Ty, Legal);
@ -684,6 +688,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FRINT, MVT::f16, Legal);
setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
@ -943,6 +948,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
setOperationAction(ISD::FREM, MVT::v1f64, Expand);
setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand);
setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
@ -1069,6 +1075,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
}
if (Subtarget->hasFullFP16()) {
@ -1079,6 +1086,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
}
}
@ -1403,6 +1411,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FRINT, VT, Custom);
setOperationAction(ISD::FROUND, VT, Custom);
setOperationAction(ISD::FROUNDEVEN, VT, Custom);
setOperationAction(ISD::FSQRT, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);
setOperationAction(ISD::FTRUNC, VT, Custom);

View File

@ -3796,12 +3796,9 @@ defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>;
defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>;
defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>;
defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>;
defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>;
defm FRINTN : SingleOperandFPData<0b1000, "frintn", froundeven>;
defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))),
(FRINTNDr FPR64:$Rn)>;
defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;
@ -4090,7 +4087,7 @@ defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>;
defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>;
defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>;
defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>;
defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", froundeven>;
defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;

View File

@ -590,7 +590,7 @@ define <2 x float> @frintn_2s(<2 x float> %A) nounwind {
;CHECK-NOT: ld1
;CHECK: frintn.2s v0, v0
;CHECK-NEXT: ret
%tmp3 = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %A)
%tmp3 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
ret <2 x float> %tmp3
}
@ -599,7 +599,7 @@ define <4 x float> @frintn_4s(<4 x float> %A) nounwind {
;CHECK-NOT: ld1
;CHECK: frintn.4s v0, v0
;CHECK-NEXT: ret
%tmp3 = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %A)
%tmp3 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
ret <4 x float> %tmp3
}
@ -608,13 +608,13 @@ define <2 x double> @frintn_2d(<2 x double> %A) nounwind {
;CHECK-NOT: ld1
;CHECK: frintn.2d v0, v0
;CHECK-NEXT: ret
%tmp3 = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %A)
%tmp3 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A)
ret <2 x double> %tmp3
}
declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) nounwind readnone
declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) nounwind readnone
declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) nounwind readnone
declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) nounwind readnone
declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) nounwind readnone
declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) nounwind readnone
; FALLBACK-NOT: remark{{.*}}frintp_2s
define <2 x float> @frintp_2s(<2 x float> %A) nounwind {

View File

@ -245,6 +245,20 @@ define %v4f16 @test_v4f16.round(%v4f16 %a) {
%1 = call %v4f16 @llvm.round.v4f16(%v4f16 %a)
ret %v4f16 %1
}
define %v4f16 @test_v4f16.roundeven(%v4f16 %a) {
; CHECK-LABEL: test_v4f16.roundeven:
; CHECK-NOFP16-COUNT-4: frintn s{{[0-9]+}}, s{{[0-9]+}}
; CHECK-FP16-NOT: fcvt
; CHECK-FP16: frintn.4h
; CHECK-FP16-NEXT: ret
; GISEL-LABEL: test_v4f16.roundeven:
; GISEL-NOFP16-COUNT-4: frintn s{{[0-9]+}}, s{{[0-9]+}}
; GISEL-FP16-NOT: fcvt
; GISEL-FP16: frintn.4h
; GISEL-FP16-NEXT: ret
%1 = call %v4f16 @llvm.roundeven.v4f16(%v4f16 %a)
ret %v4f16 %1
}
declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0
declare %v4f16 @llvm.powi.v4f16(%v4f16, i32) #0
@ -264,6 +278,7 @@ declare %v4f16 @llvm.trunc.v4f16(%v4f16) #0
declare %v4f16 @llvm.rint.v4f16(%v4f16) #0
declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0
declare %v4f16 @llvm.round.v4f16(%v4f16) #0
declare %v4f16 @llvm.roundeven.v4f16(%v4f16) #0
;;;
@ -502,6 +517,20 @@ define %v8f16 @test_v8f16.round(%v8f16 %a) {
%1 = call %v8f16 @llvm.round.v8f16(%v8f16 %a)
ret %v8f16 %1
}
define %v8f16 @test_v8f16.roundeven(%v8f16 %a) {
; CHECK-LABEL: test_v8f16.roundeven:
; CHECK-NOFP16-COUNT-8: frintn s{{[0-9]+}}, s{{[0-9]+}}
; CHECK-FP16-NOT: fcvt
; CHECK-FP16: frintn.8h
; CHECK-FP16-NEXT: ret
; GISEL-LABEL: test_v8f16.roundeven:
; GISEL-NOFP16-COUNT-8: frintn s{{[0-9]+}}, s{{[0-9]+}}
; GISEL-FP16-NOT: fcvt
; GISEL-FP16: frintn.8h
; GISEL-FP16-NEXT: ret
%1 = call %v8f16 @llvm.roundeven.v8f16(%v8f16 %a)
ret %v8f16 %1
}
declare %v8f16 @llvm.sqrt.v8f16(%v8f16) #0
declare %v8f16 @llvm.powi.v8f16(%v8f16, i32) #0
@ -521,6 +550,7 @@ declare %v8f16 @llvm.trunc.v8f16(%v8f16) #0
declare %v8f16 @llvm.rint.v8f16(%v8f16) #0
declare %v8f16 @llvm.nearbyint.v8f16(%v8f16) #0
declare %v8f16 @llvm.round.v8f16(%v8f16) #0
declare %v8f16 @llvm.roundeven.v8f16(%v8f16) #0
;;; Float vectors

View File

@ -796,6 +796,7 @@ declare half @llvm.trunc.f16(half %a) #0
declare half @llvm.rint.f16(half %a) #0
declare half @llvm.nearbyint.f16(half %a) #0
declare half @llvm.round.f16(half %a) #0
declare half @llvm.roundeven.f16(half %a) #0
declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
declare half @llvm.aarch64.neon.frecpe.f16(half %a) #0
declare half @llvm.aarch64.neon.frecpx.f16(half %a) #0
@ -1313,6 +1314,32 @@ define half @test_round(half %a) #0 {
ret half %r
}
; CHECK-CVT-LABEL: test_roundeven:
; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
; CHECK-CVT-NEXT: frintn [[INT32:s[0-9]+]], [[FLOAT32]]
; CHECK-CVT-NEXT: fcvt h0, [[INT32]]
; CHECK-CVT-NEXT: ret
; GISEL-CVT-LABEL: test_roundeven:
; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
; GISEL-CVT-NEXT: frintn [[INT32:s[0-9]+]], [[FLOAT32]]
; GISEL-CVT-NEXT: fcvt h0, [[INT32]]
; GISEL-CVT-NEXT: ret
; CHECK-FP16-LABEL: test_roundeven:
; CHECK-FP16-NEXT: frintn h0, h0
; CHECK-FP16-NEXT: ret
; GISEL-FP16-LABEL: test_roundeven:
; GISEL-FP16-NEXT: frintn h0, h0
; GISEL-FP16-NEXT: ret
define half @test_roundeven(half %a) #0 {
%r = call half @llvm.roundeven.f16(half %a)
ret half %r
}
; CHECK-CVT-LABEL: test_fmuladd:
; CHECK-CVT-NEXT: fcvt s1, h1
; CHECK-CVT-NEXT: fcvt s0, h0

View File

@ -266,6 +266,13 @@ define float @round_f32(float %x) #0 {
ret float %val
}
; CHECK-LABEL: roundeven_f32:
; CHECK: frintn s0, s0
define float @roundeven_f32(float %x) #0 {
%val = call float @llvm.experimental.constrained.roundeven.f32(float %x, metadata !"fpexcept.strict") #0
ret float %val
}
; CHECK-LABEL: trunc_f32:
; CHECK: frintz s0, s0
define float @trunc_f32(float %x) #0 {
@ -729,6 +736,13 @@ define double @round_f64(double %x) #0 {
ret double %val
}
; CHECK-LABEL: roundeven_f64:
; CHECK: frintn d0, d0
define double @roundeven_f64(double %x) #0 {
%val = call double @llvm.experimental.constrained.roundeven.f64(double %x, metadata !"fpexcept.strict") #0
ret double %val
}
; CHECK-LABEL: trunc_f64:
; CHECK: frintz d0, d0
define double @trunc_f64(double %x) #0 {
@ -1474,6 +1488,7 @@ declare float @llvm.experimental.constrained.floor.f32(float, metadata)
declare i32 @llvm.experimental.constrained.lround.f32(float, metadata)
declare i64 @llvm.experimental.constrained.llround.f32(float, metadata)
declare float @llvm.experimental.constrained.round.f32(float, metadata)
declare float @llvm.experimental.constrained.roundeven.f32(float, metadata)
declare float @llvm.experimental.constrained.trunc.f32(float, metadata)
declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata)
declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)
@ -1515,6 +1530,7 @@ declare double @llvm.experimental.constrained.floor.f64(double, metadata)
declare i32 @llvm.experimental.constrained.lround.f64(double, metadata)
declare i64 @llvm.experimental.constrained.llround.f64(double, metadata)
declare double @llvm.experimental.constrained.round.f64(double, metadata)
declare double @llvm.experimental.constrained.roundeven.f64(double, metadata)
declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata)
declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)

View File

@ -0,0 +1,41 @@
; RUN: llc -mtriple=aarch64-eabi -mattr=+fullfp16 %s -o - | FileCheck %s
; The llvm.aarch64.neon.frintn intrinsic should be auto-upgraded to the
; target-independent roundeven intrinsic.
define <4 x half> @frintn_4h(<4 x half> %A) nounwind {
;CHECK-LABEL: frintn_4h:
;CHECK: frintn v0.4h, v0.4h
;CHECK-NEXT: ret
%tmp3 = call <4 x half> @llvm.aarch64.neon.frintn.v4f16(<4 x half> %A)
ret <4 x half> %tmp3
}
define <2 x float> @frintn_2s(<2 x float> %A) nounwind {
;CHECK-LABEL: frintn_2s:
;CHECK: frintn v0.2s, v0.2s
;CHECK-NEXT: ret
%tmp3 = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %A)
ret <2 x float> %tmp3
}
define <4 x float> @frintn_4s(<4 x float> %A) nounwind {
;CHECK-LABEL: frintn_4s:
;CHECK: frintn v0.4s, v0.4s
;CHECK-NEXT: ret
%tmp3 = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %A)
ret <4 x float> %tmp3
}
define <2 x double> @frintn_2d(<2 x double> %A) nounwind {
;CHECK-LABEL: frintn_2d:
;CHECK: frintn v0.2d, v0.2d
;CHECK-NEXT: ret
%tmp3 = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %A)
ret <2 x double> %tmp3
}
declare <4 x half> @llvm.aarch64.neon.frintn.v4f16(<4 x half>) nounwind readnone
declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) nounwind readnone
declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) nounwind readnone
declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) nounwind readnone

View File

@ -1255,6 +1255,253 @@ define void @frinta_v32f64(<32 x double>* %a) #0 {
ret void
}
;
; ROUNDEVEN -> FRINTN
;
; Don't use SVE for 64-bit vectors.
define <4 x half> @frintn_v4f16(<4 x half> %op) #0 {
; CHECK-LABEL: frintn_v4f16:
; CHECK: frintn v0.4h, v0.4h
; CHECK-NEXT: ret
%res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
ret <4 x half> %res
}
; Don't use SVE for 128-bit vectors.
define <8 x half> @frintn_v8f16(<8 x half> %op) #0 {
; CHECK-LABEL: frintn_v8f16:
; CHECK: frintn v0.8h, v0.8h
; CHECK-NEXT: ret
%res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
ret <8 x half> %res
}
define void @frintn_v16f16(<16 x half>* %a) #0 {
; CHECK-LABEL: frintn_v16f16:
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
; CHECK-NEXT: ret
%op = load <16 x half>, <16 x half>* %a
%res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
store <16 x half> %res, <16 x half>* %a
ret void
}
define void @frintn_v32f16(<32 x half>* %a) #0 {
; CHECK-LABEL: frintn_v32f16:
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
; VBITS_GE_512-NEXT: ret
; Ensure sensible type legalisation.
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
; VBITS_EQ_256-NEXT: ret
%op = load <32 x half>, <32 x half>* %a
%res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
store <32 x half> %res, <32 x half>* %a
ret void
}
define void @frintn_v64f16(<64 x half>* %a) #0 {
; CHECK-LABEL: frintn_v64f16:
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
; VBITS_GE_1024-NEXT: ret
%op = load <64 x half>, <64 x half>* %a
%res = call <64 x half> @llvm.roundeven.v64f16(<64 x half> %op)
store <64 x half> %res, <64 x half>* %a
ret void
}
define void @frintn_v128f16(<128 x half>* %a) #0 {
; CHECK-LABEL: frintn_v128f16:
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
; VBITS_GE_2048-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
; VBITS_GE_2048-NEXT: ret
%op = load <128 x half>, <128 x half>* %a
%res = call <128 x half> @llvm.roundeven.v128f16(<128 x half> %op)
store <128 x half> %res, <128 x half>* %a
ret void
}
; Don't use SVE for 64-bit vectors.
define <2 x float> @frintn_v2f32(<2 x float> %op) #0 {
; CHECK-LABEL: frintn_v2f32:
; CHECK: frintn v0.2s, v0.2s
; CHECK-NEXT: ret
%res = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %op)
ret <2 x float> %res
}
; Don't use SVE for 128-bit vectors.
define <4 x float> @frintn_v4f32(<4 x float> %op) #0 {
; CHECK-LABEL: frintn_v4f32:
; CHECK: frintn v0.4s, v0.4s
; CHECK-NEXT: ret
%res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %op)
ret <4 x float> %res
}
define void @frintn_v8f32(<8 x float>* %a) #0 {
; CHECK-LABEL: frintn_v8f32:
; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
; CHECK-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
; CHECK-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
; CHECK-NEXT: ret
%op = load <8 x float>, <8 x float>* %a
%res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op)
store <8 x float> %res, <8 x float>* %a
ret void
}
define void @frintn_v16f32(<16 x float>* %a) #0 {
; CHECK-LABEL: frintn_v16f32:
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
; VBITS_GE_512-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
; VBITS_GE_512-NEXT: ret
; Ensure sensible type legalisation.
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
; VBITS_EQ_256-NEXT: ret
%op = load <16 x float>, <16 x float>* %a
%res = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %op)
store <16 x float> %res, <16 x float>* %a
ret void
}
define void @frintn_v32f32(<32 x float>* %a) #0 {
; CHECK-LABEL: frintn_v32f32:
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
; VBITS_GE_1024-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
; VBITS_GE_1024-NEXT: ret
%op = load <32 x float>, <32 x float>* %a
%res = call <32 x float> @llvm.roundeven.v32f32(<32 x float> %op)
store <32 x float> %res, <32 x float>* %a
ret void
}
define void @frintn_v64f32(<64 x float>* %a) #0 {
; CHECK-LABEL: frintn_v64f32:
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
; VBITS_GE_2048-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
; VBITS_GE_2048-NEXT: ret
%op = load <64 x float>, <64 x float>* %a
%res = call <64 x float> @llvm.roundeven.v64f32(<64 x float> %op)
store <64 x float> %res, <64 x float>* %a
ret void
}
; Don't use SVE for 64-bit vectors.
define <1 x double> @frintn_v1f64(<1 x double> %op) #0 {
; CHECK-LABEL: frintn_v1f64:
; CHECK: frintn d0, d0
; CHECK-NEXT: ret
%res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op)
ret <1 x double> %res
}
; Don't use SVE for 128-bit vectors.
define <2 x double> @frintn_v2f64(<2 x double> %op) #0 {
; CHECK-LABEL: frintn_v2f64:
; CHECK: frintn v0.2d, v0.2d
; CHECK-NEXT: ret
%res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %op)
ret <2 x double> %res
}
define void @frintn_v4f64(<4 x double>* %a) #0 {
; CHECK-LABEL: frintn_v4f64:
; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
; CHECK-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
; CHECK-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
; CHECK-NEXT: ret
%op = load <4 x double>, <4 x double>* %a
%res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op)
store <4 x double> %res, <4 x double>* %a
ret void
}
define void @frintn_v8f64(<8 x double>* %a) #0 {
; CHECK-LABEL: frintn_v8f64:
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
; VBITS_GE_512-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
; VBITS_GE_512-NEXT: ret
; Ensure sensible type legalisation.
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
; VBITS_EQ_256-NEXT: ret
%op = load <8 x double>, <8 x double>* %a
%res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %op)
store <8 x double> %res, <8 x double>* %a
ret void
}
define void @frintn_v16f64(<16 x double>* %a) #0 {
; CHECK-LABEL: frintn_v16f64:
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
; VBITS_GE_1024-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
; VBITS_GE_1024-NEXT: ret
%op = load <16 x double>, <16 x double>* %a
%res = call <16 x double> @llvm.roundeven.v16f64(<16 x double> %op)
store <16 x double> %res, <16 x double>* %a
ret void
}
define void @frintn_v32f64(<32 x double>* %a) #0 {
; CHECK-LABEL: frintn_v32f64:
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
; VBITS_GE_2048-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
; VBITS_GE_2048-NEXT: ret
%op = load <32 x double>, <32 x double>* %a
%res = call <32 x double> @llvm.roundeven.v32f64(<32 x double> %op)
store <32 x double> %res, <32 x double>* %a
ret void
}
;
; TRUNC -> FRINTZ
;
@ -1599,6 +1846,25 @@ declare <8 x double> @llvm.round.v8f64(<8 x double>)
declare <16 x double> @llvm.round.v16f64(<16 x double>)
declare <32 x double> @llvm.round.v32f64(<32 x double>)
declare <4 x half> @llvm.roundeven.v4f16(<4 x half>)
declare <8 x half> @llvm.roundeven.v8f16(<8 x half>)
declare <16 x half> @llvm.roundeven.v16f16(<16 x half>)
declare <32 x half> @llvm.roundeven.v32f16(<32 x half>)
declare <64 x half> @llvm.roundeven.v64f16(<64 x half>)
declare <128 x half> @llvm.roundeven.v128f16(<128 x half>)
declare <2 x float> @llvm.roundeven.v2f32(<2 x float>)
declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
declare <16 x float> @llvm.roundeven.v16f32(<16 x float>)
declare <32 x float> @llvm.roundeven.v32f32(<32 x float>)
declare <64 x float> @llvm.roundeven.v64f32(<64 x float>)
declare <1 x double> @llvm.roundeven.v1f64(<1 x double>)
declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)
declare <16 x double> @llvm.roundeven.v16f64(<16 x double>)
declare <32 x double> @llvm.roundeven.v32f64(<32 x double>)
declare <4 x half> @llvm.trunc.v4f16(<4 x half>)
declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
declare <16 x half> @llvm.trunc.v16f16(<16 x half>)

View File

@ -29,6 +29,7 @@ declare <3 x float> @llvm.log2.v3f32(<3 x float>)
declare <3 x float> @llvm.nearbyint.v3f32(<3 x float>)
declare <3 x float> @llvm.rint.v3f32(<3 x float>)
declare <3 x float> @llvm.round.v3f32(<3 x float>)
declare <3 x float> @llvm.roundeven.v3f32(<3 x float>)
declare <3 x float> @llvm.sqrt.v3f32(<3 x float>)
declare <3 x float> @llvm.trunc.v3f32(<3 x float>)
@ -478,6 +479,15 @@ define <3 x float> @round_v3f32(<3 x float> %x) nounwind {
ret <3 x float> %r
}
define <3 x float> @roundeven_v3f32(<3 x float> %x) nounwind {
; CHECK-LABEL: roundeven_v3f32:
; CHECK: // %bb.0:
; CHECK-NEXT: frintn v0.4s, v0.4s
; CHECK-NEXT: ret
%r = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
ret <3 x float> %r
}
define <3 x float> @sqrt_v3f32(<3 x float> %x) nounwind {
; CHECK-LABEL: sqrt_v3f32:
; CHECK: // %bb.0: