[AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

Previously NEON used a target specific intrinsic for frintn, given that the FROUNDEVEN ISD node now exists, move over to that instead and add codegen support for that node for both NEON and fixed length SVE. Differential Revision: https://reviews.llvm.org/D98487
2021-03-12 11:46:58 +00:00 · 2021-03-12 11:46:58 +00:00 · cf0da91ba5
parent dbf8f2b089
commit cf0da91ba5
19 changed files with 438 additions and 29 deletions
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@ -10620,17 +10620,23 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
  }
  case NEON::BI__builtin_neon_vrndnh_f16: {
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
-    Int = Intrinsic::aarch64_neon_frintn;
+    Int = Builder.getIsFPConstrained()
+              ? Intrinsic::experimental_constrained_roundeven
+              : Intrinsic::roundeven;
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
  }
  case NEON::BI__builtin_neon_vrndn_v:
  case NEON::BI__builtin_neon_vrndnq_v: {
-    Int = Intrinsic::aarch64_neon_frintn;
+    Int = Builder.getIsFPConstrained()
+              ? Intrinsic::experimental_constrained_roundeven
+              : Intrinsic::roundeven;
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
  }
  case NEON::BI__builtin_neon_vrndns_f32: {
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
-    Int = Intrinsic::aarch64_neon_frintn;
+    Int = Builder.getIsFPConstrained()
+              ? Intrinsic::experimental_constrained_roundeven
+              : Intrinsic::roundeven;
    return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
  }
  case NEON::BI__builtin_neon_vrndph_f16: {
--- a/clang/test/CodeGen/aarch64-neon-intrinsics.c
+++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c
@ -18155,7 +18155,7 @@ float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {

 // CHECK-LABEL: @test_vrndn_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK:   [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a)
+// CHECK:   [[VRNDN1_I:%.*]] = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %a)
 // CHECK:   ret <1 x double> [[VRNDN1_I]]
 float64x1_t test_vrndn_f64(float64x1_t a) {
  return vrndn_f64(a);
--- a/clang/test/CodeGen/aarch64-neon-misc.c
+++ b/clang/test/CodeGen/aarch64-neon-misc.c
@ -2287,7 +2287,7 @@ float64x2_t test_vcvt_high_f64_f32(float32x4_t a) {

 // CHECK-LABEL: @test_vrndnq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VRNDN1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a)
+// CHECK:   [[VRNDN1_I:%.*]] = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %a)
 // CHECK:   ret <2 x double> [[VRNDN1_I]]
 float64x2_t test_vrndnq_f64(float64x2_t a) {
  return vrndnq_f64(a);
--- a/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
+++ b/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
@ -366,7 +366,7 @@ float16_t test_vrndmh_f16(float16_t a) {
 }

 // CHECK-LABEL: test_vrndnh_f16
-// CHECK:  [[RND:%.*]] =  call half @llvm.aarch64.neon.frintn.f16(half %a)
+// CHECK:  [[RND:%.*]] =  call half @llvm.roundeven.f16(half %a)
 // CHECK:  ret half [[RND]]
 float16_t test_vrndnh_f16(float16_t a) {
  return vrndnh_f16(a);
--- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
+++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@ -348,14 +348,14 @@ float16x8_t test_vrndmq_f16(float16x8_t a) {
 }

 // CHECK-LABEL: test_vrndn_f16
-// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.aarch64.neon.frintn.v4f16(<4 x half> %a)
+// CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.roundeven.v4f16(<4 x half> %a)
 // CHECK:  ret <4 x half> [[RND]]
 float16x4_t test_vrndn_f16(float16x4_t a) {
  return vrndn_f16(a);
 }

 // CHECK-LABEL: test_vrndnq_f16
-// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.aarch64.neon.frintn.v8f16(<8 x half> %a)
+// CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.roundeven.v8f16(<8 x half> %a)
 // CHECK:  ret <8 x half> [[RND]]
 float16x8_t test_vrndnq_f16(float16x8_t a) {
  return vrndnq_f16(a);
--- a/clang/test/CodeGen/arm-neon-directed-rounding.c
+++ b/clang/test/CodeGen/arm-neon-directed-rounding.c
@ -41,7 +41,7 @@ float32x4_t test_vrndmq_f32(float32x4_t a) {

 // CHECK-LABEL: define{{.*}} <2 x float> @test_vrndn_f32(<2 x float> %a)
 // CHECK-A32: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> %a)
-// CHECK-A64: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a)
+// CHECK-A64: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %a)
 // CHECK: ret <2 x float> [[VRNDN_V1_I]]
 float32x2_t test_vrndn_f32(float32x2_t a) {
  return vrndn_f32(a);
@ -49,7 +49,7 @@ float32x2_t test_vrndn_f32(float32x2_t a) {

 // CHECK-LABEL: define{{.*}} <4 x float> @test_vrndnq_f32(<4 x float> %a)
 // CHECK-A32: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> %a)
-// CHECK-A64: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a)
+// CHECK-A64: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %a)
 // CHECK: ret <4 x float> [[VRNDNQ_V1_I]]
 float32x4_t test_vrndnq_f32(float32x4_t a) {
  return vrndnq_f32(a);
@ -105,7 +105,7 @@ float32x4_t test_vrndq_f32(float32x4_t a) {

 // CHECK-LABEL: define{{.*}} float @test_vrndns_f32(float %a)
 // CHECK-A32: [[VRNDN_I:%.*]] = call float @llvm.arm.neon.vrintn.f32(float %a)
-// CHECK-A64: [[VRNDN_I:%.*]] = call float @llvm.aarch64.neon.frintn.f32(float %a)
+// CHECK-A64: [[VRNDN_I:%.*]] = call float @llvm.roundeven.f32(float %a)
 // CHECK: ret float [[VRNDN_I]]
 float32_t test_vrndns_f32(float32_t a) {
  return vrndns_f32(a);
--- a/clang/test/CodeGen/arm64-vrnd.c
+++ b/clang/test/CodeGen/arm64-vrnd.c
@ -6,7 +6,7 @@ float64x2_t rnd5(float64x2_t a) { return vrndq_f64(a); }
 // CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double>

 float64x2_t rnd9(float64x2_t a) { return vrndnq_f64(a); }
-// CHECK: call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>
+// CHECK: call <2 x double> @llvm.roundeven.v2f64(<2 x double>

 float64x2_t rnd13(float64x2_t a) { return vrndmq_f64(a); }
 // CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double>
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@ -471,10 +471,6 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
  def int_aarch64_neon_fcvtzs : AdvSIMD_FPToIntRounding_Intrinsic;
  def int_aarch64_neon_fcvtzu : AdvSIMD_FPToIntRounding_Intrinsic;

-  // Vector FP Rounding: only ties to even is unrepresented by a normal
-  // intrinsic.
-  def int_aarch64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic;
-
  // v8.5-A Vector FP Rounding
  def int_aarch64_neon_frint32x : AdvSIMD_1FloatArg_Intrinsic;
  def int_aarch64_neon_frint32z : AdvSIMD_1FloatArg_Intrinsic;
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@ -152,10 +152,10 @@ def SDTIntTruncOp  : SDTypeProfile<1, 1, [  // trunc
 def SDTFPUnaryOp  : SDTypeProfile<1, 1, [   // fneg, fsqrt, etc
  SDTCisSameAs<0, 1>, SDTCisFP<0>
 ]>;
-def SDTFPRoundOp  : SDTypeProfile<1, 1, [   // fround
+def SDTFPRoundOp  : SDTypeProfile<1, 1, [   // fpround
  SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>
 ]>;
-def SDTFPExtendOp  : SDTypeProfile<1, 1, [  // fextend
+def SDTFPExtendOp  : SDTypeProfile<1, 1, [  // fpextend
  SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1>
 ]>;
 def SDTIntToFPOp : SDTypeProfile<1, 1, [    // [su]int_to_fp
@ -486,6 +486,7 @@ def fceil      : SDNode<"ISD::FCEIL"      , SDTFPUnaryOp>;
 def ffloor     : SDNode<"ISD::FFLOOR"     , SDTFPUnaryOp>;
 def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp>;
 def fround     : SDNode<"ISD::FROUND"     , SDTFPUnaryOp>;
+def froundeven : SDNode<"ISD::FROUNDEVEN" , SDTFPUnaryOp>;

 def lround     : SDNode<"ISD::LROUND"     , SDTFPToIntOp>;
 def llround    : SDNode<"ISD::LLROUND"    , SDTFPToIntOp>;
@ -547,6 +548,8 @@ def strict_llround    : SDNode<"ISD::STRICT_LLROUND",
                               SDTFPToIntOp, [SDNPHasChain]>;
 def strict_fround     : SDNode<"ISD::STRICT_FROUND",
                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_froundeven : SDNode<"ISD::STRICT_FROUNDEVEN",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
 def strict_ftrunc     : SDNode<"ISD::STRICT_FTRUNC",
                               SDTFPUnaryOp, [SDNPHasChain]>;
 def strict_fminnum    : SDNode<"ISD::STRICT_FMINNUM",
@ -1414,6 +1417,9 @@ def any_llround    : PatFrags<(ops node:$src),
 def any_fround     : PatFrags<(ops node:$src),
                              [(strict_fround node:$src),
                               (fround node:$src)]>;
+def any_froundeven : PatFrags<(ops node:$src),
+                              [(strict_froundeven node:$src),
+                               (froundeven node:$src)]>;
 def any_ftrunc     : PatFrags<(ops node:$src),
                              [(strict_ftrunc node:$src),
                               (ftrunc node:$src)]>;
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@ -548,6 +548,11 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
                                        F->arg_begin()->getType());
      return true;
    }
+    if (Name.startswith("aarch64.neon.frintn")) {
+      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
+                                        F->arg_begin()->getType());
+      return true;
+    }
    if (Name.startswith("arm.neon.vclz")) {
      Type* args[2] = {
        F->arg_begin()->getType(),
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -605,6 +605,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::FNEARBYINT,  MVT::f16,  Promote);
    setOperationAction(ISD::FRINT,       MVT::f16,  Promote);
    setOperationAction(ISD::FROUND,      MVT::f16,  Promote);
+    setOperationAction(ISD::FROUNDEVEN,  MVT::f16,  Promote);
    setOperationAction(ISD::FTRUNC,      MVT::f16,  Promote);
    setOperationAction(ISD::FMINNUM,     MVT::f16,  Promote);
    setOperationAction(ISD::FMAXNUM,     MVT::f16,  Promote);
@ -624,6 +625,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::FABS,        MVT::v4f16, Expand);
    setOperationAction(ISD::FNEG,        MVT::v4f16, Expand);
    setOperationAction(ISD::FROUND,      MVT::v4f16, Expand);
+    setOperationAction(ISD::FROUNDEVEN,  MVT::v4f16, Expand);
    setOperationAction(ISD::FMA,         MVT::v4f16, Expand);
    setOperationAction(ISD::SETCC,       MVT::v4f16, Expand);
    setOperationAction(ISD::BR_CC,       MVT::v4f16, Expand);
@ -648,6 +650,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::FNEARBYINT,  MVT::v8f16, Expand);
    setOperationAction(ISD::FNEG,        MVT::v8f16, Expand);
    setOperationAction(ISD::FROUND,      MVT::v8f16, Expand);
+    setOperationAction(ISD::FROUNDEVEN,  MVT::v8f16, Expand);
    setOperationAction(ISD::FRINT,       MVT::v8f16, Expand);
    setOperationAction(ISD::FSQRT,       MVT::v8f16, Expand);
    setOperationAction(ISD::FSUB,        MVT::v8f16, Expand);
@ -667,6 +670,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::FRINT, Ty, Legal);
    setOperationAction(ISD::FTRUNC, Ty, Legal);
    setOperationAction(ISD::FROUND, Ty, Legal);
+    setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
    setOperationAction(ISD::FMINNUM, Ty, Legal);
    setOperationAction(ISD::FMAXNUM, Ty, Legal);
    setOperationAction(ISD::FMINIMUM, Ty, Legal);
@ -684,6 +688,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::FRINT,   MVT::f16, Legal);
    setOperationAction(ISD::FTRUNC,  MVT::f16, Legal);
    setOperationAction(ISD::FROUND,  MVT::f16, Legal);
+    setOperationAction(ISD::FROUNDEVEN,  MVT::f16, Legal);
    setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
    setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
    setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
@ -943,6 +948,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
    setOperationAction(ISD::FREM, MVT::v1f64, Expand);
    setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
+    setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand);
    setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
    setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
    setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
@ -1069,6 +1075,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
      setOperationAction(ISD::FRINT, Ty, Legal);
      setOperationAction(ISD::FTRUNC, Ty, Legal);
      setOperationAction(ISD::FROUND, Ty, Legal);
+      setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
    }

    if (Subtarget->hasFullFP16()) {
@ -1079,6 +1086,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
        setOperationAction(ISD::FRINT, Ty, Legal);
        setOperationAction(ISD::FTRUNC, Ty, Legal);
        setOperationAction(ISD::FROUND, Ty, Legal);
+        setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
      }
    }

@ -1403,6 +1411,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
  setOperationAction(ISD::FNEG, VT, Custom);
  setOperationAction(ISD::FRINT, VT, Custom);
  setOperationAction(ISD::FROUND, VT, Custom);
+  setOperationAction(ISD::FROUNDEVEN, VT, Custom);
  setOperationAction(ISD::FSQRT, VT, Custom);
  setOperationAction(ISD::FSUB, VT, Custom);
  setOperationAction(ISD::FTRUNC, VT, Custom);
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@ -3796,12 +3796,9 @@ defm FNEG   : SingleOperandFPData<0b0010, "fneg", fneg>;
 defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>;
 defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>;
 defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>;
-defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>;
+defm FRINTN : SingleOperandFPData<0b1000, "frintn", froundeven>;
 defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;

-def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))),
-          (FRINTNDr FPR64:$Rn)>;
-
 defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
 defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;

@ -4090,7 +4087,7 @@ defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
 defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>;
 defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>;
 defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>;
-defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>;
+defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", froundeven>;
 defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
 defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
 defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
@ -590,7 +590,7 @@ define <2 x float> @frintn_2s(<2 x float> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: frintn.2s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %A)
+	%tmp3 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
 	ret <2 x float> %tmp3
 }

@ -599,7 +599,7 @@ define <4 x float> @frintn_4s(<4 x float> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: frintn.4s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %A)
+	%tmp3 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
 	ret <4 x float> %tmp3
 }

@ -608,13 +608,13 @@ define <2 x double> @frintn_2d(<2 x double> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: frintn.2d v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %A)
+	%tmp3 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A)
 	ret <2 x double> %tmp3
 }

-declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) nounwind readnone
+declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) nounwind readnone

 ; FALLBACK-NOT: remark{{.*}}frintp_2s
 define <2 x float> @frintp_2s(<2 x float> %A) nounwind {
--- a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
@ -245,6 +245,20 @@ define %v4f16 @test_v4f16.round(%v4f16 %a) {
  %1 =  call %v4f16 @llvm.round.v4f16(%v4f16 %a)
  ret %v4f16 %1
 }
+define %v4f16 @test_v4f16.roundeven(%v4f16 %a) {
+  ; CHECK-LABEL:          test_v4f16.roundeven:
+  ; CHECK-NOFP16-COUNT-4: frintn s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           frintn.4h
+  ; CHECK-FP16-NEXT:      ret
+  ; GISEL-LABEL:          test_v4f16.roundeven:
+  ; GISEL-NOFP16-COUNT-4: frintn s{{[0-9]+}}, s{{[0-9]+}}
+  ; GISEL-FP16-NOT:       fcvt
+  ; GISEL-FP16:           frintn.4h
+  ; GISEL-FP16-NEXT:      ret
+  %1 =  call %v4f16 @llvm.roundeven.v4f16(%v4f16 %a)
+  ret %v4f16 %1
+}

 declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0
 declare %v4f16 @llvm.powi.v4f16(%v4f16, i32) #0
@ -264,6 +278,7 @@ declare %v4f16 @llvm.trunc.v4f16(%v4f16) #0
 declare %v4f16 @llvm.rint.v4f16(%v4f16) #0
 declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0
 declare %v4f16 @llvm.round.v4f16(%v4f16) #0
+declare %v4f16 @llvm.roundeven.v4f16(%v4f16) #0

 ;;;

@ -502,6 +517,20 @@ define %v8f16 @test_v8f16.round(%v8f16 %a) {
  %1 =  call %v8f16 @llvm.round.v8f16(%v8f16 %a)
  ret %v8f16 %1
 }
+define %v8f16 @test_v8f16.roundeven(%v8f16 %a) {
+  ; CHECK-LABEL:          test_v8f16.roundeven:
+  ; CHECK-NOFP16-COUNT-8: frintn s{{[0-9]+}}, s{{[0-9]+}}
+  ; CHECK-FP16-NOT:       fcvt
+  ; CHECK-FP16:           frintn.8h
+  ; CHECK-FP16-NEXT:      ret
+  ; GISEL-LABEL:          test_v8f16.roundeven:
+  ; GISEL-NOFP16-COUNT-8: frintn s{{[0-9]+}}, s{{[0-9]+}}
+  ; GISEL-FP16-NOT:       fcvt
+  ; GISEL-FP16:           frintn.8h
+  ; GISEL-FP16-NEXT:      ret
+  %1 =  call %v8f16 @llvm.roundeven.v8f16(%v8f16 %a)
+  ret %v8f16 %1
+}

 declare %v8f16 @llvm.sqrt.v8f16(%v8f16) #0
 declare %v8f16 @llvm.powi.v8f16(%v8f16, i32) #0
@ -521,6 +550,7 @@ declare %v8f16 @llvm.trunc.v8f16(%v8f16) #0
 declare %v8f16 @llvm.rint.v8f16(%v8f16) #0
 declare %v8f16 @llvm.nearbyint.v8f16(%v8f16) #0
 declare %v8f16 @llvm.round.v8f16(%v8f16) #0
+declare %v8f16 @llvm.roundeven.v8f16(%v8f16) #0

 ;;; Float vectors

--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@ -796,6 +796,7 @@ declare half @llvm.trunc.f16(half %a) #0
 declare half @llvm.rint.f16(half %a) #0
 declare half @llvm.nearbyint.f16(half %a) #0
 declare half @llvm.round.f16(half %a) #0
+declare half @llvm.roundeven.f16(half %a) #0
 declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
 declare half @llvm.aarch64.neon.frecpe.f16(half %a) #0
 declare half @llvm.aarch64.neon.frecpx.f16(half %a) #0
@ -1313,6 +1314,32 @@ define half @test_round(half %a) #0 {
  ret half %r
 }

+; CHECK-CVT-LABEL: test_roundeven:
+; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
+; CHECK-CVT-NEXT: frintn [[INT32:s[0-9]+]], [[FLOAT32]]
+; CHECK-CVT-NEXT: fcvt h0, [[INT32]]
+; CHECK-CVT-NEXT: ret
+
+; GISEL-CVT-LABEL: test_roundeven:
+; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
+; GISEL-CVT-NEXT: frintn [[INT32:s[0-9]+]], [[FLOAT32]]
+; GISEL-CVT-NEXT: fcvt h0, [[INT32]]
+; GISEL-CVT-NEXT: ret
+
+
+; CHECK-FP16-LABEL: test_roundeven:
+; CHECK-FP16-NEXT: frintn h0, h0
+; CHECK-FP16-NEXT: ret
+
+; GISEL-FP16-LABEL: test_roundeven:
+; GISEL-FP16-NEXT: frintn h0, h0
+; GISEL-FP16-NEXT: ret
+
+define half @test_roundeven(half %a) #0 {
+  %r = call half @llvm.roundeven.f16(half %a)
+  ret half %r
+}
+
 ; CHECK-CVT-LABEL: test_fmuladd:
 ; CHECK-CVT-NEXT: fcvt s1, h1
 ; CHECK-CVT-NEXT: fcvt s0, h0
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
@ -266,6 +266,13 @@ define float @round_f32(float %x) #0 {
  ret float %val
 }

+; CHECK-LABEL: roundeven_f32:
+; CHECK: frintn s0, s0
+define float @roundeven_f32(float %x) #0 {
+  %val = call float @llvm.experimental.constrained.roundeven.f32(float %x, metadata !"fpexcept.strict") #0
+  ret float %val
+}
+
 ; CHECK-LABEL: trunc_f32:
 ; CHECK: frintz s0, s0
 define float @trunc_f32(float %x) #0 {
@ -729,6 +736,13 @@ define double @round_f64(double %x) #0 {
  ret double %val
 }

+; CHECK-LABEL: roundeven_f64:
+; CHECK: frintn d0, d0
+define double @roundeven_f64(double %x) #0 {
+  %val = call double @llvm.experimental.constrained.roundeven.f64(double %x, metadata !"fpexcept.strict") #0
+  ret double %val
+}
+
 ; CHECK-LABEL: trunc_f64:
 ; CHECK: frintz d0, d0
 define double @trunc_f64(double %x) #0 {
@ -1474,6 +1488,7 @@ declare float @llvm.experimental.constrained.floor.f32(float, metadata)
 declare i32 @llvm.experimental.constrained.lround.f32(float, metadata)
 declare i64 @llvm.experimental.constrained.llround.f32(float, metadata)
 declare float @llvm.experimental.constrained.round.f32(float, metadata)
+declare float @llvm.experimental.constrained.roundeven.f32(float, metadata)
 declare float @llvm.experimental.constrained.trunc.f32(float, metadata)
 declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata)
 declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)
@ -1515,6 +1530,7 @@ declare double @llvm.experimental.constrained.floor.f64(double, metadata)
 declare i32 @llvm.experimental.constrained.lround.f64(double, metadata)
 declare i64 @llvm.experimental.constrained.llround.f64(double, metadata)
 declare double @llvm.experimental.constrained.round.f64(double, metadata)
+declare double @llvm.experimental.constrained.roundeven.f64(double, metadata)
 declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
 declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata)
 declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
--- a/llvm/test/CodeGen/AArch64/frintn.ll
+++ b/llvm/test/CodeGen/AArch64/frintn.ll
@ -0,0 +1,41 @@
+; RUN: llc -mtriple=aarch64-eabi -mattr=+fullfp16 %s -o - | FileCheck %s
+
+; The llvm.aarch64.neon.frintn intrinsic should be auto-upgraded to the
+; target-independent roundeven intrinsic.
+
+define <4 x half> @frintn_4h(<4 x half> %A) nounwind {
+;CHECK-LABEL: frintn_4h:
+;CHECK: frintn v0.4h, v0.4h
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x half> @llvm.aarch64.neon.frintn.v4f16(<4 x half> %A)
+	ret <4 x half> %tmp3
+}
+
+define <2 x float> @frintn_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: frintn_2s:
+;CHECK: frintn v0.2s, v0.2s
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %A)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @frintn_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: frintn_4s:
+;CHECK: frintn v0.4s, v0.4s
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %A)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @frintn_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: frintn_2d:
+;CHECK: frintn v0.2d, v0.2d
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %A)
+	ret <2 x double> %tmp3
+}
+
+declare <4 x half> @llvm.aarch64.neon.frintn.v4f16(<4 x half>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) nounwind readnone
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
@ -1255,6 +1255,253 @@ define void @frinta_v32f64(<32 x double>* %a) #0 {
  ret void
 }

+;
+; ROUNDEVEN -> FRINTN
+;
+
+; Don't use SVE for 64-bit vectors.
+define <4 x half> @frintn_v4f16(<4 x half> %op) #0 {
+; CHECK-LABEL: frintn_v4f16:
+; CHECK: frintn v0.4h, v0.4h
+; CHECK-NEXT: ret
+  %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
+  ret <4 x half> %res
+}
+
+; Don't use SVE for 128-bit vectors.
+define <8 x half> @frintn_v8f16(<8 x half> %op) #0 {
+; CHECK-LABEL: frintn_v8f16:
+; CHECK: frintn v0.8h, v0.8h
+; CHECK-NEXT: ret
+  %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
+  ret <8 x half> %res
+}
+
+define void @frintn_v16f16(<16 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; CHECK-NEXT: ret
+  %op = load <16 x half>, <16 x half>* %a
+  %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
+  store <16 x half> %res, <16 x half>* %a
+  ret void
+}
+
+define void @frintn_v32f16(<32 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v32f16:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
+; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation.
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
+; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
+; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-NEXT: ret
+  %op = load <32 x half>, <32 x half>* %a
+  %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
+  store <32 x half> %res, <32 x half>* %a
+  ret void
+}
+
+define void @frintn_v64f16(<64 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v64f16:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
+; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_1024-NEXT: ret
+  %op = load <64 x half>, <64 x half>* %a
+  %res = call <64 x half> @llvm.roundeven.v64f16(<64 x half> %op)
+  store <64 x half> %res, <64 x half>* %a
+  ret void
+}
+
+define void @frintn_v128f16(<128 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v128f16:
+; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
+; VBITS_GE_2048-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_2048-NEXT: ret
+  %op = load <128 x half>, <128 x half>* %a
+  %res = call <128 x half> @llvm.roundeven.v128f16(<128 x half> %op)
+  store <128 x half> %res, <128 x half>* %a
+  ret void
+}
+
+; Don't use SVE for 64-bit vectors.
+define <2 x float> @frintn_v2f32(<2 x float> %op) #0 {
+; CHECK-LABEL: frintn_v2f32:
+; CHECK: frintn v0.2s, v0.2s
+; CHECK-NEXT: ret
+  %res = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %op)
+  ret <2 x float> %res
+}
+
+; Don't use SVE for 128-bit vectors.
+define <4 x float> @frintn_v4f32(<4 x float> %op) #0 {
+; CHECK-LABEL: frintn_v4f32:
+; CHECK: frintn v0.4s, v0.4s
+; CHECK-NEXT: ret
+  %res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %op)
+  ret <4 x float> %res
+}
+
+define void @frintn_v8f32(<8 x float>* %a) #0 {
+; CHECK-LABEL: frintn_v8f32:
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
+; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
+; CHECK-NEXT: ret
+  %op = load <8 x float>, <8 x float>* %a
+  %res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op)
+  store <8 x float> %res, <8 x float>* %a
+  ret void
+}
+
+define void @frintn_v16f32(<16 x float>* %a) #0 {
+; CHECK-LABEL: frintn_v16f32:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
+; VBITS_GE_512-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
+; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation.
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
+; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
+; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-NEXT: ret
+  %op = load <16 x float>, <16 x float>* %a
+  %res = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %op)
+  store <16 x float> %res, <16 x float>* %a
+  ret void
+}
+
+define void @frintn_v32f32(<32 x float>* %a) #0 {
+; CHECK-LABEL: frintn_v32f32:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
+; VBITS_GE_1024-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
+; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
+; VBITS_GE_1024-NEXT: ret
+  %op = load <32 x float>, <32 x float>* %a
+  %res = call <32 x float> @llvm.roundeven.v32f32(<32 x float> %op)
+  store <32 x float> %res, <32 x float>* %a
+  ret void
+}
+
+define void @frintn_v64f32(<64 x float>* %a) #0 {
+; CHECK-LABEL: frintn_v64f32:
+; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
+; VBITS_GE_2048-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
+; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
+; VBITS_GE_2048-NEXT: ret
+  %op = load <64 x float>, <64 x float>* %a
+  %res = call <64 x float> @llvm.roundeven.v64f32(<64 x float> %op)
+  store <64 x float> %res, <64 x float>* %a
+  ret void
+}
+
+; Don't use SVE for 64-bit vectors.
+define <1 x double> @frintn_v1f64(<1 x double> %op) #0 {
+; CHECK-LABEL: frintn_v1f64:
+; CHECK: frintn d0, d0
+; CHECK-NEXT: ret
+  %res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op)
+  ret <1 x double> %res
+}
+
+; Don't use SVE for 128-bit vectors.
+define <2 x double> @frintn_v2f64(<2 x double> %op) #0 {
+; CHECK-LABEL: frintn_v2f64:
+; CHECK: frintn v0.2d, v0.2d
+; CHECK-NEXT: ret
+  %res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %op)
+  ret <2 x double> %res
+}
+
+define void @frintn_v4f64(<4 x double>* %a) #0 {
+; CHECK-LABEL: frintn_v4f64:
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
+; CHECK-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
+; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
+; CHECK-NEXT: ret
+  %op = load <4 x double>, <4 x double>* %a
+  %res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op)
+  store <4 x double> %res, <4 x double>* %a
+  ret void
+}
+
+define void @frintn_v8f64(<8 x double>* %a) #0 {
+; CHECK-LABEL: frintn_v8f64:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
+; VBITS_GE_512-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
+; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation.
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
+; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
+; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-NEXT: ret
+  %op = load <8 x double>, <8 x double>* %a
+  %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %op)
+  store <8 x double> %res, <8 x double>* %a
+  ret void
+}
+
+define void @frintn_v16f64(<16 x double>* %a) #0 {
+; CHECK-LABEL: frintn_v16f64:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
+; VBITS_GE_1024-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
+; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
+; VBITS_GE_1024-NEXT: ret
+  %op = load <16 x double>, <16 x double>* %a
+  %res = call <16 x double> @llvm.roundeven.v16f64(<16 x double> %op)
+  store <16 x double> %res, <16 x double>* %a
+  ret void
+}
+
+define void @frintn_v32f64(<32 x double>* %a) #0 {
+; CHECK-LABEL: frintn_v32f64:
+; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
+; VBITS_GE_2048-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
+; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
+; VBITS_GE_2048-NEXT: ret
+  %op = load <32 x double>, <32 x double>* %a
+  %res = call <32 x double> @llvm.roundeven.v32f64(<32 x double> %op)
+  store <32 x double> %res, <32 x double>* %a
+  ret void
+}
+
 ;
 ; TRUNC -> FRINTZ
 ;
@ -1599,6 +1846,25 @@ declare <8 x double> @llvm.round.v8f64(<8 x double>)
 declare <16 x double> @llvm.round.v16f64(<16 x double>)
 declare <32 x double> @llvm.round.v32f64(<32 x double>)

+declare <4 x half> @llvm.roundeven.v4f16(<4 x half>)
+declare <8 x half> @llvm.roundeven.v8f16(<8 x half>)
+declare <16 x half> @llvm.roundeven.v16f16(<16 x half>)
+declare <32 x half> @llvm.roundeven.v32f16(<32 x half>)
+declare <64 x half> @llvm.roundeven.v64f16(<64 x half>)
+declare <128 x half> @llvm.roundeven.v128f16(<128 x half>)
+declare <2 x float> @llvm.roundeven.v2f32(<2 x float>)
+declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
+declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
+declare <16 x float> @llvm.roundeven.v16f32(<16 x float>)
+declare <32 x float> @llvm.roundeven.v32f32(<32 x float>)
+declare <64 x float> @llvm.roundeven.v64f32(<64 x float>)
+declare <1 x double> @llvm.roundeven.v1f64(<1 x double>)
+declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
+declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
+declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)
+declare <16 x double> @llvm.roundeven.v16f64(<16 x double>)
+declare <32 x double> @llvm.roundeven.v32f64(<32 x double>)
+
 declare <4 x half> @llvm.trunc.v4f16(<4 x half>)
 declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
 declare <16 x half> @llvm.trunc.v16f16(<16 x half>)
--- a/llvm/test/CodeGen/AArch64/vec-libcalls.ll
+++ b/llvm/test/CodeGen/AArch64/vec-libcalls.ll
@ -29,6 +29,7 @@ declare <3 x float> @llvm.log2.v3f32(<3 x float>)
 declare <3 x float> @llvm.nearbyint.v3f32(<3 x float>)
 declare <3 x float> @llvm.rint.v3f32(<3 x float>)
 declare <3 x float> @llvm.round.v3f32(<3 x float>)
+declare <3 x float> @llvm.roundeven.v3f32(<3 x float>)
 declare <3 x float> @llvm.sqrt.v3f32(<3 x float>)
 declare <3 x float> @llvm.trunc.v3f32(<3 x float>)

@ -478,6 +479,15 @@ define <3 x float> @round_v3f32(<3 x float> %x) nounwind {
  ret <3 x float> %r
 }

+define <3 x float> @roundeven_v3f32(<3 x float> %x) nounwind {
+; CHECK-LABEL: roundeven_v3f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %r = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
+  ret <3 x float> %r
+}
+
 define <3 x float> @sqrt_v3f32(<3 x float> %x) nounwind {
 ; CHECK-LABEL: sqrt_v3f32:
 ; CHECK:       // %bb.0: