From 3e59dfc301240ffb5e82bc43e05225d0d51d5cfa Mon Sep 17 00:00:00 2001 From: Francesco Petrogalli Date: Mon, 15 Jun 2020 20:48:13 +0000 Subject: [PATCH] [llvm][SveEmitter] Emit the bfloat version of `svld1ro`. Summary: The new SVE builtin type __SVBFloat16_t` is used to represent scalable vectors of bfloat elements. Reviewers: sdesmalen, efriedma, stuij, ctetreau, shafik, rengolin Subscribers: tschuett, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D81304 --- .../clang/Basic/AArch64SVEACLETypes.def | 94 ++++++++++--------- clang/include/clang/Basic/arm_sve.td | 5 + clang/lib/AST/ASTContext.cpp | 14 ++- clang/lib/AST/ItaniumMangle.cpp | 2 +- clang/lib/CodeGen/CGBuiltin.cpp | 5 + clang/lib/CodeGen/CodeGenTypes.cpp | 2 + clang/test/AST/ast-dump-aarch64-sve-types.c | 3 + .../acle_sve_ld1ro-bfloat.c | 19 ++++ clang/unittests/AST/ASTImporterTest.cpp | 1 + clang/unittests/AST/SizelessTypesTest.cpp | 4 + clang/utils/TableGen/SveEmitter.cpp | 47 +++++++--- 11 files changed, 134 insertions(+), 62 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ro-bfloat.c diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def index b0950d1058bf..0640da83ebb3 100644 --- a/clang/include/clang/Basic/AArch64SVEACLETypes.def +++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def @@ -35,11 +35,12 @@ // // - IsFP is true for vectors of floating-point elements. // +// - IsBF true for vector of brain float elements. //===----------------------------------------------------------------------===// #ifndef SVE_VECTOR_TYPE #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId, NumEls, ElBits, \ - IsSigned, IsFP) \ + IsSigned, IsFP, IsBF) \ SVE_TYPE(Name, Id, SingletonId) #endif @@ -50,70 +51,73 @@ //===- Vector point types -----------------------------------------------===// -SVE_VECTOR_TYPE("__SVInt8_t", "__SVInt8_t", SveInt8, SveInt8Ty, 16, 8, true, false) -SVE_VECTOR_TYPE("__SVInt16_t", "__SVInt16_t", SveInt16, SveInt16Ty, 8, 16, true, false) -SVE_VECTOR_TYPE("__SVInt32_t", "__SVInt32_t", SveInt32, SveInt32Ty, 4, 32, true, false) -SVE_VECTOR_TYPE("__SVInt64_t", "__SVInt64_t", SveInt64, SveInt64Ty, 2, 64, true, false) -SVE_VECTOR_TYPE("__SVUint8_t", "__SVUint8_t", SveUint8, SveUint8Ty, 16, 8, false, false) -SVE_VECTOR_TYPE("__SVUint16_t", "__SVUint16_t", SveUint16, SveUint16Ty, 8, 16, false, false) -SVE_VECTOR_TYPE("__SVUint32_t", "__SVUint32_t", SveUint32, SveUint32Ty, 4, 32, false, false) -SVE_VECTOR_TYPE("__SVUint64_t", "__SVUint64_t", SveUint64, SveUint64Ty, 2, 64, false, false) +SVE_VECTOR_TYPE("__SVInt8_t", "__SVInt8_t", SveInt8, SveInt8Ty, 16, 8, true, false, false) +SVE_VECTOR_TYPE("__SVInt16_t", "__SVInt16_t", SveInt16, SveInt16Ty, 8, 16, true, false, false) +SVE_VECTOR_TYPE("__SVInt32_t", "__SVInt32_t", SveInt32, SveInt32Ty, 4, 32, true, false, false) +SVE_VECTOR_TYPE("__SVInt64_t", "__SVInt64_t", SveInt64, SveInt64Ty, 2, 64, true, false, false) -SVE_VECTOR_TYPE("__SVFloat16_t", "__SVFloat16_t", SveFloat16, SveFloat16Ty, 8, 16, true, true) -SVE_VECTOR_TYPE("__SVFloat32_t", "__SVFloat32_t", SveFloat32, SveFloat32Ty, 4, 32, true, true) -SVE_VECTOR_TYPE("__SVFloat64_t", "__SVFloat64_t", SveFloat64, SveFloat64Ty, 2, 64, true, true) +SVE_VECTOR_TYPE("__SVUint8_t", "__SVUint8_t", SveUint8, SveUint8Ty, 16, 8, false, false, false) +SVE_VECTOR_TYPE("__SVUint16_t", "__SVUint16_t", SveUint16, SveUint16Ty, 8, 16, false, false, false) +SVE_VECTOR_TYPE("__SVUint32_t", "__SVUint32_t", SveUint32, SveUint32Ty, 4, 32, false, false, false) +SVE_VECTOR_TYPE("__SVUint64_t", "__SVUint64_t", SveUint64, SveUint64Ty, 2, 64, false, false, false) + +SVE_VECTOR_TYPE("__SVFloat16_t", "__SVFloat16_t", SveFloat16, SveFloat16Ty, 8, 16, true, true, false) +SVE_VECTOR_TYPE("__SVFloat32_t", "__SVFloat32_t", SveFloat32, SveFloat32Ty, 4, 32, true, true, false) +SVE_VECTOR_TYPE("__SVFloat64_t", "__SVFloat64_t", SveFloat64, SveFloat64Ty, 2, 64, true, true, false) + +SVE_VECTOR_TYPE("__SVBFloat16_t", "__SVBFloat16_t", SveBFloat16, SveBFloat16Ty, 8, 16, false, false, true) // // x2 // -SVE_VECTOR_TYPE("__clang_svint8x2_t", "svint8x2_t", SveInt8x2, SveInt8x2Ty, 32, 8, true, false) -SVE_VECTOR_TYPE("__clang_svint16x2_t", "svint16x2_t", SveInt16x2, SveInt16x2Ty, 16, 16, true, false) -SVE_VECTOR_TYPE("__clang_svint32x2_t", "svint32x2_t", SveInt32x2, SveInt32x2Ty, 8, 32, true, false) -SVE_VECTOR_TYPE("__clang_svint64x2_t", "svint64x2_t", SveInt64x2, SveInt64x2Ty, 4, 64, true, false) +SVE_VECTOR_TYPE("__clang_svint8x2_t", "svint8x2_t", SveInt8x2, SveInt8x2Ty, 32, 8, true, false, false) +SVE_VECTOR_TYPE("__clang_svint16x2_t", "svint16x2_t", SveInt16x2, SveInt16x2Ty, 16, 16, true, false, false) +SVE_VECTOR_TYPE("__clang_svint32x2_t", "svint32x2_t", SveInt32x2, SveInt32x2Ty, 8, 32, true, false, false) +SVE_VECTOR_TYPE("__clang_svint64x2_t", "svint64x2_t", SveInt64x2, SveInt64x2Ty, 4, 64, true, false, false) -SVE_VECTOR_TYPE("__clang_svuint8x2_t", "svuint8x2_t", SveUint8x2, SveUint8x2Ty, 32, 8, false, false) -SVE_VECTOR_TYPE("__clang_svuint16x2_t", "svuint16x2_t", SveUint16x2, SveUint16x2Ty, 16, 16, false, false) -SVE_VECTOR_TYPE("__clang_svuint32x2_t", "svuint32x2_t", SveUint32x2, SveUint32x2Ty, 8, 32, false, false) -SVE_VECTOR_TYPE("__clang_svuint64x2_t", "svuint64x2_t", SveUint64x2, SveUint64x2Ty, 4, 64, false, false) +SVE_VECTOR_TYPE("__clang_svuint8x2_t", "svuint8x2_t", SveUint8x2, SveUint8x2Ty, 32, 8, false, false, false) +SVE_VECTOR_TYPE("__clang_svuint16x2_t", "svuint16x2_t", SveUint16x2, SveUint16x2Ty, 16, 16, false, false, false) +SVE_VECTOR_TYPE("__clang_svuint32x2_t", "svuint32x2_t", SveUint32x2, SveUint32x2Ty, 8, 32, false, false, false) +SVE_VECTOR_TYPE("__clang_svuint64x2_t", "svuint64x2_t", SveUint64x2, SveUint64x2Ty, 4, 64, false, false, false) -SVE_VECTOR_TYPE("__clang_svfloat16x2_t", "svfloat16x2_t", SveFloat16x2, SveFloat16x2Ty, 16, 16, true, true) -SVE_VECTOR_TYPE("__clang_svfloat32x2_t", "svfloat32x2_t", SveFloat32x2, SveFloat32x2Ty, 8, 32, true, true) -SVE_VECTOR_TYPE("__clang_svfloat64x2_t", "svfloat64x2_t", SveFloat64x2, SveFloat64x2Ty, 4, 64, true, true) +SVE_VECTOR_TYPE("__clang_svfloat16x2_t", "svfloat16x2_t", SveFloat16x2, SveFloat16x2Ty, 16, 16, true, true, false) +SVE_VECTOR_TYPE("__clang_svfloat32x2_t", "svfloat32x2_t", SveFloat32x2, SveFloat32x2Ty, 8, 32, true, true, false) +SVE_VECTOR_TYPE("__clang_svfloat64x2_t", "svfloat64x2_t", SveFloat64x2, SveFloat64x2Ty, 4, 64, true, true, false) // // x3 // -SVE_VECTOR_TYPE("__clang_svint8x3_t", "svint8x3_t", SveInt8x3, SveInt8x3Ty, 48, 8, true, false) -SVE_VECTOR_TYPE("__clang_svint16x3_t", "svint16x3_t", SveInt16x3, SveInt16x3Ty, 24, 16, true, false) -SVE_VECTOR_TYPE("__clang_svint32x3_t", "svint32x3_t", SveInt32x3, SveInt32x3Ty, 12, 32, true, false) -SVE_VECTOR_TYPE("__clang_svint64x3_t", "svint64x3_t", SveInt64x3, SveInt64x3Ty, 6, 64, true, false) +SVE_VECTOR_TYPE("__clang_svint8x3_t", "svint8x3_t", SveInt8x3, SveInt8x3Ty, 48, 8, true, false, false) +SVE_VECTOR_TYPE("__clang_svint16x3_t", "svint16x3_t", SveInt16x3, SveInt16x3Ty, 24, 16, true, false, false) +SVE_VECTOR_TYPE("__clang_svint32x3_t", "svint32x3_t", SveInt32x3, SveInt32x3Ty, 12, 32, true, false, false) +SVE_VECTOR_TYPE("__clang_svint64x3_t", "svint64x3_t", SveInt64x3, SveInt64x3Ty, 6, 64, true, false, false) -SVE_VECTOR_TYPE("__clang_svuint8x3_t", "svuint8x3_t", SveUint8x3, SveUint8x3Ty, 48, 8, false, false) -SVE_VECTOR_TYPE("__clang_svuint16x3_t", "svuint16x3_t", SveUint16x3, SveUint16x3Ty, 24, 16, false, false) -SVE_VECTOR_TYPE("__clang_svuint32x3_t", "svuint32x3_t", SveUint32x3, SveUint32x3Ty, 12, 32, false, false) -SVE_VECTOR_TYPE("__clang_svuint64x3_t", "svuint64x3_t", SveUint64x3, SveUint64x3Ty, 6, 64, false, false) +SVE_VECTOR_TYPE("__clang_svuint8x3_t", "svuint8x3_t", SveUint8x3, SveUint8x3Ty, 48, 8, false, false, false) +SVE_VECTOR_TYPE("__clang_svuint16x3_t", "svuint16x3_t", SveUint16x3, SveUint16x3Ty, 24, 16, false, false, false) +SVE_VECTOR_TYPE("__clang_svuint32x3_t", "svuint32x3_t", SveUint32x3, SveUint32x3Ty, 12, 32, false, false, false) +SVE_VECTOR_TYPE("__clang_svuint64x3_t", "svuint64x3_t", SveUint64x3, SveUint64x3Ty, 6, 64, false, false, false) -SVE_VECTOR_TYPE("__clang_svfloat16x3_t", "svfloat16x3_t", SveFloat16x3, SveFloat16x3Ty, 24, 16, true, true) -SVE_VECTOR_TYPE("__clang_svfloat32x3_t", "svfloat32x3_t", SveFloat32x3, SveFloat32x3Ty, 12, 32, true, true) -SVE_VECTOR_TYPE("__clang_svfloat64x3_t", "svfloat64x3_t", SveFloat64x3, SveFloat64x3Ty, 6, 64, true, true) +SVE_VECTOR_TYPE("__clang_svfloat16x3_t", "svfloat16x3_t", SveFloat16x3, SveFloat16x3Ty, 24, 16, true, true, false) +SVE_VECTOR_TYPE("__clang_svfloat32x3_t", "svfloat32x3_t", SveFloat32x3, SveFloat32x3Ty, 12, 32, true, true, false) +SVE_VECTOR_TYPE("__clang_svfloat64x3_t", "svfloat64x3_t", SveFloat64x3, SveFloat64x3Ty, 6, 64, true, true, false) // // x4 // -SVE_VECTOR_TYPE("__clang_svint8x4_t", "svint8x4_t", SveInt8x4, SveInt8x4Ty, 64, 8, true, false) -SVE_VECTOR_TYPE("__clang_svint16x4_t", "svint16x4_t", SveInt16x4, SveInt16x4Ty, 32, 16, true, false) -SVE_VECTOR_TYPE("__clang_svint32x4_t", "svint32x4_t", SveInt32x4, SveInt32x4Ty, 16, 32, true, false) -SVE_VECTOR_TYPE("__clang_svint64x4_t", "svint64x4_t", SveInt64x4, SveInt64x4Ty, 8, 64, true, false) +SVE_VECTOR_TYPE("__clang_svint8x4_t", "svint8x4_t", SveInt8x4, SveInt8x4Ty, 64, 8, true, false, false) +SVE_VECTOR_TYPE("__clang_svint16x4_t", "svint16x4_t", SveInt16x4, SveInt16x4Ty, 32, 16, true, false, false) +SVE_VECTOR_TYPE("__clang_svint32x4_t", "svint32x4_t", SveInt32x4, SveInt32x4Ty, 16, 32, true, false, false) +SVE_VECTOR_TYPE("__clang_svint64x4_t", "svint64x4_t", SveInt64x4, SveInt64x4Ty, 8, 64, true, false, false) -SVE_VECTOR_TYPE("__clang_svuint8x4_t", "svuint8x4_t", SveUint8x4, SveUint8x4Ty, 64, 8, false, false) -SVE_VECTOR_TYPE("__clang_svuint16x4_t", "svuint16x4_t", SveUint16x4, SveUint16x4Ty, 32, 16, false, false) -SVE_VECTOR_TYPE("__clang_svuint32x4_t", "svuint32x4_t", SveUint32x4, SveUint32x4Ty, 16, 32, false, false) -SVE_VECTOR_TYPE("__clang_svuint64x4_t", "svuint64x4_t", SveUint64x4, SveUint64x4Ty, 8, 64, false, false) +SVE_VECTOR_TYPE("__clang_svuint8x4_t", "svuint8x4_t", SveUint8x4, SveUint8x4Ty, 64, 8, false, false, false) +SVE_VECTOR_TYPE("__clang_svuint16x4_t", "svuint16x4_t", SveUint16x4, SveUint16x4Ty, 32, 16, false, false, false) +SVE_VECTOR_TYPE("__clang_svuint32x4_t", "svuint32x4_t", SveUint32x4, SveUint32x4Ty, 16, 32, false, false, false) +SVE_VECTOR_TYPE("__clang_svuint64x4_t", "svuint64x4_t", SveUint64x4, SveUint64x4Ty, 8, 64, false, false, false) -SVE_VECTOR_TYPE("__clang_svfloat16x4_t", "svfloat16x4_t", SveFloat16x4, SveFloat16x4Ty, 32, 16, true, true) -SVE_VECTOR_TYPE("__clang_svfloat32x4_t", "svfloat32x4_t", SveFloat32x4, SveFloat32x4Ty, 16, 32, true, true) -SVE_VECTOR_TYPE("__clang_svfloat64x4_t", "svfloat64x4_t", SveFloat64x4, SveFloat64x4Ty, 8, 64, true, true) +SVE_VECTOR_TYPE("__clang_svfloat16x4_t", "svfloat16x4_t", SveFloat16x4, SveFloat16x4Ty, 32, 16, true, true, false) +SVE_VECTOR_TYPE("__clang_svfloat32x4_t", "svfloat32x4_t", SveFloat32x4, SveFloat32x4Ty, 16, 32, true, true, false) +SVE_VECTOR_TYPE("__clang_svfloat64x4_t", "svfloat64x4_t", SveFloat64x4, SveFloat64x4Ty, 8, 64, true, true, false) SVE_PREDICATE_TYPE("__SVBool_t", "__SVBool_t", SveBool, SveBoolTy, 16) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 7c8eb8d38f75..4eb7ff7857cb 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -48,6 +48,7 @@ // f: float // h: half-float // d: double +// b: bfloat // Typespec modifiers // ------------------ @@ -145,6 +146,7 @@ def EltTyBool8 : EltType<8>; def EltTyBool16 : EltType<9>; def EltTyBool32 : EltType<10>; def EltTyBool64 : EltType<11>; +def EltTyBFloat16 : EltType<12>; class MemEltType { int Value = val; @@ -482,6 +484,9 @@ def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP64)" in { def SVLD1RO : SInst<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1ro">; } +let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP64) && defined(__ARM_FEATURE_BF16_SCALAR_ARITHMETIC)" in { + def SVLD1RO_BF : SInst<"svld1ro[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1ro">; +} //////////////////////////////////////////////////////////////////////////////// // Stores diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 91efa19d158b..01a737e307d0 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -2132,7 +2132,7 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { // of 0 for the static length. The alignment values are those defined // by the Procedure Call Standard for the Arm Architecture. #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId, NumEls, ElBits, \ - IsSigned, IsFP) \ + IsSigned, IsFP, IsBF) \ case BuiltinType::Id: \ Width = 0; \ Align = 128; \ @@ -3642,13 +3642,17 @@ QualType ASTContext::getScalableVectorType(QualType EltTy, if (Target->hasAArch64SVETypes()) { uint64_t EltTySize = getTypeSize(EltTy); #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId, NumEls, ElBits, \ - IsSigned, IsFP) \ + IsSigned, IsFP, IsBF) \ if (!EltTy->isBooleanType() && \ ((EltTy->hasIntegerRepresentation() && \ EltTy->hasSignedIntegerRepresentation() == IsSigned) || \ - (EltTy->hasFloatingRepresentation() && IsFP)) && \ - EltTySize == ElBits && NumElts == NumEls) \ - return SingletonId; + (EltTy->hasFloatingRepresentation() && !EltTy->isBFloat16Type() && \ + IsFP && !IsBF) || \ + (EltTy->hasFloatingRepresentation() && EltTy->isBFloat16Type() && \ + IsBF && !IsFP)) && \ + EltTySize == ElBits && NumElts == NumEls) { \ + return SingletonId; \ + } #define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId, NumEls) \ if (EltTy->isBooleanType() && NumElts == NumEls) \ return SingletonId; diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 247f5667bef9..ad4ecad151f5 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -2821,7 +2821,7 @@ void CXXNameMangler::mangleType(const BuiltinType *T) { // is defined in the appendices to the Procedure Call Standard for the // Arm Architecture. #define SVE_VECTOR_TYPE(InternalName, MangledName, Id, SingletonId, NumEls, \ - ElBits, IsSigned, IsFP) \ + ElBits, IsSigned, IsFP, IsBF) \ case BuiltinType::Id: \ type_name = MangledName; \ Out << (type_name == InternalName ? "u" : "") << type_name.size() \ diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2339446609f6..27e917062259 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7639,6 +7639,9 @@ llvm::Type *CodeGenFunction::getEltType(SVETypeFlags TypeFlags) { case SVETypeFlags::EltTyFloat64: return Builder.getDoubleTy(); + case SVETypeFlags::EltTyBFloat16: + return Builder.getBFloatTy(); + case SVETypeFlags::EltTyBool8: case SVETypeFlags::EltTyBool16: case SVETypeFlags::EltTyBool32: @@ -7699,6 +7702,8 @@ CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { case SVETypeFlags::EltTyFloat16: return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8); + case SVETypeFlags::EltTyBFloat16: + return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8); case SVETypeFlags::EltTyFloat32: return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4); case SVETypeFlags::EltTyFloat64: diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index b57ce4e8d4b5..4811e309abbe 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -617,6 +617,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { return GET_SVE_FP_VEC(DoubleTy, false, 6); case BuiltinType::SveFloat64x4: return GET_SVE_FP_VEC(DoubleTy, false, 8); + case BuiltinType::SveBFloat16: + return GET_SVE_FP_VEC(BFloat16Ty, false, 8); #undef GET_SVE_FP_VEC case BuiltinType::Dependent: #define BUILTIN_TYPE(Id, SingletonId) diff --git a/clang/test/AST/ast-dump-aarch64-sve-types.c b/clang/test/AST/ast-dump-aarch64-sve-types.c index fdad9b4efc4c..a522164124a3 100644 --- a/clang/test/AST/ast-dump-aarch64-sve-types.c +++ b/clang/test/AST/ast-dump-aarch64-sve-types.c @@ -34,5 +34,8 @@ // CHECK: TypedefDecl {{.*}} implicit __SVFloat64_t '__SVFloat64_t' // CHECK-NEXT: -BuiltinType {{.*}} '__SVFloat64_t' +// CHECK: TypedefDecl {{.*}} implicit __SVBFloat16_t '__SVBFloat16_t' +// CHECK-NEXT: -BuiltinType {{.*}} '__SVBFloat16_t' + // CHECK: TypedefDecl {{.*}} implicit __SVBool_t '__SVBool_t' // CHECK-NEXT: -BuiltinType {{.*}} '__SVBool_t' diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ro-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ro-bfloat.c new file mode 100644 index 000000000000..56137bd6c8a8 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ro-bfloat.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE_MATMUL_FP64 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE_MATMUL_FP64 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svld1ro_bf16(svbool_t pg, const bfloat16_t *base) { + // CHECK-LABEL: test_svld1ro_bf16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.ld1ro.nxv8bf16( %[[PG]], bfloat* %base) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svld1ro, _bf16, , )(pg, base); +} diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index 57825560ceb7..172fbb461a7e 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -5393,6 +5393,7 @@ TEST_P(SVEBuiltins, ImportTypes) { "__SVUint32_t", "__SVUint64_t", "__SVFloat16_t", + "__SVBFloat16_t", "__SVFloat32_t", "__SVFloat64_t", "__SVBool_t" diff --git a/clang/unittests/AST/SizelessTypesTest.cpp b/clang/unittests/AST/SizelessTypesTest.cpp index 8daf30e6bbe3..9ff3a5b09d89 100644 --- a/clang/unittests/AST/SizelessTypesTest.cpp +++ b/clang/unittests/AST/SizelessTypesTest.cpp @@ -42,6 +42,8 @@ TEST_F(SizelessTypeTester, TestSizelessBuiltin) { ASSERT_TRUE(Ctx.SveFloat32Ty->isSizelessBuiltinType()); ASSERT_TRUE(Ctx.SveFloat64Ty->isSizelessBuiltinType()); + ASSERT_TRUE(Ctx.SveBFloat16Ty->isSizelessBuiltinType()); + ASSERT_TRUE(Ctx.SveBoolTy->isSizelessBuiltinType()); ASSERT_FALSE(Ctx.VoidTy->isSizelessBuiltinType()); @@ -70,6 +72,8 @@ TEST_F(SizelessTypeTester, TestSizeless) { ASSERT_TRUE(Ctx.SveFloat32Ty->isSizelessType()); ASSERT_TRUE(Ctx.SveFloat64Ty->isSizelessType()); + ASSERT_TRUE(Ctx.SveBFloat16Ty->isSizelessType()); + ASSERT_TRUE(Ctx.SveBoolTy->isSizelessType()); ASSERT_FALSE(Ctx.VoidTy->isSizelessType()); diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index 54b50482bb8f..c8e16a9283ff 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -65,7 +65,7 @@ public: class SVEType { TypeSpec TS; - bool Float, Signed, Immediate, Void, Constant, Pointer; + bool Float, Signed, Immediate, Void, Constant, Pointer, BFloat; bool DefaultType, IsScalable, Predicate, PredicatePattern, PrefetchOp; unsigned Bitwidth, ElementBitwidth, NumVectors; @@ -74,9 +74,9 @@ public: SVEType(TypeSpec TS, char CharMod) : TS(TS), Float(false), Signed(true), Immediate(false), Void(false), - Constant(false), Pointer(false), DefaultType(false), IsScalable(true), - Predicate(false), PredicatePattern(false), PrefetchOp(false), - Bitwidth(128), ElementBitwidth(~0U), NumVectors(1) { + Constant(false), Pointer(false), BFloat(false), DefaultType(false), + IsScalable(true), Predicate(false), PredicatePattern(false), + PrefetchOp(false), Bitwidth(128), ElementBitwidth(~0U), NumVectors(1) { if (!TS.empty()) applyTypespec(); applyModifier(CharMod); @@ -93,9 +93,11 @@ public: bool isVoid() const { return Void & !Pointer; } bool isDefault() const { return DefaultType; } bool isFloat() const { return Float; } - bool isInteger() const { return !Float && !Predicate; } + bool isBFloat() const { return BFloat; } + bool isFloatingPoint() const { return Float || BFloat; } + bool isInteger() const { return !isFloatingPoint() && !Predicate; } bool isScalarPredicate() const { - return !Float && Predicate && NumVectors == 0; + return !isFloatingPoint() && Predicate && NumVectors == 0; } bool isPredicateVector() const { return Predicate; } bool isPredicatePattern() const { return PredicatePattern; } @@ -362,7 +364,7 @@ std::string SVEType::builtin_str() const { if (isVoidPointer()) S += "v"; - else if (!Float) + else if (!isFloatingPoint()) switch (ElementBitwidth) { case 1: S += "b"; break; case 8: S += "c"; break; @@ -372,15 +374,19 @@ std::string SVEType::builtin_str() const { case 128: S += "LLLi"; break; default: llvm_unreachable("Unhandled case!"); } - else + else if (isFloat()) switch (ElementBitwidth) { case 16: S += "h"; break; case 32: S += "f"; break; case 64: S += "d"; break; default: llvm_unreachable("Unhandled case!"); } + else if (isBFloat()) { + assert(ElementBitwidth == 16 && "Not a valid BFloat."); + S += "y"; + } - if (!isFloat()) { + if (!isFloatingPoint()) { if ((isChar() || isPointer()) && !isVoidPointer()) { // Make chars and typed pointers explicitly signed. if (Signed) @@ -421,13 +427,15 @@ std::string SVEType::str() const { else { if (isScalableVector()) S += "sv"; - if (!Signed && !Float) + if (!Signed && !isFloatingPoint()) S += "u"; if (Float) S += "float"; else if (isScalarPredicate() || isPredicateVector()) S += "bool"; + else if (isBFloat()) + S += "bfloat"; else S += "int"; @@ -481,6 +489,10 @@ void SVEType::applyTypespec() { Float = true; ElementBitwidth = 64; break; + case 'b': + BFloat = true; + ElementBitwidth = 16; + break; default: llvm_unreachable("Unhandled type code!"); } @@ -534,6 +546,7 @@ void SVEType::applyModifier(char Mod) { case 'P': Signed = true; Float = false; + BFloat = false; Predicate = true; Bitwidth = 16; ElementBitwidth = 1; @@ -784,7 +797,6 @@ Intrinsic::Intrinsic(StringRef Name, StringRef Proto, uint64_t MergeTy, BaseTypeSpec(BT), Class(Class), Guard(Guard.str()), MergeSuffix(MergeSuffix.str()), BaseType(BT, 'd'), Flags(Flags), ImmChecks(Checks.begin(), Checks.end()) { - // Types[0] is the return value. for (unsigned I = 0; I < Proto.size(); ++I) { SVEType T(BaseTypeSpec, Proto[I]); @@ -848,6 +860,8 @@ std::string Intrinsic::replaceTemplatedArgs(std::string Name, TypeSpec TS, TypeCode = T.isSigned() ? 's' : 'u'; else if (T.isPredicateVector()) TypeCode = 'b'; + else if (T.isBFloat()) + TypeCode = "bf"; else TypeCode = 'f'; Ret.replace(Pos, NumChars, TypeCode + utostr(T.getElementSizeInBits())); @@ -923,6 +937,11 @@ uint64_t SVEEmitter::encodeTypeFlags(const SVEType &T) { } } + if (T.isBFloat()) { + assert(T.getElementSizeInBits() == 16 && "Not a valid BFloat."); + return encodeEltType("EltTyBFloat16"); + } + if (T.isPredicateVector()) { switch (T.getElementSizeInBits()) { case 8: @@ -1067,6 +1086,12 @@ void SVEEmitter::createHeader(raw_ostream &OS) { OS << "typedef __SVUint32_t svuint32_t;\n"; OS << "typedef __SVUint64_t svuint64_t;\n"; OS << "typedef __SVFloat16_t svfloat16_t;\n"; + OS << "typedef __SVBFloat16_t svbfloat16_t;\n\n"; + + OS << "#ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC\n"; + OS << "typedef __bf16 bfloat16_t;\n"; + OS << "#endif\n\n"; + OS << "typedef __SVFloat32_t svfloat32_t;\n"; OS << "typedef __SVFloat64_t svfloat64_t;\n"; OS << "typedef __clang_svint8x2_t svint8x2_t;\n";