[ARM] Allow passing/returning of __fp16 arguments

The ACLE (ARM C Language Extensions) 2.0 allows the __fp16 type to be used as a functon argument or return type (ACLE 1.1 did not). The current public release of the AAPCS (2.09) states that __fp16 values should be converted to single-precision before being passed or returned, but AAPCS 2.10 (to be released shortly) changes this, so that they are passed in the least-significant 16 bits of either a GPR (for base AAPCS) or a single-precision register (for AAPCS-VFP). This does not change how arguments are passed if they get passed on the stack. This patch brings clang up to compliance with the latest versions of both of these specs. We can now set the __ARM_FP16_ARGS ACLE predefine, and we have always been able to set the __ARM_FP16_FORMAT_IEEE predefine (we do not support the alternative format). llvm-svn: 246764
2015-09-03 12:40:58 +00:00 · 2015-09-03 12:40:58 +00:00 · dc2854c2f1
parent 3ebcaf6685
commit dc2854c2f1
6 changed files with 60 additions and 3 deletions
--- a/clang/lib/Basic/Targets.cpp
+++ b/clang/lib/Basic/Targets.cpp
@ -4568,6 +4568,10 @@ public:
    // ACLE predefines.
    Builder.defineMacro("__ARM_ACLE", "200");

+    // FP16 support (we currently only support IEEE format).
+    Builder.defineMacro("__ARM_FP16_FORMAT_IEEE", "1");
+    Builder.defineMacro("__ARM_FP16_ARGS", "1");
+
    // Subtarget options.

    // FIXME: It's more complicated than this and we don't really support
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@ -4714,6 +4714,16 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
  }

+  // __fp16 gets passed as if it were an int or float, but with the top 16 bits
+  // unspecified. This is not done for OpenCL as it handles the half type
+  // natively, and does not need to interwork with AAPCS code.
+  if (Ty->isHalfType() && !getContext().getLangOpts().OpenCL) {
+    llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
+      llvm::Type::getFloatTy(getVMContext()) :
+      llvm::Type::getInt32Ty(getVMContext());
+    return ABIArgInfo::getDirect(ResType);
+  }
+
  if (!isAggregateTypeForABI(Ty)) {
    // Treat an enum type as its underlying type.
    if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
@ -4872,6 +4882,16 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
    return ABIArgInfo::getIndirect(0);
  }

+  // __fp16 gets returned as if it were an int or float, but with the top 16
+  // bits unspecified. This is not done for OpenCL as it handles the half type
+  // natively, and does not need to interwork with AAPCS code.
+  if (RetTy->isHalfType() && !getContext().getLangOpts().OpenCL) {
+    llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
+      llvm::Type::getFloatTy(getVMContext()) :
+      llvm::Type::getInt32Ty(getVMContext());
+    return ABIArgInfo::getDirect(ResType);
+  }
+
  if (!isAggregateTypeForABI(RetTy)) {
    // Treat an enum type as its underlying type.
    if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
--- a/clang/lib/Driver/Tools.cpp
+++ b/clang/lib/Driver/Tools.cpp
@ -4293,9 +4293,19 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
      CmdArgs.push_back("-mstack-probe-size=0");
  }

-  if (getToolChain().getArch() == llvm::Triple::aarch64 ||
-      getToolChain().getArch() == llvm::Triple::aarch64_be)
+  switch (getToolChain().getArch()) {
+  case llvm::Triple::aarch64:
+  case llvm::Triple::aarch64_be:
+  case llvm::Triple::arm:
+  case llvm::Triple::armeb:
+  case llvm::Triple::thumb:
+  case llvm::Triple::thumbeb:
    CmdArgs.push_back("-fallow-half-arguments-and-returns");
+    break;
+
+  default:
+    break;
+  }

  if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it,
                               options::OPT_mno_restrict_it)) {
--- a/clang/test/CodeGen/arm-fp16-arguments.c
+++ b/clang/test/CodeGen/arm-fp16-arguments.c
@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fallow-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
+// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi hard -fallow-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
+
+__fp16 g;
+
+void t1(__fp16 a) { g = a; }
+// SOFT: define void @t1(i32 [[PARAM:%.*]])
+// SOFT: [[TRUNC:%.*]] = trunc i32 [[PARAM]] to i16
+// HARD: define arm_aapcs_vfpcc void @t1(float [[PARAM:%.*]])
+// HARD: [[BITCAST:%.*]] = bitcast float [[PARAM]] to i32
+// HARD: [[TRUNC:%.*]] = trunc i32 [[BITCAST]] to i16
+// CHECK: store i16 [[TRUNC]], i16* bitcast (half* @g to i16*)
+
+__fp16 t2() { return g; }
+// SOFT: define i32 @t2()
+// HARD: define arm_aapcs_vfpcc float @t2()
+// CHECK: [[LOAD:%.*]] = load i16, i16* bitcast (half* @g to i16*)
+// CHECK: [[ZEXT:%.*]] = zext i16 [[LOAD]] to i32
+// SOFT: ret i32 [[ZEXT]]
+// HARD: [[BITCAST:%.*]] = bitcast i32 [[ZEXT]] to float
+// HARD: ret float [[BITCAST]]
--- a/clang/test/Preprocessor/arm-acle-6.5.c
+++ b/clang/test/Preprocessor/arm-acle-6.5.c
@ -1,6 +1,6 @@
 // RUN: %clang -target arm-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-DEFAULT

-// CHECK-DEFAULT-NOT: __ARM_FP
+// CHECK-DEFAULT-NOT: __ARM_FP 0x

 // RUN: %clang -target arm-eabi -mfpu=vfp -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP
 // RUN: %clang -target arm-eabi -mfpu=vfp3 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP
--- a/clang/test/Preprocessor/arm-target-features.c
+++ b/clang/test/Preprocessor/arm-target-features.c
@ -5,6 +5,8 @@
 // CHECK: __ARM_FEATURE_CRC32 1
 // CHECK: __ARM_FEATURE_DIRECTED_ROUNDING 1
 // CHECK: __ARM_FEATURE_NUMERIC_MAXMIN 1
+// CHECK: __ARM_FP16_ARGS 1
+// CHECK: __ARM_FP16_FORMAT_IEEE 1

 // RUN: %clang -target armv7a-none-linux-gnu -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-V7 %s
 // CHECK-V7: __ARMEL__ 1