ARM: teach backend about WatchOS and TvOS libcalls.

The most substantial changes are again for watchOS: libcalls are hard-float if needed and sincos has a different calling convention. llvm-svn: 251571
2015-10-28 22:51:16 +00:00 · 2015-10-28 22:51:16 +00:00 · 8b40366b54
parent e0ccdc6de9
commit 8b40366b54
5 changed files with 226 additions and 26 deletions
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@ -74,13 +74,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
  }

  // memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later.
+  // All versions of watchOS support it.
  if (T.isMacOSX()) {
    if (T.isMacOSXVersionLT(10, 5))
      TLI.setUnavailable(LibFunc::memset_pattern16);
  } else if (T.isiOS()) {
    if (T.isOSVersionLT(3, 0))
      TLI.setUnavailable(LibFunc::memset_pattern16);
-  } else {
+  } else if (!T.isWatchOS()) {
    TLI.setUnavailable(LibFunc::memset_pattern16);
  }

@ -288,8 +289,12 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
    }
    break;
  case Triple::IOS:
+  case Triple::WatchOS:
    TLI.setUnavailable(LibFunc::exp10l);
-    if (T.isOSVersionLT(7, 0)) {
+    if (!T.isWatchOS() && (T.isOSVersionLT(7, 0) ||
+                           (T.isOSVersionLT(9, 0) &&
+                            (T.getArch() == Triple::x86 ||
+                             T.getArch() == Triple::x86_64)))) {
      TLI.setUnavailable(LibFunc::exp10);
      TLI.setUnavailable(LibFunc::exp10f);
    } else {
@ -319,6 +324,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
  case Triple::Darwin:
  case Triple::MacOSX:
  case Triple::IOS:
+  case Triple::WatchOS:
  case Triple::FreeBSD:
  case Triple::Linux:
    break;
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@ -242,6 +242,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
          setCmpLibcallCC(LC.Op, LC.Cond);
      }
    }
+
+    // Set the correct calling convention for ARMv7k WatchOS. It's just
+    // AAPCS_VFP for functions as simple as libcalls.
+    if (Subtarget->isTargetWatchOS()) {
+      for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
+        setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
+    }
  }

  // These libcalls are not available in 32-bit.
@ -377,8 +384,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
  }

  // Use divmod compiler-rt calls for iOS 5.0 and later.
-  if (Subtarget->getTargetTriple().isiOS() &&
-      !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
+  if (Subtarget->isTargetWatchOS() ||
+      (Subtarget->isTargetIOS() &&
+       !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
    setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
    setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
  }
@ -941,7 +949,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
  if (Subtarget->hasSinCos()) {
    setLibcallName(RTLIB::SINCOS_F32, "sincosf");
    setLibcallName(RTLIB::SINCOS_F64, "sincos");
-    if (Subtarget->getTargetTriple().isiOS()) {
+    if (Subtarget->isTargetWatchOS()) {
+      setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
+      setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
+    }
+    if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
      // For iOS, we don't want to the normal expansion of a libcall to
      // sincos. We want to issue a libcall to __sincos_stret.
      setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
@ -6576,27 +6588,33 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
  auto PtrVT = getPointerTy(DAG.getDataLayout());

  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();

  // Pair of floats / doubles used to pass the result.
-  StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
-
-  // Create stack object for sret.
+  Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
  auto &DL = DAG.getDataLayout();
-  const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
-  const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
-  int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
-  SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL));

  ArgListTy Args;
+  bool ShouldUseSRet = Subtarget->isAPCS_ABI();
+  SDValue SRet;
+  if (ShouldUseSRet) {
+    // Create stack object for sret.
+    const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
+    const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
+    int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+    SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
+
+    ArgListEntry Entry;
+    Entry.Node = SRet;
+    Entry.Ty = RetTy->getPointerTo();
+    Entry.isSExt = false;
+    Entry.isZExt = false;
+    Entry.isSRet = true;
+    Args.push_back(Entry);
+    RetTy = Type::getVoidTy(*DAG.getContext());
+  }
+
  ArgListEntry Entry;
-
-  Entry.Node = SRet;
-  Entry.Ty = RetTy->getPointerTo();
-  Entry.isSExt = false;
-  Entry.isZExt = false;
-  Entry.isSRet = true;
-  Args.push_back(Entry);
-
  Entry.Node = Arg;
  Entry.Ty = ArgTy;
  Entry.isSExt = false;
@ -6605,16 +6623,21 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {

  const char *LibcallName =
      (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
+  RTLIB::Libcall LC =
+      (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32;
+  CallingConv::ID CC = getLibcallCallingConv(LC);
  SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));

  TargetLowering::CallLoweringInfo CLI(DAG);
-  CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
-    .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee,
-               std::move(Args), 0)
-    .setDiscardResult();
-
+  CLI.setDebugLoc(dl)
+      .setChain(DAG.getEntryNode())
+      .setCallee(CC, RetTy, Callee, std::move(Args), 0)
+      .setDiscardResult(ShouldUseSRet);
  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);

+  if (!ShouldUseSRet)
+    return CallResult.first;
+
  SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
                                MachinePointerInfo(), false, false, false, 0);

--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@ -299,7 +299,8 @@ unsigned ARMSubtarget::getMispredictionPenalty() const {
 }

 bool ARMSubtarget::hasSinCos() const {
-  return getTargetTriple().isiOS() && !getTargetTriple().isOSVersionLT(7, 0);
+  return isTargetWatchOS() ||
+    (isTargetIOS() && !getTargetTriple().isOSVersionLT(7, 0));
 }

 bool ARMSubtarget::enableMachineScheduler() const {
--- a/llvm/test/CodeGen/ARM/v7k-libcalls.ll
+++ b/llvm/test/CodeGen/ARM/v7k-libcalls.ll
@ -0,0 +1,154 @@
+; RUN: llc -mtriple=armv7k-apple-watchos2.0 -mcpu=cortex-a7 < %s | FileCheck %s
+
+define arm_aapcs_vfpcc float @t1(float %a, float %b) {
+entry:
+; CHECK: t1
+; CHECK-NOT: vmov
+; CHECK: vadd.f32
+  %a.addr = alloca float, align 4
+  %b.addr = alloca float, align 4
+  store float %a, float* %a.addr, align 4
+  store float %b, float* %b.addr, align 4
+  %0 = load float, float* %a.addr, align 4
+  %1 = load float, float* %b.addr, align 4
+  %add = fadd float %0, %1
+  ret float %add
+}
+
+define arm_aapcs_vfpcc double @t2(double %a, double %b) {
+entry:
+; CHECK: t2
+; CHECK-NOT: vmov
+; CHECK: vadd.f64
+  %a.addr = alloca double, align 8
+  %b.addr = alloca double, align 8
+  store double %a, double* %a.addr, align 8
+  store double %b, double* %b.addr, align 8
+  %0 = load double, double* %a.addr, align 8
+  %1 = load double, double* %b.addr, align 8
+  %add = fadd double %0, %1
+  ret double %add
+}
+
+define arm_aapcs_vfpcc i64 @t3(double %ti) {
+entry:
+; CHECK-LABEL: t3:
+; CHECK-NOT: vmov
+; CHECK: bl ___fixunsdfdi
+  %conv = fptoui double %ti to i64
+  ret i64 %conv
+}
+
+define arm_aapcs_vfpcc i64 @t4(double %ti) {
+entry:
+; CHECK-LABEL: t4:
+; CHECK-NOT: vmov
+; CHECK: bl ___fixdfdi
+  %conv = fptosi double %ti to i64
+  ret i64 %conv
+}
+
+define arm_aapcs_vfpcc double @t5(i64 %ti) {
+entry:
+; CHECK-LABEL: t5:
+; CHECK: bl ___floatundidf
+; CHECK-NOT: vmov
+; CHECK: pop
+  %conv = uitofp i64 %ti to double
+  ret double %conv
+}
+
+define arm_aapcs_vfpcc double @t6(i64 %ti) {
+entry:
+; CHECK-LABEL: t6:
+; CHECK: bl ___floatdidf
+; CHECK-NOT: vmov
+; CHECK: pop
+  %conv = sitofp i64 %ti to double
+  ret double %conv
+}
+
+define arm_aapcs_vfpcc float @t7(i64 %ti) {
+entry:
+; CHECK-LABEL: t7:
+; CHECK: bl ___floatundisf
+; CHECK-NOT: vmov
+; CHECK: pop
+  %conv = uitofp i64 %ti to float
+  ret float %conv
+}
+
+define arm_aapcs_vfpcc float @t8(i64 %ti) {
+entry:
+; CHECK-LABEL: t8:
+; CHECK: bl ___floatdisf
+; CHECK-NOT: vmov
+; CHECK: pop
+  %conv = sitofp i64 %ti to float
+  ret float %conv
+}
+
+define arm_aapcs_vfpcc double @t9(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %a, float %b) {
+entry:
+; CHECK-LABEL: t9:
+; CHECK-NOT: vmov
+; CHECK: vldr
+  %add = fadd float %a, %b
+  %conv = fpext float %add to double
+  ret double %conv
+}
+
+define arm_aapcs_vfpcc double @t10(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %a, float %b, double %c) {
+entry:
+; CHECK-LABEL: t10:
+; CHECK-NOT: vmov
+; CHECK: vldr
+  %add = fadd double %a, %c
+  ret double %add
+}
+
+define arm_aapcs_vfpcc float @t11(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, float %a, double %b, float %c) {
+entry:
+; CHECK-LABEL: t11:
+; CHECK: vldr
+  %add = fadd float %a, %c
+  ret float %add
+}
+
+; rdar://16039676
+define arm_aapcs_vfpcc double @t12(double %a, double %b) {
+entry:
+; CHECK-LABEL: t12:
+; CHECK: vstr
+  %add = fadd double %a, %b
+  %sub = fsub double %a, %b
+  %call = tail call arm_aapcs_vfpcc double @x(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double %add, float 0.000000e+00, double %sub)
+  ret double %call
+}
+
+define arm_aapcs_vfpcc double @t13(double %x) {
+entry:
+; CHECK-LABEL: t13:
+; CHECK-NOT: vmov
+; CHECK: bl ___sincos_stret
+  %call = tail call arm_aapcs_vfpcc double @cos(double %x)
+  %call1 = tail call arm_aapcs_vfpcc double @sin(double %x)
+  %mul = fmul double %call, %call1
+  ret double %mul
+}
+
+define arm_aapcs_vfpcc double @t14(double %x) {
+; CHECK-LABEL: t14:
+; CHECK-NOT: vmov
+; CHECK: b ___exp10
+  %__exp10 = tail call double @__exp10(double %x) #1
+  ret double %__exp10
+}
+
+declare arm_aapcs_vfpcc double @x(double, double, double, double, double, double, double, float, double)
+declare arm_aapcs_vfpcc double @cos(double) #0
+declare arm_aapcs_vfpcc double @sin(double) #0
+declare double @__exp10(double)
+
+attributes #0 = { readnone }
+attributes #1 = { readonly }
--- a/llvm/test/CodeGen/ARM/v7k-sincos.ll
+++ b/llvm/test/CodeGen/ARM/v7k-sincos.ll
@ -0,0 +1,16 @@
+; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -o - %s | FileCheck %s
+
+declare double @sin(double) nounwind readnone
+declare double @cos(double) nounwind readnone
+
+define double @test_stret(double %in) {
+; CHECK-LABEL: test_stret:
+; CHECK: blx ___sincos_stret
+; CHECK-NOT: ldr
+; CHECK: vadd.f64 d0, d0, d1
+
+  %sin = call double @sin(double %in)
+  %cos = call double @cos(double %in)
+  %sum = fadd double %sin, %cos
+  ret double %sum
+}