[PowerPC] Initial VSX intrinsic support, with min/max for vector double

Now that we have initial support for VSX, we can begin adding intrinsics for programmer access to VSX instructions. This patch adds basic support for VSX intrinsics in general, and tests it by implementing intrinsics for minimum and maximum for the vector double data type. The LLVM portion of this is quite straightforward. There is a companion patch for Clang. llvm-svn: 220988
2014-10-31 19:19:07 +00:00 · 2014-10-31 19:19:07 +00:00 · 1ca69fa64d
parent 7bb413e3ba
commit 1ca69fa64d
3 changed files with 167 additions and 6 deletions
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@ -47,6 +47,13 @@ let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
                              list<IntrinsicProperty> properties>
    : GCCBuiltin<!strconcat("__builtin_altivec_", GCCIntSuffix)>,
      Intrinsic<ret_types, param_types, properties>;
+
+  /// PowerPC_VSX_Intrinsic - Base class for all VSX intrinsics.
+  class PowerPC_VSX_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
+                              list<LLVMType> param_types,
+                              list<IntrinsicProperty> properties>
+    : GCCBuiltin<!strconcat("__builtin_vsx_", GCCIntSuffix)>,
+      Intrinsic<ret_types, param_types, properties>;
 }

 //===----------------------------------------------------------------------===//
@ -88,6 +95,32 @@ class PowerPC_Vec_WWW_Intrinsic<string GCCIntSuffix>
                          [IntrNoMem]>;


+//===----------------------------------------------------------------------===//
+// PowerPC VSX Intrinsic Class Definitions.
+//
+
+/// PowerPC_VSX_Vec_DDD_Intrinsic - A PowerPC intrinsic that takes two v2f64
+/// vectors and returns one.  These intrinsics have no side effects.
+class PowerPC_VSX_Vec_DDD_Intrinsic<string GCCIntSuffix>
+  : PowerPC_VSX_Intrinsic<GCCIntSuffix,
+                          [llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
+                          [IntrNoMem]>;
+
+/// PowerPC_VSX_Vec_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f32
+/// vectors and returns one.  These intrinsics have no side effects.
+class PowerPC_VSX_Vec_FFF_Intrinsic<string GCCIntSuffix>
+  : PowerPC_VSX_Intrinsic<GCCIntSuffix,
+                          [llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                          [IntrNoMem]>;
+
+/// PowerPC_VSX_Sca_DDD_Intrinsic - A PowerPC intrinsic that takes two f64
+/// scalars and returns one.  These intrinsics have no side effects.
+class PowerPC_VSX_Sca_DDD_Intrinsic<string GCCIntSuffix>
+  : PowerPC_VSX_Intrinsic<GCCIntSuffix,
+                          [llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+                          [IntrNoMem]>;
+
+
 //===----------------------------------------------------------------------===//
 // PowerPC Altivec Intrinsic Definitions.

@ -476,3 +509,21 @@ def int_ppc_altivec_vexptefp  : PowerPC_Vec_FF_Intrinsic<"vexptefp">;
 def int_ppc_altivec_vlogefp   : PowerPC_Vec_FF_Intrinsic<"vlogefp">;
 def int_ppc_altivec_vrefp     : PowerPC_Vec_FF_Intrinsic<"vrefp">;
 def int_ppc_altivec_vrsqrtefp : PowerPC_Vec_FF_Intrinsic<"vrsqrtefp">;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC VSX Intrinsic Definitions.
+
+let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
+
+// Vector maximum.
+def int_ppc_vsx_xvmaxdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvmaxdp">;
+def int_ppc_vsx_xvmaxsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvmaxsp">;
+def int_ppc_vsx_xsmaxdp : PowerPC_VSX_Sca_DDD_Intrinsic<"xsmaxdp">;
+
+// Vector minimum.
+def int_ppc_vsx_xvmindp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvmindp">;
+def int_ppc_vsx_xvminsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvminsp">;
+def int_ppc_vsx_xsmindp : PowerPC_VSX_Sca_DDD_Intrinsic<"xsmindp">;
+
+}
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@ -643,24 +643,36 @@ let Uses = [RM] in {
  let isCommutable = 1 in {
  def XSMAXDP : XX3Form<60, 160,
                        (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
-                        "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xsmaxdp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsfrc:$XT,
+                              (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>;
  def XSMINDP : XX3Form<60, 168,
                        (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
-                        "xsmindp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xsmindp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsfrc:$XT,
+                              (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>;

  def XVMAXDP : XX3Form<60, 224,
                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                        "xvmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xvmaxdp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsrc:$XT,
+                              (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>;
  def XVMINDP : XX3Form<60, 232,
                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                        "xvmindp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xvmindp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsrc:$XT,
+                              (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>;

  def XVMAXSP : XX3Form<60, 192,
                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                        "xvmaxsp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xvmaxsp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsrc:$XT,
+                              (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>;
  def XVMINSP : XX3Form<60, 200,
                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                        "xvminsp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xvminsp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsrc:$XT,
+                              (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>;
  } // isCommutable
 } // Uses = [RM]

--- a/llvm/test/CodeGen/PowerPC/vsx-minmax.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-minmax.ll
@ -0,0 +1,98 @@
+; RUN: llc -mcpu=pwr7 -mattr=+vsx -O0 -fast-isel=0 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@vf = global <4 x float> <float -1.500000e+00, float 2.500000e+00, float -3.500000e+00, float 4.500000e+00>, align 16
+@vd = global <2 x double> <double 3.500000e+00, double -7.500000e+00>, align 16
+@d = global double 2.340000e+01, align 8
+@vf1 = common global <4 x float> zeroinitializer, align 16
+@vd1 = common global <2 x double> zeroinitializer, align 16
+@vf2 = common global <4 x float> zeroinitializer, align 16
+@vf3 = common global <4 x float> zeroinitializer, align 16
+@vd2 = common global <2 x double> zeroinitializer, align 16
+@vf4 = common global <4 x float> zeroinitializer, align 16
+@d1 = common global double 0.000000e+00, align 8
+@d2 = common global double 0.000000e+00, align 8
+
+; Function Attrs: nounwind
+define void @test1() #0 {
+; CHECK-LABEL: @test1
+entry:
+  %0 = load volatile <4 x float>* @vf, align 16
+  %1 = load volatile <4 x float>* @vf, align 16
+  %2 = tail call <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float> %0, <4 x float> %1)
+; CHECK: xvmaxsp
+  store <4 x float> %2, <4 x float>* @vf1, align 16
+  %3 = load <2 x double>* @vd, align 16
+  %4 = tail call <2 x double> @llvm.ppc.vsx.xvmaxdp(<2 x double> %3, <2 x double> %3)
+; CHECK: xvmaxdp
+  store <2 x double> %4, <2 x double>* @vd1, align 16
+  %5 = load volatile <4 x float>* @vf, align 16
+  %6 = load volatile <4 x float>* @vf, align 16
+  %7 = tail call <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float> %5, <4 x float> %6)
+; CHECK: xvmaxsp
+  store <4 x float> %7, <4 x float>* @vf2, align 16
+  %8 = load volatile <4 x float>* @vf, align 16
+  %9 = load volatile <4 x float>* @vf, align 16
+  %10 = tail call <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float> %8, <4 x float> %9)
+; CHECK: xvminsp
+  store <4 x float> %10, <4 x float>* @vf3, align 16
+  %11 = load <2 x double>* @vd, align 16
+  %12 = tail call <2 x double> @llvm.ppc.vsx.xvmindp(<2 x double> %11, <2 x double> %11)
+; CHECK: xvmindp
+  store <2 x double> %12, <2 x double>* @vd2, align 16
+  %13 = load volatile <4 x float>* @vf, align 16
+  %14 = load volatile <4 x float>* @vf, align 16
+  %15 = tail call <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float> %13, <4 x float> %14)
+; CHECK: xvminsp
+  store <4 x float> %15, <4 x float>* @vf4, align 16
+  %16 = load double* @d, align 8
+  %17 = tail call double @llvm.ppc.vsx.xsmaxdp(double %16, double %16)
+; CHECK: xsmaxdp
+  store double %17, double* @d1, align 8
+  %18 = tail call double @llvm.ppc.vsx.xsmindp(double %16, double %16)
+; CHECK: xsmindp
+  store double %18, double* @d2, align 8
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare double @llvm.ppc.vsx.xsmaxdp(double, double)
+
+; Function Attrs: nounwind readnone
+declare double @llvm.ppc.vsx.xsmindp(double, double)
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float>, <4 x float>)
+
+; Function Attrs: nounwind readnone
+declare <2 x double> @llvm.ppc.vsx.xvmindp(<2 x double>, <2 x double>)
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float>, <4 x float>)
+
+; Function Attrs: nounwind readnone
+declare <2 x double> @llvm.ppc.vsx.xvmaxdp(<2 x double>, <2 x double>)
+
+; Generated from C source:
+
+; % clang -O1 -maltivec -mvsx -S -emit-llvm vsx-minmax.c
+;
+;volatile vector float vf = { -1.5, 2.5, -3.5, 4.5 };
+;vector double vd = { 3.5, -7.5 };
+;double d = 23.4;
+;
+;vector float vf1, vf2, vf3, vf4;
+;vector double vd1, vd2;
+;double d1, d2;
+;
+;void test1() {
+;  vf1 = vec_max(vf, vf);
+;  vd1 = vec_max(vd, vd);
+;  vf2 = vec_vmaxfp(vf, vf);
+;  vf3 = vec_min(vf, vf);
+;  vd2 = vec_min(vd, vd);
+;  vf4 = vec_vminfp(vf, vf);
+;  d1 = __builtin_vsx_xsmaxdp(d, d);
+;  d2 = __builtin_vsx_xsmindp(d, d);
+;}