[PowerPC] Implement vec_xxsldwi builtin.

The vec_xxsldwi builtin is missing from altivec.h. This has been requested by developers working on libvpx for VP9 support for Google. The patch fixes PR: https://bugs.llvm.org/show_bug.cgi?id=32653 Differential Revision: https://reviews.llvm.org/D33236 llvm-svn: 303766
2017-05-24 15:54:13 +00:00 · 2017-05-24 15:54:13 +00:00 · 9aa2c0383d
parent dc62b80c82
commit 9aa2c0383d
6 changed files with 107 additions and 0 deletions
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@ -421,6 +421,7 @@ BUILTIN(__builtin_vsx_insertword, "V16UcV4UiV16UcIi", "")
 BUILTIN(__builtin_vsx_extractuword, "V2ULLiV16UcIi", "")

 BUILTIN(__builtin_vsx_xxpermdi, "v.", "t")
+BUILTIN(__builtin_vsx_xxsldwi, "v.", "t")

 // HTM builtins
 BUILTIN(__builtin_tbegin, "UiUIi", "")
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@ -8475,6 +8475,47 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
    auto RetTy = ConvertType(BIRetType);
    return Builder.CreateBitCast(ShuffleCall, RetTy);
  }
+
+  case PPC::BI__builtin_vsx_xxsldwi: {
+    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
+    assert(ArgCI && "Third argument must be a compile time constant");
+    unsigned Index = ArgCI->getZExtValue() & 0x3;
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
+    Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
+
+    // Create a shuffle mask
+    unsigned ElemIdx0;
+    unsigned ElemIdx1;
+    unsigned ElemIdx2;
+    unsigned ElemIdx3;
+    if (getTarget().isLittleEndian()) {
+      // Little endian element N comes from element 8+N-Index of the
+      // concatenated wide vector (of course, using modulo arithmetic on
+      // the total number of elements).
+      ElemIdx0 = (8 - Index) % 8;
+      ElemIdx1 = (9 - Index) % 8;
+      ElemIdx2 = (10 - Index) % 8;
+      ElemIdx3 = (11 - Index) % 8;
+    } else {
+      // Big endian ElemIdx<N> = Index + N
+      ElemIdx0 = Index;
+      ElemIdx1 = Index + 1;
+      ElemIdx2 = Index + 2;
+      ElemIdx3 = Index + 3;
+    }
+
+    Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
+                                ConstantInt::get(Int32Ty, ElemIdx1),
+                                ConstantInt::get(Int32Ty, ElemIdx2),
+                                ConstantInt::get(Int32Ty, ElemIdx3)};
+
+    Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
+    Value *ShuffleCall =
+        Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
+    QualType BIRetType = E->getType();
+    auto RetTy = ConvertType(BIRetType);
+    return Builder.CreateBitCast(ShuffleCall, RetTy);
+  }
  }
 }

--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@ -12158,6 +12158,7 @@ static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned char __a,

 #ifdef __VSX__
 #define vec_xxpermdi __builtin_vsx_xxpermdi
+#define vec_xxsldwi __builtin_vsx_xxsldwi
 #endif

 /* vec_xor */
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@ -1697,6 +1697,7 @@ bool Sema::CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
    return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31) ||
           SemaBuiltinConstantArgRange(TheCall, 2, 0, 31);
  case PPC::BI__builtin_vsx_xxpermdi:
+  case PPC::BI__builtin_vsx_xxsldwi:
    return SemaBuiltinVSX(TheCall);
  }
  return SemaBuiltinConstantArgRange(TheCall, i, l, u);
--- a/clang/test/CodeGen/builtins-ppc-error.c
+++ b/clang/test/CodeGen/builtins-ppc-error.c
@ -26,3 +26,11 @@ void testXXPERMDI(int index) {
  vec_xxpermdi(1, 2, 3); //expected-error {{first two arguments to '__builtin_vsx_xxpermdi' must be vectors}}
  vec_xxpermdi(vsi, vuc, 2); //expected-error {{first two arguments to '__builtin_vsx_xxpermdi' must have the same type}}
 }
+
+void testXXSLDWI(int index) {
+  vec_xxsldwi(vsi); //expected-error {{too few arguments to function call, expected at least 3, have 1}}
+  vec_xxsldwi(vsi, vsi, 2, 4); //expected-error {{too many arguments to function call, expected at most 3, have 4}}
+  vec_xxsldwi(vsi, vsi, index); //expected-error {{argument 3 to '__builtin_vsx_xxsldwi' must be a 2-bit unsigned literal (i.e. 0, 1, 2 or 3)}}
+  vec_xxsldwi(1, 2, 3); //expected-error {{first two arguments to '__builtin_vsx_xxsldwi' must be vectors}}
+  vec_xxsldwi(vsi, vuc, 2); //expected-error {{first two arguments to '__builtin_vsx_xxsldwi' must have the same type}}
+}
--- a/clang/test/CodeGen/builtins-ppc-vsx.c
+++ b/clang/test/CodeGen/builtins-ppc-vsx.c
@ -1731,6 +1731,47 @@ res_vsc = vec_xxpermdi(vsc, vsc, 0);
 res_vuc = vec_xxpermdi(vuc, vuc, 1);
 // CHECK: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 0, i32 3>
 // CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 2, i32 1>
+
+res_vd = vec_xxsldwi(vd, vd, 0);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+res_vf = vec_xxsldwi(vf, vf, 1);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
+
+res_vsll = vec_xxsldwi(vsll, vsll, 2);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+
+res_vull = vec_xxsldwi(vull, vull, 3);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
+
+res_vsi = vec_xxsldwi(vsi, vsi, 0);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+res_vui = vec_xxsldwi(vui, vui, 1);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
+
+res_vss = vec_xxsldwi(vss, vss, 2);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+
+
+res_vus = vec_xxsldwi(vus, vus, 3);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
+
+res_vsc = vec_xxsldwi(vsc, vsc, 0);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+res_vuc = vec_xxsldwi(vuc, vuc, 1);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
 }

 // The return type of the call expression may be different from the return type of the shufflevector.
@ -1748,3 +1789,17 @@ vector int xxpermdi_should_not_assert(vector int a, vector int b) {
 // CHECK-LE-NEXT:  shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 3, i32 1>
 // CHECK-LE-NEXT:  bitcast <2 x i64> %{{[0-9]+}} to <4 x i32>
 }
+
+vector double xxsldwi_should_not_assert(vector double a, vector double b) {
+  return vec_xxsldwi(a, b, 0);
+// CHECK-LABEL: xxsldwi_should_not_assert
+// CHECK:  bitcast <2 x double> %0 to <4 x i32>
+// CHECK-NEXT:  bitcast <2 x double> %1 to <4 x i32>
+// CHECK-NEXT:  shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:  bitcast <4 x i32> %4 to <2 x double>
+
+// CHECK-LE:  bitcast <2 x double> %0 to <4 x i32>
+// CHECK-NEXT-LE:  bitcast <2 x double> %1 to <4 x i32>
+// CHECK-NEXT-LE:  shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT-LE:  bitcast <4 x i32> %4 to <2 x double>
+}