[x86] Teach the new vector shuffle lowering code to handle what is

essentially a DAG combine that never gets a chance to run. We might typically expect DAG combining to remove shuffles-of-splats and other similar patterns, but we don't get a chance to run the DAG combiner when we recursively form sub-shuffles during the lowering of a shuffle. So instead hand-roll a really important combine directly into the lowering code to detect shuffles-of-splats, especially shuffles of an all-zero splat which needn't even have the same element width, etc. This lets the new vector shuffle lowering handle shuffles which implement things like zero-extension really nicely. This will become even more important when I wire the legalization of zero-extension to vector shuffles with the new widening legalization strategy. llvm-svn: 212444
2014-07-07 09:06:58 +00:00 · 2014-07-07 09:06:58 +00:00 · 0dcb366268
parent 55beb64bd0
commit 0dcb366268
2 changed files with 58 additions and 0 deletions
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -7914,6 +7914,47 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
        return DAG.getVectorShuffle(VT, dl, V1, V2, NewMask);
      }

+  // Check for a shuffle of a splat, and return just the splat. While DAG
+  // combining will do a similar transformation, this shows up with the
+  // internally created shuffles and so we handle it specially here as we won't
+  // have another chance to DAG-combine the generic shuffle instructions.
+  if (V2IsUndef) {
+    SDValue V = V1;
+
+    // Look through any bitcasts. These can't change the size, just the number
+    // of elements which we check later.
+    while (V.getOpcode() == ISD::BITCAST)
+      V = V->getOperand(0);
+
+    // A splat should always show up as a build vector node.
+    if (V.getOpcode() == ISD::BUILD_VECTOR) {
+      SDValue Base;
+      bool AllSame = true;
+      for (unsigned i = 0; i != V->getNumOperands(); ++i)
+        if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+          Base = V->getOperand(i);
+          break;
+        }
+      // Splat of <u, u, ..., u>, return <u, u, ..., u>
+      if (!Base)
+        return V1;
+      for (unsigned i = 0; i != V->getNumOperands(); ++i)
+        if (V->getOperand(i) != Base) {
+          AllSame = false;
+          break;
+        }
+      // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
+      // number of elements match or the value splatted is a zero constant.
+      if (AllSame) {
+        if (V.getValueType().getVectorNumElements() == (unsigned)NumElements)
+          return V1;
+        if (auto *C = dyn_cast<ConstantSDNode>(Base))
+          if (C->isNullValue())
+            return V1;
+      }
+    }
+  }
+
  // For integer vector shuffles, try to collapse them into a shuffle of fewer
  // lanes but wider integers. We cap this to not form integers larger than i64
  // but it might be interesting to form i128 integers to handle flipping the
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
@ -172,3 +172,20 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(
  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20>
  ret <16 x i8> %shuffle
 }
+
+define <16 x i8> @zext_to_v8i16_shuffle(<16 x i8> %a) {
+; CHECK-SSE2-LABEL: @zext_to_v8i16_shuffle
+; CHECK-SSE2:         pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT:    punpcklbw %xmm1, %xmm0
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @zext_to_v4i32_shuffle(<16 x i8> %a) {
+; CHECK-SSE2-LABEL: @zext_to_v4i32_shuffle
+; CHECK-SSE2:         pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT:    punpcklbw %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    punpcklbw %xmm1, %xmm0
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31>
+  ret <16 x i8> %shuffle
+}