[X86][SSE] Reuse zeroable element mask in lowerVectorShuffleAsBlend. NFCI

Don't regenerate a zeroable element mask with computeZeroableShuffleElements when its already available. llvm-svn: 286045
2016-11-05 18:31:57 +00:00 · 2016-11-05 18:31:57 +00:00 · 4a9f210412
parent 725174694a
commit 4a9f210412
1 changed files with 24 additions and 20 deletions
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -7558,12 +7558,12 @@ static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
 /// that the shuffle mask is a blend, or convertible into a blend with zero.
 static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
                                         SDValue V2, ArrayRef<int> Original,
+                                         const SmallBitVector &Zeroable,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
  bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
  bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
  SmallVector<int, 8> Mask(Original.begin(), Original.end());
-  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
  bool ForceV1Zero = false, ForceV2Zero = false;

  // Attempt to generate the binary blend mask. If an input is zero then
@ -9047,6 +9047,7 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
 /// it is better to avoid lowering through this for integer vectors where
 /// possible.
 static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                       SDValue V1, SDValue V2,
                                       const X86Subtarget &Subtarget,
                                       SelectionDAG &DAG) {
@ -9108,7 +9109,7 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,

  if (Subtarget.hasSSE41())
    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
-                                                  Subtarget, DAG))
+                                                  Zeroable, Subtarget, DAG))
      return Blend;

  // Use dedicated unpack instructions for masks that match their pattern.
@ -9201,7 +9202,7 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  bool IsBlendSupported = Subtarget.hasSSE41();
  if (IsBlendSupported)
    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
-                                                  Subtarget, DAG))
+                                                  Zeroable, Subtarget, DAG))
      return Blend;

  // Use dedicated unpack instructions for masks that match their pattern.
@ -9396,7 +9397,7 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,

  if (Subtarget.hasSSE41()) {
    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
-                                                  Subtarget, DAG))
+                                                  Zeroable, Subtarget, DAG))
      return Blend;

    // Use INSERTPS if we can complete the shuffle efficiently.
@ -9485,7 +9486,7 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  bool IsBlendSupported = Subtarget.hasSSE41();
  if (IsBlendSupported)
    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
-                                                  Subtarget, DAG))
+                                                  Zeroable, Subtarget, DAG))
      return Blend;

  if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
@ -10125,7 +10126,7 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  bool IsBlendSupported = Subtarget.hasSSE41();
  if (IsBlendSupported)
    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
-                                                  Subtarget, DAG))
+                                                  Zeroable, Subtarget, DAG))
      return Blend;

  if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
@ -10402,8 +10403,8 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
    // important as a single pshufb is significantly faster for that.
    if (V1InUse && V2InUse) {
      if (Subtarget.hasSSE41())
-        if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i8, V1, V2,
-                                                      Mask, Subtarget, DAG))
+        if (SDValue Blend = lowerVectorShuffleAsBlend(
+                DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
          return Blend;

      // We can use an unpack to do the blending rather than an or in some
@ -10536,7 +10537,7 @@ static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  case MVT::v2i64:
    return lowerV2I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
  case MVT::v2f64:
-    return lowerV2F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV2F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
  case MVT::v4i32:
    return lowerV4I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
  case MVT::v4f32:
@ -10841,6 +10842,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
 /// \brief Handle lowering 2-lane 128-bit shuffles.
 static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
                                        SDValue V2, ArrayRef<int> Mask,
+                                        const SmallBitVector &Zeroable,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
  // TODO: If minimizing size and one of the inputs is a zero vector and the
@ -10849,7 +10851,7 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,

  // Blends are faster and handle all the non-lane-crossing cases.
  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
    return Blend;

  bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode());
@ -11359,6 +11361,7 @@ static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
 /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
 /// isn't available.
 static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                       SDValue V1, SDValue V2,
                                       const X86Subtarget &Subtarget,
                                       SelectionDAG &DAG) {
@ -11369,7 +11372,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  SmallVector<int, 4> WidenedMask;
  if (canWidenShuffleElements(Mask, WidenedMask))
    if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
-                                             Subtarget, DAG))
+                                             Zeroable, Subtarget, DAG))
      return V;

  if (V2.isUndef()) {
@ -11413,7 +11416,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
    return V;

  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
    return Blend;

  // Check if the blend happens to exactly fit that of SHUFPD.
@ -11464,11 +11467,11 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  SmallVector<int, 4> WidenedMask;
  if (canWidenShuffleElements(Mask, WidenedMask))
    if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
-                                             Subtarget, DAG))
+                                             Zeroable, Subtarget, DAG))
      return V;

  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
    return Blend;

  // Check for being able to broadcast a single element.
@ -11530,6 +11533,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
 /// isn't available.
 static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                       SDValue V1, SDValue V2,
                                       const X86Subtarget &Subtarget,
                                       SelectionDAG &DAG) {
@ -11538,7 +11542,7 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");

  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
    return Blend;

  // Check for being able to broadcast a single element.
@ -11632,7 +11636,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
    return ZExt;

  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
    return Blend;

  // Check for being able to broadcast a single element.
@ -11717,7 +11721,7 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
    return Broadcast;

  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
    return Blend;

  // Use dedicated unpack instructions for masks that match their pattern.
@ -11803,7 +11807,7 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
    return Broadcast;

  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
    return Blend;

  // Use dedicated unpack instructions for masks that match their pattern.
@ -11900,11 +11904,11 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,

  switch (VT.SimpleTy) {
  case MVT::v4f64:
-    return lowerV4F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV4F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
  case MVT::v4i64:
    return lowerV4I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
  case MVT::v8f32:
-    return lowerV8F32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV8F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
  case MVT::v8i32:
    return lowerV8I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
  case MVT::v16i16: