forked from OSchip/llvm-project
[X86] Add Subtarget to more lowerVectorShuffle functions. NFCI.
This will be necessary for an update to D54267 llvm-svn: 346490
This commit is contained in:
parent
eee2e06e2a
commit
ea51f98b9b
|
@ -10240,11 +10240,9 @@ static SDValue lowerVectorShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT,
|
|||
/// shuffle+blend operations on newer X86 ISAs where we have very fast blend
|
||||
/// operations. It will try to pick the best arrangement of shuffles and
|
||||
/// blends.
|
||||
static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(const SDLoc &DL,
|
||||
MVT VT, SDValue V1,
|
||||
SDValue V2,
|
||||
ArrayRef<int> Mask,
|
||||
SelectionDAG &DAG) {
|
||||
static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(
|
||||
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
|
||||
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
|
||||
// Shuffle the input elements into the desired positions in V1 and V2 and
|
||||
// blend them together.
|
||||
SmallVector<int, 32> V1Mask(Mask.size(), -1);
|
||||
|
@ -11805,7 +11803,7 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
// a permute. That will be faster than the domain cross.
|
||||
if (IsBlendSupported)
|
||||
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2,
|
||||
Mask, DAG);
|
||||
Mask, Subtarget, DAG);
|
||||
|
||||
// We implement this with SHUFPD which is pretty lame because it will likely
|
||||
// incur 2 cycles of stall for integer vectors on Nehalem and older chips.
|
||||
|
@ -12115,7 +12113,7 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
// a permute. That will be faster than the domain cross.
|
||||
if (IsBlendSupported)
|
||||
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2,
|
||||
Mask, DAG);
|
||||
Mask, Subtarget, DAG);
|
||||
|
||||
// Try to lower by permuting the inputs into an unpack instruction.
|
||||
if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
|
||||
|
@ -12830,7 +12828,7 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
// We can always bit-blend if we have to so the fallback strategy is to
|
||||
// decompose into single-input permutes and blends.
|
||||
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2,
|
||||
Mask, DAG);
|
||||
Mask, Subtarget, DAG);
|
||||
}
|
||||
|
||||
/// Check whether a compaction lowering can be done by dropping even
|
||||
|
@ -13161,7 +13159,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
// Handle multi-input cases by blending single-input shuffles.
|
||||
if (NumV2Elements > 0)
|
||||
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2,
|
||||
Mask, DAG);
|
||||
Mask, Subtarget, DAG);
|
||||
|
||||
// The fallback path for single-input shuffles widens this into two v8i16
|
||||
// vectors with unpacks, shuffles those, and then pulls them back together
|
||||
|
@ -13374,6 +13372,7 @@ static SDValue splitAndLowerVectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
|
||||
SDValue V1, SDValue V2,
|
||||
ArrayRef<int> Mask,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
assert(!V2.isUndef() && "This routine must not be used to lower single-input "
|
||||
"shuffles as it could then recurse on itself.");
|
||||
|
@ -13400,7 +13399,7 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
|
|||
};
|
||||
if (DoBothBroadcast())
|
||||
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
|
||||
DAG);
|
||||
Subtarget, DAG);
|
||||
|
||||
// If the inputs all stem from a single 128-bit lane of each input, then we
|
||||
// split them rather than blending because the split will decompose to
|
||||
|
@ -13418,7 +13417,8 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
|
|||
|
||||
// Otherwise, just fall back to decomposed shuffles and a blend. This requires
|
||||
// that the decomposed single-input shuffles don't end up here.
|
||||
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG);
|
||||
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
|
||||
Subtarget, DAG);
|
||||
}
|
||||
|
||||
/// Lower a vector shuffle crossing multiple 128-bit lanes as
|
||||
|
@ -14261,10 +14261,11 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
// can fully permute the elements.
|
||||
if (Subtarget.hasAVX2())
|
||||
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,
|
||||
Mask, DAG);
|
||||
Mask, Subtarget, DAG);
|
||||
|
||||
// Otherwise fall back on generic lowering.
|
||||
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, DAG);
|
||||
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask,
|
||||
Subtarget, DAG);
|
||||
}
|
||||
|
||||
/// Handle lowering of 4-lane 64-bit integer shuffles.
|
||||
|
@ -14358,7 +14359,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
|
||||
// Otherwise fall back on generic blend lowering.
|
||||
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2,
|
||||
Mask, DAG);
|
||||
Mask, Subtarget, DAG);
|
||||
}
|
||||
|
||||
/// Handle lowering of 8-lane 32-bit floating point shuffles.
|
||||
|
@ -14447,17 +14448,18 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
// vpunpckhwd instrs than vblend.
|
||||
if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32))
|
||||
if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2,
|
||||
Mask, DAG))
|
||||
Mask, Subtarget, DAG))
|
||||
return V;
|
||||
|
||||
// If we have AVX2 then we always want to lower with a blend because at v8 we
|
||||
// can fully permute the elements.
|
||||
if (Subtarget.hasAVX2())
|
||||
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,
|
||||
Mask, DAG);
|
||||
Mask, Subtarget, DAG);
|
||||
|
||||
// Otherwise fall back on generic lowering.
|
||||
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, DAG);
|
||||
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask,
|
||||
Subtarget, DAG);
|
||||
}
|
||||
|
||||
/// Handle lowering of 8-lane 32-bit integer shuffles.
|
||||
|
@ -14486,8 +14488,8 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
// vpunpcklwd and vpunpckhwd instrs.
|
||||
if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() &&
|
||||
!Subtarget.hasAVX512())
|
||||
if (SDValue V =
|
||||
lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, DAG))
|
||||
if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2,
|
||||
Mask, Subtarget, DAG))
|
||||
return V;
|
||||
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
|
||||
|
@ -14570,7 +14572,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
|
||||
// Otherwise fall back on generic blend lowering.
|
||||
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2,
|
||||
Mask, DAG);
|
||||
Mask, Subtarget, DAG);
|
||||
}
|
||||
|
||||
/// Handle lowering of 16-lane 16-bit integer shuffles.
|
||||
|
@ -14671,7 +14673,8 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
return V;
|
||||
|
||||
// Otherwise fall back on generic lowering.
|
||||
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, DAG);
|
||||
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask,
|
||||
Subtarget, DAG);
|
||||
}
|
||||
|
||||
/// Handle lowering of 32-lane 8-bit integer shuffles.
|
||||
|
@ -14761,7 +14764,8 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
return V;
|
||||
|
||||
// Otherwise fall back on generic lowering.
|
||||
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, DAG);
|
||||
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask,
|
||||
Subtarget, DAG);
|
||||
}
|
||||
|
||||
/// High-level routine to lower various 256-bit x86 vector shuffles.
|
||||
|
|
Loading…
Reference in New Issue