From ea51f98b9b592d757dc9141674a09b91c38b8b7d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 9 Nov 2018 13:19:03 +0000 Subject: [PATCH] [X86] Add Subtarget to more lowerVectorShuffle functions. NFCI. This will be necessary for an update to D54267 llvm-svn: 346490 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 48 +++++++++++++------------ 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ddc418579e0a..44fd25bf08e9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10240,11 +10240,9 @@ static SDValue lowerVectorShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT, /// shuffle+blend operations on newer X86 ISAs where we have very fast blend /// operations. It will try to pick the best arrangement of shuffles and /// blends. -static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(const SDLoc &DL, - MVT VT, SDValue V1, - SDValue V2, - ArrayRef Mask, - SelectionDAG &DAG) { +static SDValue lowerVectorShuffleAsDecomposedShuffleBlend( + const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { // Shuffle the input elements into the desired positions in V1 and V2 and // blend them together. SmallVector V1Mask(Mask.size(), -1); @@ -11805,7 +11803,7 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, // a permute. That will be faster than the domain cross. if (IsBlendSupported) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2, - Mask, DAG); + Mask, Subtarget, DAG); // We implement this with SHUFPD which is pretty lame because it will likely // incur 2 cycles of stall for integer vectors on Nehalem and older chips. @@ -12115,7 +12113,7 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // a permute. That will be faster than the domain cross. if (IsBlendSupported) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2, - Mask, DAG); + Mask, Subtarget, DAG); // Try to lower by permuting the inputs into an unpack instruction. if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack( @@ -12830,7 +12828,7 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, // We can always bit-blend if we have to so the fallback strategy is to // decompose into single-input permutes and blends. return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2, - Mask, DAG); + Mask, Subtarget, DAG); } /// Check whether a compaction lowering can be done by dropping even @@ -13161,7 +13159,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Handle multi-input cases by blending single-input shuffles. if (NumV2Elements > 0) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2, - Mask, DAG); + Mask, Subtarget, DAG); // The fallback path for single-input shuffles widens this into two v8i16 // vectors with unpacks, shuffles those, and then pulls them back together @@ -13374,6 +13372,7 @@ static SDValue splitAndLowerVectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(!V2.isUndef() && "This routine must not be used to lower single-input " "shuffles as it could then recurse on itself."); @@ -13400,7 +13399,7 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, }; if (DoBothBroadcast()) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, - DAG); + Subtarget, DAG); // If the inputs all stem from a single 128-bit lane of each input, then we // split them rather than blending because the split will decompose to @@ -13418,7 +13417,8 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, // Otherwise, just fall back to decomposed shuffles and a blend. This requires // that the decomposed single-input shuffles don't end up here. - return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG); + return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, + Subtarget, DAG); } /// Lower a vector shuffle crossing multiple 128-bit lanes as @@ -14261,10 +14261,11 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, // can fully permute the elements. if (Subtarget.hasAVX2()) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2, - Mask, DAG); + Mask, Subtarget, DAG); // Otherwise fall back on generic lowering. - return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, DAG); + return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, + Subtarget, DAG); } /// Handle lowering of 4-lane 64-bit integer shuffles. @@ -14358,7 +14359,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Otherwise fall back on generic blend lowering. return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2, - Mask, DAG); + Mask, Subtarget, DAG); } /// Handle lowering of 8-lane 32-bit floating point shuffles. @@ -14447,17 +14448,18 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // vpunpckhwd instrs than vblend. if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32)) if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, - Mask, DAG)) + Mask, Subtarget, DAG)) return V; // If we have AVX2 then we always want to lower with a blend because at v8 we // can fully permute the elements. if (Subtarget.hasAVX2()) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2, - Mask, DAG); + Mask, Subtarget, DAG); // Otherwise fall back on generic lowering. - return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, DAG); + return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, + Subtarget, DAG); } /// Handle lowering of 8-lane 32-bit integer shuffles. @@ -14486,8 +14488,8 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // vpunpcklwd and vpunpckhwd instrs. if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() && !Subtarget.hasAVX512()) - if (SDValue V = - lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, DAG)) + if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, + Mask, Subtarget, DAG)) return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask, @@ -14570,7 +14572,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Otherwise fall back on generic blend lowering. return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2, - Mask, DAG); + Mask, Subtarget, DAG); } /// Handle lowering of 16-lane 16-bit integer shuffles. @@ -14671,7 +14673,8 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, return V; // Otherwise fall back on generic lowering. - return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, DAG); + return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, + Subtarget, DAG); } /// Handle lowering of 32-lane 8-bit integer shuffles. @@ -14761,7 +14764,8 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, return V; // Otherwise fall back on generic lowering. - return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, DAG); + return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, + Subtarget, DAG); } /// High-level routine to lower various 256-bit x86 vector shuffles.