forked from OSchip/llvm-project
[X86][SSE] Reuse zeroable element mask in lowerVectorShuffleAsBlend. NFCI
Don't regenerate a zeroable element mask with computeZeroableShuffleElements when its already available. llvm-svn: 286045
This commit is contained in:
parent
725174694a
commit
4a9f210412
|
@ -7558,12 +7558,12 @@ static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
/// that the shuffle mask is a blend, or convertible into a blend with zero.
|
||||
static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
|
||||
SDValue V2, ArrayRef<int> Original,
|
||||
const SmallBitVector &Zeroable,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
|
||||
bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
|
||||
SmallVector<int, 8> Mask(Original.begin(), Original.end());
|
||||
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
|
||||
bool ForceV1Zero = false, ForceV2Zero = false;
|
||||
|
||||
// Attempt to generate the binary blend mask. If an input is zero then
|
||||
|
@ -9047,6 +9047,7 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
|
|||
/// it is better to avoid lowering through this for integer vectors where
|
||||
/// possible.
|
||||
static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
|
@ -9108,7 +9109,7 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
|
||||
if (Subtarget.hasSSE41())
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Blend;
|
||||
|
||||
// Use dedicated unpack instructions for masks that match their pattern.
|
||||
|
@ -9201,7 +9202,7 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
bool IsBlendSupported = Subtarget.hasSSE41();
|
||||
if (IsBlendSupported)
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Blend;
|
||||
|
||||
// Use dedicated unpack instructions for masks that match their pattern.
|
||||
|
@ -9396,7 +9397,7 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
|
||||
if (Subtarget.hasSSE41()) {
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Blend;
|
||||
|
||||
// Use INSERTPS if we can complete the shuffle efficiently.
|
||||
|
@ -9485,7 +9486,7 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
bool IsBlendSupported = Subtarget.hasSSE41();
|
||||
if (IsBlendSupported)
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Blend;
|
||||
|
||||
if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
|
||||
|
@ -10125,7 +10126,7 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
bool IsBlendSupported = Subtarget.hasSSE41();
|
||||
if (IsBlendSupported)
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Blend;
|
||||
|
||||
if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
|
||||
|
@ -10402,8 +10403,8 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
// important as a single pshufb is significantly faster for that.
|
||||
if (V1InUse && V2InUse) {
|
||||
if (Subtarget.hasSSE41())
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i8, V1, V2,
|
||||
Mask, Subtarget, DAG))
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(
|
||||
DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
|
||||
return Blend;
|
||||
|
||||
// We can use an unpack to do the blending rather than an or in some
|
||||
|
@ -10536,7 +10537,7 @@ static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
case MVT::v2i64:
|
||||
return lowerV2I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
|
||||
case MVT::v2f64:
|
||||
return lowerV2F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
|
||||
return lowerV2F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
|
||||
case MVT::v4i32:
|
||||
return lowerV4I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
|
||||
case MVT::v4f32:
|
||||
|
@ -10841,6 +10842,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
|
|||
/// \brief Handle lowering 2-lane 128-bit shuffles.
|
||||
static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
|
||||
SDValue V2, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
// TODO: If minimizing size and one of the inputs is a zero vector and the
|
||||
|
@ -10849,7 +10851,7 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
|
||||
// Blends are faster and handle all the non-lane-crossing cases.
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Blend;
|
||||
|
||||
bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode());
|
||||
|
@ -11359,6 +11361,7 @@ static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
|
|||
/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
|
||||
/// isn't available.
|
||||
static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
|
@ -11369,7 +11372,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
SmallVector<int, 4> WidenedMask;
|
||||
if (canWidenShuffleElements(Mask, WidenedMask))
|
||||
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return V;
|
||||
|
||||
if (V2.isUndef()) {
|
||||
|
@ -11413,7 +11416,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
return V;
|
||||
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Blend;
|
||||
|
||||
// Check if the blend happens to exactly fit that of SHUFPD.
|
||||
|
@ -11464,11 +11467,11 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
SmallVector<int, 4> WidenedMask;
|
||||
if (canWidenShuffleElements(Mask, WidenedMask))
|
||||
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return V;
|
||||
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Blend;
|
||||
|
||||
// Check for being able to broadcast a single element.
|
||||
|
@ -11530,6 +11533,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
/// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
|
||||
/// isn't available.
|
||||
static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
const SmallBitVector &Zeroable,
|
||||
SDValue V1, SDValue V2,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
|
@ -11538,7 +11542,7 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
|
||||
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Blend;
|
||||
|
||||
// Check for being able to broadcast a single element.
|
||||
|
@ -11632,7 +11636,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
return ZExt;
|
||||
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Blend;
|
||||
|
||||
// Check for being able to broadcast a single element.
|
||||
|
@ -11717,7 +11721,7 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
return Broadcast;
|
||||
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Blend;
|
||||
|
||||
// Use dedicated unpack instructions for masks that match their pattern.
|
||||
|
@ -11803,7 +11807,7 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
return Broadcast;
|
||||
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
|
||||
Subtarget, DAG))
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Blend;
|
||||
|
||||
// Use dedicated unpack instructions for masks that match their pattern.
|
||||
|
@ -11900,11 +11904,11 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
|
||||
switch (VT.SimpleTy) {
|
||||
case MVT::v4f64:
|
||||
return lowerV4F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
|
||||
return lowerV4F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
|
||||
case MVT::v4i64:
|
||||
return lowerV4I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
|
||||
case MVT::v8f32:
|
||||
return lowerV8F32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
|
||||
return lowerV8F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
|
||||
case MVT::v8i32:
|
||||
return lowerV8I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
|
||||
case MVT::v16i16:
|
||||
|
|
Loading…
Reference in New Issue