forked from OSchip/llvm-project
[X86] Remove transformVSELECTtoBlendVECTOR_SHUFFLE
The new X86 shuffle lowering can do just fine without transforming vselects into vector_shuffles. It looks like the only thing this code does right now is cause trouble - in particular, it can lead to combine/legalization infinite loops. Note that it's not completely NFC, since some of the shuffle masks get inverted, which may cause slight differences further down the line. We may want to find a way to invert those masks, but that's orthogonal to this commit. This fixes the hang in PR27689. llvm-svn: 269676
This commit is contained in:
parent
379a1952b3
commit
ac2088d122
|
@ -11973,53 +11973,6 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
|
|||
llvm_unreachable("Unimplemented!");
|
||||
}
|
||||
|
||||
// This function assumes its argument is a BUILD_VECTOR of constants or
|
||||
// undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is
|
||||
// true.
|
||||
static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
|
||||
unsigned &MaskValue) {
|
||||
MaskValue = 0;
|
||||
unsigned NumElems = BuildVector->getNumOperands();
|
||||
|
||||
// There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
|
||||
// We don't handle the >2 lanes case right now.
|
||||
unsigned NumLanes = (NumElems - 1) / 8 + 1;
|
||||
if (NumLanes > 2)
|
||||
return false;
|
||||
|
||||
unsigned NumElemsInLane = NumElems / NumLanes;
|
||||
|
||||
// Blend for v16i16 should be symmetric for the both lanes.
|
||||
for (unsigned i = 0; i < NumElemsInLane; ++i) {
|
||||
SDValue EltCond = BuildVector->getOperand(i);
|
||||
SDValue SndLaneEltCond =
|
||||
(NumLanes == 2) ? BuildVector->getOperand(i + NumElemsInLane) : EltCond;
|
||||
|
||||
int Lane1Cond = -1, Lane2Cond = -1;
|
||||
if (isa<ConstantSDNode>(EltCond))
|
||||
Lane1Cond = !isNullConstant(EltCond);
|
||||
if (isa<ConstantSDNode>(SndLaneEltCond))
|
||||
Lane2Cond = !isNullConstant(SndLaneEltCond);
|
||||
|
||||
unsigned LaneMask = 0;
|
||||
if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
|
||||
// Lane1Cond != 0, means we want the first argument.
|
||||
// Lane1Cond == 0, means we want the second argument.
|
||||
// The encoding of this argument is 0 for the first argument, 1
|
||||
// for the second. Therefore, invert the condition.
|
||||
LaneMask = !Lane1Cond << i;
|
||||
else if (Lane1Cond < 0)
|
||||
LaneMask = !Lane2Cond << i;
|
||||
else
|
||||
return false;
|
||||
|
||||
MaskValue |= LaneMask;
|
||||
if (NumLanes == 2)
|
||||
MaskValue |= LaneMask << NumElemsInLane;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Try to lower a VSELECT instruction to a vector shuffle.
|
||||
static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
|
||||
const X86Subtarget &Subtarget,
|
||||
|
@ -25539,50 +25492,6 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue
|
||||
transformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
SDLoc dl(N);
|
||||
SDValue Cond = N->getOperand(0);
|
||||
SDValue LHS = N->getOperand(1);
|
||||
SDValue RHS = N->getOperand(2);
|
||||
|
||||
if (Cond.getOpcode() == ISD::SIGN_EXTEND) {
|
||||
SDValue CondSrc = Cond->getOperand(0);
|
||||
if (CondSrc->getOpcode() == ISD::SIGN_EXTEND_INREG)
|
||||
Cond = CondSrc->getOperand(0);
|
||||
}
|
||||
|
||||
if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
|
||||
return SDValue();
|
||||
|
||||
// A vselect where all conditions and data are constants can be optimized into
|
||||
// a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
|
||||
if (ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) &&
|
||||
ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()))
|
||||
return SDValue();
|
||||
|
||||
unsigned MaskValue = 0;
|
||||
if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
|
||||
return SDValue();
|
||||
|
||||
MVT VT = N->getSimpleValueType(0);
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
SmallVector<int, 8> ShuffleMask(NumElems, -1);
|
||||
for (unsigned i = 0; i < NumElems; ++i) {
|
||||
// Be sure we emit undef where we can.
|
||||
if (Cond.getOperand(i)->isUndef())
|
||||
ShuffleMask[i] = -1;
|
||||
else
|
||||
ShuffleMask[i] = i + NumElems * ((MaskValue >> i) & 1);
|
||||
}
|
||||
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
if (!TLI.isShuffleMaskLegal(ShuffleMask, VT))
|
||||
return SDValue();
|
||||
return DAG.getVectorShuffle(VT, dl, LHS, RHS, &ShuffleMask[0]);
|
||||
}
|
||||
|
||||
/// Do target-specific dag combines on SELECT and VSELECT nodes.
|
||||
static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
|
@ -25996,25 +25905,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
|
|||
}
|
||||
}
|
||||
|
||||
// We should generate an X86ISD::BLENDI from a vselect if its argument
|
||||
// is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of
|
||||
// constants. This specific pattern gets generated when we split a
|
||||
// selector for a 512 bit vector in a machine without AVX512 (but with
|
||||
// 256-bit vectors), during legalization:
|
||||
//
|
||||
// (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS)
|
||||
//
|
||||
// Iff we find this pattern and the build_vectors are built from
|
||||
// constants, we translate the vselect into a shuffle_vector that we
|
||||
// know will be matched by LowerVECTOR_SHUFFLEtoBlend.
|
||||
if ((N->getOpcode() == ISD::VSELECT ||
|
||||
N->getOpcode() == X86ISD::SHRUNKBLEND) &&
|
||||
!DCI.isBeforeLegalize() && !VT.is512BitVector()) {
|
||||
if (SDValue Shuffle =
|
||||
transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget))
|
||||
return Shuffle;
|
||||
}
|
||||
|
||||
// If this is a *dynamic* select (non-constant condition) and we can match
|
||||
// this node with one of the variable blend instructions, restructure the
|
||||
// condition so that the blends can use the high bit of each element and use
|
||||
|
|
|
@ -273,15 +273,15 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
|
|||
; SSE41-LABEL: vsel_i8:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
|
||||
; SSE41-NEXT: pblendvb %xmm1, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
|
||||
; SSE41-NEXT: pblendvb %xmm2, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: vsel_i8:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
|
||||
; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
|
||||
; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
|
||||
|
@ -652,10 +652,11 @@ define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
|
|||
; SSE41-LABEL: constant_pblendvb_avx2:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
|
||||
; SSE41-NEXT: pblendvb %xmm2, %xmm4
|
||||
; SSE41-NEXT: pblendvb %xmm3, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm4, %xmm0
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
|
||||
; SSE41-NEXT: pblendvb %xmm4, %xmm2
|
||||
; SSE41-NEXT: pblendvb %xmm1, %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm3, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: constant_pblendvb_avx2:
|
||||
|
|
Loading…
Reference in New Issue