[X86][SSE] Generalise target shuffle combine of shuffles using variable masks

At present the only shuffle with a variable mask we recognise is PSHUFB, which influences if its worth the cost of mask creation/loading of a combined target shuffle with a variable mask. This change sets up the infrastructure to support other shuffles in the future but has no effect yet.

llvm-svn: 275059
This commit is contained in:
Simon Pilgrim 2016-07-11 12:49:35 +00:00
parent 53a72f4d3c
commit 832463eada
1 changed files with 21 additions and 13 deletions

View File

@ -3825,6 +3825,14 @@ static bool isTargetShuffle(unsigned Opcode) {
} }
} }
static bool isTargetShuffleVariableMask(unsigned Opcode) {
switch (Opcode) {
default: return false;
case X86ISD::PSHUFB:
return true;
}
}
static SDValue getTargetShuffleNode(unsigned Opc, const SDLoc &dl, MVT VT, static SDValue getTargetShuffleNode(unsigned Opc, const SDLoc &dl, MVT VT,
SDValue V1, unsigned TargetMask, SDValue V1, unsigned TargetMask,
SelectionDAG &DAG) { SelectionDAG &DAG) {
@ -25009,7 +25017,7 @@ static bool matchBinaryVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
/// instruction but should only be used to replace chains over a certain depth. /// instruction but should only be used to replace chains over a certain depth.
static bool combineX86ShuffleChain(SDValue Input, SDValue Root, static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
ArrayRef<int> Mask, int Depth, ArrayRef<int> Mask, int Depth,
bool HasPSHUFB, SelectionDAG &DAG, bool HasVariableMask, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI, TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) { const X86Subtarget &Subtarget) {
assert(!Mask.empty() && "Cannot combine an empty shuffle mask!"); assert(!Mask.empty() && "Cannot combine an empty shuffle mask!");
@ -25175,11 +25183,12 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
if (Depth < 2) if (Depth < 2)
return false; return false;
// If we have 3 or more shuffle instructions or a chain involving PSHUFB, we // If we have 3 or more shuffle instructions or a chain involving a variable
// can replace them with a single PSHUFB instruction profitably. Intel's // mask, we can replace them with a single PSHUFB instruction profitably.
// manuals suggest only using PSHUFB if doing so replacing 5 instructions, but // Intel's manuals suggest only using PSHUFB if doing so replacing 5
// in practice PSHUFB tends to be *very* fast so we're more aggressive. // instructions, but in practice PSHUFB tends to be *very* fast so we're
if ((Depth >= 3 || HasPSHUFB) && // more aggressive.
if ((Depth >= 3 || HasVariableMask) &&
((VT.is128BitVector() && Subtarget.hasSSSE3()) || ((VT.is128BitVector() && Subtarget.hasSSSE3()) ||
(VT.is256BitVector() && Subtarget.hasAVX2()) || (VT.is256BitVector() && Subtarget.hasAVX2()) ||
(VT.is512BitVector() && Subtarget.hasBWI()))) { (VT.is512BitVector() && Subtarget.hasBWI()))) {
@ -25249,7 +25258,7 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
/// combining in this recursive walk. /// combining in this recursive walk.
static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
ArrayRef<int> RootMask, ArrayRef<int> RootMask,
int Depth, bool HasPSHUFB, int Depth, bool HasVariableMask,
SelectionDAG &DAG, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI, TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) { const X86Subtarget &Subtarget) {
@ -25351,13 +25360,12 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
assert(Input0 && "Shuffle with no inputs detected"); assert(Input0 && "Shuffle with no inputs detected");
// TODO - generalize this to support any variable mask shuffle. HasVariableMask |= isTargetShuffleVariableMask(Op.getOpcode());
HasPSHUFB |= (Op.getOpcode() == X86ISD::PSHUFB);
// See if we can recurse into Input0 (if it's a target shuffle). // See if we can recurse into Input0 (if it's a target shuffle).
if (Op->isOnlyUserOf(Input0.getNode()) && if (Op->isOnlyUserOf(Input0.getNode()) &&
combineX86ShufflesRecursively(Input0, Root, Mask, Depth + 1, HasPSHUFB, combineX86ShufflesRecursively(Input0, Root, Mask, Depth + 1,
DAG, DCI, Subtarget)) HasVariableMask, DAG, DCI, Subtarget))
return true; return true;
// Minor canonicalization of the accumulated shuffle mask to make it easier // Minor canonicalization of the accumulated shuffle mask to make it easier
@ -25370,8 +25378,8 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
Mask = std::move(WidenedMask); Mask = std::move(WidenedMask);
} }
return combineX86ShuffleChain(Input0, Root, Mask, Depth, HasPSHUFB, DAG, DCI, return combineX86ShuffleChain(Input0, Root, Mask, Depth, HasVariableMask, DAG,
Subtarget); DCI, Subtarget);
} }
/// \brief Get the PSHUF-style mask from PSHUF node. /// \brief Get the PSHUF-style mask from PSHUF node.