[X86][SSE] Pull out shuffle(hop,hop) combine into combineShuffleWithHorizOp helper. NFC.

This commit is contained in:
Simon Pilgrim 2020-08-10 15:08:44 +01:00
parent 81883ca074
commit 07e673a02b
1 changed files with 62 additions and 45 deletions

View File

@ -35847,6 +35847,66 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
return V;
}
// TODO: Merge with foldShuffleOfHorizOp.
static SDValue combineShuffleWithHorizOp(SDValue N, MVT VT, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
bool IsUnary;
SmallVector<int, 64> TargetMask;
SmallVector<SDValue, 2> TargetOps;
if (!isTargetShuffle(N.getOpcode()) ||
!getTargetShuffleMask(N.getNode(), VT, true, TargetOps, TargetMask,
IsUnary))
return SDValue();
// Combine binary shuffle of 2 similar 'Horizontal' instructions into a
// single instruction. Attempt to match a v2X64 repeating shuffle pattern that
// represents the LHS/RHS inputs for the lower/upper halves.
if (TargetMask.empty() || TargetOps.empty() || 2 < TargetOps.size())
return SDValue();
SDValue BC0 = peekThroughBitcasts(TargetOps.front());
SDValue BC1 = peekThroughBitcasts(TargetOps.back());
EVT VT0 = BC0.getValueType();
EVT VT1 = BC1.getValueType();
unsigned Opcode0 = BC0.getOpcode();
unsigned Opcode1 = BC1.getOpcode();
if (Opcode0 != Opcode1 || VT0 != VT1)
return SDValue();
bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD ||
Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB);
bool isPack = (Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS);
if (!isHoriz && !isPack)
return SDValue();
SmallVector<int, 16> TargetMask128, WideMask128;
if (isRepeatedTargetShuffleMask(128, VT, TargetMask, TargetMask128) &&
scaleShuffleElements(TargetMask128, 2, WideMask128)) {
assert(isUndefOrZeroOrInRange(WideMask128, 0, 4) && "Illegal shuffle");
bool SingleOp = (TargetOps.size() == 1);
if (!isHoriz || shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) {
SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1;
SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1;
Lo = Lo.getOperand(WideMask128[0] & 1);
Hi = Hi.getOperand(WideMask128[1] & 1);
if (SingleOp) {
MVT SrcVT = BC0.getOperand(0).getSimpleValueType();
SDValue Undef = DAG.getUNDEF(SrcVT);
SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL);
Lo = (WideMask128[0] == SM_SentinelZero ? Zero : Lo);
Hi = (WideMask128[1] == SM_SentinelZero ? Zero : Hi);
Lo = (WideMask128[0] == SM_SentinelUndef ? Undef : Lo);
Hi = (WideMask128[1] == SM_SentinelUndef ? Undef : Hi);
}
SDValue Horiz = DAG.getNode(Opcode0, DL, VT0, Lo, Hi);
return DAG.getBitcast(VT, Horiz);
}
}
return SDValue();
}
// Attempt to commute shufps LHS loads:
// permilps(shufps(load(),x)) --> permilps(shufps(x,load()))
static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
@ -35909,51 +35969,8 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
SmallVector<int, 4> Mask;
unsigned Opcode = N.getOpcode();
bool IsUnary;
SmallVector<int, 64> TargetMask;
SmallVector<SDValue, 2> TargetOps;
if (isTargetShuffle(Opcode))
getTargetShuffleMask(N.getNode(), VT, true, TargetOps, TargetMask, IsUnary);
// Combine binary shuffle of 2 similar 'Horizontal' instructions into a
// single instruction. Attempt to match a v2X64 repeating shuffle pattern that
// represents the LHS/RHS inputs for the lower/upper halves.
if (!TargetMask.empty() && 0 < TargetOps.size() && TargetOps.size() <= 2) {
SDValue BC0 = peekThroughBitcasts(TargetOps.front());
SDValue BC1 = peekThroughBitcasts(TargetOps.back());
EVT VT0 = BC0.getValueType();
EVT VT1 = BC1.getValueType();
unsigned Opcode0 = BC0.getOpcode();
unsigned Opcode1 = BC1.getOpcode();
bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD ||
Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB);
bool isPack = (Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS);
if (Opcode0 == Opcode1 && VT0 == VT1 && (isHoriz || isPack)) {
SmallVector<int, 16> TargetMask128, WideMask128;
if (isRepeatedTargetShuffleMask(128, VT, TargetMask, TargetMask128) &&
scaleShuffleElements(TargetMask128, 2, WideMask128)) {
assert(isUndefOrZeroOrInRange(WideMask128, 0, 4) && "Illegal shuffle");
bool SingleOp = (TargetOps.size() == 1);
if (!isHoriz || shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) {
SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1;
SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1;
Lo = Lo.getOperand(WideMask128[0] & 1);
Hi = Hi.getOperand(WideMask128[1] & 1);
if (SingleOp) {
MVT SrcVT = BC0.getOperand(0).getSimpleValueType();
SDValue Undef = DAG.getUNDEF(SrcVT);
SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL);
Lo = (WideMask128[0] == SM_SentinelZero ? Zero : Lo);
Hi = (WideMask128[1] == SM_SentinelZero ? Zero : Hi);
Lo = (WideMask128[0] == SM_SentinelUndef ? Undef : Lo);
Hi = (WideMask128[1] == SM_SentinelUndef ? Undef : Hi);
}
SDValue Horiz = DAG.getNode(Opcode0, DL, VT0, Lo, Hi);
return DAG.getBitcast(VT, Horiz);
}
}
}
}
if (SDValue R = combineShuffleWithHorizOp(N, VT, DL, DAG, Subtarget))
return R;
if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG))
return R;