forked from OSchip/llvm-project
[X86][SSE] Pull out shuffle(hop,hop) combine into combineShuffleWithHorizOp helper. NFC.
This commit is contained in:
parent
81883ca074
commit
07e673a02b
|
@ -35847,6 +35847,66 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
|
|||
return V;
|
||||
}
|
||||
|
||||
// TODO: Merge with foldShuffleOfHorizOp.
|
||||
static SDValue combineShuffleWithHorizOp(SDValue N, MVT VT, const SDLoc &DL,
|
||||
SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
bool IsUnary;
|
||||
SmallVector<int, 64> TargetMask;
|
||||
SmallVector<SDValue, 2> TargetOps;
|
||||
if (!isTargetShuffle(N.getOpcode()) ||
|
||||
!getTargetShuffleMask(N.getNode(), VT, true, TargetOps, TargetMask,
|
||||
IsUnary))
|
||||
return SDValue();
|
||||
|
||||
// Combine binary shuffle of 2 similar 'Horizontal' instructions into a
|
||||
// single instruction. Attempt to match a v2X64 repeating shuffle pattern that
|
||||
// represents the LHS/RHS inputs for the lower/upper halves.
|
||||
if (TargetMask.empty() || TargetOps.empty() || 2 < TargetOps.size())
|
||||
return SDValue();
|
||||
|
||||
SDValue BC0 = peekThroughBitcasts(TargetOps.front());
|
||||
SDValue BC1 = peekThroughBitcasts(TargetOps.back());
|
||||
EVT VT0 = BC0.getValueType();
|
||||
EVT VT1 = BC1.getValueType();
|
||||
unsigned Opcode0 = BC0.getOpcode();
|
||||
unsigned Opcode1 = BC1.getOpcode();
|
||||
if (Opcode0 != Opcode1 || VT0 != VT1)
|
||||
return SDValue();
|
||||
|
||||
bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD ||
|
||||
Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB);
|
||||
bool isPack = (Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS);
|
||||
if (!isHoriz && !isPack)
|
||||
return SDValue();
|
||||
|
||||
SmallVector<int, 16> TargetMask128, WideMask128;
|
||||
if (isRepeatedTargetShuffleMask(128, VT, TargetMask, TargetMask128) &&
|
||||
scaleShuffleElements(TargetMask128, 2, WideMask128)) {
|
||||
assert(isUndefOrZeroOrInRange(WideMask128, 0, 4) && "Illegal shuffle");
|
||||
bool SingleOp = (TargetOps.size() == 1);
|
||||
if (!isHoriz || shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) {
|
||||
SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1;
|
||||
SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1;
|
||||
Lo = Lo.getOperand(WideMask128[0] & 1);
|
||||
Hi = Hi.getOperand(WideMask128[1] & 1);
|
||||
if (SingleOp) {
|
||||
MVT SrcVT = BC0.getOperand(0).getSimpleValueType();
|
||||
SDValue Undef = DAG.getUNDEF(SrcVT);
|
||||
SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL);
|
||||
Lo = (WideMask128[0] == SM_SentinelZero ? Zero : Lo);
|
||||
Hi = (WideMask128[1] == SM_SentinelZero ? Zero : Hi);
|
||||
Lo = (WideMask128[0] == SM_SentinelUndef ? Undef : Lo);
|
||||
Hi = (WideMask128[1] == SM_SentinelUndef ? Undef : Hi);
|
||||
}
|
||||
SDValue Horiz = DAG.getNode(Opcode0, DL, VT0, Lo, Hi);
|
||||
return DAG.getBitcast(VT, Horiz);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Attempt to commute shufps LHS loads:
|
||||
// permilps(shufps(load(),x)) --> permilps(shufps(x,load()))
|
||||
static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
|
||||
|
@ -35909,51 +35969,8 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
|
|||
SmallVector<int, 4> Mask;
|
||||
unsigned Opcode = N.getOpcode();
|
||||
|
||||
bool IsUnary;
|
||||
SmallVector<int, 64> TargetMask;
|
||||
SmallVector<SDValue, 2> TargetOps;
|
||||
if (isTargetShuffle(Opcode))
|
||||
getTargetShuffleMask(N.getNode(), VT, true, TargetOps, TargetMask, IsUnary);
|
||||
|
||||
// Combine binary shuffle of 2 similar 'Horizontal' instructions into a
|
||||
// single instruction. Attempt to match a v2X64 repeating shuffle pattern that
|
||||
// represents the LHS/RHS inputs for the lower/upper halves.
|
||||
if (!TargetMask.empty() && 0 < TargetOps.size() && TargetOps.size() <= 2) {
|
||||
SDValue BC0 = peekThroughBitcasts(TargetOps.front());
|
||||
SDValue BC1 = peekThroughBitcasts(TargetOps.back());
|
||||
EVT VT0 = BC0.getValueType();
|
||||
EVT VT1 = BC1.getValueType();
|
||||
unsigned Opcode0 = BC0.getOpcode();
|
||||
unsigned Opcode1 = BC1.getOpcode();
|
||||
bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD ||
|
||||
Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB);
|
||||
bool isPack = (Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS);
|
||||
if (Opcode0 == Opcode1 && VT0 == VT1 && (isHoriz || isPack)) {
|
||||
SmallVector<int, 16> TargetMask128, WideMask128;
|
||||
if (isRepeatedTargetShuffleMask(128, VT, TargetMask, TargetMask128) &&
|
||||
scaleShuffleElements(TargetMask128, 2, WideMask128)) {
|
||||
assert(isUndefOrZeroOrInRange(WideMask128, 0, 4) && "Illegal shuffle");
|
||||
bool SingleOp = (TargetOps.size() == 1);
|
||||
if (!isHoriz || shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) {
|
||||
SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1;
|
||||
SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1;
|
||||
Lo = Lo.getOperand(WideMask128[0] & 1);
|
||||
Hi = Hi.getOperand(WideMask128[1] & 1);
|
||||
if (SingleOp) {
|
||||
MVT SrcVT = BC0.getOperand(0).getSimpleValueType();
|
||||
SDValue Undef = DAG.getUNDEF(SrcVT);
|
||||
SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL);
|
||||
Lo = (WideMask128[0] == SM_SentinelZero ? Zero : Lo);
|
||||
Hi = (WideMask128[1] == SM_SentinelZero ? Zero : Hi);
|
||||
Lo = (WideMask128[0] == SM_SentinelUndef ? Undef : Lo);
|
||||
Hi = (WideMask128[1] == SM_SentinelUndef ? Undef : Hi);
|
||||
}
|
||||
SDValue Horiz = DAG.getNode(Opcode0, DL, VT0, Lo, Hi);
|
||||
return DAG.getBitcast(VT, Horiz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (SDValue R = combineShuffleWithHorizOp(N, VT, DL, DAG, Subtarget))
|
||||
return R;
|
||||
|
||||
if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG))
|
||||
return R;
|
||||
|
|
Loading…
Reference in New Issue