From 07e673a02bb163a70b2b5eff8231578646a20120 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 10 Aug 2020 15:08:44 +0100 Subject: [PATCH] [X86][SSE] Pull out shuffle(hop,hop) combine into combineShuffleWithHorizOp helper. NFC. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 107 ++++++++++++++---------- 1 file changed, 62 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 89160d228f15..0e665404ffbe 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35847,6 +35847,66 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, return V; } +// TODO: Merge with foldShuffleOfHorizOp. +static SDValue combineShuffleWithHorizOp(SDValue N, MVT VT, const SDLoc &DL, + SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + bool IsUnary; + SmallVector TargetMask; + SmallVector TargetOps; + if (!isTargetShuffle(N.getOpcode()) || + !getTargetShuffleMask(N.getNode(), VT, true, TargetOps, TargetMask, + IsUnary)) + return SDValue(); + + // Combine binary shuffle of 2 similar 'Horizontal' instructions into a + // single instruction. Attempt to match a v2X64 repeating shuffle pattern that + // represents the LHS/RHS inputs for the lower/upper halves. + if (TargetMask.empty() || TargetOps.empty() || 2 < TargetOps.size()) + return SDValue(); + + SDValue BC0 = peekThroughBitcasts(TargetOps.front()); + SDValue BC1 = peekThroughBitcasts(TargetOps.back()); + EVT VT0 = BC0.getValueType(); + EVT VT1 = BC1.getValueType(); + unsigned Opcode0 = BC0.getOpcode(); + unsigned Opcode1 = BC1.getOpcode(); + if (Opcode0 != Opcode1 || VT0 != VT1) + return SDValue(); + + bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD || + Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB); + bool isPack = (Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS); + if (!isHoriz && !isPack) + return SDValue(); + + SmallVector TargetMask128, WideMask128; + if (isRepeatedTargetShuffleMask(128, VT, TargetMask, TargetMask128) && + scaleShuffleElements(TargetMask128, 2, WideMask128)) { + assert(isUndefOrZeroOrInRange(WideMask128, 0, 4) && "Illegal shuffle"); + bool SingleOp = (TargetOps.size() == 1); + if (!isHoriz || shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) { + SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1; + SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1; + Lo = Lo.getOperand(WideMask128[0] & 1); + Hi = Hi.getOperand(WideMask128[1] & 1); + if (SingleOp) { + MVT SrcVT = BC0.getOperand(0).getSimpleValueType(); + SDValue Undef = DAG.getUNDEF(SrcVT); + SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL); + Lo = (WideMask128[0] == SM_SentinelZero ? Zero : Lo); + Hi = (WideMask128[1] == SM_SentinelZero ? Zero : Hi); + Lo = (WideMask128[0] == SM_SentinelUndef ? Undef : Lo); + Hi = (WideMask128[1] == SM_SentinelUndef ? Undef : Hi); + } + SDValue Horiz = DAG.getNode(Opcode0, DL, VT0, Lo, Hi); + return DAG.getBitcast(VT, Horiz); + } + } + + return SDValue(); +} + // Attempt to commute shufps LHS loads: // permilps(shufps(load(),x)) --> permilps(shufps(x,load())) static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL, @@ -35909,51 +35969,8 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, SmallVector Mask; unsigned Opcode = N.getOpcode(); - bool IsUnary; - SmallVector TargetMask; - SmallVector TargetOps; - if (isTargetShuffle(Opcode)) - getTargetShuffleMask(N.getNode(), VT, true, TargetOps, TargetMask, IsUnary); - - // Combine binary shuffle of 2 similar 'Horizontal' instructions into a - // single instruction. Attempt to match a v2X64 repeating shuffle pattern that - // represents the LHS/RHS inputs for the lower/upper halves. - if (!TargetMask.empty() && 0 < TargetOps.size() && TargetOps.size() <= 2) { - SDValue BC0 = peekThroughBitcasts(TargetOps.front()); - SDValue BC1 = peekThroughBitcasts(TargetOps.back()); - EVT VT0 = BC0.getValueType(); - EVT VT1 = BC1.getValueType(); - unsigned Opcode0 = BC0.getOpcode(); - unsigned Opcode1 = BC1.getOpcode(); - bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD || - Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB); - bool isPack = (Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS); - if (Opcode0 == Opcode1 && VT0 == VT1 && (isHoriz || isPack)) { - SmallVector TargetMask128, WideMask128; - if (isRepeatedTargetShuffleMask(128, VT, TargetMask, TargetMask128) && - scaleShuffleElements(TargetMask128, 2, WideMask128)) { - assert(isUndefOrZeroOrInRange(WideMask128, 0, 4) && "Illegal shuffle"); - bool SingleOp = (TargetOps.size() == 1); - if (!isHoriz || shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) { - SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1; - SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1; - Lo = Lo.getOperand(WideMask128[0] & 1); - Hi = Hi.getOperand(WideMask128[1] & 1); - if (SingleOp) { - MVT SrcVT = BC0.getOperand(0).getSimpleValueType(); - SDValue Undef = DAG.getUNDEF(SrcVT); - SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL); - Lo = (WideMask128[0] == SM_SentinelZero ? Zero : Lo); - Hi = (WideMask128[1] == SM_SentinelZero ? Zero : Hi); - Lo = (WideMask128[0] == SM_SentinelUndef ? Undef : Lo); - Hi = (WideMask128[1] == SM_SentinelUndef ? Undef : Hi); - } - SDValue Horiz = DAG.getNode(Opcode0, DL, VT0, Lo, Hi); - return DAG.getBitcast(VT, Horiz); - } - } - } - } + if (SDValue R = combineShuffleWithHorizOp(N, VT, DL, DAG, Subtarget)) + return R; if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG)) return R;