forked from OSchip/llvm-project
[X86][SSE] Pull out repeated shift getOpcode() calls. NFCI.
llvm-svn: 339425
This commit is contained in:
parent
abb71f90d5
commit
130b00bc43
|
@ -23432,6 +23432,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
|||
SDLoc dl(Op);
|
||||
SDValue R = Op.getOperand(0);
|
||||
SDValue Amt = Op.getOperand(1);
|
||||
unsigned Opc = Op.getOpcode();
|
||||
bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
|
||||
|
||||
assert(VT.isVector() && "Custom lowering only for vector shifts!");
|
||||
|
@ -23443,26 +23444,26 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
|||
if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget))
|
||||
return V;
|
||||
|
||||
if (SupportedVectorVarShift(VT, Subtarget, Op.getOpcode()))
|
||||
if (SupportedVectorVarShift(VT, Subtarget, Opc))
|
||||
return Op;
|
||||
|
||||
// XOP has 128-bit variable logical/arithmetic shifts.
|
||||
// +ve/-ve Amt = shift left/right.
|
||||
if (Subtarget.hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 ||
|
||||
VT == MVT::v8i16 || VT == MVT::v16i8)) {
|
||||
if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) {
|
||||
if (Opc == ISD::SRL || Opc == ISD::SRA) {
|
||||
SDValue Zero = DAG.getConstant(0, dl, VT);
|
||||
Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt);
|
||||
}
|
||||
if (Op.getOpcode() == ISD::SHL || Op.getOpcode() == ISD::SRL)
|
||||
if (Opc == ISD::SHL || Opc == ISD::SRL)
|
||||
return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt);
|
||||
if (Op.getOpcode() == ISD::SRA)
|
||||
if (Opc == ISD::SRA)
|
||||
return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt);
|
||||
}
|
||||
|
||||
// 2i64 vector logical shifts can efficiently avoid scalarization - do the
|
||||
// shifts per-lane and then shuffle the partial results back together.
|
||||
if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) {
|
||||
if (VT == MVT::v2i64 && Opc != ISD::SRA) {
|
||||
// Splat the shift amounts so the scalar shifts above will catch it.
|
||||
SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0});
|
||||
SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1});
|
||||
|
@ -23475,7 +23476,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
|||
// M = lshr(SIGN_MASK, Amt)
|
||||
// ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
|
||||
if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) &&
|
||||
Op.getOpcode() == ISD::SRA) {
|
||||
Opc == ISD::SRA) {
|
||||
SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);
|
||||
SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
|
||||
R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
|
||||
|
@ -23523,8 +23524,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
|||
isa<ConstantSDNode>(Amt1) && isa<ConstantSDNode>(Amt2) &&
|
||||
(VT != MVT::v16i16 ||
|
||||
is128BitLaneRepeatedShuffleMask(VT, ShuffleMask)) &&
|
||||
(VT == MVT::v4i32 || Subtarget.hasSSE41() ||
|
||||
Op.getOpcode() != ISD::SHL || canWidenShuffleElements(ShuffleMask))) {
|
||||
(VT == MVT::v4i32 || Subtarget.hasSSE41() || Opc != ISD::SHL ||
|
||||
canWidenShuffleElements(ShuffleMask))) {
|
||||
SDValue Splat1 =
|
||||
DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT);
|
||||
SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
|
||||
|
@ -23537,14 +23538,14 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
|||
|
||||
// If possible, lower this packed shift into a vector multiply instead of
|
||||
// expanding it into a sequence of scalar shifts.
|
||||
if (Op.getOpcode() == ISD::SHL)
|
||||
if (Opc == ISD::SHL)
|
||||
if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG))
|
||||
return DAG.getNode(ISD::MUL, dl, VT, R, Scale);
|
||||
|
||||
// Constant ISD::SRL can be performed efficiently on vXi8/vXi16 vectors as we
|
||||
// can replace with ISD::MULHU, creating scale factor from (NumEltBits - Amt).
|
||||
// TODO: Improve support for the shift by zero special case.
|
||||
if (Op.getOpcode() == ISD::SRL && ConstantAmt &&
|
||||
if (Opc == ISD::SRL && ConstantAmt &&
|
||||
((Subtarget.hasSSE41() && VT == MVT::v8i16) ||
|
||||
DAG.isKnownNeverZero(Amt)) &&
|
||||
(VT == MVT::v16i8 || VT == MVT::v8i16 ||
|
||||
|
@ -23565,7 +23566,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
|||
// and shift using the SSE2 variable shifts.
|
||||
// The separate results can then be blended together.
|
||||
if (VT == MVT::v4i32) {
|
||||
unsigned Opc = Op.getOpcode();
|
||||
unsigned ShOpc = Opc;
|
||||
SDValue Amt0, Amt1, Amt2, Amt3;
|
||||
if (ConstantAmt) {
|
||||
Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0});
|
||||
|
@ -23578,13 +23579,13 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
|||
default:
|
||||
llvm_unreachable("Unknown target vector shift node");
|
||||
case ISD::SHL:
|
||||
Opc = X86ISD::VSHL;
|
||||
ShOpc = X86ISD::VSHL;
|
||||
break;
|
||||
case ISD::SRL:
|
||||
Opc = X86ISD::VSRL;
|
||||
ShOpc = X86ISD::VSRL;
|
||||
break;
|
||||
case ISD::SRA:
|
||||
Opc = X86ISD::VSRA;
|
||||
ShOpc = X86ISD::VSRA;
|
||||
break;
|
||||
}
|
||||
// The SSE2 shifts use the lower i64 as the same shift amount for
|
||||
|
@ -23612,10 +23613,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
|||
}
|
||||
}
|
||||
|
||||
SDValue R0 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt0));
|
||||
SDValue R1 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt1));
|
||||
SDValue R2 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt2));
|
||||
SDValue R3 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt3));
|
||||
SDValue R0 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt0));
|
||||
SDValue R1 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt1));
|
||||
SDValue R2 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt2));
|
||||
SDValue R3 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt3));
|
||||
|
||||
// Merge the shifted lane results optimally with/without PBLENDW.
|
||||
// TODO - ideally shuffle combining would handle this.
|
||||
|
@ -23642,12 +23643,11 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
|||
"Unexpected vector type");
|
||||
MVT EvtSVT = Subtarget.hasBWI() ? MVT::i16 : MVT::i32;
|
||||
MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements());
|
||||
unsigned ExtOpc =
|
||||
Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
|
||||
unsigned ExtOpc = Opc == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
|
||||
R = DAG.getNode(ExtOpc, dl, ExtVT, R);
|
||||
Amt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Amt);
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, VT,
|
||||
DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
|
||||
DAG.getNode(Opc, dl, ExtVT, R, Amt));
|
||||
}
|
||||
|
||||
if (VT == MVT::v16i8 ||
|
||||
|
@ -23780,8 +23780,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
|||
AHi = DAG.getBitcast(ExtVT, AHi);
|
||||
RLo = DAG.getBitcast(ExtVT, RLo);
|
||||
RHi = DAG.getBitcast(ExtVT, RHi);
|
||||
SDValue Lo = DAG.getNode(Op.getOpcode(), dl, ExtVT, RLo, ALo);
|
||||
SDValue Hi = DAG.getNode(Op.getOpcode(), dl, ExtVT, RHi, AHi);
|
||||
SDValue Lo = DAG.getNode(Opc, dl, ExtVT, RLo, ALo);
|
||||
SDValue Hi = DAG.getNode(Opc, dl, ExtVT, RHi, AHi);
|
||||
Lo = DAG.getNode(ISD::SRL, dl, ExtVT, Lo, DAG.getConstant(16, dl, ExtVT));
|
||||
Hi = DAG.getNode(ISD::SRL, dl, ExtVT, Hi, DAG.getConstant(16, dl, ExtVT));
|
||||
return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
|
||||
|
|
Loading…
Reference in New Issue