[X86][SSE] Begun adding modulo rotate support to LowerRotate

Prep work for PR38243 - mainly adding comments on where we need to add modulo support (doing so at the moment causes massive codegen regressions).

I've also consistently added support for modulo folding for uniform constants (although at the moment we have no way to trigger this) and removed the old assertions.

llvm-svn: 348366
This commit is contained in:
Simon Pilgrim 2018-12-05 14:46:37 +00:00
parent 44a40046c8
commit 32483668d7
1 changed files with 12 additions and 4 deletions

View File

@ -24765,6 +24765,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
unsigned Opcode = Op.getOpcode(); unsigned Opcode = Op.getOpcode();
unsigned EltSizeInBits = VT.getScalarSizeInBits(); unsigned EltSizeInBits = VT.getScalarSizeInBits();
// AVX512 implicitly uses modulo rotation amounts.
if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) { if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) {
// Attempt to rotate by immediate. // Attempt to rotate by immediate.
APInt UndefElts; APInt UndefElts;
@ -24788,6 +24789,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// XOP has 128-bit vector variable + immediate rotates. // XOP has 128-bit vector variable + immediate rotates.
// +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL. // +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL.
// XOP implicitly uses modulo rotation amounts.
if (Subtarget.hasXOP()) { if (Subtarget.hasXOP()) {
if (VT.is256BitVector()) if (VT.is256BitVector())
return split256IntArith(Op, DAG); return split256IntArith(Op, DAG);
@ -24796,8 +24798,10 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// Attempt to rotate by immediate. // Attempt to rotate by immediate.
if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) { if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
if (auto *RotateConst = BVAmt->getConstantSplatNode()) { if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue(); uint64_t RotateAmt = RotateConst->getAPIntValue().urem(EltSizeInBits);
assert(RotateAmt < EltSizeInBits && "Rotation out of range"); if (RotateAmt == 0)
return R;
return DAG.getNode(X86ISD::VROTLI, DL, VT, R, return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
DAG.getConstant(RotateAmt, DL, MVT::i8)); DAG.getConstant(RotateAmt, DL, MVT::i8));
} }
@ -24820,8 +24824,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// TODO - legalizers should be able to handle this. // TODO - legalizers should be able to handle this.
if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) { if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
if (auto *RotateConst = BVAmt->getConstantSplatNode()) { if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue(); uint64_t RotateAmt = RotateConst->getAPIntValue().urem(EltSizeInBits);
assert(RotateAmt < EltSizeInBits && "Rotation out of range");
if (RotateAmt == 0) if (RotateAmt == 0)
return R; return R;
@ -24832,6 +24835,8 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
} }
} }
// TODO: ISD::ROT* uses modulo rotate amounts, we need to handle this.
// Rotate by splat - expand back to shifts. // Rotate by splat - expand back to shifts.
// TODO - legalizers should be able to handle this. // TODO - legalizers should be able to handle this.
if (EltSizeInBits >= 16 || Subtarget.hasBWI()) { if (EltSizeInBits >= 16 || Subtarget.hasBWI()) {
@ -24856,6 +24861,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::OR, DL, VT, SHL, SRL); return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);
} }
// We don't need ModuloAmt here as we just peek at individual bits.
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) { auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
@ -24911,6 +24917,8 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return SignBitSelect(VT, Amt, M, R); return SignBitSelect(VT, Amt, M, R);
} }
// TODO: We need explicit modulo rotation amounts for everything from here on.
bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
bool LegalVarShifts = SupportedVectorVarShift(VT, Subtarget, ISD::SHL) && bool LegalVarShifts = SupportedVectorVarShift(VT, Subtarget, ISD::SHL) &&
SupportedVectorVarShift(VT, Subtarget, ISD::SRL); SupportedVectorVarShift(VT, Subtarget, ISD::SRL);