Revert "[AArch64] Alter mull shuffle(ext(..)) combine to work on buildvectors"

This reverts commit 9fc1a0dcb7.

We have bisected a compiler crash to this revision and will provide a
test case soon.
This commit is contained in:
Reid Kleckner 2022-02-22 10:31:09 -08:00
parent 63eb963e58
commit ecb27004ec
3 changed files with 77 additions and 52 deletions

View File

@ -13448,17 +13448,33 @@ static EVT calculatePreExtendType(SDValue Extend) {
}
}
/// Combines a buildvector(sext/zext) node pattern into sext/zext(buildvector)
/// Combines a dup(sext/zext) node pattern into sext/zext(dup)
/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
static SDValue performBuildVectorExtendCombine(SDValue BV, SelectionDAG &DAG) {
EVT VT = BV.getValueType();
if (BV.getOpcode() != ISD::BUILD_VECTOR)
static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
SelectionDAG &DAG) {
ShuffleVectorSDNode *ShuffleNode =
dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode());
if (!ShuffleNode)
return SDValue();
// Use the first item in the buildvector to get the size of the extend, and
// make sure it looks valid.
SDValue Extend = BV->getOperand(0);
// Ensuring the mask is zero before continuing
if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0)
return SDValue();
SDValue InsertVectorElt = VectorShuffle.getOperand(0);
if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
return SDValue();
SDValue InsertLane = InsertVectorElt.getOperand(2);
ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode());
// Ensures the insert is inserting into lane 0
if (!Constant || Constant->getZExtValue() != 0)
return SDValue();
SDValue Extend = InsertVectorElt.getOperand(1);
unsigned ExtendOpcode = Extend.getOpcode();
bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
ExtendOpcode == ISD::AssertSext;
@ -13468,28 +13484,30 @@ static SDValue performBuildVectorExtendCombine(SDValue BV, SelectionDAG &DAG) {
// Restrict valid pre-extend data type
EVT PreExtendType = calculatePreExtendType(Extend);
if (PreExtendType.getSizeInBits() != VT.getScalarSizeInBits() / 2)
if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 &&
PreExtendType != MVT::i32)
return SDValue();
// Make sure all other operands are equally extended
for (SDValue Op : drop_begin(BV->ops())) {
unsigned Opc = Op.getOpcode();
bool OpcIsSExt = Opc == ISD::SIGN_EXTEND || Opc == ISD::SIGN_EXTEND_INREG ||
Opc == ISD::AssertSext;
if (OpcIsSExt != IsSExt || calculatePreExtendType(Op) != PreExtendType)
return SDValue();
}
EVT TargetType = VectorShuffle.getValueType();
EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType);
if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2)
return SDValue();
EVT PreExtendVT = VT.changeVectorElementType(PreExtendType);
EVT PreExtendLegalType =
PreExtendType.getScalarSizeInBits() < 32 ? MVT::i32 : PreExtendType;
SDLoc DL(BV);
SmallVector<SDValue, 8> NewOps;
for (SDValue Op : BV->ops())
NewOps.push_back(
DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, PreExtendLegalType));
SDValue NBV = DAG.getNode(ISD::BUILD_VECTOR, DL, PreExtendVT, NewOps);
return DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT, NBV);
SDLoc DL(VectorShuffle);
SDValue InsertVectorNode = DAG.getNode(
InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
DAG.getConstant(0, DL, MVT::i64));
std::vector<int> ShuffleMask(TargetType.getVectorNumElements());
SDValue VectorShuffleNode =
DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
DAG.getUNDEF(PreExtendVT), ShuffleMask);
return DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
TargetType, VectorShuffleNode);
}
/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
@ -13500,8 +13518,8 @@ static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
return SDValue();
SDValue Op0 = performBuildVectorExtendCombine(Mul->getOperand(0), DAG);
SDValue Op1 = performBuildVectorExtendCombine(Mul->getOperand(1), DAG);
SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG);
SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG);
// Neither operands have been changed, don't make any further changes
if (!Op0 && !Op1)

View File

@ -156,8 +156,10 @@ entry:
define <8 x i16> @nonsplat_shuffleinsert(i8 %src, <8 x i8> %b) {
; CHECK-LABEL: nonsplat_shuffleinsert:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: dup v1.8b, w0
; CHECK-NEXT: smull v0.8h, v1.8b, v0.8b
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: dup v1.8h, w8
; CHECK-NEXT: mul v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
entry:
%in = sext i8 %src to i16

View File

@ -201,22 +201,25 @@ define void @larger_smull(i16* nocapture noundef readonly %x, i16 noundef %y, i3
; CHECK-NEXT: b .LBB3_6
; CHECK-NEXT: .LBB3_3: // %vector.ph
; CHECK-NEXT: and x10, x9, #0xfffffff0
; CHECK-NEXT: dup v0.4h, w8
; CHECK-NEXT: add x11, x2, #32
; CHECK-NEXT: add x12, x0, #16
; CHECK-NEXT: mov x13, x10
; CHECK-NEXT: dup v1.8h, w8
; CHECK-NEXT: dup v0.4s, w8
; CHECK-NEXT: .LBB3_4: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldp q2, q3, [x12, #-16]
; CHECK-NEXT: ldp q1, q2, [x12, #-16]
; CHECK-NEXT: subs x13, x13, #16
; CHECK-NEXT: add x12, x12, #32
; CHECK-NEXT: smull2 v4.4s, v1.8h, v2.8h
; CHECK-NEXT: smull v2.4s, v0.4h, v2.4h
; CHECK-NEXT: smull2 v5.4s, v1.8h, v3.8h
; CHECK-NEXT: smull v3.4s, v0.4h, v3.4h
; CHECK-NEXT: stp q2, q4, [x11, #-32]
; CHECK-NEXT: stp q3, q5, [x11], #64
; CHECK-NEXT: sshll2 v3.4s, v1.8h, #0
; CHECK-NEXT: sshll v1.4s, v1.4h, #0
; CHECK-NEXT: sshll2 v4.4s, v2.8h, #0
; CHECK-NEXT: sshll v2.4s, v2.4h, #0
; CHECK-NEXT: mul v3.4s, v0.4s, v3.4s
; CHECK-NEXT: mul v1.4s, v0.4s, v1.4s
; CHECK-NEXT: mul v4.4s, v0.4s, v4.4s
; CHECK-NEXT: mul v2.4s, v0.4s, v2.4s
; CHECK-NEXT: stp q1, q3, [x11, #-32]
; CHECK-NEXT: stp q2, q4, [x11], #64
; CHECK-NEXT: b.ne .LBB3_4
; CHECK-NEXT: // %bb.5: // %middle.block
; CHECK-NEXT: cmp x10, x9
@ -314,22 +317,25 @@ define void @larger_umull(i16* nocapture noundef readonly %x, i16 noundef %y, i3
; CHECK-NEXT: b .LBB4_6
; CHECK-NEXT: .LBB4_3: // %vector.ph
; CHECK-NEXT: and x10, x9, #0xfffffff0
; CHECK-NEXT: dup v0.4h, w8
; CHECK-NEXT: add x11, x2, #32
; CHECK-NEXT: add x12, x0, #16
; CHECK-NEXT: mov x13, x10
; CHECK-NEXT: dup v1.8h, w8
; CHECK-NEXT: dup v0.4s, w8
; CHECK-NEXT: .LBB4_4: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldp q2, q3, [x12, #-16]
; CHECK-NEXT: ldp q1, q2, [x12, #-16]
; CHECK-NEXT: subs x13, x13, #16
; CHECK-NEXT: add x12, x12, #32
; CHECK-NEXT: umull2 v4.4s, v1.8h, v2.8h
; CHECK-NEXT: umull v2.4s, v0.4h, v2.4h
; CHECK-NEXT: umull2 v5.4s, v1.8h, v3.8h
; CHECK-NEXT: umull v3.4s, v0.4h, v3.4h
; CHECK-NEXT: stp q2, q4, [x11, #-32]
; CHECK-NEXT: stp q3, q5, [x11], #64
; CHECK-NEXT: ushll2 v3.4s, v1.8h, #0
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: ushll2 v4.4s, v2.8h, #0
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-NEXT: mul v3.4s, v0.4s, v3.4s
; CHECK-NEXT: mul v1.4s, v0.4s, v1.4s
; CHECK-NEXT: mul v4.4s, v0.4s, v4.4s
; CHECK-NEXT: mul v2.4s, v0.4s, v2.4s
; CHECK-NEXT: stp q1, q3, [x11, #-32]
; CHECK-NEXT: stp q2, q4, [x11], #64
; CHECK-NEXT: b.ne .LBB4_4
; CHECK-NEXT: // %bb.5: // %middle.block
; CHECK-NEXT: cmp x10, x9
@ -429,13 +435,12 @@ define i16 @red_mla_dup_ext_u8_s8_s16(i8* noalias nocapture noundef readonly %A,
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB5_4: // %vector.ph
; CHECK-NEXT: dup v2.8b, w9
; CHECK-NEXT: and x11, x10, #0xfffffff0
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: add x8, x0, #8
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: mov x12, x11
; CHECK-NEXT: sshll v2.8h, v2.8b, #0
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: dup v2.8h, w9
; CHECK-NEXT: .LBB5_5: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldp d3, d4, [x8, #-8]