[SelectionDAG] Merge FoldConstantVectorArithmetic into FoldConstantArithmetic (PR36544)

This patch merges FoldConstantVectorArithmetic back into FoldConstantArithmetic. Like FoldConstantVectorArithmetic we now handle vector ops with any operand count, but we currently still only handle binops for scalar types - this can be improved in future patches - in particular some common unary/trinary ops still have poor constant folding. There's one change in functionality causing test changes - FoldConstantVectorArithmetic bails early if the build/splat vector isn't all constant (with some undefs) elements, but FoldConstantArithmetic doesn't - it instead attempts to fold the scalar nodes and bails if they fail to regenerate a constant/undef result, allowing some additional identity/undef patterns to be handled. Differential Revision: https://reviews.llvm.org/D113300
2021-11-09 10:18:00 +00:00 · 2021-11-09 10:18:00 +00:00 · 58c01ef270
parent 3bdf738d1b
commit 58c01ef270
4 changed files with 80 additions and 215 deletions
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@ -1732,9 +1732,6 @@ public:
  SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
                                 ArrayRef<SDValue> Ops);
  SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
                                       ArrayRef<SDValue> Ops);
  /// Fold floating-point operations with 2 operands when both operands are
  /// constants and/or undefined.
  SDValue foldConstantFPMath(unsigned Opcode, const SDLoc &DL, EVT VT,
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -4888,7 +4888,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
  case ISD::CTTZ_ZERO_UNDEF:
  case ISD::CTPOP: {
    SDValue Ops = {Operand};
-    if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+    if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops))
      return Fold;
  }
  }
@ -5268,162 +5268,56 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
  if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS)
    return SDValue();
-  // TODO: For now, the array Ops should only contain two values.
+  unsigned NumOps = Ops.size();
-  // This enforcement will be removed once this function is merged with
+  if (NumOps == 0)
  // FoldConstantVectorArithmetic
  if (Ops.size() != 2)
    return SDValue();
  if (isUndef(Opcode, Ops))
    return getUNDEF(VT);
  SDNode *N1 = Ops[0].getNode();
  SDNode *N2 = Ops[1].getNode();
  // Handle the case of two scalars.
-  if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {
+  if (NumOps == 2) {
-    if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) {
+    // TODO: Move foldConstantFPMath here?
      if (C1->isOpaque() || C2->isOpaque())
        return SDValue();
-      Optional<APInt> FoldAttempt =
+    if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) {
-          FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
+      if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) {
-      if (!FoldAttempt)
+        if (C1->isOpaque() || C2->isOpaque())
-        return SDValue();
+          return SDValue();
-      SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
+        Optional<APInt> FoldAttempt =
-      assert((!Folded || !VT.isVector()) &&
+            FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
-             "Can't fold vectors ops with scalar operands");
+        if (!FoldAttempt)
-      return Folded;
+          return SDValue();
        SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
        assert((!Folded || !VT.isVector()) &&
               "Can't fold vectors ops with scalar operands");
        return Folded;
      }
    }
    // fold (add Sym, c) -> Sym+c
    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[0]))
      return FoldSymbolOffset(Opcode, VT, GA, Ops[1].getNode());
    if (TLI->isCommutativeBinOp(Opcode))
      if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[1]))
        return FoldSymbolOffset(Opcode, VT, GA, Ops[0].getNode());
    // If this is a bitwise logic opcode see if we can fold bitcasted ops.
    // TODO: Can we generalize this and fold any bitcasted constant data?
    if (ISD::isBitwiseLogicOp(Opcode) && Ops[0].getOpcode() == ISD::BITCAST &&
        Ops[1].getOpcode() == ISD::BITCAST) {
      SDValue InnerN1 = peekThroughBitcasts(Ops[0].getOperand(0));
      SDValue InnerN2 = peekThroughBitcasts(Ops[1].getOperand(0));
      EVT InnerVT = InnerN1.getValueType();
      if (InnerVT == InnerN2.getValueType() && InnerVT.isInteger())
        if (SDValue C =
                FoldConstantArithmetic(Opcode, DL, InnerVT, {InnerN1, InnerN2}))
          return getBitcast(VT, C);
    }
  }
-  // fold (add Sym, c) -> Sym+c
+  // This is for vector folding only from here on.
  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1))
    return FoldSymbolOffset(Opcode, VT, GA, N2);
  if (TLI->isCommutativeBinOp(Opcode))
    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2))
      return FoldSymbolOffset(Opcode, VT, GA, N1);
  // If this is a bitwise logic opcode see if we can fold bitcasted ops.
  // TODO: Can we generalize this and fold any bitcasted constant data?
  if (ISD::isBitwiseLogicOp(Opcode) && N1->getOpcode() == ISD::BITCAST &&
      N2->getOpcode() == ISD::BITCAST) {
    SDValue InnerN1 = peekThroughBitcasts(N1->getOperand(0));
    SDValue InnerN2 = peekThroughBitcasts(N2->getOperand(0));
    EVT InnerVT = InnerN1.getValueType();
    if (InnerVT == InnerN2.getValueType() && InnerVT.isInteger())
      if (SDValue C =
              FoldConstantArithmetic(Opcode, DL, InnerVT, {InnerN1, InnerN2}))
        return getBitcast(VT, C);
  }
  // For fixed width vectors, extract each constant element and fold them
  // individually. Either input may be an undef value.
  bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR ||
                   N1->getOpcode() == ISD::SPLAT_VECTOR;
  if (!IsBVOrSV1 && !N1->isUndef())
    return SDValue();
  bool IsBVOrSV2 = N2->getOpcode() == ISD::BUILD_VECTOR ||
                   N2->getOpcode() == ISD::SPLAT_VECTOR;
  if (!IsBVOrSV2 && !N2->isUndef())
    return SDValue();
  // If both operands are undef, that's handled the same way as scalars.
  if (!IsBVOrSV1 && !IsBVOrSV2)
    return SDValue();
  EVT SVT = VT.getScalarType();
  EVT LegalSVT = SVT;
  if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
    LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
    if (LegalSVT.bitsLT(SVT))
      return SDValue();
  }
  SmallVector<SDValue, 4> Outputs;
  unsigned NumElts = 0;
  if (IsBVOrSV1)
    NumElts = std::max(NumElts, N1->getNumOperands());
  if (IsBVOrSV2)
    NumElts = std::max(NumElts, N2->getNumOperands());
  assert(NumElts != 0 && "Expected non-zero operands");
  // Scalable vectors should only be SPLAT_VECTOR or UNDEF here. We only need
  // one iteration for that.
  assert((!VT.isScalableVector() || NumElts == 1) &&
         "Scalable vector should only have one scalar");
  for (unsigned I = 0; I != NumElts; ++I) {
    // We can have a fixed length SPLAT_VECTOR and a BUILD_VECTOR so we need
    // to use operand 0 of the SPLAT_VECTOR for each fixed element.
    SDValue V1;
    if (N1->getOpcode() == ISD::BUILD_VECTOR)
      V1 = N1->getOperand(I);
    else if (N1->getOpcode() == ISD::SPLAT_VECTOR)
      V1 = N1->getOperand(0);
    else
      V1 = getUNDEF(SVT);
    SDValue V2;
    if (N2->getOpcode() == ISD::BUILD_VECTOR)
      V2 = N2->getOperand(I);
    else if (N2->getOpcode() == ISD::SPLAT_VECTOR)
      V2 = N2->getOperand(0);
    else
      V2 = getUNDEF(SVT);
    if (SVT.isInteger()) {
      if (V1.getValueType().bitsGT(SVT))
        V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
      if (V2.getValueType().bitsGT(SVT))
        V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
    }
    if (V1.getValueType() != SVT || V2.getValueType() != SVT)
      return SDValue();
    // Fold one vector element.
    SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);
    if (LegalSVT != SVT)
      ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
    // Scalar folding only succeeded if the result is a constant or UNDEF.
    if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
        ScalarResult.getOpcode() != ISD::ConstantFP)
      return SDValue();
    Outputs.push_back(ScalarResult);
  }
  if (N1->getOpcode() == ISD::BUILD_VECTOR ||
      N2->getOpcode() == ISD::BUILD_VECTOR) {
    assert(VT.getVectorNumElements() == Outputs.size() &&
           "Vector size mismatch!");
    // Build a big vector out of the scalar elements we generated.
    return getBuildVector(VT, SDLoc(), Outputs);
  }
  assert((N1->getOpcode() == ISD::SPLAT_VECTOR ||
          N2->getOpcode() == ISD::SPLAT_VECTOR) &&
         "One operand should be a splat vector");
  assert(Outputs.size() == 1 && "Vector size mismatch!");
  return getSplatVector(VT, SDLoc(), Outputs[0]);
 }
 // TODO: Merge with FoldConstantArithmetic
 SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
                                                   const SDLoc &DL, EVT VT,
                                                   ArrayRef<SDValue> Ops) {
  // If the opcode is a target-specific ISD node, there's nothing we can
  // do here and the operand rules may not line up with the below, so
  // bail early.
  if (Opcode >= ISD::BUILTIN_OP_END)
    return SDValue();
  if (isUndef(Opcode, Ops))
    return getUNDEF(VT);
  // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
  if (!VT.isVector())
    return SDValue();
@ -5434,19 +5328,16 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
           Op.getValueType().getVectorElementCount() == NumElts;
  };
-  auto IsConstantBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
+  auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
    APInt SplatVal;
    BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
    return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE ||
-           (BV && BV->isConstant()) ||
+           Op.getOpcode() == ISD::BUILD_VECTOR ||
-           (Op.getOpcode() == ISD::SPLAT_VECTOR &&
+           Op.getOpcode() == ISD::SPLAT_VECTOR;
            ISD::isConstantSplatVector(Op.getNode(), SplatVal));
  };
  // All operands must be vector types with the same number of elements as
-  // the result type and must be either UNDEF or a build vector of constant
+  // the result type and must be either UNDEF or a build/splat vector
  // or UNDEF scalars.
-  if (!llvm::all_of(Ops, IsConstantBuildVectorSplatVectorOrUndef) ||
+  if (!llvm::all_of(Ops, IsBuildVectorSplatVectorOrUndef) ||
      !llvm::all_of(Ops, IsScalarOrSameVectorSize))
    return SDValue();
@ -5466,17 +5357,16 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
  // For scalable vector types we know we're dealing with SPLAT_VECTORs. We
  // only have one operand to check. For fixed-length vector types we may have
  // a combination of BUILD_VECTOR and SPLAT_VECTOR.
-  unsigned NumOperands = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
+  unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
  // Constant fold each scalar lane separately.
  SmallVector<SDValue, 4> ScalarResults;
-  for (unsigned I = 0; I != NumOperands; I++) {
+  for (unsigned I = 0; I != NumVectorElts; I++) {
    SmallVector<SDValue, 4> ScalarOps;
    for (SDValue Op : Ops) {
      EVT InSVT = Op.getValueType().getScalarType();
      if (Op.getOpcode() != ISD::BUILD_VECTOR &&
          Op.getOpcode() != ISD::SPLAT_VECTOR) {
        // We've checked that this is UNDEF or a constant of some kind.
        if (Op.isUndef())
          ScalarOps.push_back(getUNDEF(InSVT));
        else
@ -6160,7 +6050,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
      return V;
    // Vector constant folding.
    SDValue Ops[] = {N1, N2, N3};
-    if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) {
+    if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) {
      NewSDValueDbgMsg(V, "New node vector constant folding: ", this);
      return V;
    }
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@ -569,27 +569,27 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV32MV-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
 ; RV32MV-NEXT:    addi a1, sp, 8
 ; RV32MV-NEXT:    vle16.v v8, (a1)
 ; RV32MV-NEXT:    vmv.v.i v9, 10
 ; RV32MV-NEXT:    addi a1, zero, 9
 ; RV32MV-NEXT:    vsetvli zero, zero, e16, mf2, tu, mu
 ; RV32MV-NEXT:    vmv.s.x v9, a1
 ; RV32MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; RV32MV-NEXT:    lui a1, %hi(.LCPI4_0)
 ; RV32MV-NEXT:    addi a1, a1, %lo(.LCPI4_0)
-; RV32MV-NEXT:    vle16.v v9, (a1)
+; RV32MV-NEXT:    vle16.v v10, (a1)
-; RV32MV-NEXT:    vid.v v10
+; RV32MV-NEXT:    vid.v v11
-; RV32MV-NEXT:    vsub.vv v8, v8, v10
+; RV32MV-NEXT:    vsub.vv v8, v8, v11
-; RV32MV-NEXT:    vmul.vv v8, v8, v9
+; RV32MV-NEXT:    vmul.vv v8, v8, v10
-; RV32MV-NEXT:    vadd.vv v9, v8, v8
+; RV32MV-NEXT:    vadd.vv v10, v8, v8
-; RV32MV-NEXT:    addi a1, zero, 9
+; RV32MV-NEXT:    vsll.vv v9, v10, v9
 ; RV32MV-NEXT:    vmv.v.i v10, 10
 ; RV32MV-NEXT:    vsetvli zero, zero, e16, mf2, tu, mu
 ; RV32MV-NEXT:    vmv.s.x v10, a1
 ; RV32MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; RV32MV-NEXT:    vsll.vv v9, v9, v10
 ; RV32MV-NEXT:    addi a1, zero, 2047
 ; RV32MV-NEXT:    vand.vx v8, v8, a1
 ; RV32MV-NEXT:    vmv.v.i v10, 0
-; RV32MV-NEXT:    addi a2, zero, 1
+; RV32MV-NEXT:    addi a1, zero, 1
 ; RV32MV-NEXT:    vsetvli zero, zero, e16, mf2, tu, mu
 ; RV32MV-NEXT:    vmv1r.v v11, v10
-; RV32MV-NEXT:    vmv.s.x v11, a2
+; RV32MV-NEXT:    vmv.s.x v11, a1
 ; RV32MV-NEXT:    addi a1, zero, 2047
 ; RV32MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; RV32MV-NEXT:    vand.vx v8, v8, a1
 ; RV32MV-NEXT:    lui a2, %hi(.LCPI4_1)
 ; RV32MV-NEXT:    addi a2, a2, %lo(.LCPI4_1)
 ; RV32MV-NEXT:    vle16.v v12, (a2)
@ -634,27 +634,27 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV64MV-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
 ; RV64MV-NEXT:    addi a1, sp, 8
 ; RV64MV-NEXT:    vle16.v v8, (a1)
 ; RV64MV-NEXT:    vmv.v.i v9, 10
 ; RV64MV-NEXT:    addi a1, zero, 9
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, tu, mu
 ; RV64MV-NEXT:    vmv.s.x v9, a1
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; RV64MV-NEXT:    lui a1, %hi(.LCPI4_0)
 ; RV64MV-NEXT:    addi a1, a1, %lo(.LCPI4_0)
-; RV64MV-NEXT:    vle16.v v9, (a1)
+; RV64MV-NEXT:    vle16.v v10, (a1)
-; RV64MV-NEXT:    vid.v v10
+; RV64MV-NEXT:    vid.v v11
-; RV64MV-NEXT:    vsub.vv v8, v8, v10
+; RV64MV-NEXT:    vsub.vv v8, v8, v11
-; RV64MV-NEXT:    vmul.vv v8, v8, v9
+; RV64MV-NEXT:    vmul.vv v8, v8, v10
-; RV64MV-NEXT:    vadd.vv v9, v8, v8
+; RV64MV-NEXT:    vadd.vv v10, v8, v8
-; RV64MV-NEXT:    addi a1, zero, 9
+; RV64MV-NEXT:    vsll.vv v9, v10, v9
 ; RV64MV-NEXT:    vmv.v.i v10, 10
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, tu, mu
 ; RV64MV-NEXT:    vmv.s.x v10, a1
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; RV64MV-NEXT:    vsll.vv v9, v9, v10
 ; RV64MV-NEXT:    addi a1, zero, 2047
 ; RV64MV-NEXT:    vand.vx v8, v8, a1
 ; RV64MV-NEXT:    vmv.v.i v10, 0
-; RV64MV-NEXT:    addi a2, zero, 1
+; RV64MV-NEXT:    addi a1, zero, 1
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, tu, mu
 ; RV64MV-NEXT:    vmv1r.v v11, v10
-; RV64MV-NEXT:    vmv.s.x v11, a2
+; RV64MV-NEXT:    vmv.s.x v11, a1
 ; RV64MV-NEXT:    addi a1, zero, 2047
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
 ; RV64MV-NEXT:    vand.vx v8, v8, a1
 ; RV64MV-NEXT:    lui a2, %hi(.LCPI4_1)
 ; RV64MV-NEXT:    addi a2, a2, %lo(.LCPI4_1)
 ; RV64MV-NEXT:    vle16.v v12, (a2)
--- a/llvm/test/CodeGen/X86/vselect-constants.ll
+++ b/llvm/test/CodeGen/X86/vselect-constants.ll
@ -260,34 +260,12 @@ define <4 x i32> @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) {
 define <2 x i37> @ossfuzz21167(<2 x i37> %x, <2 x i37> %y) {
 ; SSE-LABEL: ossfuzz21167:
 ; SSE:       # %bb.0: # %BB
-; SSE-NEXT:    psllq $27, %xmm1
+; SSE-NEXT:    xorps %xmm0, %xmm0
 ; SSE-NEXT:    movdqa %xmm1, %xmm0
 ; SSE-NEXT:    psrad $27, %xmm0
 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
 ; SSE-NEXT:    psrlq $27, %xmm1
 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 ; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; SSE-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
 ; SSE-NEXT:    pxor %xmm0, %xmm1
 ; SSE-NEXT:    movdqa %xmm1, %xmm2
 ; SSE-NEXT:    pcmpgtd %xmm0, %xmm2
 ; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
 ; SSE-NEXT:    pand %xmm2, %xmm1
 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
 ; SSE-NEXT:    por %xmm1, %xmm0
 ; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: ossfuzz21167:
 ; AVX:       # %bb.0: # %BB
-; AVX-NEXT:    vpsllq $27, %xmm1, %xmm0
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vpsrad $27, %xmm0, %xmm1
 ; AVX-NEXT:    vpsrlq $27, %xmm0, %xmm0
 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vpsrlq $63, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 BB:
  %c0 = icmp sgt <2 x i37> %y, zeroinitializer