[PowerPC] Implement BuildSDIVPow2, lower i64 pow2 sdiv using sradi

PPCISelDAGToDAG contained existing code to lower i32 sdiv by a power-of-2 using srawi/addze, but did not implement the i64 case. DAGCombine now contains a callback specifically designed for this purpose (BuildSDIVPow2), and part of the logic has been moved to an implementation of that callback. Doing this lowering using BuildSDIVPow2 likely does not matter, compared to handling everything in PPCISelDAGToDAG, for the positive divisor case, but the negative divisor case, which generates an additional negation, can potentially benefit from additional folding from DAGCombine. Now, both the i32 and the i64 cases have been implemented. Fixes PR20732. llvm-svn: 224033
2014-12-11 18:37:52 +00:00 · 2014-12-11 18:37:52 +00:00 · 13d104bf78
parent 256ecc3c2a
commit 13d104bf78
4 changed files with 116 additions and 30 deletions
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@ -1041,37 +1041,28 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
                                  MVT::Other, N->getOperand(0));
  }

-  case ISD::SDIV: {
-    // FIXME: since this depends on the setting of the carry flag from the srawi
-    //        we should really be making notes about that for the scheduler.
-    // FIXME: It sure would be nice if we could cheaply recognize the
-    //        srl/add/sra pattern the dag combiner will generate for this as
-    //        sra/addze rather than having to handle sdiv ourselves.  oh well.
-    unsigned Imm;
-    if (isInt32Immediate(N->getOperand(1), Imm)) {
+  case PPCISD::SRA_ADDZE: {
    SDValue N0 = N->getOperand(0);
-      if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
+    SDValue ShiftAmt =
+      CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
+                                  getConstantIntValue(), N->getValueType(0));
+    if (N->getValueType(0) == MVT::i64) {
+      SDNode *Op =
+        CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
+                               N0, ShiftAmt);
+      return CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64,
+                                  SDValue(Op, 0), SDValue(Op, 1));
+    } else {
+      assert(N->getValueType(0) == MVT::i32 &&
+             "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
      SDNode *Op =
        CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
-                                 N0, getI32Imm(Log2_32(Imm)));
+                               N0, ShiftAmt);
      return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
                                  SDValue(Op, 0), SDValue(Op, 1));
-      } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
-        SDNode *Op =
-          CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
-                                 N0, getI32Imm(Log2_32(-Imm)));
-        SDValue PT =
-          SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32,
-                                         SDValue(Op, 0), SDValue(Op, 1)),
-                    0);
-        return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
    }
  }

-    // Other cases are autogenerated.
-    break;
-  }
-
  case ISD::LOAD: {
    // Handle preincrement loads.
    LoadSDNode *LD = cast<LoadSDNode>(N);
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -58,8 +58,6 @@ extern cl::opt<bool> ANDIGlueBug;
 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
    : TargetLowering(TM),
      Subtarget(*TM.getSubtargetImpl()) {
-  setPow2SDivIsCheap();
-
  // Use _setjmp/_longjmp instead of setjmp/longjmp.
  setUseUnderscoreSetJmp(true);
  setUseUnderscoreLongJmp(true);
@ -8931,6 +8929,36 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
  return SDValue();
 }

+SDValue
+PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+                                  SelectionDAG &DAG,
+                                  std::vector<SDNode *> *Created) const {
+  // fold (sdiv X, pow2)
+  EVT VT = N->getValueType(0);
+  if ((VT != MVT::i32 && VT != MVT::i64) ||
+      !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
+    return SDValue();
+
+  SDLoc DL(N);
+  SDValue N0 = N->getOperand(0);
+
+  bool IsNegPow2 = (-Divisor).isPowerOf2();
+  unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
+  SDValue ShiftAmt = DAG.getConstant(Lg2, VT);
+
+  SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
+  if (Created)
+    Created->push_back(Op.getNode());
+
+  if (IsNegPow2) {
+    Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), Op);
+    if (Created)
+      Created->push_back(Op.getNode());
+  }
+
+  return Op;
+}
+
 //===----------------------------------------------------------------------===//
 // Inline Assembly Support
 //===----------------------------------------------------------------------===//
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@ -94,6 +94,12 @@ namespace llvm {
      /// code.
      SRL, SRA, SHL,

+      /// The combination of sra[wd]i and addze used to implemented signed
+      /// integer division by a power of 2. The first operand is the dividend,
+      /// and the second is the constant shift amount (representing the
+      /// divisor).
+      SRA_ADDZE,
+
      /// CALL - A direct function call.
      /// CALL_NOP is a call with the special NOP which follows 64-bit
      /// SVR4 calls.
@ -425,6 +431,9 @@ namespace llvm {

    SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;

+    SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+                          std::vector<SDNode *> *Created) const override;
+
    unsigned getRegisterByName(const char* RegName, EVT VT) const override;

    void computeKnownBitsForTargetNode(const SDValue Op,
--- a/llvm/test/CodeGen/PowerPC/sdiv-pow2.ll
+++ b/llvm/test/CodeGen/PowerPC/sdiv-pow2.ll
@ -0,0 +1,58 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo4(i32 signext %a) #0 {
+entry:
+  %div = sdiv i32 %a, 8
+  ret i32 %div
+
+; CHECK-LABEL @foo4
+; CHECK: srawi [[REG1:[0-9]+]], 3, 3
+; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
+; CHECK: extsw 3, [[REG2]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define i64 @foo8(i64 %a) #0 {
+entry:
+  %div = sdiv i64 %a, 8
+  ret i64 %div
+
+; CHECK-LABEL @foo8
+; CHECK: sradi [[REG1:[0-9]+]], 3, 3
+; CHECK: addze 3, [[REG1]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo4n(i32 signext %a) #0 {
+entry:
+  %div = sdiv i32 %a, -8
+  ret i32 %div
+
+; CHECK-LABEL: @foo4n
+; CHECK: srawi [[REG1:[0-9]+]], 3, 3
+; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
+; CHECK: neg [[REG3:[0-9]+]], [[REG2]]
+; CHECK: extsw 3, [[REG3]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define i64 @foo8n(i64 %a) #0 {
+entry:
+  %div = sdiv i64 %a, -8
+  ret i64 %div
+
+; CHECK-LABEL: @foo8n
+; CHECK: sradi [[REG1:[0-9]+]], 3, 3
+; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
+; CHECK: neg 3, [[REG2]]
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readnone }
+