forked from OSchip/llvm-project
[PowerPC] Implement BuildSDIVPow2, lower i64 pow2 sdiv using sradi
PPCISelDAGToDAG contained existing code to lower i32 sdiv by a power-of-2 using srawi/addze, but did not implement the i64 case. DAGCombine now contains a callback specifically designed for this purpose (BuildSDIVPow2), and part of the logic has been moved to an implementation of that callback. Doing this lowering using BuildSDIVPow2 likely does not matter, compared to handling everything in PPCISelDAGToDAG, for the positive divisor case, but the negative divisor case, which generates an additional negation, can potentially benefit from additional folding from DAGCombine. Now, both the i32 and the i64 cases have been implemented. Fixes PR20732. llvm-svn: 224033
This commit is contained in:
parent
256ecc3c2a
commit
13d104bf78
|
@ -1041,35 +1041,26 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
|
|||
MVT::Other, N->getOperand(0));
|
||||
}
|
||||
|
||||
case ISD::SDIV: {
|
||||
// FIXME: since this depends on the setting of the carry flag from the srawi
|
||||
// we should really be making notes about that for the scheduler.
|
||||
// FIXME: It sure would be nice if we could cheaply recognize the
|
||||
// srl/add/sra pattern the dag combiner will generate for this as
|
||||
// sra/addze rather than having to handle sdiv ourselves. oh well.
|
||||
unsigned Imm;
|
||||
if (isInt32Immediate(N->getOperand(1), Imm)) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
|
||||
SDNode *Op =
|
||||
CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
|
||||
N0, getI32Imm(Log2_32(Imm)));
|
||||
return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
|
||||
SDValue(Op, 0), SDValue(Op, 1));
|
||||
} else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
|
||||
SDNode *Op =
|
||||
CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
|
||||
N0, getI32Imm(Log2_32(-Imm)));
|
||||
SDValue PT =
|
||||
SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32,
|
||||
SDValue(Op, 0), SDValue(Op, 1)),
|
||||
0);
|
||||
return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
|
||||
}
|
||||
case PPCISD::SRA_ADDZE: {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue ShiftAmt =
|
||||
CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
|
||||
getConstantIntValue(), N->getValueType(0));
|
||||
if (N->getValueType(0) == MVT::i64) {
|
||||
SDNode *Op =
|
||||
CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
|
||||
N0, ShiftAmt);
|
||||
return CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64,
|
||||
SDValue(Op, 0), SDValue(Op, 1));
|
||||
} else {
|
||||
assert(N->getValueType(0) == MVT::i32 &&
|
||||
"Expecting i64 or i32 in PPCISD::SRA_ADDZE");
|
||||
SDNode *Op =
|
||||
CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
|
||||
N0, ShiftAmt);
|
||||
return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
|
||||
SDValue(Op, 0), SDValue(Op, 1));
|
||||
}
|
||||
|
||||
// Other cases are autogenerated.
|
||||
break;
|
||||
}
|
||||
|
||||
case ISD::LOAD: {
|
||||
|
|
|
@ -58,8 +58,6 @@ extern cl::opt<bool> ANDIGlueBug;
|
|||
PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
|
||||
: TargetLowering(TM),
|
||||
Subtarget(*TM.getSubtargetImpl()) {
|
||||
setPow2SDivIsCheap();
|
||||
|
||||
// Use _setjmp/_longjmp instead of setjmp/longjmp.
|
||||
setUseUnderscoreSetJmp(true);
|
||||
setUseUnderscoreLongJmp(true);
|
||||
|
@ -8931,6 +8929,36 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue
|
||||
PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
|
||||
SelectionDAG &DAG,
|
||||
std::vector<SDNode *> *Created) const {
|
||||
// fold (sdiv X, pow2)
|
||||
EVT VT = N->getValueType(0);
|
||||
if ((VT != MVT::i32 && VT != MVT::i64) ||
|
||||
!(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(N);
|
||||
SDValue N0 = N->getOperand(0);
|
||||
|
||||
bool IsNegPow2 = (-Divisor).isPowerOf2();
|
||||
unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
|
||||
SDValue ShiftAmt = DAG.getConstant(Lg2, VT);
|
||||
|
||||
SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
|
||||
if (Created)
|
||||
Created->push_back(Op.getNode());
|
||||
|
||||
if (IsNegPow2) {
|
||||
Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), Op);
|
||||
if (Created)
|
||||
Created->push_back(Op.getNode());
|
||||
}
|
||||
|
||||
return Op;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Inline Assembly Support
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -94,6 +94,12 @@ namespace llvm {
|
|||
/// code.
|
||||
SRL, SRA, SHL,
|
||||
|
||||
/// The combination of sra[wd]i and addze used to implemented signed
|
||||
/// integer division by a power of 2. The first operand is the dividend,
|
||||
/// and the second is the constant shift amount (representing the
|
||||
/// divisor).
|
||||
SRA_ADDZE,
|
||||
|
||||
/// CALL - A direct function call.
|
||||
/// CALL_NOP is a call with the special NOP which follows 64-bit
|
||||
/// SVR4 calls.
|
||||
|
@ -425,6 +431,9 @@ namespace llvm {
|
|||
|
||||
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
|
||||
|
||||
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
|
||||
std::vector<SDNode *> *Created) const override;
|
||||
|
||||
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
|
||||
|
||||
void computeKnownBitsForTargetNode(const SDValue Op,
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define signext i32 @foo4(i32 signext %a) #0 {
|
||||
entry:
|
||||
%div = sdiv i32 %a, 8
|
||||
ret i32 %div
|
||||
|
||||
; CHECK-LABEL @foo4
|
||||
; CHECK: srawi [[REG1:[0-9]+]], 3, 3
|
||||
; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
|
||||
; CHECK: extsw 3, [[REG2]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define i64 @foo8(i64 %a) #0 {
|
||||
entry:
|
||||
%div = sdiv i64 %a, 8
|
||||
ret i64 %div
|
||||
|
||||
; CHECK-LABEL @foo8
|
||||
; CHECK: sradi [[REG1:[0-9]+]], 3, 3
|
||||
; CHECK: addze 3, [[REG1]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define signext i32 @foo4n(i32 signext %a) #0 {
|
||||
entry:
|
||||
%div = sdiv i32 %a, -8
|
||||
ret i32 %div
|
||||
|
||||
; CHECK-LABEL: @foo4n
|
||||
; CHECK: srawi [[REG1:[0-9]+]], 3, 3
|
||||
; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
|
||||
; CHECK: neg [[REG3:[0-9]+]], [[REG2]]
|
||||
; CHECK: extsw 3, [[REG3]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define i64 @foo8n(i64 %a) #0 {
|
||||
entry:
|
||||
%div = sdiv i64 %a, -8
|
||||
ret i64 %div
|
||||
|
||||
; CHECK-LABEL: @foo8n
|
||||
; CHECK: sradi [[REG1:[0-9]+]], 3, 3
|
||||
; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
|
||||
; CHECK: neg 3, [[REG2]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
Loading…
Reference in New Issue