[PowerPC] Implement BuildSDIVPow2, lower i64 pow2 sdiv using sradi

PPCISelDAGToDAG contained existing code to lower i32 sdiv by a power-of-2 using
srawi/addze, but did not implement the i64 case. DAGCombine now contains a
callback specifically designed for this purpose (BuildSDIVPow2), and part of
the logic has been moved to an implementation of that callback. Doing this
lowering using BuildSDIVPow2 likely does not matter, compared to handling
everything in PPCISelDAGToDAG, for the positive divisor case, but the negative
divisor case, which generates an additional negation, can potentially benefit
from additional folding from DAGCombine. Now, both the i32 and the i64 cases
have been implemented.

Fixes PR20732.

llvm-svn: 224033
This commit is contained in:
Hal Finkel 2014-12-11 18:37:52 +00:00
parent 256ecc3c2a
commit 13d104bf78
4 changed files with 116 additions and 30 deletions

View File

@ -1041,37 +1041,28 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
MVT::Other, N->getOperand(0));
}
case ISD::SDIV: {
// FIXME: since this depends on the setting of the carry flag from the srawi
// we should really be making notes about that for the scheduler.
// FIXME: It sure would be nice if we could cheaply recognize the
// srl/add/sra pattern the dag combiner will generate for this as
// sra/addze rather than having to handle sdiv ourselves. oh well.
unsigned Imm;
if (isInt32Immediate(N->getOperand(1), Imm)) {
case PPCISD::SRA_ADDZE: {
SDValue N0 = N->getOperand(0);
if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
SDValue ShiftAmt =
CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
getConstantIntValue(), N->getValueType(0));
if (N->getValueType(0) == MVT::i64) {
SDNode *Op =
CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
N0, ShiftAmt);
return CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64,
SDValue(Op, 0), SDValue(Op, 1));
} else {
assert(N->getValueType(0) == MVT::i32 &&
"Expecting i64 or i32 in PPCISD::SRA_ADDZE");
SDNode *Op =
CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
N0, getI32Imm(Log2_32(Imm)));
N0, ShiftAmt);
return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
SDValue(Op, 0), SDValue(Op, 1));
} else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
SDNode *Op =
CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
N0, getI32Imm(Log2_32(-Imm)));
SDValue PT =
SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32,
SDValue(Op, 0), SDValue(Op, 1)),
0);
return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
}
}
// Other cases are autogenerated.
break;
}
case ISD::LOAD: {
// Handle preincrement loads.
LoadSDNode *LD = cast<LoadSDNode>(N);

View File

@ -58,8 +58,6 @@ extern cl::opt<bool> ANDIGlueBug;
PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
: TargetLowering(TM),
Subtarget(*TM.getSubtargetImpl()) {
setPow2SDivIsCheap();
// Use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(true);
@ -8931,6 +8929,36 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
SDValue
PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
std::vector<SDNode *> *Created) const {
// fold (sdiv X, pow2)
EVT VT = N->getValueType(0);
if ((VT != MVT::i32 && VT != MVT::i64) ||
!(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
return SDValue();
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
bool IsNegPow2 = (-Divisor).isPowerOf2();
unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
SDValue ShiftAmt = DAG.getConstant(Lg2, VT);
SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
if (Created)
Created->push_back(Op.getNode());
if (IsNegPow2) {
Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), Op);
if (Created)
Created->push_back(Op.getNode());
}
return Op;
}
//===----------------------------------------------------------------------===//
// Inline Assembly Support
//===----------------------------------------------------------------------===//

View File

@ -94,6 +94,12 @@ namespace llvm {
/// code.
SRL, SRA, SHL,
/// The combination of sra[wd]i and addze used to implemented signed
/// integer division by a power of 2. The first operand is the dividend,
/// and the second is the constant shift amount (representing the
/// divisor).
SRA_ADDZE,
/// CALL - A direct function call.
/// CALL_NOP is a call with the special NOP which follows 64-bit
/// SVR4 calls.
@ -425,6 +431,9 @@ namespace llvm {
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
std::vector<SDNode *> *Created) const override;
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
void computeKnownBitsForTargetNode(const SDValue Op,

View File

@ -0,0 +1,58 @@
; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; Function Attrs: nounwind readnone
define signext i32 @foo4(i32 signext %a) #0 {
entry:
%div = sdiv i32 %a, 8
ret i32 %div
; CHECK-LABEL @foo4
; CHECK: srawi [[REG1:[0-9]+]], 3, 3
; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
; CHECK: extsw 3, [[REG2]]
; CHECK: blr
}
; Function Attrs: nounwind readnone
define i64 @foo8(i64 %a) #0 {
entry:
%div = sdiv i64 %a, 8
ret i64 %div
; CHECK-LABEL @foo8
; CHECK: sradi [[REG1:[0-9]+]], 3, 3
; CHECK: addze 3, [[REG1]]
; CHECK: blr
}
; Function Attrs: nounwind readnone
define signext i32 @foo4n(i32 signext %a) #0 {
entry:
%div = sdiv i32 %a, -8
ret i32 %div
; CHECK-LABEL: @foo4n
; CHECK: srawi [[REG1:[0-9]+]], 3, 3
; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
; CHECK: neg [[REG3:[0-9]+]], [[REG2]]
; CHECK: extsw 3, [[REG3]]
; CHECK: blr
}
; Function Attrs: nounwind readnone
define i64 @foo8n(i64 %a) #0 {
entry:
%div = sdiv i64 %a, -8
ret i64 %div
; CHECK-LABEL: @foo8n
; CHECK: sradi [[REG1:[0-9]+]], 3, 3
; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
; CHECK: neg 3, [[REG2]]
; CHECK: blr
}
attributes #0 = { nounwind readnone }