[x86] Refactor a PowerPC specific ctlz/srl transformation (NFC).

Following the discussion on D22038, this refactors a PowerPC specific setcc -> srl(ctlz) transformation so it can be used by other targets.

Differential Revision: https://reviews.llvm.org/D23445

llvm-svn: 278799
This commit is contained in:
Pierre Gousseau 2016-08-16 13:53:53 +00:00
parent 25d2506029
commit 051db7d838
4 changed files with 43 additions and 13 deletions

View File

@ -322,6 +322,11 @@ public:
return false;
}
/// \brief Return true if ctlz instruction is fast.
virtual bool isCtlzFast() const {
return false;
}
/// Return true if it is safe to transform an integer-domain bitwise operation
/// into the equivalent floating-point operation. This should be set to true
/// if the target has IEEE-754-compliant fabs/fneg operations for the input
@ -3053,6 +3058,12 @@ public:
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
SelectionDAG &DAG) const;
// seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits)))
// If we're comparing for equality to zero and isCtlzFast is true, expose the
// fact that this can be implemented as a ctlz/srl pair, so that the dag
// combiner can fold the new nodes.
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
private:
SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, DAGCombinerInfo &DCI,

View File

@ -3562,3 +3562,28 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
"Emulated TLS must have zero offset in GlobalAddressSDNode");
return CallResult.first;
}
SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
SelectionDAG &DAG) const {
assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
if (!isCtlzFast())
return SDValue();
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDLoc dl(Op);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
if (C->isNullValue() && CC == ISD::SETEQ) {
EVT VT = Op.getOperand(0).getValueType();
SDValue Zext = Op.getOperand(0);
if (VT.bitsLT(MVT::i32)) {
VT = MVT::i32;
Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
}
unsigned Log2b = Log2_32(VT.getSizeInBits());
SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
DAG.getConstant(Log2b, dl, MVT::i32));
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
}
}
return SDValue();
}

View File

@ -2362,20 +2362,10 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// If we're comparing for equality to zero, expose the fact that this is
// implemented as a ctlz/srl pair on ppc, so that the dag combiner can
// fold the new nodes.
if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
return V;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
if (C->isNullValue() && CC == ISD::SETEQ) {
EVT VT = Op.getOperand(0).getValueType();
SDValue Zext = Op.getOperand(0);
if (VT.bitsLT(MVT::i32)) {
VT = MVT::i32;
Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
}
unsigned Log2b = Log2_32(VT.getSizeInBits());
SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
DAG.getConstant(Log2b, dl, MVT::i32));
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
}
// Leave comparisons against 0 and -1 alone for now, since they're usually
// optimized. FIXME: revisit this when we can custom lower all setcc
// optimizations.

View File

@ -492,6 +492,10 @@ namespace llvm {
return true;
}
bool isCtlzFast() const override {
return true;
}
bool supportSplitCSR(MachineFunction *MF) const override {
return
MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&