forked from OSchip/llvm-project
[x86] Refactor a PowerPC specific ctlz/srl transformation (NFC).
Following the discussion on D22038, this refactors a PowerPC specific setcc -> srl(ctlz) transformation so it can be used by other targets. Differential Revision: https://reviews.llvm.org/D23445 llvm-svn: 278799
This commit is contained in:
parent
25d2506029
commit
051db7d838
|
@ -322,6 +322,11 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
/// \brief Return true if ctlz instruction is fast.
|
||||
virtual bool isCtlzFast() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Return true if it is safe to transform an integer-domain bitwise operation
|
||||
/// into the equivalent floating-point operation. This should be set to true
|
||||
/// if the target has IEEE-754-compliant fabs/fneg operations for the input
|
||||
|
@ -3053,6 +3058,12 @@ public:
|
|||
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
// seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits)))
|
||||
// If we're comparing for equality to zero and isCtlzFast is true, expose the
|
||||
// fact that this can be implemented as a ctlz/srl pair, so that the dag
|
||||
// combiner can fold the new nodes.
|
||||
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
private:
|
||||
SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
|
||||
ISD::CondCode Cond, DAGCombinerInfo &DCI,
|
||||
|
|
|
@ -3562,3 +3562,28 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
|
|||
"Emulated TLS must have zero offset in GlobalAddressSDNode");
|
||||
return CallResult.first;
|
||||
}
|
||||
|
||||
SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
|
||||
if (!isCtlzFast())
|
||||
return SDValue();
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
|
||||
SDLoc dl(Op);
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
||||
if (C->isNullValue() && CC == ISD::SETEQ) {
|
||||
EVT VT = Op.getOperand(0).getValueType();
|
||||
SDValue Zext = Op.getOperand(0);
|
||||
if (VT.bitsLT(MVT::i32)) {
|
||||
VT = MVT::i32;
|
||||
Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
|
||||
}
|
||||
unsigned Log2b = Log2_32(VT.getSizeInBits());
|
||||
SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
|
||||
SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
|
||||
DAG.getConstant(Log2b, dl, MVT::i32));
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
|
|
@ -2362,20 +2362,10 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
|||
// If we're comparing for equality to zero, expose the fact that this is
|
||||
// implemented as a ctlz/srl pair on ppc, so that the dag combiner can
|
||||
// fold the new nodes.
|
||||
if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
|
||||
return V;
|
||||
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
||||
if (C->isNullValue() && CC == ISD::SETEQ) {
|
||||
EVT VT = Op.getOperand(0).getValueType();
|
||||
SDValue Zext = Op.getOperand(0);
|
||||
if (VT.bitsLT(MVT::i32)) {
|
||||
VT = MVT::i32;
|
||||
Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
|
||||
}
|
||||
unsigned Log2b = Log2_32(VT.getSizeInBits());
|
||||
SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
|
||||
SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
|
||||
DAG.getConstant(Log2b, dl, MVT::i32));
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
|
||||
}
|
||||
// Leave comparisons against 0 and -1 alone for now, since they're usually
|
||||
// optimized. FIXME: revisit this when we can custom lower all setcc
|
||||
// optimizations.
|
||||
|
|
|
@ -492,6 +492,10 @@ namespace llvm {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool isCtlzFast() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool supportSplitCSR(MachineFunction *MF) const override {
|
||||
return
|
||||
MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
|
||||
|
|
Loading…
Reference in New Issue